Internship 2021 • Source Code for Multiple Imputation Comparisons: Summary Vignettes

The code below can be used to recreate the tables and figures in the multiple imputation (MI) comparison summary vignettes. Because many tables and figures are replicated for different dependent variables, imputed values (i.e., scale scores or SGPs), and so forth, only select example code is presented here. Variable names can be easily changed for subsequent analyses.

Set-Up

The code below prepares the data from the simulation without a COVID-19 impact. Given that the MI methods differed between the “impact” and “no impact” simulations, make sure to change the data names and imputation method labels accordingly.

```{r setup, include = FALSE}
knitr::opts_chunk$set(cache   = FALSE, 
                      echo    = FALSE, 
                      fig.align = "center",
                      fig.topcation = TRUE) 

# Set working directory
setwd("./FilePath")

# kable options
options(knitr.kable.NA = '')

# Load R libraries
require(pacman)
pacman::p_load(kableExtra, ggplot2, glmnet, data.table, fixest)

# Set data directory
datadir = "./DataPath"

# Missingness percentages
missperc = c(30, 50, 70)

# Missingness types
misstype = c("MCAR", "STATUS_w_DEMOG", "STATUS_w_GROWTH")

# For-loop to create full data tables
MIsummary.GC = MIsummary.School =  NULL
for(mp in missperc) {
  
  for(mt in misstype) {
    
    # Create file path
    fp = paste0(datadir, "/", mp, " Percent Missing/", mt, "/")
    
    # Load summary data
    load(paste0(fp, "L2PAN_Summaries.rda"))
    load(paste0(fp, "L2PAN_LONG_Summaries.rda"))
    load(paste0(fp, "L2LMER_Summaries.rda"))
    load(paste0(fp, "L2LMER_LONG_Summaries.rda"))
    load(paste0(fp, "PMM_Summaries.rda"))
    load(paste0(fp, "RQ_Summaries.rda"))
    
    # Create "observed" cases
    Observed.GC = copy(PMM_Summaries[["SCHOOL"]][["GRADE_CONTENT"]][["Evaluation"]])[, IMP_METHOD := "Observed"]
    Observed.GC[, SS_Raw_Bias := SS_Obs_Raw_Bias]
    Observed.GC[, SGP_Raw_Bias := SGP_Obs_Raw_Bias]
    Observed.GC[, SGPB_Raw_Bias := SGPB_Obs_Raw_Bias]
    
    Observed.School = copy(PMM_Summaries[["SCHOOL"]][["GLOBAL"]][["Evaluation"]])[, IMP_METHOD := "Observed"]
    Observed.School[, SS_Raw_Bias := SS_Obs_Raw_Bias]
    Observed.School[, SGP_Raw_Bias := SGP_Obs_Raw_Bias]
    Observed.School[, SGPB_Raw_Bias := SGPB_Obs_Raw_Bias]
    
    # Create long data.table combining imputation methods 
    # By grade/content area
    data.temp.gc = rbindlist(list(
      
      # L2PAN
      data.table(L2PAN_Summaries$SCHOOL$GRADE_CONTENT$Evaluation, "IMP_METHOD" = "L2PAN"),
      
      # L2PAN LONG
      data.table(L2PAN_LONG_Summaries$SCHOOL$GRADE_CONTENT$Evaluation, "IMP_METHOD" = "L2PAN_LONG"),
      
       # L2LMER
      data.table(L2LMER_Summaries$SCHOOL$GRADE_CONTENT$Evaluation, "IMP_METHOD" = "L2LMER"),
      
      # L2LMER LONG
      data.table(L2LMER_LONG_Summaries$SCHOOL$GRADE_CONTENT$Evaluation, "IMP_METHOD" = "L2LMER_LONG"),
      
      # PMM
      data.table(PMM_Summaries$SCHOOL$GRADE_CONTENT$Evaluation, "IMP_METHOD" = "PMM"),
      
      # RQ
      data.table(RQ_Summaries$SCHOOL$GRADE_CONTENT$Evaluation, "IMP_METHOD" = "RQ"),
      
      # Observed
      data.table(Observed.GC))
  
      )
    
    # Convert imputation method, grade, and content area variables to class "factor"
    data.temp.gc[, IMP_METHOD := factor(IMP_METHOD, 
                   levels = rev(c("Observed", "PMM", "RQ", "L2PAN", "L2LMER", "L2PAN_LONG", "L2LMER_LONG")))]
    data.temp.gc[, GRADE := as.factor(GRADE)]
    data.temp.gc[, CONTENT_AREA := as.factor(CONTENT_AREA)]
    
    # Create missingness type and percentage variables
    data.temp.gc[, MISS_TYPE := mt]
    data.temp.gc[, MISS_PERC := mp]
    
    # Combine with previous data
    MIsummary.GC = rbind(MIsummary.GC, data.temp.gc)
    
    # Create long data.table combining imputation methods 
    # By school
    data.temp.sc = rbindlist(list(
      
      # L2PAN
      data.table(L2PAN_Summaries$SCHOOL$GLOBAL$Evaluation, "IMP_METHOD" = "L2PAN"),
      
      # L2PAN LONG
      data.table(L2PAN_LONG_Summaries$SCHOOL$GLOBAL$Evaluation, "IMP_METHOD" = "L2PAN_LONG"),
      
       # L2LMER
      data.table(L2LMER_Summaries$SCHOOL$GLOBAL$Evaluation, "IMP_METHOD" = "L2LMER"),
      
      # L2LMER LONG
      data.table(L2LMER_LONG_Summaries$SCHOOL$GLOBAL$Evaluation, "IMP_METHOD" = "L2LMER_LONG"),
      
      # PMM
      data.table(PMM_Summaries$SCHOOL$GLOBAL$Evaluation, "IMP_METHOD" = "PMM"),
      
      # RQ
      data.table(RQ_Summaries$SCHOOL$GLOBAL$Evaluation, "IMP_METHOD" = "RQ"),
      
      # Observed
      data.table(Observed.School))
  
      )
    
    # Convert imputation method, grade, and content area variables to class "factor"
    data.temp.sc[, IMP_METHOD := factor(IMP_METHOD, 
                   levels = rev(c("Observed", "PMM", "RQ", "L2PAN", "L2LMER", "L2PAN_LONG", "L2LMER_LONG")))]
    
    # Create missingness type and percentage variables
    data.temp.sc[, MISS_TYPE := mt]
    data.temp.sc[, MISS_PERC := mp]
    
    # Combine with previous data
    MIsummary.School = rbind(MIsummary.School, data.temp.sc)
        
  } # end for mt in misstype
  
} # end for mp in missperc

# Convert missing type and percentage to factor variables
MIsummary.GC[, MISS_TYPE := factor(MISS_TYPE, 
               levels = c("MCAR", "STATUS_w_DEMOG", "STATUS_w_GROWTH"), 
               labels = c("MCAR", "DEMOG", "GROWTH"))]
MIsummary.GC[, MISS_PERC := factor(MISS_PERC, 
               levels = c("30", "50", "70"), 
               labels = c("30% Missing", "50% Missing", "70% Missing"))]
MIsummary.School[, MISS_TYPE := factor(MISS_TYPE, 
                   levels = c("MCAR", "STATUS_w_DEMOG", "STATUS_w_GROWTH"), 
                   labels = c("MCAR", "DEMOG", "GROWTH"))]
MIsummary.School[, MISS_PERC := factor(MISS_PERC, 
                   levels = c("30", "50", "70"), 
                   labels = c("30% Missing", "50% Missing", "70% Missing"))]

# Create classification variables based on the F1 (simplified) p-value
# 1 = Statistically significant (reject the null) based on alphaval
alphaval = 0.1
MIsummary.GC[, SS_F1_pSimp_Class := ifelse(SS_F1_pSimp < alphaval, 1, 0)]
MIsummary.GC[, SGP_F1_pSimp_Class := ifelse(SGP_F1_pSimp < alphaval, 1, 0)]
MIsummary.School[, SS_F1_pSimp_Class := ifelse(SS_F1_pSimp < alphaval, 1, 0)]
MIsummary.School[, SGP_F1_pSimp_Class := ifelse(SGP_F1_pSimp < alphaval, 1, 0)]

# Remove observations with N < 10
MIsummary.GC = MIsummary.GC[N >= 10]
MIsummary.School = MIsummary.School[N >= 10]
setkey(MIsummary.GC, IMP_METHOD); setkey(MIsummary.School, IMP_METHOD)

# Create factor variable grouping observations into quantile of grade/content area size
MIsummary.GC[, N_QUANT := cut(N, breaks = c(quantile(N, seq(0, 1, by = 0.25))),
                                                     labels = c("1", "2", "3", "4"),
                                                     include.lowest = T)]
MIsummary.School[, N_QUANT := cut(N, breaks = c(quantile(N, seq(0, 1, by = 0.25))),
                                                         labels = c("1", "2", "3", "4"),
                                                         include.lowest = T)]
```

Imputation Method Comparison: Summary Tables

Code for two tables is given: (a) for one missingness type at the grade/content area level, and (b) for all missingness types at the school level.

```{r}
# Summary by imputation method, grade, and missingness percentage for MCAR data at grade/content area level
cbind(
  
  MIsummary.GC[IMP_METHOD == "L2PAN" & MISS_TYPE == "MCAR",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_PERC, GRADE)][,-1],
  MIsummary.GC[IMP_METHOD == "L2PAN_LONG" & MISS_TYPE == "MCAR",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_PERC, GRADE)][,3:6],
  MIsummary.GC[IMP_METHOD == "L2LMER" & MISS_TYPE == "MCAR",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_PERC, GRADE)][,3:6],
  MIsummary.GC[IMP_METHOD == "L2LMER_LONG" & MISS_TYPE == "MCAR",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_PERC, GRADE)][,3:6],
  MIsummary.GC[IMP_METHOD == "PMM" & MISS_TYPE == "MCAR",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_PERC, GRADE)][,3:6],
  MIsummary.GC[IMP_METHOD == "RQ" & MISS_TYPE == "MCAR",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_PERC, GRADE)][,3:6],
  MIsummary.GC[IMP_METHOD == "Observed" & MISS_TYPE == "MCAR",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_PERC, GRADE)][,3:6]
  
) %>%
  kable(format = "html", digits = 3, booktabs = T,
        col.names = c("Grade", rep(c("SS", "SGP"), 14)),
        caption = "Mean percent bias and confidence interval coverage rates for scale score (SS) and 
                   student growth percentiles (SGPs) with MCAR data, grade-content area level") %>%
  kable_classic_2("hover", full_width = F) %>%
  pack_rows("30% Missing", 1, 6) %>%
  pack_rows("50% Missing", 7, 12) %>%
  pack_rows("70% Missing", 13, 18) %>%
  add_header_above(c(" ", "Percent Bias" = 2, "CR" = 2, "Percent Bias" = 2, "CR" = 2,
                   "Percent Bias" = 2, "CR" = 2, "Percent Bias" = 2, "CR" = 2, 
                   "Percent Bias" = 2, "CR" = 2, "Percent Bias" = 2, "CR" = 2,
                   "Percent Bias" = 2, "CR" = 2)) %>%
  add_header_above(c(" ", "L2PAN" = 4, "L2PAN_LONG" = 4, "LMER" = 4, "LMER_LONG" = 4, 
                          "PMM" = 4, "RQ" = 4, "Observed" = 4)) %>%
  scroll_box(width = "900px")
```

```{r}
# Summary by imputation method, missingness type, and missingness percentage at the school level
cbind(
  
  MIsummary.School[IMP_METHOD == "L2PAN",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_TYPE, MISS_PERC)][,-1],
  MIsummary.School[IMP_METHOD == "L2PAN_LONG",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_TYPE, MISS_PERC)][,3:6],
  MIsummary.School[IMP_METHOD == "L2LMER",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_TYPE, MISS_PERC)][,3:6],
  MIsummary.School[IMP_METHOD == "L2LMER_LONG",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_TYPE, MISS_PERC)][,3:6],
  MIsummary.School[IMP_METHOD == "PMM",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_TYPE, MISS_PERC)][,3:6],
  MIsummary.School[IMP_METHOD == "RQ",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_TYPE, MISS_PERC)][,3:6],
  MIsummary.School[IMP_METHOD == "Observed",
            .("mean_pb_ss" = mean(SS_Pct_Bias, na.rm = T),
              "mean_pb_sgp" = mean(SGP_Pct_Bias, na.rm = T),
              "mean_cr_ss" = mean(SS_Coverage_Simp, na.rm = T),
              "mean_cr_sgp" = mean(SGP_Coverage_Simp, na.rm = T)),
            keyby = .(MISS_TYPE, MISS_PERC)][,3:6]
  
) %>%
  kable(format = "html", digits = 3, booktabs = T,
        col.names = c("Percent Missing", rep(c("SS", "SGP"), 14)),
        caption = "Mean percent bias and confidence interval coverage rates for scale score (SS) and 
                   student growth percentiles (SGPs) at the school level") %>%
  kable_classic_2("hover", full_width = F) %>%
  pack_rows("MCAR", 1, 3) %>%
  pack_rows("MAR (Status with Demographics)", 4, 6) %>%
  pack_rows("MAR (Status with Growth)", 7, 9) %>%
  add_header_above(c(" ", "Percent Bias" = 2, "CR" = 2, "Percent Bias" = 2, "CR" = 2,
                   "Percent Bias" = 2, "CR" = 2, "Percent Bias" = 2, "CR" = 2, 
                   "Percent Bias" = 2, "CR" = 2, "Percent Bias" = 2, "CR" = 2,
                   "Percent Bias" = 2, "CR" = 2)) %>%
  add_header_above(c(" ", "L2PAN" = 4, "L2PAN_LONG" = 4, "LMER" = 4, "LMER_LONG" = 4, 
                           "PMM" = 4, "RQ" = 4, "Observed" = 4))  %>%
  scroll_box(width = "900px")
```

Imputation Method Comparison: Summary Figures

The following figures are given for scale scores at the grade/content area level. To examine at the school level, change MIsummary.GC to MIsummary.School. To examine SGPs instead, change SS_ to SGP_ for the various dependent variables.

```{r fig.cap = "Scale score percent bias by imputation method, missingness percentage, and missingness type"}
# Faceted box plot, grouping by imputation method
# Faceting by missing percentage and missingness type
ggplot(MIsummary.GC, aes(x = IMP_METHOD, y = SS_Pct_Bias, fill = IMP_METHOD)) +
  geom_boxplot() + coord_flip() +
  labs(x = "Imputation Method", y = "Percent Bias") +
  facet_wrap(MISS_PERC ~ MISS_TYPE) + 
  scale_fill_brewer(palette="Dark2") +
  theme(legend.position = "none") 
```

```{r fig.cap = "Scale score coverage rate by imputation method, missingness percentage, and missingness type"}
# Faceted box plot, grouping by imputation method
# Faceting by missing percentage and missingness type
ggplot(MIsummary.GC, aes(x = IMP_METHOD, y = SS_Coverage_Simp, fill = IMP_METHOD)) +
  geom_boxplot() + coord_flip() +
  labs(x = "Imputation Method", y = "Simplified CI Coverage Rate") +
  facet_wrap(MISS_PERC ~ MISS_TYPE) + 
  scale_fill_brewer(palette="Dark2") +
  theme(legend.position = "none") 
```

```{r fig.cap = "Scatterplot of scale score percent bias as a function of grade/content area size"}
# Scatterplot faceted by imputation method
MIsummary.GC[order(N)] %>%
ggplot(aes(x = N, y = SS_Pct_Bias, color = IMP_METHOD)) +
  geom_point() +
  labs(x = "Grade/Content Area Size", y = "Percent Bias") +
  theme(legend.position = "none") +
  facet_wrap(MISS_TYPE ~ IMP_METHOD, ncol = 7) +
  scale_color_brewer(palette="Dark2") +
  scale_x_continuous(breaks=seq(0, 500, by=250))
```

```{r fig.cap = "Scatterplot of scale score coverage rate as a function of grade/content area size"}
# Scatterplot faceted by imputation method
MIsummary.GC[order(N)] %>%
ggplot(aes(x = N, y = SS_Coverage_Simp, color = IMP_METHOD)) +
  geom_point() +
  labs(x = "Grade/Content Area Size", y = "Simplified CI Coverage Rate") +
  theme(legend.position = "none") +
  facet_wrap(MISS_TYPE ~ IMP_METHOD, ncol = 7) +
  scale_color_brewer(palette="Dark2") +
  scale_x_continuous(breaks=seq(0, 500, by=250))
```

```{r fig.cap = "Proportion of times that the imputed SS was found to differ from the true value based on the simplified F1 statistic"}
# Bar plot, grouping by imputation method
# Faceting by missingness type and percentage
MIsummary.GC[, .("prop_p" = mean(SS_F1_pSimp_Class)), keyby = .(IMP_METHOD, MISS_PERC, MISS_TYPE)] %>%
ggplot(aes(x = IMP_METHOD, y = prop_p, fill = IMP_METHOD)) +
  geom_bar(stat = "identity") +
  theme(legend.position = "none") +
  facet_wrap(MISS_TYPE ~ MISS_PERC, ncol = 3) +
  scale_color_brewer(palette="Dark2") +
  labs(x = "Imputation Method", y = "Proportion Significant Differences") +
  theme(axis.text.x = element_text(angle = 90))
```

```{r fig.cap = "Density plot of rejected null hypotheses for the simplified F1 statistic as a function of grade/content area size"}
# Density plot faceted by imputation method
MIsummary.GC[SS_F1_pSimp_Class == 1, ] %>%
ggplot(aes(x = N)) +
  geom_density(aes(fill = "aquamarine", alpha = 0.5)) +
  labs(x = "Grade/Content Area Size", y = "Density") +
  facet_wrap(MISS_TYPE ~ IMP_METHOD, ncol = 7) + 
  scale_fill_brewer(palette="Dark2") +
  theme(legend.position = "none") +
  scale_x_continuous(breaks=seq(0, 500, by=250))
```

```{r fig.cap = "Density plot of rejected null hypotheses for the simplified F1 statistic as a function of percent missing"}
# Density plot faceted by imputation method
MIsummary.GC[SS_F1_pSimp_Class == 1, ] %>%
ggplot(aes(x = Percent_Missing)) +
  geom_density(aes(fill = "aquamarine", alpha = 0.5)) +
  labs(x = "Percent Missing", y = "Density") +
  facet_wrap(MISS_TYPE ~ IMP_METHOD, ncol = 7) + 
  scale_fill_brewer(palette="Dark2") +
  theme(legend.position = "none") +
  scale_x_continuous(breaks=seq(0, 100, by=25))
```

Imputation Method Comparison: Regression Models

The code below runs a series of basic fixed-effects regression models for raw bias either at the grade/content area or school level.

```{r}
# Fit models using fixest
gc.raw.add.ss = feols(SS_Raw_Bias ~ N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed") | 
                      GRADE^CONTENT_AREA, data = MIsummary.GC)
gc.raw.int.ss = feols(SS_Raw_Bias ~ (N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed"))^2 | 
                      GRADE^CONTENT_AREA, data = MIsummary.GC)
gc.raw.add.sgp = feols(SGP_Raw_Bias ~ N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed") | 
                       GRADE^CONTENT_AREA, data = MIsummary.GC[GRADE %in% 5:8])
gc.raw.int.sgp = feols(SGP_Raw_Bias ~ (N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed"))^2 | 
                       GRADE^CONTENT_AREA, data = MIsummary.GC[GRADE %in% 5:8])

# Create table of results
etable(gc.raw.add.ss, gc.raw.int.ss, gc.raw.add.sgp, gc.raw.int.sgp, 
       subtitles = c("SS Add", "SS Int", "SGP Add", "SGP Int"))[-c(1,2),] %>%
  kable(format = "html", booktabs = T, row.names = T,
        col.names = c("Scale Scores: Additive", "Scale Scores: Interaction", 
                      "SGPs: Additive", "SGPs: Interaction"),
        caption = "Linear fixed-effect regression models for raw bias at the grade/content area level") %>%
  kable_classic_2("hover", full_width = F) %>%
  scroll_box(height = "700px")
```

```{r}
# Fit models using fixest
sc.raw.add.ss = feols(SS_Raw_Bias ~ N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed"), 
                      data = MIsummary.School)
sc.raw.int.ss = feols(SS_Raw_Bias ~ (N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed"))^2, 
                      data = MIsummary.School)
sc.raw.add.sgp = feols(SGP_Raw_Bias ~ N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed"), 
                       data = MIsummary.School)
sc.raw.int.sgp = feols(SGP_Raw_Bias ~ (N + MISS_PERC + MISS_TYPE + i(IMP_METHOD, ref = "Observed"))^2, 
                       data = MIsummary.School)

# Create table of results
etable(sc.raw.add.ss, sc.raw.int.ss, sc.raw.add.sgp, sc.raw.int.sgp, 
       subtitles = c("SS Add", "SS Int", "SGP Add", "SGP Int"))[-c(1,2),] %>%
  kable(format = "html", booktabs = T, row.names = T,
        col.names = c("Scale Scores: Additive", "Scale Scores: Interaction", 
                      "SGPs: Additive", "SGPs: Interaction"),
        caption = "Linear fixed-effect regression models for raw bias at the school level") %>%
  kable_classic_2("hover", full_width = F) %>%
  scroll_box(height = "700px")
```

Analyses Specific to an MI Method

The code below is for data corresponding to a specific MI method. For example, in the vignettes provided, the data is subset to examine cross-sectional L2PAN. To look at a different MI method, modify the first two lines of code below. Here, we provide code to recreate the heat maps for scale scores at the grade/content area level.

```{r}
# Create data set just for L2PAN
MIsummary.l2pan.GC = MIsummary.GC[IMP_METHOD == "L2PAN"]
MIsummary.l2pan.School = MIsummary.School[IMP_METHOD == "L2PAN"]
```

```{r fig.cap = "Average SS percent bias by grade/content area quantile, grade, and missingness characteristics"}
# Heat map for average PB by grade and N quantile
MIsummary.l2pan.GC[, .("mean_pb" = mean(SS_Pct_Bias, na.rm = T)),
                keyby = .(N_QUANT, GRADE, MISS_PERC, MISS_TYPE)] %>%
ggplot(aes(x = N_QUANT, y = GRADE)) + 
    geom_tile(aes(fill = mean_pb)) + 
    scale_fill_gradient(low = "white", high = "aquamarine3", name = "Mean Percent Bias") + 
    facet_wrap(MISS_TYPE ~ MISS_PERC) +
    geom_text(aes(label = round(mean_pb, 2)), size = 3) +
    labs(x = "Grade/Content Area Size Quantile", y = "Grade") 
```

```{r fig.cap = "Average SS coverage rate by grade/content area quantile, grade, and missingness characteristics"}
# Heat map for average CR by grade and N quantile
MIsummary.l2pan.GC[, .("mean_cr" = mean(SS_Coverage_Simp, na.rm = T)),
                keyby = .(N_QUANT, GRADE, MISS_PERC, MISS_TYPE)] %>%
ggplot(aes(x = N_QUANT, y = GRADE)) + 
    geom_tile(aes(fill = mean_cr)) + 
    scale_fill_gradient(low = "aquamarine3", high = "white", name = "Mean CI \nCoverage Rate") + 
    facet_wrap(MISS_TYPE ~ MISS_PERC) +
    geom_text(aes(label = round(mean_cr, 2)), size = 3) +
    labs(x = "Grade/Content Area Size Quantile", y = "Grade") 
```

```{r fig.cap = "Proportion of cases where a significant difference between the imputed and true SS value was found using the F1 statistic"}
# Heat map for proportion significant p-values by grade and N quantile
MIsummary.l2pan.GC[, .("prop_f1p" = mean(SS_F1_pSimp_Class, na.rm = T)),
                keyby = .(N_QUANT, GRADE, MISS_PERC, MISS_TYPE)] %>%
ggplot(aes(x = N_QUANT, y = GRADE)) + 
    geom_tile(aes(fill = prop_f1p)) + 
    scale_fill_gradient(low = "white", high = "aquamarine3", name = "Proportion \nNulls Rejected") + 
    facet_wrap(MISS_TYPE ~ MISS_PERC) +
    geom_text(aes(label = round(prop_f1p, 2)), size = 3) +
    labs(x = "Grade/Content Area Size Quantile", y = "Grade") 
```

We also include code to create the classification models for “flagged” observations with L2PAN based on the SGPs.

```{r}
# Generate binary outcome
MIsummary.l2pan.sgp.GC = MIsummary.l2pan.GC[!is.na(SGP_Pct_Bias)]
MIsummary.l2pan.sgp.GC[, SGP_FLAG := ifelse(SGP_Pct_Bias > 5 & 
                                            SGP_Coverage_Simp < .9 & 
                                            SGP_F1_pSimp_Class == 1, 1, 0)]
MIsummary.l2pan.sgp.School = MIsummary.l2pan.School[!is.na(SGP_Pct_Bias)]
MIsummary.l2pan.sgp.School[, SGP_FLAG := ifelse(SGP_Pct_Bias > 5 & 
                                                SGP_Coverage_Simp < .9 & 
                                                SGP_F1_pSimp_Class == 1, 1, 0)]

# Fit fixed effects model for grade/content area
fe.flag.gc = feglm(SGP_FLAG ~ N + MISS_TYPE + MISS_PERC + 
                             i(N, MISS_TYPE) + i(N, MISS_PERC) + i(MISS_TYPE, MISS_PERC) | CONTENT_AREA^GRADE, 
                   data = MIsummary.l2pan.sgp.GC, family = "binomial")

# Fit school-level model
fe.flag.school = feglm(SGP_FLAG ~ N + MISS_TYPE + MISS_PERC + 
                             i(N, MISS_TYPE) + i(N, MISS_PERC) + i(MISS_TYPE, MISS_PERC), 
                   data = MIsummary.l2pan.sgp.School, family = "binomial")
  
# Create table of results
etable(fe.flag.gc, fe.flag.school, subtitles = c("Grade/Content Area", "School"))[-c(1,2),] %>%
  kable(format = "html", booktabs = T, row.names = T,
        col.names = c("Grade/Content Area", "School"),
        caption = "Logistic fixed-effect regression models for grade/content area or 
                   school-level flagged observations") %>%
  kable_classic_2("hover", full_width = F) %>%
  scroll_box(height = "700px")
```

Source Code for Multiple Imputation Comparisons: Summary Vignettes

Allie Cooperman, Adam Van Iwaarden, and Damian Betebenner

June 16, 2021

Set-Up

Imputation Method Comparison: Summary Tables

Imputation Method Comparison: Summary Figures

Imputation Method Comparison: Regression Models

Analyses Specific to an MI Method

Contents