Example datasets for mdmb package.

data(data.mb01)
data(data.mb02)
data(data.mb03)
data(data.mb04)
data(data.mb05)

Format

  • Dataset data.mb01. Simulated dataset with missing values. Variables Y, X and Z are continuous.

    List of 2
    $ complete:'data.frame': 4000 obs. of 3 variables:
    ..$ X: num [1:4000] -1.08 0.57 -0.32 0.34 1.21 -0.44 -1.07 -0.29 0.76 -1.75 ...
    ..$ Z: num [1:4000] -0.02 0.26 -1.45 1.24 0.98 -2.36 0.84 -1.08 -0.15 -1.36 ...
    ..$ Y: num [1:4000] 0.88 1.75 -0.82 -1.81 -1.58 -3.34 -3.35 -0.29 1.47 0.23 ...
    $ missing :'data.frame': 4000 obs. of 3 variables:
    ..$ X: num [1:4000] -1.08 0.57 NA NA 1.21 NA -1.07 -0.29 0.76 NA ...
    ..$ Z: num [1:4000] -0.02 0.26 -1.45 1.24 0.98 -2.36 0.84 -1.08 -0.15 -1.36 ...
    ..$ Y: num [1:4000] 0.88 1.75 -0.82 -1.81 -1.58 -3.34 -3.35 -0.29 1.47 0.23 ...

  • Dataset data.mb02. Simulated dataset with missing values. The variables Z and Y are dichotomous.

    List of 2
    $ complete:'data.frame': 2000 obs. of 3 variables:
    ..$ X: num [1:2000] -0.93 0.3 -0.93 0.7 0.52 -1.38 -0.14 0.09 0.23 -1.64 ...
    ..$ Z: num [1:2000] 1 0 1 0 0 1 1 1 1 1 ...
    ..$ Y: num [1:2000] 1 1 0 1 1 0 1 0 0 1 ...
    $ missing :'data.frame': 2000 obs. of 3 variables:
    ..$ X: num [1:2000] -0.93 0.3 -0.93 0.7 0.52 NA -0.14 0.09 0.23 -1.64 ...
    ..$ Z: num [1:2000] 1 0 1 NA NA 1 NA 1 1 1 ...
    ..$ Y: num [1:2000] 1 1 0 1 1 0 1 0 0 1 ...

  • Dataset data.mb03. This dataset is from Enders, Baraldi & Cham (2014) and contains three variables primary school reading (x), primary school learning problems (z) and middle school reading (y) which all have missing values.

    'data.frame': 74 obs. of 3 variables:
    $ x: num NA NA NA NA NA 8.34 NA 8.36 6.89 8.56 ...
    $ z: num 8.81 4.5 6.31 4.7 5.1 4 6.11 3.7 6.81 6.31 ...
    $ y: num 5 5.1 6.3 9 9 9.3 NA 10.7 6.2 NA ...

  • Dataset data.mb04. This multilevel dataset contains three variables: level-1 variables y, x and the level-2 variable w.

    'data.frame': 500 obs. of 4 variables:
    $ idcluster: int 1 1 1 1 1 2 2 2 2 2 ...
    $ x : num NA NA -1.15 -1.65 0.25 ...
    $ w : num -0.552 -0.552 -0.552 -0.552 -0.552 ...
    $ y : num NA NA -0.0711 0.7165 -0.1917 ...

  • Dataset data.mb05. This dataset contains selected (and transformed) variables of the German PISA 2012 data.

    'data.frame': 5001 obs. of 13 variables:
    $ idschool : num 1001 1001 1001 1001 1001 ...
    $ idstud : num 1e+05 1e+05 1e+05 1e+05 1e+05 ...
    $ female : num 1 1 0 0 0 1 1 1 0 0 ...
    $ books : num NA 3 3 1 NA 2 NA 1 NA 2 ...
    $ hisced : num NA 6 6 2 NA 2 NA 2 NA 2 ...
    $ hisei : num NA 30.6 57.7 26.9 NA ...
    $ hisei10 : num NA 0.257 0.596 0.211 NA ...
    $ native : num NA NA 1 0 NA 0 NA 1 NA 1 ...
    $ ANCINTMAT: num NA 0.644 -0.096 1.057 NA ...
    $ MATHEFF : num NA 0.34 0.54 -0.18 NA 0.15 NA NA NA NA ...
    $ READ : num -0.25 -0.503 0.421 -1.664 -0.894 ...
    $ MATH : num -0.565 -0.854 0.384 -0.896 -0.534 ...
    $ W_FSTUWT : num 140 140 140 140 140 ...

References

Enders, C. K., Baraldi, A. N., & Cham, H. (2014). Estimating interaction effects with incomplete predictor variables. Psychological Methods, 19(1), 39-55. doi:10.1037/a0035314

Examples

if (FALSE) {
#############################################################################
# EXAMPLE 1: Linear interaction example from Enders et al. (2014)
#############################################################################

# load packages
library(mdmb)
library(mice)
library(mitools)
library(sandwich)

#--- attach example dataset (Enders et al., 2014) from mdmb package
data( data.mb03, package="mdmb")
dat <- data.mb03

#--- center data which speeds convergence of Bayesian estimation
#--- of the imputation model
for (vv in 1:3){
    M_vv <- mean( dat[,vv], na.rm=TRUE )
    dat[,vv] <- dat[,vv] - M_vv
}

#--- generate initial imputed values withj mice package
imp <- mice::mice( dat, m=, maxit=20 )
data_init <- mice::complete(imp, action=1)

#--- define number of iterations and number of imputed datasets
iter <- 50000; burnin <- 5000
Nimp <- 100

#******* imputation model M3 with quadratic effects

# model for dependent variable
dep <- list("model"="linreg", "formula"=y ~ x*z + I(x^2) + I(z^2) )
# covariate models
ind_x <- list( "model"="linreg", "formula"=x ~ z + I(z^2) )
ind_z <- list( "model"="linreg", "formula"=z ~ 1 )
ind <- list( x=ind_x, z=ind_z)

#generate imputations
imp <- mdmb::frm_fb(dat=dat, dep=dep, ind=ind, burnin=burnin, iter=iter,
              data_init=data_init, Nimp=Nimp)

#--- create list of multiply imputed datasets
datlist <- mdmb::frm2datlist(imp)

#-------------------------------
#--- analyze imputed datasets with mice package

# convert into object of class mids
imp2 <- miceadds::datlist2mids(datlist)
# estimate linear model on multiply imputed datasets
mod1 <- with(imp2, stats::lm( y ~ x*z ) )
summary( mice::pool(mod1) )

#-------------------------------
#--- analyze imputed datasets using sandwich standard errors

results <- list()
variances <- list()
Nimp <- length(datlist)
for (ii in 1:Nimp){
    mod_ii <- stats::lm( y ~ x*z, data=datlist[[ii]] )
    variances[[ii]] <- sandwich::vcovHC(mod_ii)
    results[[ii]] <- coef(mod_ii)
}

mod2 <- mitools::MIcombine(results=results,variances=variances,df.complete=69)
summary(mod2)
}