BIFIEdata
ObjectsBIFIE.data.transform.Rd
Computes a data transformation for BIFIEdata
objects.
BIFIE.data.transform( bifieobj, transform.formula, varnames.new=NULL )
Object of class BIFIEdata
R formula object for data transformation.
Optional vector of names for new defined variables.
An object of class BIFIEdata
. Additional values are
Added variables in data transformation
Indices of added variables
library(miceadds)
#############################################################################
# EXAMPLE 1: Data transformations for TIMSS data
#############################################################################
data(data.timss2)
data(data.timssrep)
# create BIFIEdata object
bifieobj1 <- BIFIEsurvey::BIFIE.data( data.timss2, wgt=data.timss2[[1]]$TOTWGT,
wgtrep=data.timssrep[,-1] )
# create BIFIEdata object in compact way (cdata=TRUE)
bifieobj2 <- BIFIEsurvey::BIFIE.data( data.timss2, wgt=data.timss2[[1]]$TOTWGT,
wgtrep=data.timssrep[,-1], cdata=TRUE)
#****************************
#*** Transformation 1: Squared and cubic book variable
transform.formula <- ~ I( books^2 ) + I( books^3 )
# as.character(transform.formula)
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj1,
transform.formula=transform.formula)
bifieobj$variables
# rename added variables
bifieobj$varnames[ bifieobj$varsindex.added ] <- c("books_sq", "books_cub")
# check descriptive statistics
res1 <- BIFIEsurvey::BIFIE.univar( bifieobj, vars=c("books_sq", "books_cub" ) )
summary(res1)
if (FALSE) {
#****************************
#*** Transformation 2: Create dummy variables for variable book
transform.formula <- ~ as.factor(books)
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=transform.formula )
## Included 5 variables: as.factor(books)1 as.factor(books)2 as.factor(books)3
## as.factor(books)4 as.factor(books)5
bifieobj$varnames[ bifieobj$varsindex.added ] <- paste0("books_D", 1:5)
#****************************
#*** Transformation 3: Discretized mathematics score
hi3a <- BIFIEsurvey::BIFIE.hist( bifieobj, vars="ASMMAT" )
plot(hi3a)
transform.formula <- ~ I( as.numeric(cut( ASMMAT, breaks=seq(200,800,100) )) )
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=transform.formula, varnames.new="ASMMAT_discret")
hi3b <- BIFIEsurvey::BIFIE.hist( bifieobj, vars="ASMMAT_discret", breaks=1:7 )
plot(hi3b)
# check frequencies
fr3b <- BIFIEsurvey::BIFIE.freq( bifieobj, vars="ASMMAT_discret", se=FALSE )
summary(fr3b)
#****************************
#*** Transformation 4: include standardization variables for book variable
# start with testing the transformation function on a single dataset
dat1 <- bifieobj$dat1
stats::weighted.mean( dat1[,"books"], dat1[,"TOTWGT"], na.rm=TRUE)
sqrt( Hmisc::wtd.var( dat1[,"books"], dat1[,"TOTWGT"], na.rm=TRUE) )
# z standardization
transform.formula <- ~ I( ( books - weighted.mean( books, TOTWGT, na.rm=TRUE) )/
sqrt( Hmisc::wtd.var( books, TOTWGT, na.rm=TRUE) ))
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=transform.formula, varnames.new="z_books" )
# standardize variable books with M=500 and SD=100
transform.formula <- ~ I(
500 + 100*( books - stats::weighted.mean( books, w=TOTWGT, na.rm=TRUE) ) /
sqrt( Hmisc::wtd.var( books, weights=TOTWGT, na.rm=TRUE) ) )
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=transform.formula, varnames.new="z500_books" )
# standardize variable books with respect to M and SD of ALL imputed datasets
res <- BIFIEsurvey::BIFIE.univar( bifieobj, vars="books" )
summary(res)
## var Nweight Ncases M M_SE M_fmi M_VarMI M_VarRep SD SD_SE SD_fmi
## 1 books 76588.72 4554 2.945 0.04 0 0 0.002 1.146 0.015 0
M <- round(res$output$mean1,5)
SD <- round(res$output$sd1,5)
transform.formula <- paste0( " ~ I( ( books - ", M, " ) / ", SD, ")" )
## > transform.formula
## [1] " ~ I( ( books - 2.94496 ) / 1.14609)"
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=stats::as.formula(transform.formula),
varnames.new="zall_books" )
# check statistics
res4 <- BIFIEsurvey::BIFIE.univar( bifieobj,
vars=c("z_books", "z500_books", "zall_books") )
summary(res4)
#****************************
#*** Transformation 5: include rank transformation for variable ASMMAT
# calculate percentage ranks using wtd.rank function from Hmisc package
dat1 <- bifieobj$dat1
100 * Hmisc::wtd.rank( dat1[,"ASMMAT"], w=dat1[,"TOTWGT"] ) / sum( dat1[,"TOTWGT"] )
# define an auxiliary function for calculating percentage ranks
wtd.percrank <- function( x, w ){
100 * Hmisc::wtd.rank( x, w, na.rm=TRUE ) / sum( w, na.rm=TRUE )
}
wtd.percrank( dat1[,"ASMMAT"], dat1[,"TOTWGT"] )
# define transformation formula
transform.formula <- ~ I( wtd.percrank( ASMMAT, TOTWGT ) )
# add ranks to BIFIEdata object
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=transform.formula, varnames.new="ASMMAT_rk")
# check statistic
res5 <- BIFIEsurvey::BIFIE.univar( bifieobj, vars=c("ASMMAT_rk" ) )
summary(res5)
#****************************
#*** Transformation 6: recode variable books
library(car)
# recode variable books according to "1,2=0, 3,4=1, 5=2"
dat1 <- bifieobj$dat1
# use Recode function from car package
car::Recode( dat1[,"books"], "1:2='0'; c(3,4)='1';5='2'")
# define transformation formula
transform.formula <- ~ I( car::Recode( books, "1:2='0'; c(3,4)='1';5='2'") )
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=transform.formula, varnames.new="book_rec" )
res6 <- BIFIEsurvey::BIFIE.freq( bifieobj, vars=c("book_rec" ) )
summary(res6)
#****************************
#*** Transformation 7: include some variables aggregated to the school level
dat1 <- as.data.frame(bifieobj$dat1)
# at first, create school ID in the dataset by transforming the student ID
dat1$idschool <- as.numeric(substring( dat1$IDSTUD, 1, 5 ))
transform.formula <- ~ I( as.numeric( substring( IDSTUD, 1, 5 ) ) )
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=transform.formula, varnames.new="idschool" )
#*** test function for a single dataset bifieobj$dat1
dat1 <- as.data.frame(bifieobj$dat1)
gm <- miceadds::GroupMean( data=dat1$ASMMAT, group=dat1$idschool, extend=TRUE)[,2]
# add school mean ASMMAT
tformula <- ~ I( miceadds::GroupMean( ASMMAT, group=idschool, extend=TRUE)[,2] )
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj, transform.formula=tformula,
varnames.new="M_ASMMAT" )
# add within group centered mathematics values of ASMMAT
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=~ 0 + I( ASMMAT - M_ASMMAT ),
varnames.new="WC_ASMMAT" )
# add school mean books
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj,
transform.formula=~ 0 + I( add.groupmean( books, idschool ) ),
varnames.new="M_books" )
#****************************
#*** Transformation 8: include fitted values and residuals from a linear model
# create new BIFIEdata object
data(data.timss1)
bifieobj3 <- BIFIEsurvey::BIFIE.data( data.timss1, wgt=data.timss1[[1]]$TOTWGT,
wgtrep=data.timssrep[,-1] )
# specify transformation
transform.formula <- ~ I( fitted( stats::lm( ASMMAT ~ migrant + female ) ) ) +
I( residuals( stats::lm( ASMMAT ~ migrant + female ) ) )
# Note that lm omits cases in regression by listwise deletion.
# add fitted values and residual to BIFIEdata object
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj3,
transform.formula=transform.formula )
bifieobj$varnames[ bifieobj$varsindex.added ] <- c("math_fitted1", "math_resid1")
#****************************
#*** Transformation 9: Including principal component scores in BIFIEdata object
# define auxiliary function for extracting PCA scores
BIFIE.princomp <- function( formula, Ncomp ){
X <- stats::princomp( formula, cor=TRUE)
Xp <- X$scores[, 1:Ncomp ]
return(Xp)
}
# define transformation formula
transform.formula <- ~ I( BIFIE.princomp( ~ migrant + female + books + lang + ASMMAT, 3 ))
# apply transformation
bifieobj <- BIFIEsurvey::BIFIE.data.transform( bifieobj3,
transform.formula=transform.formula )
bifieobj$varnames[ bifieobj$varsindex.added ] <- c("pca_sc1", "pca_sc2","pca_sc3")
# check descriptive statistics
res9 <- BIFIEsurvey::BIFIE.univar( bifieobj, vars="pca_sc1", se=FALSE)
summary(res9)
res9$output$mean1M
# The transformation formula can also be conveniently generated by string operations
vars <- c("migrant", "female", "books", "lang" )
transform.formula2 <- as.formula( paste0( "~ 0 + I ( BIFIE.princomp( ~ ",
paste0( vars, collapse="+" ), ", 3 ) )") )
## > transform.formula2
## ~ I(BIFIE.princomp(~migrant + female + books + lang, 3))
#****************************
#*** Transformation 10: Overwriting variables books and migrant
bifieobj4 <- BIFIEsurvey::BIFIE.data.transform( bifieobj3,
transform.formula=~ I( 1*(books >=1 ) ) + I(2*migrant),
varnames.new=c("books","migrant") )
summary(bifieobj4)
}