BIFIE.linreg.Rd
Computes linear regression.
Object of class BIFIEdata
String for the dependent variable in the regression model
Vector of predictor variables. If the intercept should be included,
then use the variable one
for specifying it (see Examples).
An R formula object which can be applied instead of
providing dep
and pre
. Note that there is
additional computation time needed for model matrix creation.
Optional grouping variable(s)
Optional vector of grouping values. This can be omitted and grouping values will be determined automatically.
Optional logical indicating whether statistical inference based on replication should be employed.
Object of class BIFIE.linreg
Number of digits for rounding output
Further arguments to be passed
A list with following entries
Data frame with unstandardized and standardized regression coefficients, residual standard deviation and \(R^2\)
Extensive output with all replicated statistics
More values
Alternative implementations: survey::svyglm
,
intsvy::timss.reg
,
intsvy::timss.reg.pv
,
stats::lm
See BIFIE.logistreg
for logistic regression.
#############################################################################
# EXAMPLE 1: Imputed TIMSS dataset
#############################################################################
data(data.timss1)
data(data.timssrep)
# create BIFIE.dat object
bdat <- BIFIEsurvey::BIFIE.data( data.list=data.timss1, wgt=data.timss1[[1]]$TOTWGT,
wgtrep=data.timssrep[, -1 ] )
#**** Model 1: Linear regression for mathematics score
mod1 <- BIFIEsurvey::BIFIE.linreg( bdat, dep="ASMMAT", pre=c("one","books","migrant"),
group="female" )
summary(mod1)
if (FALSE) {
# same model but specified with R formulas
mod1a <- BIFIEsurvey::BIFIE.linreg( bdat, formula=ASMMAT ~ books + migrant,
group="female", group_values=0:1 )
summary(mod1a)
# compare result with lm function and first imputed dataset
dat1 <- data.timss1[[1]]
mod1b <- stats::lm( ASMMAT ~ 0 + as.factor(female) + as.factor(female):books +
as.factor(female):migrant,
data=dat1, weights=dat1$TOTWGT )
summary(mod1b)
#**** Model 2: Like Model 1, but books is now treated as a factor
mod2 <- BIFIEsurvey::BIFIE.linreg( bdat, formula=ASMMAT ~ as.factor(books) + migrant)
summary(mod2)
#############################################################################
# EXAMPLE 2: PISA data | Nonlinear regression models
#############################################################################
data(data.pisaNLD)
data <- data.pisaNLD
#--- Create BIFIEdata object immediately using BIFIE.data.jack function
bdat <- BIFIEsurvey::BIFIE.data.jack( data.pisaNLD, jktype="RW_PISA", cdata=TRUE)
summary(bdat)
#****************************************************
#*** Model 1: linear regression
mod1 <- BIFIEsurvey::BIFIE.linreg( bdat, formula=MATH ~ HISEI )
summary(mod1)
#****************************************************
#*** Model 2: Cubic regression
mod2 <- BIFIEsurvey::BIFIE.linreg( bdat, formula=MATH ~ HISEI + I(HISEI^2) + I(HISEI^3) )
summary(mod2)
#****************************************************
#*** Model 3: B-spline regression
# test with design of HISEI values
dfr <- data.frame("HISEI"=16:90 )
des <- stats::model.frame( ~ splines::bs( HISEI, df=5 ), dfr )
des <- des$splines
plot( dfr$HISEI, des[,1], type="l", pch=1, lwd=2, ylim=c(0,1) )
for (vv in 2:ncol(des) ){
lines( dfr$HISEI, des[,vv], lty=vv, col=vv, lwd=2)
}
# apply B-spline regression in BIFIEsurvey::BIFIE.linreg
mod3 <- BIFIEsurvey::BIFIE.linreg( bdat, formula=MATH ~ splines::bs(HISEI,df=5) )
summary(mod3)
#*** include transformed HISEI values for B-spline matrix in bdat
bdat2 <- BIFIEsurvey::BIFIE.data.transform( bdat, ~ 0 + splines::bs( HISEI, df=5 ))
bdat2$varnames[ bdat2$varsindex.added ] <- paste0("HISEI_bsdes",
seq( 1, length( bdat2$varsindex.added ) ) )
#****************************************************
#*** Model 4: Nonparametric regression using BIFIE.by
?BIFIE.by
#---- (1) test function with one dataset
dat1 <- bdat$dat1
vars <- c("MATH", "HISEI")
X <- dat1[,vars]
w <- bdat$wgt
X <- as.data.frame(X)
# estimate model
mod <- stats::loess( MATH ~ HISEI, weights=w, data=X )
# predict HISEI values
hisei_val <- data.frame( "HISEI"=seq(16,90) )
y_pred <- stats::predict( mod, hisei_val )
graphics::plot( hisei_val$HISEI, y_pred, type="l")
#--- (2) define loess function
loess_fct <- function(X,w){
X1 <- data.frame( X, w )
colnames(X1) <- c( vars, "wgt")
X1 <- stats::na.omit(X1)
# mod <- stats::lm( MATH ~ HISEI, weights=X1$wgt, data=X1 )
mod <- stats::loess( MATH ~ HISEI, weights=X1$wgt, data=X1 )
y_pred <- stats::predict( mod, hisei_val )
return(y_pred)
}
#--- (3) estimate model
mod4 <- BIFIEsurvey::BIFIE.by( bdat, vars, userfct=loess_fct )
summary(mod4)
# plot linear function pointwise and confidence intervals
graphics::plot( hisei_val$HISEI, mod4$stat$est, type="l", lwd=2,
xlab="HISEI", ylab="PVMATH", ylim=c(430,670) )
graphics::lines( hisei_val$HISEI, mod4$stat$est - 1.96* mod4$stat$SE, lty=3 )
graphics::lines( hisei_val$HISEI, mod4$stat$est + 1.96* mod4$stat$SE, lty=3 )
}