#R code for the sixth lecture on September 17 body = read.csv("BodyFat.csv", header = TRUE) head(body, 10) #Linear Model Fitting mod1 = lm(BODYFAT ~ AGE + WEIGHT + HEIGHT + THIGH, data = body) summary(mod1) #Illustrating fitted values names(mod1) mod1$fitted.values #Calculating fitted values manually: xmat = matrix(0, nrow(body), 5) xmat[,1] = rep(1, nrow(body)) xmat[,2] = body$AGE xmat[,3] = body$WEIGHT xmat[,4] = body$HEIGHT xmat[,5] = body$THIGH yvec = body$BODYFAT beta.est = (solve(t(xmat) %*% xmat))%*%t(xmat)%*%yvec f.val = xmat %*% (beta.est) cbind(mod1$fitted.values, f.val) #Fitted values do not change by a reparametrization of the model: mod2 = lm(BODYFAT ~ I(AGE + 2*WEIGHT) + I(WEIGHT - AGE) + I(HEIGHT + 8*THIGH) + THIGH, data = body) summary(mod2) #The parameter estimates are obviously different. cbind(mod1$fitted.values, mod2$fitted.values) #The residuals names(mod1) mod1$residuals sum(mod1$residuals) sum(mod1$residuals * body$THIGH) sum(mod1$residuals * body$AGE) sum(mod1$residuals * mod1$fitted.values) #Residual Degrees of Freedom: this is n - p - 1. n here is 252 and p is 4 #This Residual Degrees of Freedom here is 247 mod1$df.residual #Residual Sum of Squares: sum(mod1$residuals^2) #Decreases when more explanatory variables are added in the model: mod2 = lm(BODYFAT ~ AGE + WEIGHT + HEIGHT + THIGH + WRIST, data = body) sum(mod2$residuals^2) #Increases when explanatory variables are removed from the model: mod3 = lm(BODYFAT ~ AGE + WEIGHT + HEIGHT, data = body) sum(mod3$residuals^2) #Next important quantity: R-squared or the Coefficient of Determination names(summary(mod1)) summary(mod1)$r.squared #Manual Calculation tss = sum((yvec - mean(yvec))^2) rss = sum(mod1$residuals^2) rsq = 1 - (rss/tss) #R-squared increases when more explanatory variables are added to the model mod2 = lm(BODYFAT ~ AGE + WEIGHT + HEIGHT + THIGH + WRIST, data = body) summary(mod2)$r.squared #R-squared decreases when explanatory variables are removed from the model: mod3 = lm(BODYFAT ~ AGE + WEIGHT + HEIGHT, data = body) summary(mod3)$r.squared