### Exam 2 R codes ### data=read.csv("HealthExam.csv",header=T) attach(data) ## part (a) results=lm(BodyMass~Arm) coefficients(results) # y.hat=-5.635212+1.007401*Arm ## part(b) - Bonferroni C.I. for beta0 and beta1 confint(results,level=(1-.05/2)) # 1.25 % 98.75 % #(Intercept) -9.8001293 -1.470295 #Arm 0.8754987 1.139304 ## part(c) - W-H procedure, constuct 95% joint CI for the mean response at the 3 levels. new=data.frame(Arm=c(25,36,42)) ci=predict(results,new,interval="confidence",se.fit=T) W2=2*qf(1-.05,2,78) # W2=6.227585 wh.lwr=ci$fit[,1]-sqrt(W2)*ci$se wh.upr=ci$fit[,1]+sqrt(W2)*ci$se round(cbind(wh.lwr,wh.upr),digits=2) # wh.lwr wh.upr #1 18.45 20.65 #2 29.70 31.56 #3 35.01 38.34 ## part (d) - Scheffe Procedure, construct 95% joint prediction intervals S=sqrt(3*qf(1-.05,3,78)) # S=2.857508 MSE=anova(results)$Mean[2] # MSE=5.076992 se.fit.pred=sqrt(MSE*(1+(ci$se.fit^2/MSE))) s.lwr=ci$fit[,1]-S*se.fit.pred s.upr=ci$fit[,1]+S*se.fit.pred round(cbind(s.lwr,s.upr),digits=2) # s.lwr s.upr #1 12.99 26.11 #2 24.11 37.16 #3 29.96 43.39 ## part (e) - Lack of Fit Test x=Arm y=BodyMass reduced.mod=lm(y~x) full.mod=lm(y~factor(x)) anova(reduced.mod,full.mod) # Res.Df RSS Df Sum of Sq F Pr(>F) #1 78 396.01 #2 19 70.50 59 325.5 1.4868 0.1699 # SSPE = 70.50 --> MSPE = 70.50/19=3.710526 # SSLF = 325.5 --> MSLF = 325.5/59=5.516949 # F = 1.4868 --> p-value = 0.1699 # Since the p-value is not less than 0.01, we don't reject null hypothesis # that the relationship between Arm and BodyMass is linear. # part (f) - Regression through the origin reg.origin=lm(BodyMass~0+Arm) # slope=0.8306 # part (g) - Brown-Forysthe Test e=reg.origin$residuals x.med=median(x) e1=e[x<=x.med]; n1=length(e1) e2=e[x>x.med]; n2=length(e2) d1=abs(e1-median(e1)) d2=abs(e2-median(e2)) s2=(sum((d1-mean(d1))^2)+sum((d2-mean(d2))^2))/(n1+n2-2) t.bf=(mean(d1)-mean(d2))/(sqrt(s2)*sqrt(1/n1+1/n2)) # t.bf=-1.882942 p.val=2*pt(abs(t.bf),df=(n1+n2-2),lower.tail=F) # p-val=0.06343652 plot(reg.origin$fit,reg.origin$res) ### Multiple Regression ## part (h) results2=lm(BodyMass~Arm+Waist) coef(results2) # BodyMass.hat=-6.9086747+0.5874524*Arm+0.1634134*Waist ## part (i) summary(results2)$r.sq # R2=0.8478479 # About 84.78% of the variability of BodyMass can be explained by the first-order # regression line involving the predictors Arm and Waist. ## part (j) new2=data.frame(Arm=c(25,36,42),Waist=c(78,102,108)) predict(results2,new2) # 1 2 3 #20.52388 30.90778 35.41297 ## part (k) predict(results2,new2,interval="prediction",level=(1-.05/3)) # fit lwr upr #1 20.52388 15.61528 25.43248 #2 30.90778 26.04587 35.76968 #3 35.41297 30.37464 40.45131 ## part (l) results3=lm(BodyMass~Arm+Waist+Age) coef(results3) #(Intercept) Arm Waist Age #-7.10755959 0.56057848 0.18578714 -0.02716573 ## part (m) anova(results3) # Df Sum Sq Mean Sq F value Pr(>F) #Arm 1 1546.91 1546.91 407.0686 < 2.2e-16 *** #Waist 1 100.39 100.39 26.4169 2.072e-06 *** #Age 1 6.81 6.81 1.7919 0.1847 #Residuals 76 288.81 3.80 ## part (n) # ssr(Age|Arm,Waist) = 6.81 # check anova(results3) # ssr(Age,Waist|Arm) = 100.39+6.81=107.2 ## part (o) # check anova(results3): f_obs=1.79 and p-value=0.1847. Therefore, do not reject Ho. ## part(p) # f_obs=(102.7/2)/3.8=14.105 # p_val=1-pf(14.105,2,76) = 6.171056e-06. # Therefore, we reject Ho. #Or you can use these commands: red=lm(BodyMass~Arm) full=lm(BodyMass~Arm+Waist+Age) anova(red,full) # Partial F-Test for Ho: beta_2=0 and beta_3=0 # Res.Df RSS Df Sum of Sq F Pr(>F) #1 78 396.01 #2 76 288.81 2 107.2 14.104 6.174e-06 ***