### Exam 2 R codes ###

data=read.csv("HealthExam.csv",header=T)
attach(data)


## part (a) 
results=lm(BodyMass~Arm)
coefficients(results)     # y.hat=-5.635212+1.007401*Arm


## part(b) - Bonferroni C.I. for beta0 and beta1
confint(results,level=(1-.05/2))  
#                1.25 %   98.75 %
#(Intercept) -9.8001293 -1.470295
#Arm          0.8754987  1.139304


## part(c) - W-H procedure, constuct 95% joint CI for the mean response at the 3 levels.
new=data.frame(Arm=c(25,36,42))
ci=predict(results,new,interval="confidence",se.fit=T)
W2=2*qf(1-.05,2,78)       # W2=6.227585

wh.lwr=ci$fit[,1]-sqrt(W2)*ci$se
wh.upr=ci$fit[,1]+sqrt(W2)*ci$se
round(cbind(wh.lwr,wh.upr),digits=2)
#  wh.lwr wh.upr
#1  18.45  20.65
#2  29.70  31.56
#3  35.01  38.34


## part (d) - Scheffe Procedure, construct 95% joint prediction intervals
S=sqrt(3*qf(1-.05,3,78))		# S=2.857508
MSE=anova(results)$Mean[2]  # MSE=5.076992
se.fit.pred=sqrt(MSE*(1+(ci$se.fit^2/MSE)))

s.lwr=ci$fit[,1]-S*se.fit.pred
s.upr=ci$fit[,1]+S*se.fit.pred
round(cbind(s.lwr,s.upr),digits=2)
#  s.lwr s.upr
#1 12.99 26.11
#2 24.11 37.16
#3 29.96 43.39


## part (e) - Lack of Fit Test
x=Arm
y=BodyMass

reduced.mod=lm(y~x)
full.mod=lm(y~factor(x))
anova(reduced.mod,full.mod)
#  Res.Df    RSS Df Sum of Sq      F Pr(>F)
#1     78 396.01                           
#2     19  70.50 59     325.5 1.4868 0.1699

# SSPE = 70.50  --> MSPE = 70.50/19=3.710526
# SSLF = 325.5  --> MSLF = 325.5/59=5.516949
# F = 1.4868    --> p-value = 0.1699
# Since the p-value is not less than 0.01, we don't reject null hypothesis
# that the relationship between Arm and BodyMass is linear.


# part (f) - Regression through the origin
reg.origin=lm(BodyMass~0+Arm)     # slope=0.8306


# part (g) - Brown-Forysthe Test
e=reg.origin$residuals
x.med=median(x)

e1=e[x<=x.med]; n1=length(e1)
e2=e[x>x.med]; n2=length(e2)

d1=abs(e1-median(e1))
d2=abs(e2-median(e2))
s2=(sum((d1-mean(d1))^2)+sum((d2-mean(d2))^2))/(n1+n2-2)

t.bf=(mean(d1)-mean(d2))/(sqrt(s2)*sqrt(1/n1+1/n2))   # t.bf=-1.882942
p.val=2*pt(abs(t.bf),df=(n1+n2-2),lower.tail=F)       # p-val=0.06343652

plot(reg.origin$fit,reg.origin$res)


### Multiple Regression
## part (h)
results2=lm(BodyMass~Arm+Waist)
coef(results2)      # BodyMass.hat=-6.9086747+0.5874524*Arm+0.1634134*Waist

## part (i)
summary(results2)$r.sq    # R2=0.8478479
# About 84.78% of the variability of BodyMass can be explained by the first-order
# regression line involving the predictors Arm and Waist.


## part (j)
new2=data.frame(Arm=c(25,36,42),Waist=c(78,102,108))
predict(results2,new2)
#       1        2        3 
#20.52388 30.90778 35.41297 


## part (k)
predict(results2,new2,interval="prediction",level=(1-.05/3))
#       fit      lwr      upr
#1 20.52388 15.61528 25.43248
#2 30.90778 26.04587 35.76968
#3 35.41297 30.37464 40.45131


## part (l)
results3=lm(BodyMass~Arm+Waist+Age)
coef(results3)
#(Intercept)         Arm       Waist         Age 
#-7.10755959  0.56057848  0.18578714 -0.02716573 


## part (m) 
anova(results3)
#          Df  Sum Sq Mean Sq  F value    Pr(>F)    
#Arm        1 1546.91 1546.91 407.0686 < 2.2e-16 ***
#Waist      1  100.39  100.39  26.4169 2.072e-06 ***
#Age        1    6.81    6.81   1.7919    0.1847    
#Residuals 76  288.81    3.80             


## part (n)
# ssr(Age|Arm,Waist) = 6.81  # check anova(results3)
# ssr(Age,Waist|Arm) = 100.39+6.81=107.2


## part (o)
# check anova(results3): f_obs=1.79 and p-value=0.1847. Therefore, do not reject Ho.


## part(p)
# f_obs=(102.7/2)/3.8=14.105
# p_val=1-pf(14.105,2,76) = 6.171056e-06.
# Therefore, we reject Ho.

#Or you can use these commands:
red=lm(BodyMass~Arm)
full=lm(BodyMass~Arm+Waist+Age)
anova(red,full)			# Partial F-Test for Ho: beta_2=0 and beta_3=0
#  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
#1     78 396.01                                  
#2     76 288.81  2     107.2 14.104 6.174e-06 ***