# R program for additional exercise 7.7 (VHM 802)

surv <- read.csv("r:/hs07_7.csv")

#Q1
surv.full <- lm(survival~ as.factor(poison)*as.factor(treat), data=surv)
summary(surv.full)
plot(fitted(surv.full), rstandard(surv.full))
# clearly unsatisfactory, so try a Box-Cox analysis

library(MASS)
bc <- boxcox(survival~ as.factor(poison)*as.factor(treat), data=surv, lambda=seq(-1.5, 1, 0.01))
bc # lowest value for obs # 67 ~ lambda=-0.84

surv$invsurv <- 1/surv$survival
surv.invfull <- lm(invsurv~ as.factor(poison)*as.factor(treat), data=surv)
summary(surv.invfull)
surv.diag <- surv #all diagnostics to be stored in surv.diag (for convenience, not at all necessary)
surv.diag$fit <- fitted(surv.invfull) 
surv.diag$stdres <- rstandard(surv.invfull)
plot(stdres~fit, data=surv.diag)
hist(surv.diag$stdres, nclass=20) # no overlaid normal distribution
qqnorm(surv.diag$stdres)
qqline(surv.diag$stdres)
shapiro.test(surv.diag$stdres)
# everything looks fine

#Q2
surv.diag$delres <- rstudent(surv.invfull) 
print(surv.diag[abs(surv.diag$stdres)>2 & !is.na(surv.diag$stdres),]) #no values listed
2*48*pt(2.786144, df.residual(surv.invfull)-1, lower.tail=FALSE)
# most extreme residual clearly non-significant

#Q3
anova(surv.invfull)
# model reduction to facilitate contrasts and pairwise comparisons
surv.invred <- lm(invsurv~ as.factor(poison) +as.factor(treat), data=surv)
summary(surv.invred)
anova(surv.invred)

# contrast of poisons
g<-3 # 3 categories for poison
coef <- coef(surv.invred)
vcov <- vcov(surv.invred)
w<- matrix(c(0,0.5,-1,0,0,0), ncol=1); w
est<- t(w)%*%coef
var <- diag(t(w)%*%vcov%*%w)
se <- matrix(sqrt(var), ncol=1)
t<- est/se
p<- 2*pt(abs(t), df.residual(surv.invred), lower.tail=FALSE)
ss<- (t^2)*(mse<- deviance(surv.invred)/df.residual(surv.invred))
F <- t^2/(g-1) # Scheffe test
pF <- 1-pf(F, g-1, df.residual(surv.invred))
data.frame(est,se,t,p,ss,F,pF)

# pairwise comparisons of treatments
w<- matrix(c(0,0,0,-1,0,0, 0,0,0,0,-1,0, 0,0,0,0,0,-1, 0,0,0,1,-1,0, 0,0,0,1,0,-1, 0,0,0,0,1,-1), ncol=6); w
est<- t(w)%*%coef
var <- diag(t(w)%*%vcov%*%w)
se <- matrix(sqrt(var), ncol=1)
t<- est/se
p<- 2*pt(abs(t), df.residual(surv.invred), lower.tail=FALSE)
t(p) # unadjusted P-values
p.adjust(p, method="bonferroni")

#Q4
# setup of two estimations
w<- matrix(c(1,0,1,1/4,1/4,1/4, 1,0,1,0,0,0), ncol=2); w
est<- t(w)%*%coef
var <- diag(t(w)%*%vcov%*%w)
se <- matrix(sqrt(var), ncol=1)
tstar <- qt(.025, df.residual(surv.invred), lower.tail=FALSE)
low <- est-tstar*se; upp <- est+tstar*se
est.surv <- 1/est
low.surv <- 1/upp; upp.surv <- 1/low
data.frame(est,se,low,upp,est.surv,low.surv,upp.surv)