# R program for additional exercise 2.7 (VHM 802)

crude <- read.csv("r:/data_csv/hs02_7.csv")
summary(crude)
cor(crude)

# full regression, with model checks
crude.full<- lm(y~x1+x2+x3+x4, data=crude)
summary(crude.full)
crude.diag <- crude #all diagnostics to be stored in btb.diag (for convenience, not at all necessary)
crude.diag$fit <- fitted(crude.full) 
crude.diag$stdres <- rstandard(crude.full)
plot(stdres~fit, data=crude.diag)
qqnorm(crude.diag$stdres)
qqline(crude.diag$stdres)
shapiro.test(crude.diag$stdres)
crude.diag$delres <- rstudent(crude.full)
crude.diag$hat <- hatvalues(crude.full)
crude.diag$cook <- cooks.distance(crude.full)
summary(crude.diag)
library(car) #for VIF (reliable source)
vif(crude.full)
print(v<-vcov(crude.full))
v/sqrt(diag(v)%o% diag(v))
# residual plots against each predictor
plot(stdres~x1, data=crude.diag)
plot(stdres~x2, data=crude.diag)
plot(stdres~x3, data=crude.diag)
plot(stdres~x4, data=crude.diag)

# model reductions and final model
crude.red<- lm(y~x1+x3+x4, data=crude)
summary(crude.red)
vif(crude.red)
# stepwise elimination based on AIC
step(crude.full, direction="both") # no model reduction

# x3 as categorical
crude.cat <- lm(y~ as.factor(x3)+x4, data=crude)
anova(crude.full, crude.cat)

# Box-Cox analyses
library(MASS)
boxcox(y ~ x1+x2+x3+x4, data=crude, lambda=seq(0, 1, 0.01))
boxcox(y ~ as.factor(x3)+x4, data=crude, lambda=seq(0, 1, 0.01))

