# R program for linear regression exercise # 2 (VER 14.2)

btb <- read.csv("h:/vhm/vhm802/data_csv/btb_episodes.csv")
btb$intvl.ln <- log(btb$intvl) #natural log

#Q2
summary( lm(intvl.ln~ p_year, data=btb))            #continuous
summary( lm(intvl.ln~ as.factor(p_year), data=btb)) #categorical
summary( lm(intvl.ln~ I(p_year-1989), data=btb))    #continuous, centered at 1989

#Q3
plot(btb$p_year, btb$intvl.ln)
lines (lowess(btb$p_year, btb$intvl.ln))
quadr.year <- lm(intvl.ln~ p_year+I(p_year^2), data=btb)
summary(quadr.year)

#Q4
library(car) #for VIF (reliable source)
vif(quadr.year)
quadrct.year <- lm(intvl.ln~ I(p_year-1998)+I((p_year-1998)^2), data=btb)
summary(quadrct.year)
vif(quadrct.year)

#Q5
summary( lm(intvl.ln~ hdsize, data=btb))
summary( lm(intvl.ln~ hdsize+I(p_year-1998)+I((p_year-1998)^2), data=btb))

#Q6
summary( lm(intvl.ln~ I(p_year>=1999)*p_rct, data=btb))

#Q7
multreg <- lm(intvl.ln~ hdsize+I(p_year>=1999)*p_rct, data=btb)
summary(multreg)
new <- data.frame( cbind(hdsize=rep(50,10), p_year=rep(1998:1999, each=5), p_rct=rep( seq(0,40,by=10),2)))
btb.ci <- cbind(new, predict(multreg, new, interval="confidence"))
head(btb.ci,10) #just checking
plot(exp(fit) ~ p_rct, data=btb.ci, type="n")
lines(exp(fit) ~ p_rct, data=btb.ci, subset=p_year==1998)
lines(exp(fit) ~ p_rct, data=btb.ci, subset=p_year==1999)
#confidence limits may be added

