# R program for linear regression exercise # 1 (VER 14.1)

btb <- read.csv("r:/btb_episodes.csv")

#Q1
summary(btb$intvl)
hist(btb$intvl)
btb$intvl.ln <- log(btb$intvl) #natural log
summary(btb$intvl.ln)
hist(btb$intvl.ln)

#Q2
summary( lm(intvl.ln~ p_rct, data=btb))
summary( lm(intvl.ln~ p_year, data=btb))
simple.hdsize <- lm(intvl.ln~ hdsize, data=btb)
summary(simple.hdsize)

#Q3
summary( lm(intvl.ln~ p_rct+p_year+hdsize, data=btb))

#Q4
simple.hdsize <- lm(intvl.ln~ hdsize, data=btb)
summary(simple.hdsize)
new <- data.frame( hdsize=seq(1,472,1))
btb.pred <- predict(simple.hdsize, new, interval="prediction")
btb.ci <- predict(simple.hdsize, new, interval="confidence")
plot(btb$hdsize, btb$intvl.ln)
lines(new$hdsize, btb.ci[,1])
lines(new$hdsize, btb.ci[,2], col=2)
lines(new$hdsize, btb.ci[,3], col=2)
lines(new$hdsize, btb.pred[,2], col=3)
lines(new$hdsize, btb.pred[,3], col=3)

#Q5
summary( lm(intvl.ln~ p_year+hdsize, data=btb))

