* do-file for lecture 4a of VHM 802/812, Winter 2023
version 17 /* works also with versions 14-16, except for table command */
set more off
cd "r:\"

use nocardia.dta, clear
tabulate casecont dclox, chi2 row exact lrchi2
cc casecont dclox
* logistic regression for case-control design
logit casecont dclox
logit casecont dclox, or /* displays ORs */
logistic casecont dclox  /* same using logistic command */
logit dclox casecont /* to demonstrate same log(OR) */

* logistic regression for categorical predictor
tabulate casecont dbarn, chi2 row exact lrchi2
logit casecont i.dbarn
logistic casecont i.dbarn
testparm i.dbarn /* multiple Wald test for dbarn */

* multiple logistic regression 
logit casecont dneo dclox dcpct 
logistic casecont dneo dclox dcpct /* shows ORs */
testparm dclox /* Wald test as chi^2-test, same P as z-test */
testparm dneo dclox /* Wald test for dneo+dclox */
* profile likelihood CI for dclox, using logprof add-on command
logprof dclox

* display of iterations of ML estimation
logit casecont dneo dclox dcpct, trace 

* likelihood-ratio tests 
logit casecont dneo dclox dcpct
estimates store full
logit casecont dneo dcpct
lrtest full /* likelihood-ratio test for dclox */
logit casecont dcpct
lrtest full /* likelihood-ratio test for dneo+dclox */
lrtest full, stats /* same with all fit statistics */
estimates store red
logit casecont
estimates store null
estimates stats *
estimates table *

use mice.dta, clear
logit dead dose
estimates store red
* does not work with non-integer dose values: logit dead i.dose 
egen Dose=group(dose), label /* create dose groups (in new variable Dose)  */
logit dead i.Dose /* dose as categorical predictor */
logit dead i.Dose, asis /* avoids exclusion of dose=0.1413 */
logit dead i.Dose, asis trace /* showing convergence process */
testparm i.Dose /* note: multiple Wald test for dose fails here, LR-test = 33.70 */
lrtest red, stats /* likelihood-ratio (goodness-of-fit) test ok */

* prediction after logistic regression for Nocardia data
use nocardia.dta, clear
* effects on probability scale
logit casecont i.dneo i.dclox dcpct
predict phat
predict logitphat, xb
* plots of predicted values - for demonstration only (not valid in case-control study)
twoway (connected logitphat dcpct if dneo==1 & dclox==0, sort) (connected logitphat dcpct if dneo==0 & dclox==0, sort)
twoway (connected phat dcpct if dneo==1 & dclox==0, sort) (connected phat dcpct if dneo==0 & dclox==0, sort)
* manually created prediction values and plots
set obs 119
replace dneo=0 in 109/119
replace dclox=0 in 109/119
replace dcpct=0+(_n-109)*10 in 109/119
capture drop phat logitphat
predict phat
predict logitphat, xb
twoway (connected logitphat dcpct in 109/119, sort)
twoway (connected phat dcpct in 109/119, sort)

* estimation and plots by margins and marginsplot commands
* logit scale
margins, at(dcpct=(0(10)100) dneo=1 dclox=0) at(dcpct=(0(10)100) dneo=0 dclox=0) predict(xb) plot
* probability scale, the default
margins, at(dcpct=(0(10)100) dneo=1 dclox=0) at(dcpct=(0(10)100) dneo=0 dclox=0) 
marginsplot, noci /* CIs not quite correct anyway */
margins, over(dcpct) at(dneo=1 dclox=0) at(dneo=0 dclox=0) 
marginsplot, noci /* CIs not quite correct anyway */
* figure in lecture
margins, over(dcpct) at(dneo=0 dclox=0) at(dneo=1 dclox=0) at(dneo=0 dclox=1) at(dneo=1 dclox=1) 
marginsplot, noci /* CIs not quite correct anyway */
* illustration of weighting
margins, at(dcpct=(0(10)100)) predict(xb)
table dneo dclox /* Stata 16: table dneo dclox, row col */
lincom _cons+0*dcpct+1.dclox*27/108+1.dneo*74/108 /* dcpct=0 */
margins, at(dcpct=(0(10)100)) predict(xb) atmeans /* same as above */
margins, at(dcpct=(0(10)100)) predict(xb) over(dneo dclox)
di ((-2.984272)*22+(-4.396788)*12+(-.7717085)*59+(-2.184225)*15)/108 /* dcpct=0 again */
margins, at(dcpct=(0(10)100)) atmeans /* backtransformed */
di invlogit(-1.821385) /* dcpct=0, transformed to probability scale */
* weighting on probability scale
margins, at(dcpct=(0(10)100)) /* not the same! */
margins, at(dcpct=(0(10)100)) expression(invlogit(predict(xb))) /* same as preceding line */
margins, at(dcpct=(0(10)100)) predict(pr) /* same as preceding line */
margins, at(dcpct=(0(10)100)) over(dneo dclox)
di (.0481415*22+.012167*12+.3161096*59+.1011761*15)/108 /* dcpct=0, weighted at probability scale */
