* do-file for lecture 10 of VHM 802, Winter 2021
* slightly updated on the matrix commands
version 16 /* works also with versions 14-15 */
set more off
cd "r:\"

* Manly Example 6.1 - sparrow data
import delimited r:\sparrow.csv, clear
pca total_length-l_keel_sternum
predict scor1-scor5, score
scoreplot, comp(2) mlabel(survivorship)
loadingplot
screeplot
biplot total_length- l_keel_sternum, std
* note: a separate command, not a postestimation command
encode survivorship, g(surv)
replace surv=surv-1
logit surv total_length-l_keel_sternum
logit surv scor1-scor5

* Manly Table 9.7 - Steneryd data
import delimited "r:\steneryd.csv", clear varnames(1)
* include code to relabel spec1-spec25 as s1-s25
foreach j of numlist 1(1)25 {
   rename spec`j' s`j'
   }
tabstat s1-s25, statistics( mean min max sd var )
pwcorr s1-s25
pca s1-s25
screeplot
scoreplot, comp(3) mlabel(plot)
loadingplot
predict sco1-sco4, score
pwcorr sco1-sco4 light-nitrogen
scatter light sco1, mlabel(plot)
scatter moisture sco1, mlabel(plot)
biplot s1-s25, std xneg yneg alpha(1) stretch(10)

* added code to demonstrate equivalence with mds (without standardization)
pca s1-s25, comp(5) cov
scoreplot, comp(2) mlabel(plot)
mds s1-s25, config noplot id(plot)
* note: eigenvalues multiplied by (n-1)
mdsconfig, xneg yneg

* calf data
use calf, clear
sort case
* note: polychoric is an add-on command; Stata has a tetrachoric command for binary variables
polychoric age sex-umb /* add verbose option to see lots of details! */
matrix polycorr=r(R)
sort case /* the polychoric command resorts the data, so it's nice to reset */
correlate age sex-umb /* note: comparison is not with pwcorr */
* for display of correlations only

pcamat polycorr, n(213) /* n=number of complete rows */
foreach var of varlist age sex-umb {
  egen s`var'=std(`var')
  }
mkmat sage-sumb, matrix(sdata)
*matrix list sdata
matrix load=e(L)
matrix scomat = sdata*load
*matrix list scomat
svmat scomat
twoway (scatter scomat2 scomat1 if sepsis==0, msymbol(smx)) (scatter scomat2 scomat1 if sepsis==1, msymbol(smcircle_hollow)), ytitle(2nd principal component) xtitle(1st principal component) title(Polychoric PCA (x=no sepsis, o=sepsis), size(medium)) legend(off)

pca age sex-umb
predict sco1-sco6
list sco1-sco5 scomat1-scomat5 in 250/254
list age sex-umb in 250/254
twoway (scatter sco2 sco1 if sepsis==0, msymbol(smx)) (scatter sco2 sco1 if sepsis==1, msymbol(smcircle_hollow)), ytitle(2nd principal component) xtitle(1st principal component) title(Ordinary PCA (x=no sepsis, o=sepsis), size(medium)) legend(off)

tabstat sco1-sco6 scomat1-scomat6, statistics( mean semean ) by(sepsis)
foreach var of varlist sco1-sco6 scomat1-scomat6 {
  ttest `var', by(sepsis) unpaired unequal
  }
logit sepsis sco1-sco6
logit sepsis sco1-sco3
logit sepsis sco1-sco2
logit sepsis scomat1-scomat6
logit sepsis scomat1-scomat3
logit sepsis scomat1-scomat2

* factor analysis
factormat polycorr, n(213) pcf
loadingplot
rotate, varimax
loadingplot
matrix rload=e(L)
matrix fsco=sdata*rload*inv(rload'*rload) /* formula (7.4) in Manly */
svmat fsco
twoway (scatter fsco2 fsco1 if sepsis==0, msymbol(smx)) (scatter sco2 sco1 if sepsis==1, msymbol(smcircle_hollow)), ytitle(2nd rotated factor) xtitle(1st rotated factor) title(Polychoric FA, varimax rotation (x=no sepsis, o=sepsis), size(medium)) legend(off)

* sparrow data revisited for factor analysis
import delimited r:\sparrow.csv, clear
factor total_length-l_keel_sternum, pcf /* only one factor */
factor total_length-l_keel_sternum, pcf mineigen(0.5)
