Example 1: Dichotomous and Categorical
use http://www.philender.com/courses/data/apilog, clear tabulate cred Full | Credent | Teachers, | Lo Med Hi | Freq. Percent Cum. ------------+----------------------------------- low | 382 31.83 31.83 medium | 325 27.08 58.92 high | 493 41.08 100.00 ------------+----------------------------------- Total | 1200 100.00 generate hicred = cred==3 tabulate pared hicred Parents | Education, | hicred Lo Med Hi | 0 1 | Total -----------+----------------------+---------- low | 265 132 | 397 medium | 252 169 | 421 high | 190 192 | 382 -----------+----------------------+---------- Total | 707 493 | 1200 xi3: logit hiqual i.pared i.hicred, nolog i.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) i.hicred _Ihicred_0-1 (naturally coded; _Ihicred_0 omitted) Logit estimates Number of obs = 1200 LR chi2(3) = 134.24 Prob > chi2 = 0.0000 Log likelihood = -690.30767 Pseudo R2 = 0.0886 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ipared_2 | .062312 .1610005 0.39 0.699 -.2532432 .3778672 _Ipared_3 | .1184453 .1635732 0.72 0.469 -.2021523 .4390429 _Ihicred_1 | 1.456637 .1319549 11.04 0.000 1.19801 1.715264 _cons | -1.471349 .131101 -11.22 0.000 -1.728302 -1.214396 ------------------------------------------------------------------------------ postgr3 hicred, by(pared) egen sum = sum(hiqual), by(pared hicred) egen count = count(hiqual), by(pared hicred) generate diff = count - sum generate logit = log(sum/diff) tabdisp pared hicred, cell(logit) -------------------------------- Parents | hicred Education | 0 1 ----------+--------------------- low | -1.850203 .3991559 medium | -1.207415 -.1541507 high | -1.199417 0 -------------------------------- xi3: logit hiqual i.pared*i.hicred, nolog i.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) i.hicred _Ihicred_0-1 (naturally coded; _Ihicred_0 omitted) Logit estimates Number of obs = 1200 LR chi2(5) = 149.43 Prob > chi2 = 0.0000 Log likelihood = -682.71264 Pseudo R2 = 0.0986 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ipared_2 | .6427879 .2335393 2.75 0.006 .1850593 1.100517 _Ipared_3 | .6507861 .2484375 2.62 0.009 .1638576 1.137715 _Ihicred_1 | 2.249359 .2523306 8.91 0.000 1.7548 2.743918 _Ipa2Xhi1 | -1.196095 .3314759 -3.61 0.000 -1.845775 -.5464137 _Ipa3Xhi1 | -1.049942 .3377583 -3.11 0.002 -1.711936 -.3879479 _cons | -1.850203 .1792891 -10.32 0.000 -2.201603 -1.498803 ------------------------------------------------------------------------------ describe _Ipared_2 _Ipared_3 _Ihicred_1 _Ipa2Xhi1 _Ipa3Xhi1 storage display value variable name type format label variable label ------------------------------------------------------------------------------- _Ipared_2 byte %8.0g pared=2 _Ipared_3 byte %8.0g pared=3 _Ihicred_1 byte %8.0g hicred=1 _Ipa2Xhi1 float %9.0g pared=2*hicred=1 _Ipa3Xhi1 float %9.0g pared=3*hicred=1 test _Ipa2Xhi1 _Ipa3Xhi1 ( 1) _Ipa2Xhi1 = 0 ( 2) _Ipa3Xhi1 = 0 chi2( 2) = 14.67 Prob > chi2 = 0.0007 test _Ipared_2 _Ipared_3 ( 1) _Ipared_2 = 0 ( 2) _Ipared_3 = 0 chi2( 2) = 9.30 Prob > chi2 = 0.0095 xi3: logit hiqual r.pared*r.hicred, nolog r.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) r.hicred _Ihicred_0-1 (naturally coded; _Ihicred_0 omitted) Logit estimates Number of obs = 1200 LR chi2(5) = 149.43 Prob > chi2 = 0.0000 Log likelihood = -682.71264 Pseudo R2 = 0.0986 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ipared_2 | .0447406 .165738 0.27 0.787 -.2800998 .3695811 _Ipared_3 | .1034447 .139534 0.74 0.458 -.1700369 .3769264 _Ihicred_1 | 1.50068 .1334525 11.25 0.000 1.239118 1.762242 _Ipa2Xhi1 | -1.196095 .3314759 -3.61 0.000 -1.845775 -.5464137 _Ipa3Xhi1 | -.4518947 .2790681 -1.62 0.105 -.9988581 .0950686 _cons | -.6686717 .0667263 -10.02 0.000 -.7994527 -.5378906 ------------------------------------------------------------------------------ describe _Ipared_2 _Ipared_3 _Ihicred_1 _Ipa2Xhi1 _Ipa3Xhi1 storage display value variable name type format label variable label ------------------------------------------------------------------------------- _Ipared_2 double %10.0g pared(2 vs. 1) _Ipared_3 double %10.0g pared(3 vs. 2-) _Ihicred_1 double %10.0g hicred(1 vs. 0) _Ipa2Xhi1 float %9.0g pared(2 vs. 1)*hicred(1 vs. 0) _Ipa3Xhi1 float %9.0g pared(3 vs. 2-)*hicred(1 vs. 0) test _Ipa2Xhi1 _Ipa3Xhi1 ( 1) _Ipa2Xhi1 = 0 ( 2) _Ipa3Xhi1 = 0 chi2( 2) = 14.67 Prob > chi2 = 0.0007 test _Ipared_2 _Ipared_3 ( 1) _Ipared_2 = 0 ( 2) _Ipared_3 = 0 chi2( 2) = 0.59 Prob > chi2 = 0.7445 postgr3 hicred, by(pared) xi3: logit hiqual r.pared@i.hicred, nolog r.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) i.hicred _Ihicred_0-1 (naturally coded; _Ihicred_0 omitted) Logit estimates Number of obs = 1200 LR chi2(5) = 149.43 Prob > chi2 = 0.0000 Log likelihood = -682.71264 Pseudo R2 = 0.0986 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ihicred_1 | 1.50068 .1334525 11.25 0.000 1.239118 1.762242 _Ipa2Whi0 | .6427879 .2335393 2.75 0.006 .1850593 1.100517 _Ipa2Whi1 | -.5533066 .2352354 -2.35 0.019 -1.014359 -.0922538 _Ipa3Whi0 | .3293921 .2078743 1.58 0.113 -.0780341 .7368183 _Ipa3Whi1 | -.1225026 .1861914 -0.66 0.511 -.4874311 .2424259 _cons | -1.419012 .0966765 -14.68 0.000 -1.608494 -1.229529 ------------------------------------------------------------------------------ describe _Ihicred_1 _Ipa2Whi0 _Ipa2Whi1 _Ipa3Whi0 _Ipa3Whi1 storage display value variable name type format label variable label ------------------------------------------------------------------------------- _Ihicred_1 byte %8.0g hicred=1 _Ipa2Whi0 double %10.0g pared(2 vs. 1) @ hicred==0 _Ipa2Whi1 double %10.0g pared(2 vs. 1) @ hicred==1 _Ipa3Whi0 double %10.0g pared(3 vs. 2-) @ hicred==0 _Ipa3Whi1 double %10.0g pared(3 vs. 2-) @ hicred==1 /* test of simple main effect of pared at hicred=0 */ test _Ipa1Whi0 _Ipa2Whi0 ( 1) _Ipa1Whi0 = 0 ( 2) _Ipa2Whi0 = 0 chi2( 2) = 9.30 Prob > chi2 = 0.0095 /* test of simple main effect of pared at hicred=1 */ test _Ipa1Whi1 _Ipa2Whi1 ( 1) _Ipa1Whi1 = 0 ( 2) _Ipa2Whi1 = 0 chi2( 2) = 5.74 Prob > chi2 = 0.0568Example 2: Dichotomous and Continuous
use http://www.ats.ucla.edu/stat/data/hsbdemo, clear generate honors = write>=60 tab1 honors female -> tabulation of honors honors | Freq. Percent Cum. ------------+----------------------------------- 0 | 147 73.50 73.50 1 | 53 26.50 100.00 ------------+----------------------------------- Total | 200 100.00 -> tabulation of female female | Freq. Percent Cum. ------------+----------------------------------- male | 91 45.50 45.50 female | 109 54.50 100.00 ------------+----------------------------------- Total | 200 100.00 xi3: logit honors i.female socst, nolog i.female _Ifemale_0-1 (naturally coded; _Ifemale_0 omitted) Logit estimates Number of obs = 200 LR chi2(2) = 38.63 Prob > chi2 = 0.0000 Log likelihood = -96.330397 Pseudo R2 = 0.1670 ------------------------------------------------------------------------------ honors | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ifemale_1 | .7181606 .3665387 1.96 0.050 -.000242 1.436563 socst | .1093791 .021713 5.04 0.000 .0668224 .1519358 _cons | -7.465113 1.298135 -5.75 0.000 -10.00941 -4.920815 ------------------------------------------------------------------------------ estimates store M1 postgr3 socst, by(female) xi3: logit honors i.female*socst, nolog i.female _Ifemale_0-1 (naturally coded; _Ifemale_0 omitted) Logit estimates Number of obs = 200 LR chi2(3) = 42.54 Prob > chi2 = 0.0000 Log likelihood = -94.374049 Pseudo R2 = 0.1839 ------------------------------------------------------------------------------ honors | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ifemale_1 | 6.406617 3.162907 2.03 0.043 .2074326 12.6058 socst | .1774272 .0468625 3.79 0.000 .0855783 .269276 _Ife1Xso | -.0974514 .0529988 -1.84 0.066 -.2013271 .0064244 _cons | -11.5045 2.838139 -4.05 0.000 -17.06715 -5.941854 ------------------------------------------------------------------------------ postgr3 socst, by(female) /* postgr available for UCLA ATS */ lrtest M1 likelihood-ratio test LR chi2(1) = 3.91 (Assumption: M1 nested in .) Prob > chi2 = 0.0479 logit honors socst if female==0 Logit estimates Number of obs = 91 LR chi2(1) = 26.03 Prob > chi2 = 0.0000 Log likelihood = -32.242503 Pseudo R2 = 0.2876 ------------------------------------------------------------------------------ honors | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- socst | .1774272 .0468627 3.79 0.000 .0855779 .2692764 _cons | -11.5045 2.838151 -4.05 0.000 -17.06718 -5.941829 ------------------------------------------------------------------------------ logit, or /* header deleted */ Logit estimates Number of obs = 91 LR chi2(1) = 26.03 Prob > chi2 = 0.0000 Log likelihood = -32.242503 Pseudo R2 = 0.2876 ------------------------------------------------------------------------------ honors | Odds Ratio Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- socst | 1.194141 .0559607 3.79 0.000 1.089346 1.309017 ------------------------------------------------------------------------------ logit honors socst if female==1 Logit estimates Number of obs = 109 LR chi2(1) = 12.57 Prob > chi2 = 0.0004 Log likelihood = -62.131547 Pseudo R2 = 0.0919 ------------------------------------------------------------------------------ honors | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- socst | .0799758 .0247537 3.23 0.001 .0314595 .1284921 _cons | -5.097887 1.396009 -3.65 0.000 -7.834013 -2.36176 ------------------------------------------------------------------------------ logit, or /* header deleted */ ------------------------------------------------------------------------------ honors | Odds Ratio Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- socst | 1.083261 .0268147 3.23 0.001 1.03196 1.137112 ------------------------------------------------------------------------------ /* rerun logit model with interaction */ quietly xi3: logit honors i.female*socst listcoef logit (N=200): Factor Change in Odds Odds of: 1 vs 0 ---------------------------------------------------------------------- honors | b z P>|z| e^b e^bStdX SDofX -------------+-------------------------------------------------------- _Ifemale_1 | 6.40662 2.026 0.043 605.8408 24.4912 0.4992 socst | 0.17743 3.786 0.000 1.1941 6.7182 10.7358 _Ife1Xso | -0.09745 -1.839 0.066 0.9071 0.0688 27.4723 ---------------------------------------------------------------------- listcoef, percent logit (N=200): Percentage Change in Odds Odds of: 1 vs 0 ---------------------------------------------------------------------- honors | b z P>|z| % %StdX SDofX -------------+-------------------------------------------------------- _Ifemale_1 | 6.40662 2.026 0.043 60484.1 2349.1 0.4992 socst | 0.17743 3.786 0.000 19.4 571.8 10.7358 _Ife1Xso | -0.09745 -1.839 0.066 -9.3 -93.1 27.4723 ---------------------------------------------------------------------- prchange min->max 0->1 -+1/2 -+sd/2 MargEfct _Ifemale_1 0.7940 0.7940 0.8339 0.4763 0.9402 socst 0.8529 0.0000 0.0260 0.2831 0.0260 _Ife1Xso -0.7798 -0.0170 -0.0143 -0.3995 -0.0143 0 1 Pr(y|x) 0.8213 0.1787 _Ifemale_1 socst _Ife1Xso x= .545 52.405 28.84 sd(x)= .49922 10.7358 27.4723 predict p1 (option p assumed; Pr(honors)) table socst female, cont(mean p1) ------------------------------ social | studies | female score | male female ----------+------------------- 26 | .0010155 .0465964 31 | .0024621 .0679486 32 | .0029387 33 | .0788058 36 | .0059575 .0980787 37 | .1053843 39 | .1214437 41 | .0143438 .1395686 42 | .017081 43 | .1599062 44 | .024181 46 | .03413 .1948186 47 | .0404877 48 | .221138 51 | .0790214 .2651989 52 | .2810734 56 | .1724187 .3499542 57 | .1992235 58 | .3871546 61 | .3359387 .4453785 66 | .5512419 .5450073 71 | .7489135 .6411588 ------------------------------ summarize socst Variable | Obs Mean Std. Dev. Min Max -------------+----------------------------------------------------- socst | 200 52.405 10.73579 26 71 generate csoc = socst-r(mean) xi3: logit honors i.female*csoc, nolog i.female _Ifemale_0-1 (naturally coded; _Ifemale_0 omitted) Logit estimates Number of obs = 200 LR chi2(3) = 42.54 Prob > chi2 = 0.0000 Log likelihood = -94.374049 Pseudo R2 = 0.1839 ------------------------------------------------------------------------------ honors | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ifemale_1 | 1.299679 .5328923 2.44 0.015 .2552294 2.344129 csoc | .1774272 .0468625 3.79 0.000 .0855783 .269276 _Ife1Xcs | -.0974514 .0529988 -1.84 0.066 -.2013271 .0064244 _cons | -2.206433 .4795982 -4.60 0.000 -3.146428 -1.266437 ------------------------------------------------------------------------------ listcoef logit (N=200): Factor Change in Odds Odds of: 1 vs 0 ---------------------------------------------------------------------- honors | b z P>|z| e^b e^bStdX SDofX -------------+-------------------------------------------------------- _Ifemale_1 | 1.29968 2.439 0.015 3.6681 1.9133 0.4992 csoc | 0.17743 3.786 0.000 1.1941 6.7182 10.7358 _Ife1Xcs | -0.09745 -1.839 0.066 0.9071 0.4794 7.5439 ----------------------------------------------------------------------Example 3: Categorical and Continuous
use http://www.philender.com/courses/data/apilog, clear summarize meals Variable | Obs Mean Std. Dev. Min Max -------------+----------------------------------------------------- meals | 1200 52.15 31.23653 0 100 generate mealcent = meals-r(mean) univar meals mealcent -------------- Quantiles -------------- Variable n Mean S.D. Min .25 Mdn .75 Max ------------------------------------------------------------------------------- meals 1200 52.15 31.24 0.00 24.00 53.00 80.00 100.00 mealcent 1200 -0.00 31.24 -52.15 -28.15 0.85 27.85 47.85 ------------------------------------------------------------------------------- codebook pared pared -------------------------------------------- Parents Education, Lo Med Hi type: numeric (byte) label: lmh range: [1,3] units: 1 unique values: 3 coded missing: 0 / 1200 tabulation: Freq. Numeric Label 397 1 low 421 2 medium 382 3 high xi3: logit hiqual i.pared meals, nolog i.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) Logit estimates Number of obs = 1200 LR chi2(3) = 897.02 Prob > chi2 = 0.0000 Log likelihood = -308.91779 Pseudo R2 = 0.5921 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ipared_2 | -.2201914 .3091861 -0.71 0.476 -.826185 .3858022 _Ipared_3 | -.6882049 .2975907 -2.31 0.021 -1.271472 -.1049379 meals | -.1090413 .0064922 -16.80 0.000 -.1217658 -.0963168 _cons | 3.984179 .3452765 11.54 0.000 3.307449 4.660908 ------------------------------------------------------------------------------ estimates store M1 postgr3 meals, by(pared) xi3: logit hiqual meals, nolog Logit estimates Number of obs = 1200 LR chi2(1) = 889.70 Prob > chi2 = 0.0000 Log likelihood = -312.57619 Pseudo R2 = 0.5873 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- meals | -.1079217 .0064131 -16.83 0.000 -.1204911 -.0953523 _cons | 3.536434 .2353569 15.03 0.000 3.075143 3.997725 ------------------------------------------------------------------------------ lrtest M1 likelihood-ratio test LR chi2(2) = 7.32 (Assumption: . nested in M1) Prob > chi2 = 0.0258 xi3: logit hiqual i.pared mealcent, nolog i.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) Logit estimates Number of obs = 1200 LR chi2(3) = 897.02 Prob > chi2 = 0.0000 Log likelihood = -308.91779 Pseudo R2 = 0.5921 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ipared_2 | -.2201914 .3091861 -0.71 0.476 -.826185 .3858022 _Ipared_3 | -.6882049 .2975907 -2.31 0.021 -1.271472 -.1049379 mealcent | -.1090413 .0064922 -16.80 0.000 -.1217658 -.0963168 _cons | -1.704234 .2784723 -6.12 0.000 -2.250029 -1.158438 ------------------------------------------------------------------------------ xi3: logit hiqual r.pared mealcent, nolog r.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) Logit estimates Number of obs = 1200 LR chi2(3) = 897.02 Prob > chi2 = 0.0000 Log likelihood = -308.91779 Pseudo R2 = 0.5921 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ipared_2 | -.2201914 .3091861 -0.71 0.476 -.826185 .3858022 _Ipared_3 | -.5781091 .2159999 -2.68 0.007 -1.001461 -.154757 mealcent | -.1090413 .0064922 -16.80 0.000 -.1217658 -.0963168 _cons | -2.007032 .1630359 -12.31 0.000 -2.326577 -1.687488 ------------------------------------------------------------------------------ estimates store M2 describe _Ipared_2 _Ipared_3 storage display value variable name type format label variable label ------------------------------------------------------------------------------- _Ipared_2 float %9.0g pared(2 vs. 1) _Ipared_3 float %9.0g pared(3 vs. 2-) test _Ipared_2 _Ipared_3 ( 1) _Ipared_2 = 0.0 ( 2) _Ipared_3 = 0.0 chi2( 2) = 7.18 Prob > chi2 = 0.0276 xi3: logit hiqual r.pared*mealcent, nolog r.pared _Ipared_1-3 (naturally coded; _Ipared_1 omitted) Logit estimates Number of obs = 1200 LR chi2(5) = 907.67 Prob > chi2 = 0.0000 Log likelihood = -303.59105 Pseudo R2 = 0.5992 ------------------------------------------------------------------------------ hiqual | Coef. Std. Err. z P>|z| [95% Conf. Interval] -------------+---------------------------------------------------------------- _Ipared_2 | .4336538 .5717637 0.76 0.448 -.6869825 1.55429 _Ipared_3 | .2808648 .3667777 0.77 0.444 -.4380063 .9997359 mealcent | -.1180247 .0088005 -13.41 0.000 -.1352733 -.1007761 _Ipa2Xme | .0369404 .0247695 1.49 0.136 -.0116069 .0854877 _Ipa3Xme | .0454167 .0153912 2.95 0.003 .0152506 .0755829 _cons | -2.138486 .2054024 -10.41 0.000 -2.541068 -1.735905 ------------------------------------------------------------------------------ describe _Ipa2Xme _Ipa3Xme storage display value variable name type format label variable label ------------------------------------------------------------------------------- _Ipa2Xme float %9.0g pared(2 vs. 1)*mealcent _Ipa3Xme float %9.0g pared(3 vs. 2-)*mealcent postgr3 mealcen, by(pared) lrtest M2 likelihood-ratio test LR chi2(2) = 10.65 (Assumption: M2 nested in .) Prob > chi2 = 0.0049
Categorical Data Analysis Course
Phil Ender