[Stata Program] use http://www.gseis.ucla.edu/courses/data/missdata summarize generate pctmeal2=pctmeal replace pctmeal2=55.65104 if pctmeal==. label variable pctmeal2 "pctmeal mean substitution" generate avged2=avged replace avged2=2.715862 if avged==. label variable avged2 "avged mean substitution" sw regress pctmeal pctel pctcred avged, pe(.05) generate pctmeal3=pctmeal replace pctmeal3=-24.65785*avged -.2680316*pctcred +.2681044*pctel +135.0125 if pctmeal==. label variable pctmeal3 "pctmeal regression substitution" sw regress avged pctmeal pctel pctcred, pe(.05) generate avged3=avged replace avged3= -.018986*pctel -.009055*pctmeal +3.663708 if avged==. label variable avged3 "avged regression substitution" summarize pctmeal pctmeal2 pctmeal3 avged avged2 avged3 generate miss=0 if pctmeal!=. replace miss=1 if pctmeal==. ttest avged, by(miss) unequal ttest pctel, by(miss) unequal ttest pctcred, by(miss) unequal regress api99 pctmeal pctel pctcred avged regress api99 pctmeal2 pctel pctcred avged2 regress api99 pctmeal3 pctel pctcred avged3 sw regress api99 pctmeal pctel pctcred avged, pe(.05) sw regress api99 pctmeal2 pctel pctcred avged2, pe(.05) sw regress api99 pctmeal3 pctel pctcred avged3, pe(.05) [Stata Output] use http://www.gseis.ucla.edu/courses/data/missdata summarize Variable | Obs Mean Std. Dev. Min Max -------------+----------------------------------------------------- schid | 236 6052145 26119.35 6011225 6115794 api99 | 236 582.5636 130.7217 345 901 strank | 236 4.427966 2.936038 1 10 pctmeal | 192 55.65104 27.18673 1 98 pctel | 236 26.02542 16.92987 1 69 avged | 232 2.715862 .6675309 1.4 4.52 pctcred | 236 78.52119 12.83527 33 100 district | 236 1 0 1 1 generate pctmeal2 = pctmeal replace pctmeal2 = 55.65104 if pctmeal==. generate avged2 = avged replace avged2 = 2.715862 if avged==. label variable avged2 "avged mean substitution" sw regress pctmeal pctel pctcred avged, pe(.05) begin with empty model p = 0.0000 < 0.0500 adding avged p = 0.0093 < 0.0500 adding pctcred p = 0.0336 < 0.0500 adding pctel Source | SS df MS Number of obs = 188 -------------+------------------------------ F( 3, 184) = 83.28 Model | 80214.3302 3 26738.1101 Prob > F = 0.0000 Residual | 59073.3879 184 321.051021 R-squared = 0.5759 -------------+------------------------------ Adj R-squared = 0.5690 Total | 139287.718 187 744.854107 Root MSE = 17.918 ------------------------------------------------------------------------------ pctmeal | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- avged | -24.65785 3.452969 -7.14 0.000 -31.47035 -17.84535 pctcred | -.2860316 .1233665 -2.32 0.022 -.5294264 -.0426368 pctel | .2681044 .1251983 2.14 0.034 .0210956 .5151132 _cons | 135.0125 14.73126 9.17 0.000 105.9486 164.0764 ------------------------------------------------------------------------------ generate pctmeal3=pctmeal replace pctmeal3 = -24.65785*avged -.2680316*pctcred +.2681044*pctel +135.0125 if pctmeal==. label variable pctmeal3 "pctmeal regression substitution" sw regress avged pctmeal pctel pctcred, pe(.05) begin with empty model p = 0.0000 < 0.0500 adding pctel p = 0.0000 < 0.0500 adding pctmeal Source | SS df MS Number of obs = 188 -------------+------------------------------ F( 2, 185) = 220.50 Model | 50.4861884 2 25.2430942 Prob > F = 0.0000 Residual | 21.1791794 185 .114482051 R-squared = 0.7045 -------------+------------------------------ Adj R-squared = 0.7013 Total | 71.6653678 187 .383237261 Root MSE = .33835 ------------------------------------------------------------------------------ avged | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- pctel | -.018986 .0019249 -9.86 0.000 -.0227836 -.0151884 pctmeal | -.009055 .0011999 -7.55 0.000 -.0114222 -.0066878 _cons | 3.663708 .0568459 64.45 0.000 3.551558 3.775857 ------------------------------------------------------------------------------ generate avged3=avged replace avged3= -.018986*pctel -.009055*pctmeal +3.663708 if avged==. label variable avged3 "avged regression substitution" summarize pctmeal pctmeal2 pctmeal3 avged avged2 avged3 Variable | Obs Mean Std. Dev. Min Max -------------+----------------------------------------------------- pctmeal | 192 55.65104 27.18673 1 98 pctmeal2 | 236 55.65104 24.5098 1 98 pctmeal3 | 236 53.1193 27.28459 -.8314203 98 avged | 232 2.715862 .6675309 1.4 4.52 avged2 | 236 2.715862 .6618254 1.4 4.52 avged3 | 236 2.710821 .6641614 1.4 4.52 generate miss=0 if pctmeal!=. replace miss=1 if pctmeal==. ttest avged, by(miss) unequal Two-sample t test with unequal variances ------------------------------------------------------------------------------ Group | Obs Mean Std. Err. Std. Dev. [95% Conf. Interval] ---------+-------------------------------------------------------------------- 0 | 188 2.63133 .0451497 .6190616 2.542262 2.720398 1 | 44 3.077045 .1130593 .7499506 2.84904 3.305051 ---------+-------------------------------------------------------------------- combined | 232 2.715862 .0438256 .6675309 2.629513 2.802211 ---------+-------------------------------------------------------------------- diff | -.4457157 .1217411 -.6894547 -.2019767 ------------------------------------------------------------------------------ Satterthwaite's degrees of freedom: 57.4725 Ho: mean(0) - mean(1) = diff = 0 Ha: diff < 0 Ha: diff ~= 0 Ha: diff > 0 t = -3.6612 t = -3.6612 t = -3.6612 P < t = 0.0003 P > |t| = 0.0005 P > t = 0.9997 ttest pctel, by(miss) unequal Two-sample t test with unequal variances ------------------------------------------------------------------------------ Group | Obs Mean Std. Err. Std. Dev. [95% Conf. Interval] ---------+-------------------------------------------------------------------- 0 | 192 28.06771 1.22033 16.90939 25.66065 30.47476 1 | 44 17.11364 2.113136 14.01696 12.85209 21.37518 ---------+-------------------------------------------------------------------- combined | 236 26.02542 1.102041 16.92987 23.85428 28.19657 ---------+-------------------------------------------------------------------- diff | 10.95407 2.440195 6.092524 15.81562 ------------------------------------------------------------------------------ Satterthwaite's degrees of freedom: 74.596 Ho: mean(0) - mean(1) = diff = 0 Ha: diff < 0 Ha: diff ~= 0 Ha: diff > 0 t = 4.4890 t = 4.4890 t = 4.4890 P < t = 1.0000 P > |t| = 0.0000 P > t = 0.0000 ttest pctcred, by(miss) unequal Two-sample t test with unequal variances ------------------------------------------------------------------------------ Group | Obs Mean Std. Err. Std. Dev. [95% Conf. Interval] ---------+-------------------------------------------------------------------- 0 | 192 78 .8467614 11.73307 76.3298 79.6702 1 | 44 80.79545 2.5332 16.80335 75.68677 85.90414 ---------+-------------------------------------------------------------------- combined | 236 78.52119 .8355051 12.83527 76.87515 80.16722 ---------+-------------------------------------------------------------------- diff | -2.795455 2.670975 -8.152759 2.56185 ------------------------------------------------------------------------------ Satterthwaite's degrees of freedom: 52.9969 Ho: mean(0) - mean(1) = diff = 0 Ha: diff < 0 Ha: diff ~= 0 Ha: diff > 0 t = -1.0466 t = -1.0466 t = -1.0466 P < t = 0.1500 P > |t| = 0.3000 P > t = 0.8500 regress api99 pctmeal pctel pctcred avged Source | SS df MS Number of obs = 188 -------------+------------------------------ F( 4, 183) = 280.87 Model | 2353840.29 4 588460.072 Prob > F = 0.0000 Residual | 383409.627 183 2095.13457 R-squared = 0.8599 -------------+------------------------------ Adj R-squared = 0.8569 Total | 2737249.91 187 14637.7001 Root MSE = 45.773 ------------------------------------------------------------------------------ api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- pctmeal | -.637294 .1883259 -3.38 0.001 -1.008863 -.2657247 pctel | -.565281 .3237896 -1.75 0.083 -1.204122 .0735596 pctcred | 2.510879 .3197197 7.85 0.000 1.880068 3.14169 avged | 120.5732 9.968544 12.10 0.000 100.9051 140.2413 _cons | 106.7768 45.41669 2.35 0.020 17.1691 196.3845 ------------------------------------------------------------------------------ regress api99 pctmeal2 pctel pctcred avged2 Source | SS df MS Number of obs = 236 -------------+------------------------------ F( 4, 231) = 398.17 Model | 3507064.46 4 876766.115 Prob > F = 0.0000 Residual | 508655.587 231 2201.97224 R-squared = 0.8733 -------------+------------------------------ Adj R-squared = 0.8711 Total | 4015720.05 235 17088.1704 Root MSE = 46.925 ------------------------------------------------------------------------------ api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- pctmeal2 | -.4582449 .1619505 -2.83 0.005 -.7773339 -.1391559 pctel | -.6843467 .3073912 -2.23 0.027 -1.289995 -.0786979 pctcred | 2.357826 .2759607 8.54 0.000 1.814104 2.901547 avged2 | 131.9708 8.195027 16.10 0.000 115.8242 148.1173 _cons | 82.32209 35.59079 2.31 0.022 12.19803 152.4461 ------------------------------------------------------------------------------ regress api99 pctmeal3 pctel pctcred avged3 Source | SS df MS Number of obs = 236 -------------+------------------------------ F( 4, 231) = 420.65 Model | 3530961.55 4 882740.387 Prob > F = 0.0000 Residual | 484758.498 231 2098.52164 R-squared = 0.8793 -------------+------------------------------ Adj R-squared = 0.8772 Total | 4015720.05 235 17088.1704 Root MSE = 45.81 ------------------------------------------------------------------------------ api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- pctmeal3 | -.6404214 .187665 -3.41 0.001 -1.010175 -.2706676 pctel | -.5843708 .3002018 -1.95 0.053 -1.175854 .0071127 pctcred | 2.18274 .2747013 7.95 0.000 1.6415 2.72398 avged3 | 125.2458 9.012319 13.90 0.000 107.4889 143.0026 _cons | 120.8806 40.76409 2.97 0.003 40.56361 201.1975 ------------------------------------------------------------------------------ sw regress api99 pctmeal pctel pctcred avged, pe(.05) begin with empty model p = 0.0000 < 0.0500 adding avged p = 0.0000 < 0.0500 adding pctcred p = 0.0003 < 0.0500 adding pctmeal Source | SS df MS Number of obs = 188 -------------+------------------------------ F( 3, 184) = 369.37 Model | 2347454.49 3 782484.831 Prob > F = 0.0000 Residual | 389795.423 184 2118.45338 R-squared = 0.8576 -------------+------------------------------ Adj R-squared = 0.8553 Total | 2737249.91 187 14637.7001 Root MSE = 46.027 ------------------------------------------------------------------------------ api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- avged | 130.5665 8.206563 15.91 0.000 114.3755 146.7576 pctcred | 2.574684 .3193867 8.06 0.000 1.944553 3.204815 pctmeal | -.6885639 .1870544 -3.68 0.000 -1.057611 -.3195166 _cons | 62.5067 37.88756 1.65 0.101 -12.2432 137.2566 ------------------------------------------------------------------------------ sw regress api99 pctmeal2 pctel pctcred avged2, pe(.05) begin with empty model p = 0.0000 < 0.0500 adding avged2 p = 0.0000 < 0.0500 adding pctcred p = 0.0010 < 0.0500 adding pctmeal2 p = 0.0270 < 0.0500 adding pctel Source | SS df MS Number of obs = 236 -------------+------------------------------ F( 4, 231) = 398.17 Model | 3507064.46 4 876766.115 Prob > F = 0.0000 Residual | 508655.587 231 2201.97224 R-squared = 0.8733 -------------+------------------------------ Adj R-squared = 0.8711 Total | 4015720.05 235 17088.1704 Root MSE = 46.925 ------------------------------------------------------------------------------ api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- avged2 | 131.9708 8.195027 16.10 0.000 115.8242 148.1173 pctcred | 2.357826 .2759607 8.54 0.000 1.814104 2.901547 pctmeal2 | -.4582449 .1619505 -2.83 0.005 -.7773339 -.1391559 pctel | -.6843467 .3073912 -2.23 0.027 -1.289995 -.0786979 _cons | 82.32209 35.59079 2.31 0.022 12.19803 152.4461 ------------------------------------------------------------------------------ sw regress api99 pctmeal3 pctel pctcred avged3, pe(.05) begin with empty model p = 0.0000 < 0.0500 adding avged3 p = 0.0000 < 0.0500 adding pctcred p = 0.0002 < 0.0500 adding pctmeal3 Source | SS df MS Number of obs = 236 -------------+------------------------------ F( 3, 232) = 552.95 Model | 3523009.78 3 1174336.59 Prob > F = 0.0000 Residual | 492710.271 232 2123.75117 R-squared = 0.8773 -------------+------------------------------ Adj R-squared = 0.8757 Total | 4015720.05 235 17088.1704 Root MSE = 46.084 ------------------------------------------------------------------------------ api99 | Coef. Std. Err. t P>|t| [95% Conf. Interval] -------------+---------------------------------------------------------------- avged3 | 134.8376 7.591207 17.76 0.000 119.8811 149.7941 pctcred | 2.221682 .2756139 8.06 0.000 1.678656 2.764708 pctmeal3 | -.7003186 .1862347 -3.76 0.000 -1.067246 -.3333911 _cons | 79.79433 35.08332 2.27 0.024 10.67171 148.917 ------------------------------------------------------------------------------