Missing Data Stata Example


[Stata Program]

use http://www.gseis.ucla.edu/courses/data/missdata
 
summarize
generate pctmeal2=pctmeal
replace pctmeal2=55.65104 if pctmeal==.
label variable pctmeal2 "pctmeal mean substitution"
generate avged2=avged
replace avged2=2.715862 if avged==.
label variable avged2 "avged mean substitution"
 
sw regress pctmeal pctel pctcred avged, pe(.05)
generate pctmeal3=pctmeal
replace pctmeal3=-24.65785*avged -.2680316*pctcred +.2681044*pctel +135.0125 if pctmeal==. 
label variable pctmeal3 "pctmeal regression substitution"
 
sw regress avged pctmeal pctel pctcred, pe(.05)
generate avged3=avged
replace avged3= -.018986*pctel -.009055*pctmeal +3.663708 if avged==.
label variable avged3 "avged regression substitution"
 
summarize pctmeal pctmeal2 pctmeal3 avged avged2 avged3
 
generate miss=0 if pctmeal!=.
replace miss=1 if pctmeal==.
ttest avged, by(miss) unequal
ttest pctel, by(miss) unequal
ttest pctcred, by(miss) unequal
  
regress api99 pctmeal pctel pctcred avged
regress api99 pctmeal2 pctel pctcred avged2
regress api99 pctmeal3 pctel pctcred avged3
 
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
sw regress api99 pctmeal2 pctel pctcred avged2, pe(.05)
sw regress api99 pctmeal3 pctel pctcred avged3, pe(.05)                            
   
[Stata Output]
  
use http://www.gseis.ucla.edu/courses/data/missdata
 
summarize
 
    Variable |     Obs        Mean   Std. Dev.       Min        Max
-------------+-----------------------------------------------------
       schid |     236     6052145   26119.35    6011225    6115794
       api99 |     236    582.5636   130.7217        345        901
      strank |     236    4.427966   2.936038          1         10
     pctmeal |     192    55.65104   27.18673          1         98
       pctel |     236    26.02542   16.92987          1         69
       avged |     232    2.715862   .6675309        1.4       4.52
     pctcred |     236    78.52119   12.83527         33        100
    district |     236           1          0          1          1
 
generate pctmeal2 = pctmeal
replace pctmeal2 = 55.65104 if pctmeal==.
generate avged2 = avged
replace avged2 = 2.715862 if avged==.
label variable avged2 "avged mean substitution"
 
sw regress pctmeal pctel pctcred avged, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   avged
p = 0.0093 <  0.0500  adding   pctcred
p = 0.0336 <  0.0500  adding   pctel
 
      Source |       SS       df       MS              Number of obs =     188
-------------+------------------------------           F(  3,   184) =   83.28
       Model |  80214.3302     3  26738.1101           Prob > F      =  0.0000
    Residual |  59073.3879   184  321.051021           R-squared     =  0.5759
-------------+------------------------------           Adj R-squared =  0.5690
       Total |  139287.718   187  744.854107           Root MSE      =  17.918

------------------------------------------------------------------------------
     pctmeal |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       avged |  -24.65785   3.452969    -7.14   0.000    -31.47035   -17.84535
     pctcred |  -.2860316   .1233665    -2.32   0.022    -.5294264   -.0426368
       pctel |   .2681044   .1251983     2.14   0.034     .0210956    .5151132
       _cons |   135.0125   14.73126     9.17   0.000     105.9486    164.0764
------------------------------------------------------------------------------
 
generate pctmeal3=pctmeal
replace pctmeal3 = -24.65785*avged -.2680316*pctcred +.2681044*pctel +135.0125 if pctmeal==. 
label variable pctmeal3 "pctmeal regression substitution"
 
sw regress avged pctmeal pctel pctcred, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   pctel
p = 0.0000 <  0.0500  adding   pctmeal

      Source |       SS       df       MS              Number of obs =     188
-------------+------------------------------           F(  2,   185) =  220.50
       Model |  50.4861884     2  25.2430942           Prob > F      =  0.0000
    Residual |  21.1791794   185  .114482051           R-squared     =  0.7045
-------------+------------------------------           Adj R-squared =  0.7013
       Total |  71.6653678   187  .383237261           Root MSE      =  .33835

------------------------------------------------------------------------------
       avged |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       pctel |   -.018986   .0019249    -9.86   0.000    -.0227836   -.0151884
     pctmeal |   -.009055   .0011999    -7.55   0.000    -.0114222   -.0066878
       _cons |   3.663708   .0568459    64.45   0.000     3.551558    3.775857
------------------------------------------------------------------------------
 
generate avged3=avged
replace avged3= -.018986*pctel -.009055*pctmeal +3.663708 if avged==.
label variable avged3 "avged regression substitution"
 
summarize pctmeal pctmeal2 pctmeal3 avged avged2 avged3

    Variable |     Obs        Mean   Std. Dev.       Min        Max
-------------+-----------------------------------------------------
     pctmeal |     192    55.65104   27.18673          1         98
    pctmeal2 |     236    55.65104    24.5098          1         98
    pctmeal3 |     236     53.1193   27.28459  -.8314203         98
       avged |     232    2.715862   .6675309        1.4       4.52
      avged2 |     236    2.715862   .6618254        1.4       4.52
      avged3 |     236    2.710821   .6641614        1.4       4.52
 
generate miss=0 if pctmeal!=.
replace miss=1 if pctmeal==.
 
ttest avged, by(miss) unequal
 
Two-sample t test with unequal variances

------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. Err.   Std. Dev.   [95% Conf. Interval]
---------+--------------------------------------------------------------------
       0 |     188     2.63133    .0451497    .6190616    2.542262    2.720398
       1 |      44    3.077045    .1130593    .7499506     2.84904    3.305051
---------+--------------------------------------------------------------------
combined |     232    2.715862    .0438256    .6675309    2.629513    2.802211
---------+--------------------------------------------------------------------
    diff |           -.4457157    .1217411               -.6894547   -.2019767
------------------------------------------------------------------------------
Satterthwaite's degrees of freedom:  57.4725
 
                      Ho: mean(0) - mean(1) = diff = 0

     Ha: diff < 0               Ha: diff ~= 0              Ha: diff > 0
       t =  -3.6612                t =  -3.6612              t =  -3.6612
   P < t =   0.0003          P > |t| =   0.0005          P > t =   0.9997
 
ttest pctel, by(miss) unequal
 
Two-sample t test with unequal variances

------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. Err.   Std. Dev.   [95% Conf. Interval]
---------+--------------------------------------------------------------------
       0 |     192    28.06771     1.22033    16.90939    25.66065    30.47476
       1 |      44    17.11364    2.113136    14.01696    12.85209    21.37518
---------+--------------------------------------------------------------------
combined |     236    26.02542    1.102041    16.92987    23.85428    28.19657
---------+--------------------------------------------------------------------
    diff |            10.95407    2.440195                6.092524    15.81562
------------------------------------------------------------------------------
Satterthwaite's degrees of freedom:   74.596
 
                      Ho: mean(0) - mean(1) = diff = 0

     Ha: diff < 0               Ha: diff ~= 0              Ha: diff > 0
       t =   4.4890                t =   4.4890              t =   4.4890
   P < t =   1.0000          P > |t| =   0.0000          P > t =   0.0000
 
ttest pctcred, by(miss) unequal
 
Two-sample t test with unequal variances

------------------------------------------------------------------------------
   Group |     Obs        Mean    Std. Err.   Std. Dev.   [95% Conf. Interval]
---------+--------------------------------------------------------------------
       0 |     192          78    .8467614    11.73307     76.3298     79.6702
       1 |      44    80.79545      2.5332    16.80335    75.68677    85.90414
---------+--------------------------------------------------------------------
combined |     236    78.52119    .8355051    12.83527    76.87515    80.16722
---------+--------------------------------------------------------------------
    diff |           -2.795455    2.670975               -8.152759     2.56185
------------------------------------------------------------------------------
Satterthwaite's degrees of freedom:  52.9969
 
                      Ho: mean(0) - mean(1) = diff = 0

     Ha: diff < 0               Ha: diff ~= 0              Ha: diff > 0
       t =  -1.0466                t =  -1.0466              t =  -1.0466
   P < t =   0.1500          P > |t| =   0.3000          P > t =   0.8500
 
regress api99 pctmeal pctel pctcred avged
 
      Source |       SS       df       MS              Number of obs =     188
-------------+------------------------------           F(  4,   183) =  280.87
       Model |  2353840.29     4  588460.072           Prob > F      =  0.0000
    Residual |  383409.627   183  2095.13457           R-squared     =  0.8599
-------------+------------------------------           Adj R-squared =  0.8569
       Total |  2737249.91   187  14637.7001           Root MSE      =  45.773

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
     pctmeal |   -.637294   .1883259    -3.38   0.001    -1.008863   -.2657247
       pctel |   -.565281   .3237896    -1.75   0.083    -1.204122    .0735596
     pctcred |   2.510879   .3197197     7.85   0.000     1.880068     3.14169
       avged |   120.5732   9.968544    12.10   0.000     100.9051    140.2413
       _cons |   106.7768   45.41669     2.35   0.020      17.1691    196.3845
------------------------------------------------------------------------------
 
regress api99 pctmeal2 pctel pctcred avged2
 
      Source |       SS       df       MS              Number of obs =     236
-------------+------------------------------           F(  4,   231) =  398.17
       Model |  3507064.46     4  876766.115           Prob > F      =  0.0000
    Residual |  508655.587   231  2201.97224           R-squared     =  0.8733
-------------+------------------------------           Adj R-squared =  0.8711
       Total |  4015720.05   235  17088.1704           Root MSE      =  46.925

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
    pctmeal2 |  -.4582449   .1619505    -2.83   0.005    -.7773339   -.1391559
       pctel |  -.6843467   .3073912    -2.23   0.027    -1.289995   -.0786979
     pctcred |   2.357826   .2759607     8.54   0.000     1.814104    2.901547
      avged2 |   131.9708   8.195027    16.10   0.000     115.8242    148.1173
       _cons |   82.32209   35.59079     2.31   0.022     12.19803    152.4461
------------------------------------------------------------------------------
 
regress api99 pctmeal3 pctel pctcred avged3
 
      Source |       SS       df       MS              Number of obs =     236
-------------+------------------------------           F(  4,   231) =  420.65
       Model |  3530961.55     4  882740.387           Prob > F      =  0.0000
    Residual |  484758.498   231  2098.52164           R-squared     =  0.8793
-------------+------------------------------           Adj R-squared =  0.8772
       Total |  4015720.05   235  17088.1704           Root MSE      =   45.81

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
    pctmeal3 |  -.6404214    .187665    -3.41   0.001    -1.010175   -.2706676
       pctel |  -.5843708   .3002018    -1.95   0.053    -1.175854    .0071127
     pctcred |    2.18274   .2747013     7.95   0.000       1.6415     2.72398
      avged3 |   125.2458   9.012319    13.90   0.000     107.4889    143.0026
       _cons |   120.8806   40.76409     2.97   0.003     40.56361    201.1975
------------------------------------------------------------------------------
 
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   avged
p = 0.0000 <  0.0500  adding   pctcred
p = 0.0003 <  0.0500  adding   pctmeal
 
      Source |       SS       df       MS              Number of obs =     188
-------------+------------------------------           F(  3,   184) =  369.37
       Model |  2347454.49     3  782484.831           Prob > F      =  0.0000
    Residual |  389795.423   184  2118.45338           R-squared     =  0.8576
-------------+------------------------------           Adj R-squared =  0.8553
       Total |  2737249.91   187  14637.7001           Root MSE      =  46.027

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       avged |   130.5665   8.206563    15.91   0.000     114.3755    146.7576
     pctcred |   2.574684   .3193867     8.06   0.000     1.944553    3.204815
     pctmeal |  -.6885639   .1870544    -3.68   0.000    -1.057611   -.3195166
       _cons |    62.5067   37.88756     1.65   0.101     -12.2432    137.2566
------------------------------------------------------------------------------
 
sw regress api99 pctmeal2 pctel pctcred avged2, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   avged2
p = 0.0000 <  0.0500  adding   pctcred
p = 0.0010 <  0.0500  adding   pctmeal2
p = 0.0270 <  0.0500  adding   pctel
 
      Source |       SS       df       MS              Number of obs =     236
-------------+------------------------------           F(  4,   231) =  398.17
       Model |  3507064.46     4  876766.115           Prob > F      =  0.0000
    Residual |  508655.587   231  2201.97224           R-squared     =  0.8733
-------------+------------------------------           Adj R-squared =  0.8711
       Total |  4015720.05   235  17088.1704           Root MSE      =  46.925

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
      avged2 |   131.9708   8.195027    16.10   0.000     115.8242    148.1173
     pctcred |   2.357826   .2759607     8.54   0.000     1.814104    2.901547
    pctmeal2 |  -.4582449   .1619505    -2.83   0.005    -.7773339   -.1391559
       pctel |  -.6843467   .3073912    -2.23   0.027    -1.289995   -.0786979
       _cons |   82.32209   35.59079     2.31   0.022     12.19803    152.4461
------------------------------------------------------------------------------
 
sw regress api99 pctmeal3 pctel pctcred avged3, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   avged3
p = 0.0000 <  0.0500  adding   pctcred
p = 0.0002 <  0.0500  adding   pctmeal3
 
      Source |       SS       df       MS              Number of obs =     236
-------------+------------------------------           F(  3,   232) =  552.95
       Model |  3523009.78     3  1174336.59           Prob > F      =  0.0000
    Residual |  492710.271   232  2123.75117           R-squared     =  0.8773
-------------+------------------------------           Adj R-squared =  0.8757
       Total |  4015720.05   235  17088.1704           Root MSE      =  46.084

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
      avged3 |   134.8376   7.591207    17.76   0.000     119.8811    149.7941
     pctcred |   2.221682   .2756139     8.06   0.000     1.678656    2.764708
    pctmeal3 |  -.7003186   .1862347    -3.76   0.000    -1.067246   -.3333911
       _cons |   79.79433   35.08332     2.27   0.024     10.67171     148.917
------------------------------------------------------------------------------