Cross Validation Stata Example


[Stata Program]

/*  Step 1  */

use http://www.gseis.ucla.edu/courses/data/cross1
 
corr api99 pctmeal pctel pctcred avged
sw regress api99 pctmeal pctel pctcred avged, pe(.05)

/*  Step 2  */

use http://www.gseis.ucla.edu/courses/data/cross2

predict api99la1
generate api99la2 = 62.5067 + 130.5665*avged + 2.574684*pctcred - .6885639*pctmeal
label variable api99la1 "OC api99 score using LA schools using predict"
label variable api99la2 "OC api99 score using equation for LA schools"
 
corr api99 api99la1 api99la2
corr api99 pctmeal pctel pctcred avged
sw regress api99 pctmeal pctel pctcred avged, pe(.05)                            
   
[Stata Output]
  
/*  Step 1  */
 
use http://www.gseis.ucla.edu/courses/data/cross1
  
corr api99 pctmeal pctel pctcred avged
 
             |    api99  pctmeal    pctel  pctcred    avged
-------------+---------------------------------------------
       api99 |   1.0000
     pctmeal |  -0.7559   1.0000
       pctel |  -0.7556   0.6551   1.0000
     pctcred |   0.5909  -0.4211  -0.4041   1.0000
       avged |   0.8863  -0.7410  -0.7833   0.4114   1.0000
 
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   avged
p = 0.0000 <  0.0500  adding   pctcred
p = 0.0003 <  0.0500  adding   pctmeal

      Source |       SS       df       MS              Number of obs =     188
-------------+------------------------------           F(  3,   184) =  369.37
       Model |  2347454.49     3  782484.831           Prob > F      =  0.0000
    Residual |  389795.423   184  2118.45338           R-squared     =  0.8576
-------------+------------------------------           Adj R-squared =  0.8553
       Total |  2737249.91   187  14637.7001           Root MSE      =  46.027

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       avged |   130.5665   8.206563    15.91   0.000     114.3755    146.7576
     pctcred |   2.574684   .3193867     8.06   0.000     1.944553    3.204815
     pctmeal |  -.6885639   .1870544    -3.68   0.000    -1.057611   -.3195166
       _cons |    62.5067   37.88756     1.65   0.101     -12.2432    137.2566
------------------------------------------------------------------------------
 
/*  Step 2  */
 
use http://www.gseis.ucla.edu/courses/data/cross2

predict api99la1
generate api99la2 = 62.5067 + 130.5665*avged + 2.574684*pctcred - .6885639*pctmeal
label variable api99la1 "OC api99 score using LA schools using predict"
label variable api99la2 "OC api99 score using equation for LA schools"
 
corr api99 api99la1 api99la2
(obs=52)
 
             |    api99 api99la1 api99la2
-------------+---------------------------
       api99 |   1.0000
    api99la1 |   0.9312   1.0000
    api99la2 |   0.9312   1.0000   1.0000
  
corr api99 pctmeal pctel pctcred avged
(obs=52)
 
             |    api99  pctmeal    pctel  pctcred    avged
-------------+---------------------------------------------
       api99 |   1.0000
     pctmeal |  -0.9103   1.0000
       pctel |  -0.9293   0.9431   1.0000
     pctcred |   0.5772  -0.5138  -0.4906   1.0000
       avged |   0.9191  -0.9308  -0.9092   0.5342   1.0000
 
sw regress api99 pctmeal pctel pctcred avged, pe(.05)
                       begin with empty model
p = 0.0000 <  0.0500  adding   pctel
p = 0.0003 <  0.0500  adding   avged
p = 0.0310 <  0.0500  adding   pctcred

      Source |       SS       df       MS              Number of obs =      52
-------------+------------------------------           F(  3,    48) =  152.62
       Model |  690783.164     3  230261.055           Prob > F      =  0.0000
    Residual |  72420.2779    48  1508.75579           R-squared     =  0.9051
-------------+------------------------------           Adj R-squared =  0.8992
       Total |  763203.442    51  14964.7734           Root MSE      =  38.843

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       pctel |  -2.835101   .5641433    -5.03   0.000    -3.969387   -1.700815
       avged |   59.32612   17.71861     3.35   0.002     23.70046    94.95178
     pctcred |   2.645574   1.190507     2.22   0.031      .251899    5.039249
       _cons |   336.6347   118.8344     2.83   0.007      97.7021    575.5672
------------------------------------------------------------------------------