Cross Validation Stata Example


[Stata Program]

/*  Both samples are appended into one data file  */

use http://www.gseis.ucla.edu/courses/data/cross1
append using http://www.gseis.ucla.edu/courses/data/cross2
 
corr api99 pctmeal pctel pctcred avged if district==1
sw regress api99 pctmeal pctel pctcred avged  if district==1, pe(.05)

predict api99la
label variable api99la "predicted api99 score using LA schools using predict"
 
corr api99 api99la if district==2
corr api99 pctmeal pctel pctcred avged if district==2
sw regress api99 pctmeal pctel pctcred avged  if district==2, pe(.05)                            
   
[Stata Output]
  
use http://www.gseis.ucla.edu/courses/data/cross1
append using http://www.gseis.ucla.edu/courses/data/cross2
  
corr api99 pctmeal pctel pctcred avged if district==1
 
             |    api99  pctmeal    pctel  pctcred    avged
-------------+---------------------------------------------
       api99 |   1.0000
     pctmeal |  -0.7559   1.0000
       pctel |  -0.7556   0.6551   1.0000
     pctcred |   0.5909  -0.4211  -0.4041   1.0000
       avged |   0.8863  -0.7410  -0.7833   0.4114   1.0000
 
sw regress api99 pctmeal pctel pctcred avged if district==1, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   avged
p = 0.0000 <  0.0500  adding   pctcred
p = 0.0003 <  0.0500  adding   pctmeal

      Source |       SS       df       MS              Number of obs =     188
-------------+------------------------------           F(  3,   184) =  369.37
       Model |  2347454.49     3  782484.831           Prob > F      =  0.0000
    Residual |  389795.423   184  2118.45338           R-squared     =  0.8576
-------------+------------------------------           Adj R-squared =  0.8553
       Total |  2737249.91   187  14637.7001           Root MSE      =  46.027

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       avged |   130.5665   8.206563    15.91   0.000     114.3755    146.7576
     pctcred |   2.574684   .3193867     8.06   0.000     1.944553    3.204815
     pctmeal |  -.6885639   .1870544    -3.68   0.000    -1.057611   -.3195166
       _cons |    62.5067   37.88756     1.65   0.101     -12.2432    137.2566
------------------------------------------------------------------------------
 
predict api99la
label variable api99la "OC api99 score using LA schools using predict"
 
corr api99 api99la1 api99la2 if district==2
(obs=52)
 
             |    api99  api99la
-------------+------------------
       api99 |   1.0000
     api99la |   0.9312   1.0000
 
corr api99 pctmeal pctel pctcred avged if district==2
(obs=52)
  
             |    api99  pctmeal    pctel  pctcred    avged
-------------+---------------------------------------------
       api99 |   1.0000
     pctmeal |  -0.9103   1.0000
       pctel |  -0.9293   0.9431   1.0000
     pctcred |   0.5772  -0.5138  -0.4906   1.0000
       avged |   0.9191  -0.9308  -0.9092   0.5342   1.0000
 
sw regress api99 pctmeal pctel pctcred avged if district==2, pe(.05)
                      begin with empty model
p = 0.0000 <  0.0500  adding   pctel
p = 0.0003 <  0.0500  adding   avged
p = 0.0310 <  0.0500  adding   pctcred
  
      Source |       SS       df       MS              Number of obs =      52
-------------+------------------------------           F(  3,    48) =  152.62
       Model |  690783.164     3  230261.055           Prob > F      =  0.0000
    Residual |  72420.2779    48  1508.75579           R-squared     =  0.9051
-------------+------------------------------           Adj R-squared =  0.8992
       Total |  763203.442    51  14964.7734           Root MSE      =  38.843

------------------------------------------------------------------------------
       api99 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       pctel |  -2.835101   .5641433    -5.03   0.000    -3.969387   -1.700815
       avged |   59.32612   17.71861     3.35   0.002     23.70046    94.95178
     pctcred |   2.645574   1.190507     2.22   0.031      .251899    5.039249
       _cons |   336.6347   118.8344     2.83   0.007      97.7021    575.5672
------------------------------------------------------------------------------