Yet Another Blog in Statistical Computing

I can calculate the motion of heavenly bodies but not the madness of people. -Isaac Newton

SAS Macro Calculating LOO Predictions for GLM

Back in last year, I wrote a R function calculating the Leave-One-Out (LOO) prediction in GLM (https://statcompute.wordpress.com/2015/12/13/calculate-leave-one-out-prediction-for-glm). Below is a SAS macro implementing the same function with Gamma regression as the example. However, it is trivial to extend to models with other distributional assumptions.

%macro loo_gamma(data = , y = , x = , out = , out_var = _loo);
**********************************************************************;
* SAS MACRO CALCULATING LEAVE-ONE-OUT PREDICTIONS WITH THE TRAINING  *;
* SAMPLE AND PRESENTING DISTRIBUTIONS OF LOO PARAMETER ESTIMATES     *;
* ================================================================== *;
* INPUT PARAMETERS:                                                  *;
*  DATA   : INPUT SAS DATA TABLE                                     *;
*  Y      : DEPENDENT VARIABLE IN THE GAMMA MODEL                    *;
*  X      : NUMERIC INDEPENDENT VARIABLES IN THE MODEL               *;
*  OUT    : OUTPUT SAS DATA TABLE WITH LOO PREDICTIONS               *;
*  OUT_VAR: VARIABLE NAME OF LOO PREDICTIONS                         *;
* ================================================================== *;
* OUTPUTS:                                                           *;
* 1. A TABLE SHOWING DISTRIBUTIONS OF LOO PARAMETER ESTIMATES        *;
* 2. A SAS DATA TABLE WITH LOO PREDICTIONS                           *;
**********************************************************************;
options nocenter nonumber nodate mprint mlogic symbolgen;

data _1;
  retain &x &y;
  set &data (keep = &x &y);
  where &y ~= .;
  Intercept = 1;
  _i + 1;
run;

data _2;
  set _1 (keep = _i &x Intercept);
  array _x Intercept &x;
  do over _x;
    _name = upcase(vname(_x));
    _value = _x;
    output;
  end;
run;

proc sql noprint;
  select max(_i) into :nobs from _1;
quit;

%do i = 1 %to &nobs;

data _3;
  set _1;
  where _i ~= &i;
run;

ods listing close;
ods output ParameterEstimates = _est;
proc glimmix data = _last_;
  model &y = &x / solution dist = gamma link = log; 
run; 
ods listing;

proc sql;
create table
  _pred1 as
select
  a._i                  as _i,
  upcase(a._name)       as _name,
  a._value              as _value,
  b.estimate            as estimate,
  a._value * b.estimate as _xb
from
  _2 as a, _est as b
where
  a._i = &i and upcase(a._name) = upcase(b.effect); quit;

%if &i = 1 %then %do;
  data _pred2;
    set _pred1;
  run;
%end;
%else %do;
  data _pred2;
    set _pred2 _pred1;
  run;
%end;

%end;

proc summary data = _pred2 nway;
  class _name;
  output out = _eff (drop = _freq_ _type_)
  min(estimate)    = beta_min
  p5(estimate)     = beta_p05
  p10(estimate)    = beta_p10
  median(estimate) = beta_med
  p90(estimate)    = beta_p90
  p95(estimate)    = beta_p95
  max(estimate)    = beta_max
  mean(estimate)   = beta_avg
  stddev(estimate) = beta_std;
run;

title;
proc report data = _eff spacing = 1 headline nowindows split = "*";
  column(" * DISTRIBUTIONS OF LEAVE-ONE-OUT COEFFICIENTS *
             ESTIMATED FROM GAMMA REGRESSIONS * "
          _name beta_:);
  where upcase(_name) ~= 'INTERCEPT';
  define _name    / "BETA"    width = 20;
  define beta_min / "MIN"     width = 10 format = 10.4;
  define beta_p05 / '5%ILE'   width = 10 format = 10.4;
  define beta_p10 / '10%ILE'  width = 10 format = 10.4;
  define beta_med / 'MEDIAN'  width = 10 format = 10.4;
  define beta_p90 / '90%ILE'  width = 10 format = 10.4;
  define beta_p95 / '95%ILE'  width = 10 format = 10.4;
  define beta_max / "MAX"     width = 10 format = 10.4;
  define beta_avg / "AVERAGE" width = 10 format = 10.4;
  define beta_std / "STD DEV" width = 10 format = 10.4; 
run;

proc sql;
create table
  &out as
select
  a.*,
  b.out_var      as _xb,
  exp(b.out_var) as &out_var
from
  _1 (drop = intercept) as a,
  (select _i, sum(_xb) as out_var from _pred2 group by _i) as b where
  a._i = b._i;
quit;

%mend loo_gamma;
Advertisements

Written by statcompute

March 12, 2016 at 2:38 pm

Posted in SAS

Tagged with

%d bloggers like this: