## SAS Macro Calculating LOO Predictions for GLM

Back in last year, I wrote a R function calculating the Leave-One-Out (LOO) prediction in GLM (https://statcompute.wordpress.com/2015/12/13/calculate-leave-one-out-prediction-for-glm). Below is a SAS macro implementing the same function with Gamma regression as the example. However, it is trivial to extend to models with other distributional assumptions.

%macro loo_gamma(data = , y = , x = , out = , out_var = _loo); **********************************************************************; * SAS MACRO CALCULATING LEAVE-ONE-OUT PREDICTIONS WITH THE TRAINING *; * SAMPLE AND PRESENTING DISTRIBUTIONS OF LOO PARAMETER ESTIMATES *; * ================================================================== *; * INPUT PARAMETERS: *; * DATA : INPUT SAS DATA TABLE *; * Y : DEPENDENT VARIABLE IN THE GAMMA MODEL *; * X : NUMERIC INDEPENDENT VARIABLES IN THE MODEL *; * OUT : OUTPUT SAS DATA TABLE WITH LOO PREDICTIONS *; * OUT_VAR: VARIABLE NAME OF LOO PREDICTIONS *; * ================================================================== *; * OUTPUTS: *; * 1. A TABLE SHOWING DISTRIBUTIONS OF LOO PARAMETER ESTIMATES *; * 2. A SAS DATA TABLE WITH LOO PREDICTIONS *; **********************************************************************; options nocenter nonumber nodate mprint mlogic symbolgen; data _1; retain &x &y; set &data (keep = &x &y); where &y ~= .; Intercept = 1; _i + 1; run; data _2; set _1 (keep = _i &x Intercept); array _x Intercept &x; do over _x; _name = upcase(vname(_x)); _value = _x; output; end; run; proc sql noprint; select max(_i) into :nobs from _1; quit; %do i = 1 %to &nobs; data _3; set _1; where _i ~= &i; run; ods listing close; ods output ParameterEstimates = _est; proc glimmix data = _last_; model &y = &x / solution dist = gamma link = log; run; ods listing; proc sql; create table _pred1 as select a._i as _i, upcase(a._name) as _name, a._value as _value, b.estimate as estimate, a._value * b.estimate as _xb from _2 as a, _est as b where a._i = &i and upcase(a._name) = upcase(b.effect); quit; %if &i = 1 %then %do; data _pred2; set _pred1; run; %end; %else %do; data _pred2; set _pred2 _pred1; run; %end; %end; proc summary data = _pred2 nway; class _name; output out = _eff (drop = _freq_ _type_) min(estimate) = beta_min p5(estimate) = beta_p05 p10(estimate) = beta_p10 median(estimate) = beta_med p90(estimate) = beta_p90 p95(estimate) = beta_p95 max(estimate) = beta_max mean(estimate) = beta_avg stddev(estimate) = beta_std; run; title; proc report data = _eff spacing = 1 headline nowindows split = "*"; column(" * DISTRIBUTIONS OF LEAVE-ONE-OUT COEFFICIENTS * ESTIMATED FROM GAMMA REGRESSIONS * " _name beta_:); where upcase(_name) ~= 'INTERCEPT'; define _name / "BETA" width = 20; define beta_min / "MIN" width = 10 format = 10.4; define beta_p05 / '5%ILE' width = 10 format = 10.4; define beta_p10 / '10%ILE' width = 10 format = 10.4; define beta_med / 'MEDIAN' width = 10 format = 10.4; define beta_p90 / '90%ILE' width = 10 format = 10.4; define beta_p95 / '95%ILE' width = 10 format = 10.4; define beta_max / "MAX" width = 10 format = 10.4; define beta_avg / "AVERAGE" width = 10 format = 10.4; define beta_std / "STD DEV" width = 10 format = 10.4; run; proc sql; create table &out as select a.*, b.out_var as _xb, exp(b.out_var) as &out_var from _1 (drop = intercept) as a, (select _i, sum(_xb) as out_var from _pred2 group by _i) as b where a._i = b._i; quit; %mend loo_gamma;

Advertisements