I can calculate the motion of heavenly bodies but not the madness of people. -Isaac Newton

## V-Fold Cross Validation to Pick GRNN Smoothing Parameter

On 06/23, I posted two SAS macros implementation GRNN (https://statcompute.wordpress.com/2013/06/23/prototyping-a-general-regression-neural-network-with-sas). However, in order to use these macros in the production environment, we still need a scheme to automatically choose the optimal value of smoothing parameter. In practice, v-fold or holdout cross validation has been often used to accomplish the task. Below is a SAS macro implementing the v-fold cross validation to automatically select an optional value of the smoothing parameter based on the highest K-S statistics in a binary classification case.

```%macro grnn_cv(data = , y = , x = , v = , sigmas = );
********************************************************;
* THIS MACRO IS TO DO THE V-FOLD CROSS VALIDATION TO   *;
* PICK THE OPTIMAL VALUE OF SMOOTHING PARAMETER IN A   *;
* BINARY CLASSIFICATION PROBLEM                        *;
*------------------------------------------------------*;
* INPUT PARAMETERS:                                    *;
*  DATA  : INPUT SAS DATASET                           *;
*  X     : A LIST OF PREDICTORS IN THE NUMERIC FORMAT  *;
*  Y     : A RESPONSE VARIABLE IN THE NUMERIC FORMAT   *;
*  V     : NUMBER OF FOLDS FOR CROSS-VALIDATION        *;
*  SIGMAS: A LIST OF SIGMA VALUES TO TEST              *;
*------------------------------------------------------*;
* OUTPUT:                                              *;
*  SAS PRINT-OUT OF CROSS VALIDATION RESULT IN KS      *;
*  STATISTICS                                          *;
*------------------------------------------------------*;
* AUTHOR:                                              *;
*  WENSUI.LIU@53.COM                                   *;
********************************************************;

options nocenter nonumber nodate mprint mlogic symbolgen
orientation = landscape ls = 125 formchar = "|----|+|---+=|-/\<>*";

data _data_;
set &data (keep = &x &y);
where &y ~= .;
array _x_ &x;
_miss_ = 0;
do _i_ = 1 to dim(_x_);
if _x_[_i_] = . then _miss_ = 1;
end;
_rand_ = ranuni(1);
if _miss_ = 0 then output;
run;

proc rank data = _last_ out = _cv1 groups = &v;
var _rand_;
ranks _rank_;
run;

%let i = 1;
%local i;

%inc "grnn_learn.sas";
%inc "grnn_pred.sas";

%do %while (%scan(&sigmas, &i, " ") ne %str());
%let sigma = %scan(&sigmas, &i, " ");

%do j = 0 %to %eval(&v - 1);
%put &sigma | &i | &j;

data _cv2 _cv3;
set _cv1;
if _rank_ ~= &j then output _cv2;
else output _cv3;
run;

%grnn_learn(data = _cv2, x = &x, y = &y, sigma = &sigma, nn_out = _grnn);

%grnn_pred(data = _cv3, x = &x, nn_in = _grnn, id = _rand_, out = _pred);

proc sql;
create table
_cv4 as
select
a.&y as _y_,
b._pred_
from
_cv3 as a inner join _pred as b on a._rand_ = b._id_;
quit;

%if &j = 0 %then %do;
data _cv5;
set _cv4;
run;
%end;
%else %do;
data _cv5;
set _cv5 _cv4;
run;
%end;

%end;

ods listing close;
ods output kolsmir2stats = _ks1;
proc npar1way wilcoxon edf data = _cv5;
class _y_;
var _pred_;
run;
ods listing;

data _ks2 (keep = sigma ks);
set _ks1;
if _n_ = 1 then do;
sigma = &sigma;
ks = nvalue2 * 100;
output;
end;
run;

%if &i = 1 %then %do;
data _ks3;
set _ks2;
run;
%end;
%else %do;
data _ks3;
set _ks3 _ks2;
run;
%end;

%let i = %eval(&i + 1);
%end;

title "&v._fold cross validation outcomes";
proc print data = _ks3 noobs;
run;

********************************************************;
*              END OF THE MACRO                        *;
********************************************************;
%mend grnn_cv;
```