Yet Another Blog in Statistical Computing

I can calculate the motion of heavenly bodies but not the madness of people. -Isaac Newton

V-Fold Cross Validation to Pick GRNN Smoothing Parameter

On 06/23, I posted two SAS macros implementation GRNN (https://statcompute.wordpress.com/2013/06/23/prototyping-a-general-regression-neural-network-with-sas). However, in order to use these macros in the production environment, we still need a scheme to automatically choose the optimal value of smoothing parameter. In practice, v-fold or holdout cross validation has been often used to accomplish the task. Below is a SAS macro implementing the v-fold cross validation to automatically select an optional value of the smoothing parameter based on the highest K-S statistics in a binary classification case.

%macro grnn_cv(data = , y = , x = , v = , sigmas = );
********************************************************;
* THIS MACRO IS TO DO THE V-FOLD CROSS VALIDATION TO   *;
* PICK THE OPTIMAL VALUE OF SMOOTHING PARAMETER IN A   *;
* BINARY CLASSIFICATION PROBLEM                        *;
*------------------------------------------------------*;
* INPUT PARAMETERS:                                    *;
*  DATA  : INPUT SAS DATASET                           *;
*  X     : A LIST OF PREDICTORS IN THE NUMERIC FORMAT  *;
*  Y     : A RESPONSE VARIABLE IN THE NUMERIC FORMAT   *;
*  V     : NUMBER OF FOLDS FOR CROSS-VALIDATION        *; 
*  SIGMAS: A LIST OF SIGMA VALUES TO TEST              *;
*------------------------------------------------------*;
* OUTPUT:                                              *;
*  SAS PRINT-OUT OF CROSS VALIDATION RESULT IN KS      *;
*  STATISTICS                                          *;
*------------------------------------------------------*;
* AUTHOR:                                              *;
*  WENSUI.LIU@53.COM                                   *;
********************************************************;

options nocenter nonumber nodate mprint mlogic symbolgen         
        orientation = landscape ls = 125 formchar = "|----|+|---+=|-/\<>*"; 

data _data_;
  set &data (keep = &x &y);
  where &y ~= .;
  array _x_ &x;
  _miss_ = 0;
  do _i_ = 1 to dim(_x_);
    if _x_[_i_] = . then _miss_ = 1; 
  end;
  _rand_ = ranuni(1);
  if _miss_ = 0 then output;
run;

proc rank data = _last_ out = _cv1 groups = &v;
  var _rand_;
  ranks _rank_;
run;

%let i = 1;
%local i;

%inc "grnn_learn.sas";
%inc "grnn_pred.sas";

%do %while (%scan(&sigmas, &i, " ") ne %str());
%let sigma = %scan(&sigmas, &i, " ");

  %do j = 0 %to %eval(&v - 1);
  %put &sigma | &i | &j;
   
  data _cv2 _cv3;
    set _cv1;
    if _rank_ ~= &j then output _cv2;
    else output _cv3;
  run;
  
  %grnn_learn(data = _cv2, x = &x, y = &y, sigma = &sigma, nn_out = _grnn);
 
  %grnn_pred(data = _cv3, x = &x, nn_in = _grnn, id = _rand_, out = _pred);

  proc sql;
  create table
    _cv4 as
  select
    a.&y as _y_,
    b._pred_  
  from
    _cv3 as a inner join _pred as b on a._rand_ = b._id_;
  quit;

  %if &j = 0 %then %do;
  data _cv5;
    set _cv4;
  run;
  %end;
  %else %do;
  data _cv5;
    set _cv5 _cv4;
  run;
  %end;

  %end;

ods listing close;
ods output kolsmir2stats = _ks1;
proc npar1way wilcoxon edf data = _cv5;
  class _y_;
  var _pred_;
run;
ods listing;

data _ks2 (keep = sigma ks);
  set _ks1;
  if _n_ = 1 then do;
    sigma = &sigma;
    ks = nvalue2 * 100;
    output;
  end;
run;

%if &i = 1 %then %do;
data _ks3;
  set _ks2;
run;
%end;
%else %do;
data _ks3;
  set _ks3 _ks2;
run;
%end;

%let i = %eval(&i + 1); 
%end;

title "&v._fold cross validation outcomes";
proc print data = _ks3 noobs;
run;

********************************************************;
*              END OF THE MACRO                        *;
********************************************************;
%mend grnn_cv;
Advertisements

Written by statcompute

July 4, 2013 at 11:53 pm

%d bloggers like this: