Yet Another Blog in Statistical Computing

I can calculate the motion of heavenly bodies but not the madness of people. -Isaac Newton

A SAS Macro for Scorecard Performance Evaluation

%macro separation(data = , score = , y = );
***********************************************************;
* THE MACRO IS TO EVALUATE THE SEPARATION POWER OF A      *;
* SCORECARD                                               *; 
* ------------------------------------------------------- *;
* PARAMETERS:                                             *;
*  DATA : INPUT DATASET                                   *;
*  SCORE: SCORECARD VARIABLE                              *;
*  Y    : RESPONSE VARIABLE IN (0, 1)                     *;
* ------------------------------------------------------- *;
* OUTPUTS:                                                *;
*  SEPARATION_REPORT.TXT                                  *;
*  A SEPARATION SUMMARY REPORT IN TXT FORMAT              *;
*  NAMED AS THE ABOVE WITH PREDICTIVE MEASURES INCLUDING  *;
*  KS, AUC, GINI, AND DIVERGENCE                          *;
* ------------------------------------------------------- *;
* CONTACT:                                                *;
*  WENSUI.LIU@53.COM                                      *;
***********************************************************;

options nonumber nodate orientation = landscape linesize = 160 nocenter 
        formchar = "|----|+|---+=|-/\<>*" formdlim=' ' ls = 150; 

*** DEFAULT GROUP NUMBER FOR REPORT ***;
%let grp = 10;

data _tmp1 (keep = &y &score);
  set &data;
  where &y in (1, 0)  and &score ~= .;
run;

filename lst_out temp;

proc printto new print = lst_out;
run;

*** CONDUCT NON-PARAMETRIC TESTS ***; 
ods output wilcoxonscores = _wx;
ods output kolsmir2stats = _ks;
proc npar1way wilcoxon edf data = _tmp1;
  class &y;
  var &score;
run;

proc printto;
run;

proc sort data = _wx;
  by class;
run;

*** CALCULATE ROC AND GINI ***;
data _null_;
  set _wx end = eof;
  by class;

  array a{2, 3} _temporary_;
  if _n_ = 1 then do;
    a[1, 1] = n;
    a[1, 2] = sumofscores;
    a[1, 3] = expectedsum;
  end;
  else do;
    a[2, 1] = n;
  end;
  if eof then do;
    auc  = (a[1, 2] - a[1, 3]) / (a[1, 1] * a[2, 1])  + 0.5;
    if auc <= 0.5 then auc = 1 - auc;
    gini = 2 * (auc - 0.5);  
    call execute('%let auc = '||put(auc, 10.4)||';');
    call execute('%let gini = '||put(gini, 10.4)||';');
  end;
run;

*** CALCULATE KS ***;
data _null_;
  set _ks;

  if _n_ = 1 then do;
    ks = nvalue2 * 100;
    call execute('%let ks = '||put(ks, 10.4)||';');
  end;
run;

*** CAPTURE SCORE POINT FOR MAX KS ***;
data _null_;
  infile lst_out;
  input @" at Maximum = " ks_score;
  output;
  call execute('%let ks_score = '||put(ks_score, 10.4)||';');
  stop;
run;

proc summary data = _tmp1 nway;
  class &y;
  output out = _data_ (drop = _type_ _freq_)
  mean(&score.) = mean var(&score.) = variance;
run;

*** CALCULATE DIVERGENCE ***;
data _null_;
  set _last_ end = eof;
  array a{2, 2} _temporary_;
  if _n_ = 1 then do;
    a[1, 1] = mean;
    a[1, 2] = variance;
  end;
  else do;
    a[2, 1] = mean;
    a[2, 2] = variance;
  end;
  if eof then do;
    divergence = (a[1, 1] - a[2, 1]) ** 2 / ((a[1, 2] + a[2, 2]) / 2);
    call execute('%let divergence = '||put(divergence, 10.4)||';'); 
  end; 
run;

*** CAPTURE THE DIRECTION OF SCORE ***;
ods listing close;
ods output spearmancorr = _cor;
proc corr data = _tmp1 spearman;
  var &y;
  with &score;
run;
ods listing;
 
data _null_;
  set _cor;
  if &y >= 0 then do;
    call symput('desc', 'descending');
  end;
  else do;
    call symput('desc', ' ');
  end;
run;
%put &desc;

proc rank data = _tmp1 out = _tmp2 groups = &grp ties = low;
  var &score;
  ranks rank;
run;

proc summary data = _last_ nway;
  class rank;
  output out = _data_ (drop = _type_ rename = (_freq_ = freq))
  min(&score) = min_score max(&score) = max_score
  sum(&y) = bads;
run;

proc sql noprint;
  select sum(bads) into :bcnt from _last_;
  select sum(freq) - sum(bads) into :gcnt from _last_;
quit;

proc sort data = _last_ (drop = rank);
  by &desc min_score;
run;

data _data_;
  set _last_;
  by &desc min_score;

  i + 1; 
  percent = i / 100; 
  good  = freq - bads;
  odds  = good / bads;

  hit_rate = bads / freq;
  retain cum_bads cum_freq;
  cum_bads + bads;
  cum_freq + freq;
  cum_hit_rate = cum_bads / cum_freq;

  cat_rate = bads / &bcnt;
  retain cum_cat_rate;
  cum_cat_rate + cat_rate; 

  format symbol $4.;
  if i = 1 then symbol = 'BAD';
  else if i = &grp - 1 then symbol = 'V';
  else if i = &grp then symbol = 'GOOD';
  else symbol = '|';
run;

proc printto print = "%upcase(%trim(&score))_SEPARATION_REPORT.TXT" new;
run;

proc report data = _last_ spacing = 1 split = "/" headline nowd;
  column("GOOD BAD SEPARATION REPORT FOR %upcase(%trim(&score)) IN DATA %upcase(%trim(&data))/
          MAXIMUM KS = %trim(&ks) AT SCORE POINT %trim(&ks_score)/   
          ( AUC STATISTICS = %trim(&auc), GINI COEFFICIENT = %trim(&gini), DIVERGENCE = %trim(&divergence) )/ /"
         percent symbol min_score max_score good bads freq odds hit_rate cum_hit_rate cat_rate cum_cat_rate);

  define percent      / noprint order order = data;
  define symbol       / "" center               width = 5 center;
  define min_score    / "MIN/SCORE"             width = 10 format = 9.4        analysis min center;
  define max_score    / "MAX/SCORE"             width = 10 format = 9.4        analysis max center;
  define good         / "GOOD/#"                width = 10 format = comma9.    analysis sum;
  define bads         / "BAD/#"                 width = 10 format = comma9.    analysis sum;
  define freq         / "TOTAL/#"               width = 10 format = comma9.    analysis sum;
  define odds         / "ODDS"                  width = 10 format = 8.2        order;
  define hit_rate     / "BAD/RATE"              width = 10 format = percent9.2 order center;
  define cum_hit_rate / "CUMULATIVE/BAD RATE"   width = 10 format = percent9.2 order;
  define cat_rate     / "BAD/PERCENT"           width = 10 format = percent9.2 order center;
  define cum_cat_rate / "CUMU. BAD/PERCENT"     width = 10 format = percent9.2 order; 

  rbreak after / summarize dol skip;
run; 

proc printto;
run;

***********************************************************;
*                     END OF THE MACRO                    *;
***********************************************************; 
%mend separation;

libname data 'C:\Documents and Settings\liuwensui\Desktop\fraction_models\test';

%separation(data = data.accepts, score = bureau_score, y = bad);

Sample Output:

                          GOOD BAD SEPARATION REPORT FOR BUREAU_SCORE IN DATA DATA.ACCEPTS                          
                                    MAXIMUM KS = 35.5477 AT SCORE POINT 677.0000                                    
                     ( AUC STATISTICS = 0.7389, GINI COEFFICIENT = 0.4778, DIVERGENCE = 0.8027 )                    
                                                                                                                    
          MIN        MAX           GOOD        BAD      TOTAL               BAD     CUMULATIVE    BAD      CUMU. BAD
         SCORE      SCORE             #          #          #       ODDS    RATE      BAD RATE  PERCENT      PERCENT
 -------------------------------------------------------------------------------------------------------------------
  BAD   443.0000   620.0000         310        252        562       1.23   44.84%      44.84%    23.10%      23.10% 
   |    621.0000   645.0000         365        201        566       1.82   35.51%      40.16%    18.42%      41.52% 
   |    646.0000   661.0000         359        173        532       2.08   32.52%      37.71%    15.86%      57.38% 
   |    662.0000   677.0000         441        125        566       3.53   22.08%      33.74%    11.46%      68.84% 
   |    678.0000   692.0000         436         99        535       4.40   18.50%      30.79%     9.07%      77.91% 
   |    693.0000   708.0000         469         89        558       5.27   15.95%      28.29%     8.16%      86.07% 
   |    709.0000   725.0000         492         66        558       7.45   11.83%      25.92%     6.05%      92.12% 
   |    726.0000   747.0000         520         42        562      12.38    7.47%      23.59%     3.85%      95.97% 
   V    748.0000   772.0000         507         30        537      16.90    5.59%      21.64%     2.75%      98.72% 
 GOOD   773.0000   848.0000         532         14        546      38.00    2.56%      19.76%     1.28%     100.00% 
       ========== ========== ========== ========== ==========                                                       
        443.0000   848.0000       4,431      1,091      5,522 
Advertisements

Written by statcompute

September 28, 2012 at 11:42 pm

%d bloggers like this: