Yet Another Blog in Statistical Computing

I can calculate the motion of heavenly bodies but not the madness of people. -Isaac Newton

Fitting A Logistic Regression with Python

In [1]: from pandas import *

In [2]: import statsmodels.api as sm

In [3]: # LOAD EXTERNAL DATA

In [4]: data = read_table('C:\\data\\credit_count.txt', sep = ',')

In [5]: data
Out[5]:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 13444 entries, 0 to 13443
Data columns:
CARDHLDR     13444  non-null values
DEFAULT      13444  non-null values
AGE          13444  non-null values
ACADMOS      13444  non-null values
ADEPCNT      13444  non-null values
MAJORDRG     13444  non-null values
MINORDRG     13444  non-null values
OWNRENT      13444  non-null values
INCOME       13444  non-null values
SELFEMPL     13444  non-null values
INCPER       13444  non-null values
EXP_INC      13444  non-null values
SPENDING     13444  non-null values
LOGSPEND     13444  non-null values
dtypes: float64(4), int64(8), object(2)

In [6]: # DEFINE RESPONSE

In [7]: Y = data[data.CARDHLDR == 1].DEFAULT

In [8]: # SUMMARIZE RESPONSE VARIABLE

In [9]: Y.describe()
Out[9]:
count    10499.000000
mean         0.094866
std          0.293044
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          1.000000

In [10]: # DEFINE PREDICTORS

In [11]: X = sm.add_constant(data[data.CARDHLDR == 1][['AGE', 'ADEPCNT', 'MAJORDRG', 'MINORDRG', 'INCOME', 'OWNRENT']]

In [12]: # SUMMARIZE PREDICTORS

In [13]: X.describe()
Out[13]:
                AGE       ADEPCNT      MAJORDRG      MINORDRG        INCOME       OWNRENT  const
count  10499.000000  10499.000000  10499.000000  10499.000000  10499.000000  10499.000000  10499
mean      33.674945      0.990380      0.143252      0.220688   2606.125933      0.479093      1
std       10.290998      1.273887      0.461568      0.637142   1287.983386      0.499587      0
min        0.000000      0.000000      0.000000      0.000000     50.000000      0.000000      1
25%       25.750000      0.000000      0.000000      0.000000   1750.000000      0.000000      1
50%       31.666666      0.000000      0.000000      0.000000   2291.666667      0.000000      1
75%       39.750000      2.000000      0.000000      0.000000   3041.666667      1.000000      1
max       88.666664      9.000000      6.000000      7.000000   8333.250000      1.000000      1

In [14]: # DEFINE A MODEL

In [15]: model = sm.GLM(Y, X, family = sm.families.Binomial())

In [16]: # FIT A MODEL

In [17]: result = model.fit()

In [18]: # PRINT RESULTS

In [19]: print result.summary()
                 Generalized Linear Model Regression Results
==============================================================================
Dep. Variable:                DEFAULT   No. Observations:                10499
Model:                            GLM   Df Residuals:                    10492
Model Family:                Binomial   Df Model:                            6
Link Function:                  logit   Scale:                             1.0
Method:                          IRLS   Log-Likelihood:                -3175.8
Date:                Thu, 08 Nov 2012   Deviance:                       6351.7
Time:                        23:24:02   Pearson chi2:                 1.11e+04
No. Iterations:                     7
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
AGE           -0.0095      0.004     -2.450      0.014        -0.017    -0.002
ADEPCNT        0.1338      0.029      4.655      0.000         0.077     0.190
MAJORDRG       0.2103      0.070      3.016      0.003         0.074     0.347
MINORDRG       0.2007      0.048      4.178      0.000         0.107     0.295
INCOME        -0.0005   4.19e-05    -11.057      0.000        -0.001    -0.000
OWNRENT       -0.2263      0.077     -2.924      0.003        -0.378    -0.075
const         -0.9648      0.133     -7.245      0.000        -1.226    -0.704
==============================================================================
Advertisements

Written by statcompute

November 8, 2012 at 11:29 pm

%d bloggers like this: