Multivariate Adaptive Regression Splines with Python

In [1]: import statsmodels.datasets as datasets

In [2]: import sklearn.metrics as metrics

In [3]: from numpy import log

In [4]: from pyearth import Earth as earth

In [5]: boston = datasets.get_rdataset("Boston", "MASS").data

In [6]: x = boston.ix[:, 0:boston.shape[1] - 1]

In [7]: xlabel = list(x.columns)

In [8]: y = boston.ix[:, boston.shape[1] - 1]

In [9]: model = earth(enable_pruning = True, penalty = 3, minspan_alpha = 0.05, endspan_alpha = 0.05)

In [10]: model.fit(x, log(y), xlabels = xlabel)
Out[10]:
Earth(allow_linear=None, check_every=None, enable_pruning=True, endspan=None,
   endspan_alpha=0.05, max_degree=None, max_terms=None,
   min_search_points=None, minspan=None, minspan_alpha=0.05, penalty=3,
   smooth=None, thresh=None)

In [11]: print model.summary()
Earth Model
--------------------------------------
Basis Function   Pruned  Coefficient
--------------------------------------
(Intercept)      No      2.93569
h(lstat-5.9)     No      -0.0249319
h(5.9-lstat)     No      0.067697
h(rm-6.402)      No      0.230063
h(6.402-rm)      Yes     None
crim             Yes     None
h(dis-1.5004)    No      -0.0369272
h(1.5004-dis)    No      1.70517
h(ptratio-18.6)  No      -0.0393493
h(18.6-ptratio)  No      0.0278253
nox              No      -0.767146
h(indus-25.65)   No      -0.147471
h(25.65-indus)   Yes     None
h(black-395.24)  Yes     None
h(395.24-black)  Yes     None
h(crim-24.8017)  Yes     None
h(24.8017-crim)  No      0.0292657
rad              No      0.00807783
h(rm-7.853)      Yes     None
h(7.853-rm)      Yes     None
chas             Yes     None
--------------------------------------
MSE: 0.0265, GCV: 0.0320, RSQ: 0.8409, GRSQ: 0.8090

In [12]: metrics.r2_score(log(y), model.predict(x))
Out[12]: 0.840861083407211