Estoy utilizando la regresión lineal Ridge de sickit learn. En la documentación se indica que el parámetro alfa tiene que ser pequeño.
Sin embargo, estoy obteniendo el mejor rendimiento del modelo a 6060. ¿Estoy haciendo algo mal?
Esta es la descripción de la documentación:
alpha : {float, array-like} shape = [n_targets] Small positive values
of alpha improve the conditioning of the problem and reduce the
variance of the estimates.
Aquí está mi código:
import pandas as pd
import numpy as np
import custom_metrics as cmetric
from sklearn import preprocessing
from sklearn import cross_validation
from sklearn import linear_model
# Read data files:
df_train = pd.read_csv(path + "/input/train.csv")
df_test = pd.read_csv(path + "/input/test.csv")
#print df.shape
#(50999, 34)
#convert categorical features into integers
feature_cols_obj = [col for col in df_train.columns if df_train[col].dtypes == 'object']
le = preprocessing.LabelEncoder()
for col in feature_cols_obj:
df_train[col] = le.fit_transform(df_train[col])
df_test[col] = le.transform(df_test[col])
#Scale the data so that each feature has zero mean and unit std
feature_cols = [col for col in df_train.columns if col not in ['Hazard','Id']]
scaler = preprocessing.StandardScaler().fit(df_train[feature_cols])
df_train[feature_cols] = scaler.transform(df_train[feature_cols])
df_test[feature_cols] = scaler.transform(df_test[feature_cols])
#polynomial features/interactions
X_train = df_train[feature_cols]
X_test = df_test[feature_cols]
y = df_train['Hazard']
test_ids = df_test['Id']
poly = preprocessing.PolynomialFeatures(2)
X_train = poly.fit_transform(X_train)
X_test = poly.fit_transform(X_test)
#do grid search to find best value for alpha
#alphas = np.arange(-10,3,1)
#clf = linear_model.RidgeCV(10**alphas)
alphas = np.arange(100,10000,10)
clf = linear_model.RidgeCV(alphas)
clf.fit(X_train, y)
print clf.alpha_
#clf.alpha=6060
cv = cross_validation.KFold(df_train.shape[0], n_folds=10)
mse = []
mse_train = []
fold_count = 0
for train, test in cv:
print("Processing fold %s" % fold_count)
train_fold = df_train.ix[train]
test_fold = df_train.ix[test]
# Get training examples
X_train = train_fold[feature_cols]
y = train_fold['Hazard']
X_test = test_fold[feature_cols]
#interactions
poly = preprocessing.PolynomialFeatures(2)
X_train = poly.fit_transform(X_train)
X_test = poly.fit_transform(X_test)
# Fit Ridge linear regression
cfr = linear_model.Ridge (alpha = 6060)
cfr.fit(X_train, y)
# Check error on test set
pred = cfr.predict(X_test)
mse.append(cmetric.normalized_gini(test_fold.Hazard, pred))
# Check error on training set (Resubsitution error)
mse_train.append(cmetric.normalized_gini(y, cfr.predict(X_train)))
# Done with the fold
fold_count += 1
#print model coeff
print cfr.coef_
print pd.DataFrame(mse).mean()
#0.311794
print pd.DataFrame(mse_train).mean()
#.344775
Estos son los parámetros de mi modelo
[ 0.00000000e+00 5.01056266e-02 3.38358145e-01 1.30415614e-01
1.96089173e-01 1.25423106e-01 -1.72319456e-02 1.02133523e-01
2.81574892e-01 8.95633136e-02 -5.88384438e-03 1.47409573e-01
1.33623390e-01 -1.23180872e-02 -1.46668969e-01 -4.92436419e-02
1.99181255e-01 -4.04964277e-03 -1.53413757e-01 -1.44825780e-01
-3.91212516e-03 3.31216145e-03 -6.26732347e-02 2.88351008e-02
-2.06225972e-03 -5.62389494e-02 -1.36303417e-01 -9.71481638e-03
-2.50177992e-02 -5.66878847e-03 5.27927411e-03 8.52720405e-02
2.06771941e-01 1.56008577e-01 6.40581708e-04 9.92281016e-03
-9.19795609e-02 3.12156134e-02 5.99317391e-03 2.97288547e-02
8.18623392e-02 2.29032549e-02 -2.73972788e-02 1.51645073e-02
3.23438207e-02 3.88545534e-02 2.09627935e-02 6.96394351e-02
-9.16980407e-03 -2.18354808e-02 5.07216880e-03 3.17494225e-02
-2.09772938e-02 7.49790681e-02 1.64625955e-02 1.62684403e-02
1.75895590e-02 -1.75381993e-02 3.30949594e-02 1.68392658e-02
-4.66582083e-02 -1.31719587e-02 2.87477287e-02 3.09746600e-02
4.02686924e-02 9.92987348e-02 6.68115596e-02 1.37739367e-02
2.59549211e-02 -2.38058399e-02 2.65402587e-02 -6.92529979e-04
1.29576451e-02 1.25610137e-02 -9.68450401e-03 -2.27944079e-03
8.09259777e-03 3.30342301e-02 -1.66721756e-02 2.23839435e-02
3.32432426e-02 -8.04561602e-04 -1.76497454e-01 3.88186056e-02
3.37774181e-02 2.17565099e-02 5.30233710e-03 -9.24771708e-03
-2.73679575e-02 1.85809743e-02 1.29401582e-02 -3.19690493e-02
6.66584177e-03 4.49614784e-02 1.54566766e-02 -7.53477425e-02
5.81757746e-02 -1.76431610e-02 2.75413678e-02 -2.55708758e-02
-1.45602981e-02 1.81886720e-02 -2.32100056e-02 -3.80762864e-02
1.23351328e-02 -1.66785206e-02 1.56719872e-02 4.68276318e-03
9.40509132e-04 3.57552385e-02 -1.58076069e-02 -5.53826979e-02
-6.65141956e-03 -2.88814865e-03 1.50011930e-02 2.13362782e-02
-2.81424369e-03 -2.31808199e-02 -7.58749682e-04 -2.14478348e-02
-2.51214499e-02 -1.79191990e-02 -4.81870440e-03 -1.46611980e-02
-4.94190983e-03 3.70234969e-02 -1.02883622e-01 2.76857570e-02
1.79941196e-02 1.32435722e-02 4.76814155e-02 3.65520203e-02
-3.40269596e-02 2.28209650e-02 2.64274614e-02 -1.27865165e-02
-2.27199591e-02 8.70615230e-02 6.13932119e-02 -1.08140405e-02
-4.25732617e-02 2.77774841e-02 2.61014304e-02 -2.11770868e-02
-3.23724937e-02 -1.89128329e-02 -1.05251080e-02 1.83862325e-02
2.23534204e-05 -1.20347566e-02 -9.01096911e-03 4.02046530e-02
1.98012305e-02 1.58194352e-02 1.12816659e-02 2.20555788e-02
3.63227509e-03 8.58770347e-03 1.02248600e-02 9.08000210e-03
-3.46537486e-03 3.12890495e-02 -1.43673284e-02 3.14534787e-02
-2.75188217e-02 -7.18691836e-03 -7.07891187e-03 2.31015874e-03
1.35687890e-03 1.19905667e-02 1.67455167e-02 -4.81069735e-02
-9.68752624e-03 1.07210195e-02 2.21967422e-02 -1.01291741e-02
5.25316073e-02 -6.66390427e-03 1.14009388e-02 -2.01902980e-02
7.12558770e-02 7.83356049e-03 2.38237767e-02 -4.15273084e-03
8.77559520e-03 -9.00060067e-03 1.60980039e-02 -1.08296154e-02
-4.42603447e-02 -1.09092326e-02 -1.44739014e-02 -8.60951329e-03
2.49086682e-02 -1.92284756e-02 -1.89749837e-02 -2.77355424e-02
9.82524416e-04 5.67031203e-03 -2.54535190e-03 4.75299754e-03
2.71211354e-02 3.27688397e-03 1.85424999e-02 -2.26283972e-03
-1.79189346e-02 1.71016295e-02 1.93496703e-02 -8.69306494e-03
-9.58453162e-03 1.20178271e-02 -2.26289764e-02 2.15726732e-03
8.10821412e-03 2.47074350e-02 2.95059846e-02 1.21555107e-03
4.98131914e-03 2.43000118e-03 1.95785508e-02 -1.57990583e-02
-1.16102797e-02 7.46115157e-03 -8.06557912e-03 2.15298282e-02
2.29129769e-03 3.25367516e-02 1.99525909e-02 -3.92835829e-05
-2.05836804e-02 5.04866199e-03 1.24307792e-02 2.53079097e-02
1.96925968e-02 -2.64611443e-02 -6.82689419e-03 -1.49852524e-02
-3.93645529e-02 9.99089648e-03 1.09631668e-02 -1.51040704e-02
1.67164079e-02 1.26766125e-02 -5.86334604e-02 2.40496926e-02
1.54186622e-02 -6.12083319e-02 2.96323772e-02 -2.44415176e-02
1.11052819e-02 -2.57457149e-02 -2.49398749e-02 3.32951848e-02
3.85385282e-02 -5.16258286e-02 9.54478785e-03 -1.72063013e-02
2.23740124e-02 -5.52438260e-03 -1.31083373e-02 4.06350280e-02
2.55388980e-02 -1.41308581e-02 -5.75406964e-03 -2.16662792e-02
1.62563844e-02 -2.24345285e-02 -3.11828721e-02 1.44461933e-02
-1.30242777e-02 5.54219131e-03 7.95690283e-03 1.76646739e-03
1.41692278e-02 1.86635350e-02 -1.97979179e-02 -4.63186884e-03
1.20542175e-02 3.20413779e-02 -4.57051394e-02 -2.36441701e-02
-3.04932172e-02 3.87911664e-02 4.78728082e-02 -1.88170992e-02
2.63657803e-03 -3.86012566e-03 3.97224532e-03 1.39442494e-02
-1.88336565e-02 -2.70551779e-02 -4.66568493e-03 -1.33610215e-03
-8.18744988e-03 8.46266586e-03 1.48045340e-02 -6.83182810e-03
4.27071232e-04 -2.06468268e-03 -4.97468097e-03 1.48996773e-02
1.07555873e-03 -9.22835391e-03 8.99547441e-03 4.82968523e-03
9.98661438e-03 -1.65016157e-02 4.92553539e-03 -1.16019345e-03
-8.61715302e-02 3.61199006e-02 -3.42490963e-02 1.90392013e-03
-2.28637135e-02 -5.21567166e-02 2.52407362e-02 -4.18485080e-02
-1.10058841e-02 1.08156107e-02 2.75855699e-02 3.42679472e-02
-2.20825398e-02 3.10296716e-02 -2.75477891e-02 -3.07610994e-04
-9.01535833e-03 1.83193047e-02 -1.62208155e-02 -3.10351309e-02
2.48818137e-02 8.18365382e-03 -1.60809925e-02 1.01836062e-02
-2.05476464e-02 6.85855700e-03 2.57900195e-02 -1.49888744e-02
-1.74511929e-03 1.24756224e-02 6.52823373e-03 -8.66673208e-03
4.31158765e-02 -3.38250310e-02 3.18997087e-02 -1.37758806e-02
-5.33139333e-03 -8.79443447e-03 -1.44610591e-02 2.52537246e-02
7.87391233e-02 1.27012172e-02 -1.56102214e-02 -2.67977090e-02
-4.62736835e-04 2.56238335e-02 -8.09176226e-03 1.91149252e-03
2.22919180e-03 -1.68315172e-02 7.52355012e-04 8.54417905e-03
4.70626447e-03 3.26721691e-02 -6.66386713e-03 -3.62252305e-02
-1.58279947e-03 9.68094966e-04 -1.48804245e-02 7.77251715e-03
3.10671736e-02 -2.88045626e-02 -4.11328551e-03 -8.85415876e-03
1.00068277e-02 -1.91264954e-02 2.67619648e-03 4.45828413e-02
3.10793047e-02 -5.73633264e-03 -1.42399778e-02 -1.64262324e-02
3.46889578e-03 -2.07657571e-02 4.23753762e-02 -1.34629372e-04
1.29942385e-02 2.76672570e-02 2.42359462e-02 -3.10531938e-02
-6.55599208e-03 -2.99614420e-02 -1.91772543e-03 -2.02543378e-02
2.21573145e-02 -1.59205200e-02 -4.01668972e-02 1.68476566e-02
2.37997259e-02 9.68827220e-03 2.31875156e-02 6.79334701e-03
8.50905759e-03 2.75432711e-02 -1.15121814e-02 -3.73346961e-02
-1.38350227e-02 1.80944227e-03 -1.99144252e-02 -4.50820362e-02
-8.51953547e-02 1.05289990e-03 -5.73683988e-02 -2.28014261e-03
8.79845471e-04 -1.78715467e-02 3.80136044e-02 5.91779029e-03
-2.78439138e-02 1.17008295e-02 -3.42645883e-02 2.34195563e-02
-4.81212125e-03 -3.66954676e-03 3.90228979e-02 -1.70824631e-02
1.26012065e-02 -6.31776451e-03 -4.50539802e-02 4.56076309e-03
1.44345103e-04 7.18085486e-03 -5.65887042e-03 -8.93522131e-03
1.10250325e-02 -7.96661764e-03 3.41027635e-03 1.22731705e-02
8.47707142e-03 -1.49932019e-02 2.66278446e-02 -1.91671698e-02
-1.26043505e-02 6.35412651e-02 -1.96684538e-02 1.54777089e-02
1.91255149e-02 5.31808863e-02 5.35289710e-04 -2.08611895e-03
-4.96139883e-05 3.58330048e-02 -4.11211604e-03 -2.71141250e-02
-3.69387375e-02 1.32678215e-02 1.03065894e-02 -1.16026233e-01
9.05902436e-03 -4.11355240e-03 -5.76609367e-03 -3.03489660e-02
-2.28466980e-02 -4.07422105e-03 -1.14981380e-02 -2.00718306e-02
2.82733363e-02 -5.85023917e-03 3.73266757e-02 -6.81585169e-02
-5.58717156e-02 -3.08019223e-02 -2.96969413e-03 -2.47161214e-02
-4.22694385e-03 -7.48483026e-03 -5.56253994e-03 7.40543585e-04
3.01216667e-02 -5.56430481e-03 5.64963486e-03 -3.23841390e-02
8.53283403e-03 1.11517051e-02 9.92444066e-03 -1.23128623e-01
9.55844602e-03 -1.99321384e-02 1.57922080e-02 1.19673131e-02
1.68849528e-02 -6.34044465e-03 1.06775644e-02 2.47265340e-02
-3.25711720e-02 -9.76594327e-03 -5.08742553e-02 5.24289526e-02
2.91111239e-02 -1.39398617e-02 2.42465106e-02 -4.15577108e-02
-2.58480889e-02 4.62777932e-02 1.20306488e-02 2.75909133e-03
-1.01712845e-02 -5.46886148e-02 -2.46527009e-02 -1.01558015e-02
1.56187669e-02 3.52148277e-03 2.19565752e-02 -8.63359919e-03
-1.76395758e-02 1.96950103e-02 1.48984342e-02 -3.16258423e-03
-5.85331096e-03 9.65977546e-03 4.50183244e-03 -2.47884005e-03
-1.69232421e-02 -5.99840290e-04 6.71455978e-04 1.08295297e-02
1.95363677e-02 9.41925140e-03 -1.81041432e-02 -1.40310535e-02
7.22752555e-04 2.96587727e-02 -3.23319552e-02 -3.06897437e-02
-1.84134215e-02 6.59667420e-02 2.65303333e-02 3.17956491e-02
1.27969978e-02 -2.03537673e-02 -2.66071290e-02 -7.43361712e-03
-2.48459980e-02 3.64371936e-03 1.64657605e-02 -4.65377776e-02
-8.32882233e-03 2.52378813e-02 2.51187743e-02 -1.62291204e-02
-1.84450879e-02 3.42146322e-02 -3.21809348e-02 -9.33033795e-03
-9.45292979e-03 -3.45051567e-02 -3.55155679e-02 -1.25766013e-01
-2.02891251e-02 5.37952007e-03 1.27094363e-02 2.36198128e-02
2.32853839e-02]