-
Notifications
You must be signed in to change notification settings - Fork 0
/
base.py
130 lines (119 loc) · 4.73 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from sklearn.linear_model.logistic import LogisticRegression as MetaEstimator
import numpy as np
from sklearn.model_selection import cross_val_score, cross_val_predict, KFold
import sklearn.metrics
from skopt import gp_minimize
from config import log
# from catboost import CatBoostRegressor, CatBoostClassifier
#
#
# class MyCatBoostRegressor(CatBoostRegressor):
# def set_params(self, **params):
# for k in params:
# if isinstance(params[k], np.integer):
# params[k] = int(params[k])
# elif isinstance(params[k], np.floating):
# params[k] = float(params[k])
# elif isinstance(params[k], np.ndarray):
# params[k] = params[k].tolist()
# super().set_params(**params)
#
#
# class MyCatBoostClassifier(CatBoostClassifier):
# def set_params(self, **params):
# for k in params:
# if isinstance(params[k], np.integer):
# params[k] = int(params[k])
# elif isinstance(params[k], np.floating):
# params[k] = float(params[k])
# elif isinstance(params[k], np.ndarray):
# params[k] = params[k].tolist()
# super().set_params(**params)
#
def tuning(predictor, X, y, params, cv, gp, scoring='log_loss'):
log(0x25, 'tuning: predictor=', predictor)
log(0x25, 'tuning: x=', type(X), X.shape)
log(0x25, 'tuning: y=', type(y), y.shape)
log(0x25, 'tuning: params=', params)
log(0x25, 'tuning: cv=', cv)
log(0x25, 'tuning: gp=', gp)
space = []
names = []
for key in params:
names.append(key)
space.append(params[key])
def objective(p):
for k, v in enumerate(p):
if isinstance(v, np.integer):
p[k] = int(v)
elif isinstance(v, np.floating):
p[k] = float(v)
print('Set:', dict(zip(names, p)))
predictor.set_params(**dict(zip(names, p)))
prediction = cross_val_predict(predictor, X[:], y[:], **cv)
return sklearn.metrics.__getattribute__(scoring)(y[:], prediction)
result = gp_minimize(objective, space, **gp)
predictor.set_params(**dict(zip(names, result.x)))
predictor.fit(X, y)
return result
class Ensemble:
def __init__(self, base_estimators=None, random_state=0):
self.base_estimators = base_estimators
self.estimator = MetaEstimator()
self.random_state = random_state
def fit(self, X, y):
cv = KFold(n_splits=5, shuffle=True, random_state=self.random_state)
predictions = []
for estimator in self.base_estimators:
prediction = cross_val_predict(estimator, X, y, cv=cv, method='predict_proba')
print('prediction of', estimator.__class__.__name__)
print(prediction)
# predictions.extend(prediction.T)
predictions.append(prediction.T[0])
print('all predictions')
print(np.array(predictions), y)
self.estimator.fit(np.array(predictions).T, y)
for estimator in self.base_estimators:
estimator.fit(X, y)
def predict(self, X, margin):
return np.array(self.predict_proba(X)) > margin
def predict_proba(self, X):
predictions = []
for estimator in self.base_estimators:
# predictions.extend(estimator.predict_proba(X).T)
predictions.append(estimator.predict_proba(X).T[0])
return self.estimator.predict_proba(np.array(predictions).T)
#
#
# class Ensemble:
# def __init__(self, base_estimators=None, random_state=0):
# self.base_estimators = base_estimators
# self.estimator = LogisticRegression()
# self.random_state = random_state
#
# def fit(self, X, y):
# folds = KFold(n_splits=len(self.base_estimators), shuffle=True, random_state=self.random_state)
# predictions = []
# idx = 0
# for train, test in folds.split(X):
# estimator = self.base_estimators[idx]
# idx += 1
# estimator.fit(X[train], y[train])
# prediction = estimator.predict_proba(X)
# print('prediction of', estimator.__class__.__name__)
# print(prediction)
# predictions.extend(prediction.T)
# print('all predictions')
# print(np.array(predictions), y)
# self.estimator.fit(np.array(predictions).T, y)
# for estimator in self.base_estimators:
# estimator.fit(X, y)
#
# def predict(self, X, margin):
# return np.array(self.predict_proba(X)) > margin
#
# def predict_proba(self, X):
# predictions = []
# for estimator in self.base_estimators:
# predictions.extend(estimator.predict_proba(X).T)
# return self.estimator.predict_proba(np.array(predictions).T)