-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch_parameter_space.py
executable file
·317 lines (229 loc) · 9.92 KB
/
search_parameter_space.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Imputer
from preprocessings import *
from methods import *
import ea_search, grid_search, randomized_search, simulated_annealing_search
import load_openml_datasets
import openml_datasets
from numpy import NaN
import sys
import os
from _config import *
if use_narrowed_intervals:
from methods_narrowed import *
if verbose > 0:
print("Verbose:", verbose, "\n")
# create results, datasets and out folder
if not os.path.exists(results_dir):
os.makedirs(results_dir)
if not os.path.exists("pre_"+results_dir):
os.makedirs("pre_"+results_dir)
if not os.path.exists("datasets"):
os.makedirs("datasets")
if not os.path.exists("out"):
os.makedirs("out")
#----------
# List of used space search methods
#----------
# list of names of the modules containing methods (in 'methods' directory)
search_list = ["grid_search_cv",
"evolutionary_search", "evolutionary_search_cv",
"randomized_search_cv", "simulated_annealing_search_cv"]
# grid_search_cv ... Use scikit-learn grid search
# evolutionary_search_no_missing_values ... Use deap evolutionary search
# evolutionary_search ... Use deap evolutionary search with replaced
# missing values
# evolutionary_search_cv ... Use deap evolutionary search with replaced
# missing values and with cross validation
# randomized_search_cv ... Use scikit-learn randomized search
#----------
# List of available data-mining methods
#----------
# list of names of the modules containing methods (in 'methods' directory)
method_list = ["knn", "decision_tree", "adaboost",
"linear_svc", "sgd", "passive_aggressive",
"random_forest", "bernoulli_nb", "svm_svc", "extra_trees",
"gradient_boosting", "lda", "qda"]
#----------
# List of preprocessings
#----------
# list of names of the modules containing methods (in 'methods' directory)
preproc_list = ["pca", "scale", "normalize", "map_to_uniform", "map_to_gaussian"]
#----------
# Get command line args
#----------
if sys.argv[1] == "?":
print()
print("Usage: search_parameter_space.py search preprocessings-method-chain dataset")
print()
print("Parameter-space search algorithms:")
i = 0
for s in search_list:
print("\t", i, "-", search_list[i])
i += 1
print("\tUse config files to set the searchparameters.")
print()
print("Methods:")
i = 0
for m in method_list:
print("\t", i, "-", method_list[i])
i += 1
print()
print("Preprocessings:")
i = 0
for p in preproc_list:
print("\t", i, "-", preprocessing_list[i])
i += 1
print()
print()
print("Dataset: OpenML dataset ID")
print("\t(iris id is 61)")
print()
print()
print("For general settings see '_config.py' file")
sys.exit()
search_index = int(sys.argv[1])
preprocs_method_chain = sys.argv[2] # 0 - 9
# dataset_index = int(sys.argv[3])
did = int(sys.argv[3])
if "-" in preprocs_method_chain:
preprocs_method = preprocs_method_chain.split("-")
else:
preprocs_method = [preprocs_method_chain]
method_index = preprocs_method[-1]
preprocs_indices = preprocs_method[:-1] # all but the last
preprocs = [preproc_list[int(p)] for p in preprocs_indices]
method = method_list[int(method_index)] # only one dataset (for now)
search = search_list[search_index]
if verbose > 0:
print ("Search:", search)
print ("Methods:", method)
print ("Preprocs:", preprocs)
#----------
# Load datasets from OpenML
#----------
# AD HOC solution, use selected methods from "openml_datasets.py":
# did = openml_datasets.classification[dataset_index-1]
# did = 61
datasets = load_openml_datasets.get_dataset(did)
#----------
# Search algorithms
#----------
def grid_search_cv(pipeline, chain_names, chain_hyperparameter_space, dataset_name, verbose=0):
tuned_parameters = chain_hyperparameter_space.get_grid_parameters()
if verbose > 0:
print("Parameters : values to test")
for k in tuned_parameters:
print(" --", k, ":", tuned_parameters[k])
"""
Grid search from scikit-learn:
GridSearchCV(estimator, param_grid, scoring=None, fit_params=None,
n_jobs=1, iid=True, refit=True, cv=None, verbose=0,
pre_dispatch=‘2*n_jobs’, error_score=’raise’, return_train_score=’warn’)
param_grid : dict or list of dictionaries
Dictionary with parameters names (string) as keys and lists of parameter
settings to try as values, or a list of such dictionaries, in which case
the grids spanned by each dictionary in the list are explored. This
enables searching over any sequence of parameter settings.
"""
model = grid_search.GridSearch(pipeline, chain_names, tuned_parameters,
dataset_name, verbose=verbose)
return model
def evolutionary_search(m, dataset_name, verbose=0):
print ("evolutionary_search no longer supported. Use evolutionary_search_cv instead.")
return
model = Pipeline([("imputer", Imputer(missing_values=0,
strategy="mean",
axis=0)),
('ea', ea_search.EvolutionarySearch(
m.get_model_class()(),
m.get_hyperparameter_search_space(),
dataset_name,
verbose=verbose))])
return model
def evolutionary_search_cv(pipeline, chain_names, chain_hyperparameter_space, dataset_name, verbose=0):
#model = Pipeline([("imputer", Imputer(missing_values=0, # TODO jo????
# strategy="mean",
# axis=0)),
# ('ea', ea_search.EvolutionarySearchCV(
# pipeline,
# chain_names,
# chain_hyperparameter_space,
# dataset_name,
# verbose=verbose))])
model = ea_search.EvolutionarySearchCV(pipeline,
chain_names,
chain_hyperparameter_space,
dataset_name,
verbose=verbose)
return model
def randomized_search_cv(pipeline, chain_names, chain_hyperparameter_space, dataset_name, verbose=0):
""" Randomized search from scikit-learn
"""
model = randomized_search.RandomizedSearch(pipeline, chain_names,
chain_hyperparameter_space,
dataset_name, verbose=verbose)
return model
def simulated_annealing_search_cv(pipeline, chain_names, chain_hyperparameter_space, dataset_name, verbose=0):
""" Simulated annealing search
"""
model = simulated_annealing_search.SimulatedAnnealingSearch(pipeline,
chain_names,
chain_hyperparameter_space,
dataset_name, verbose=verbose)
return model
#----------
# Main cycle: for each dataset go through all methods
#----------
for X,y, dataset_name in datasets:
#print("X: ", X)
#print("y: ", y)
#----------
# Get preprocessing(s) method chain
#----------
print("---")
p_names = [eval(p).get_name() for p in preprocs] # jenom kvuli vypisu
print("Preprocessings:")
[print("---", p) for p in p_names]
m = eval(method)
print("Method:", m.get_name(), "(", method ,")")
print("---")
# create the chain
# example: Pipeline(steps=[('pca', pca), ('logistic', logistic)])
p_chain = [(p, eval(p).get_model_class()() ) for p in preprocs]
m_chain = [(method, m.get_model_class()() )]
chain = p_chain + m_chain
# print (chain)
pipeline = Pipeline(steps=chain)
chain_names = preprocs+[method]
# join parameters spaces of all chain items
chain_hyperparameter_space = hyperparameters.HyperparameterSpace()
for name in chain_names:
# join all hyperparams spaces
if name == "knn":
hs = eval(name).get_hyperparameter_search_space(len(y))
else:
hs = eval(name).get_hyperparameter_search_space()
chain_hyperparameter_space.add_hyperparameter_space(hs)
#----------
# Select the search algorithm
#----------
# construct function name from args
# grid_search_cv(...), evolutionary_search(...) ...
create_model_function = eval(search)
model = create_model_function(pipeline, chain_names, chain_hyperparameter_space, str(dataset_name), verbose=verbose)
#----------
# Train the search
#----------
model.fit(X, y)
##----------
## Evaluate the best estimator found by the search
##----------
# print()
# print("Detailed classification report:")
# y_true, y_pred = y_test, model.predict(X_test)
#
# print(classification_report(y_true, y_pred))
print()
print()