Skip to content

Commit

Permalink
solved hyperparameter issues
Browse files Browse the repository at this point in the history
  • Loading branch information
felix committed Oct 7, 2024
1 parent 4efe050 commit c97d8d6
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 47 deletions.
2 changes: 1 addition & 1 deletion python/naiveautoml/algorithm_selection/_sklearn_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ def score_func(X, y):
return sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis(**params)

if clazz == sklearn.linear_model.LogisticRegression:
params["dual"] = check_for_bool(params["dual"])
#params["dual"] = check_for_bool(params["dual"]) # disabled now
return sklearn.linear_model.LogisticRegression(**params)

if clazz == sklearn.neural_network.MLPClassifier:
Expand Down
7 changes: 5 additions & 2 deletions python/naiveautoml/naiveautoml.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,11 @@ def fit(self, X, y, categorical_features=None):

# get candidate descriptor
as_result_for_best_candidate = relevant_history.sort_values(self.task.scoring["name"]).iloc[-1]
config_space = self.algorithm_selector.get_config_space(as_result_for_best_candidate)

if (
if len(config_space) == 0:
self.logger.info(f"The selected algorithms {as_result_for_best_candidate} have no hyperparameters.")
elif (
deadline is None or
deadline is not None and deadline - time.time() >= as_result_for_best_candidate["runtime"] + 5
):
Expand All @@ -244,7 +247,7 @@ def fit(self, X, y, categorical_features=None):
self.hp_optimizer.reset(
task=self.task,
runtime_of_default_config=as_result_for_best_candidate["runtime"],
config_space=self.algorithm_selector.get_config_space(as_result_for_best_candidate),
config_space=config_space,
history_descriptor_creation_fun=lambda hp_config: self.algorithm_selector.create_history_descriptor(
as_result_for_best_candidate,
hp_config
Expand Down
20 changes: 0 additions & 20 deletions python/naiveautoml/searchspace-classification.json
Original file line number Diff line number Diff line change
Expand Up @@ -1304,26 +1304,6 @@
"class": "sklearn.linear_model.LogisticRegression",
"params": {
"hyperparameters": [
{
"name": "penalty",
"type": "categorical",
"choices": [
"l1",
"l2",
"elasticnet",
"None"
],
"default_value": "l2"
},
{
"name": "dual",
"type": "categorical",
"choices": [
"True",
"False"
],
"default_value": "False"
},
{
"name": "C",
"type": "uniform_float",
Expand Down
2 changes: 1 addition & 1 deletion python/naiveautoml/searchspace-regression.json
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@
"default_value": 1e-06
},
{
"name": "n_iter",
"name": "max_iter",
"type": "constant",
"value": 300
},
Expand Down
58 changes: 35 additions & 23 deletions python/test/test_naiveautoml.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,37 +272,39 @@ def test_constant_algorithms_in_hpo_phase(self):
X, y = get_dataset(61)

# run naml
np.random.seed(round(time.time()))
np.random.seed(0)#round(time.time()))
naml = naiveautoml.NaiveAutoML(
logger_name="naml",
timeout_overall=60,
max_hpo_iterations=10,
show_progress=True,
evaluation_fun=evaluate_randomly
evaluation_fun=evaluate_randomly,
random_state=0
)
naml.fit(X, y)
print(naml.history[["learner_class", "neg_log_loss"]])

# check that there is only one combination of algorithms in the HPO phase
history = naml.history.iloc[naml.steps_after_which_algorithm_selection_was_completed:]
self.assertTrue(len(pd.unique(history["learner_class"])) == 1)
self.assertTrue(len(pd.unique(history["data-pre-processor_class"])) == 1)
self.assertTrue(len(pd.unique(history["feature-pre-processor_class"])) == 1)

# get best solution from phase 1
phase_1_solutions = naml.history.iloc[:naml.steps_after_which_algorithm_selection_was_completed]
phase_1_solutions = phase_1_solutions[phase_1_solutions[naml.task.scoring["name"]].notna()]
best_solution_in_phase_1 = phase_1_solutions.sort_values(naml.task.scoring["name"]).iloc[-1]

for step in ["data-pre-processor", "feature-pre-processor", "learner"]:
field = f"{step}_class"
class_in_phase1 = best_solution_in_phase_1[field]
class_in_phase2 = pd.unique(history[field])[0]
self.assertEqual(
class_in_phase1,
class_in_phase2,
f"Choice for {step} should conicide but is {class_in_phase1} in AS phase and {class_in_phase2} in HPO."
)
if len(history) > 0:
self.assertTrue(len(pd.unique(history["learner_class"])) == 1)
self.assertTrue(len(pd.unique(history["data-pre-processor_class"])) == 1)
self.assertTrue(len(pd.unique(history["feature-pre-processor_class"])) == 1)

# get best solution from phase 1
phase_1_solutions = naml.history.iloc[:naml.steps_after_which_algorithm_selection_was_completed]
phase_1_solutions = phase_1_solutions[phase_1_solutions[naml.task.scoring["name"]].notna()]
best_solution_in_phase_1 = phase_1_solutions.sort_values(naml.task.scoring["name"]).iloc[-1]

for step in ["data-pre-processor", "feature-pre-processor", "learner"]:
field = f"{step}_class"
class_in_phase1 = best_solution_in_phase_1[field]
class_in_phase2 = pd.unique(history[field])[0]
self.assertEqual(
class_in_phase1,
class_in_phase2,
f"Choice for {step} should conicide but is {class_in_phase1} in AS phase and {class_in_phase2} in HPO."
)


"""
Expand Down Expand Up @@ -654,7 +656,7 @@ def update(self, pl, results):
def test_searchspaces(self):

for openmlid, task_type in {
#61: "classification", # iris
61: "classification", # iris
531: "regression" # boston housing
}.items():

Expand Down Expand Up @@ -701,10 +703,15 @@ def test_searchspaces(self):
})

# get HPO process for supposed selection
config_space = helper.get_config_space_for_selected_algorithms(selection)
if len(config_space) == 0:
self.logger.info("Config space is empty, nothing to check.")
continue

hp_optimizer.reset(
task=task,
runtime_of_default_config=0,
config_space=helper.get_config_space_for_selected_algorithms(selection),
config_space=config_space,
history_descriptor_creation_fun=lambda hp_config: naml.algorithm_selector.create_history_descriptor(faked_as_info, hp_config),
evaluator=naml.evaluator,
is_pipeline_forbidden=naml.algorithm_selector.is_pipeline_forbidden,
Expand Down Expand Up @@ -745,7 +752,12 @@ def test_process_leak(self, openmlid):
X, y = get_dataset(openmlid)
self.logger.info(f"Start test of individual stateful evaluation function on dataset {openmlid}.")

X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.8)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X,
y,
train_size=10,
test_size=10
)
for i in range(1, 21):
self.logger.info(f"Run {i}-th instance")
automl = naiveautoml.NaiveAutoML(
Expand Down

0 comments on commit c97d8d6

Please sign in to comment.