Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom metrics do not work with AutoSklearn2Classifier #1734

Open
ViktorooReps opened this issue Jul 29, 2024 · 0 comments
Open

Custom metrics do not work with AutoSklearn2Classifier #1734

ViktorooReps opened this issue Jul 29, 2024 · 0 comments

Comments

@ViktorooReps
Copy link

Describe the bug

I am creating a custom MCC scorer for binary classification problem, and encountering the following error:

FileNotFoundError: [Errno 2] No such file or directory: '/home/[[email protected]](mailto:[email protected])/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'

To Reproduce

Replace the dataset loading logic with any other dataset, matthews_corrcoef is imported from Scikit-Learn.

scorer = autosklearn.metrics.make_scorer(
    name='mcc',
    score_func=matthews_corrcoef,
    optimum=1,
    greater_is_better=True,
    needs_proba=False,
    needs_threshold=False,
)

train_x, train_y, valid_x, valid_y, _, _ = load_dataset(target)

classifier = AutoSklearn2Classifier(
    time_left_for_this_task=24 * 60 * 60,  # 1d
    per_run_time_limit=15 * 60,
    memory_limit=20 * 1024,
    n_jobs=4, 
    max_models_on_disc=50,
    ensemble_size=50,
    seed=42,
    metric=scorer
)
classifier.fit(train_x, train_y, valid_x, valid_y)

Expected behavior

No error

Actual behavior, stacktrace or logfile

�[0;31m---------------------------------------------------------------------------�[0m
�[0;31mFileNotFoundError�[0m                         Traceback (most recent call last)
Cell �[0;32mIn[21], line 13�[0m
�[1;32m     10�[0m �[38;5;28;01mfor�[39;00m target �[38;5;129;01min�[39;00m all_target_columns:
�[1;32m     11�[0m     train_x, train_y, valid_x, valid_y, _, _ �[38;5;241m=�[39m load_dataset(target)
�[0;32m---> 13�[0m     classifier �[38;5;241m=�[39m �[43mAutoSklearn2Classifier�[49m�[43m(�[49m
�[1;32m     14�[0m �[43m        �[49m�[38;5;66;43;03m# tmp_folder=cache_path / 'tnp2',�[39;49;00m
�[1;32m     15�[0m �[43m        �[49m�[43mtime_left_for_this_task�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m24�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m60�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m60�[39;49m�[43m,�[49m�[43m  �[49m�[38;5;66;43;03m# 1d�[39;49;00m
�[1;32m     16�[0m �[43m        �[49m�[43mper_run_time_limit�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m15�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m60�[39;49m�[43m,�[49m
�[1;32m     17�[0m �[43m        �[49m�[43mmemory_limit�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m20�[39;49m�[43m �[49m�[38;5;241;43m*�[39;49m�[43m �[49m�[38;5;241;43m1024�[39;49m�[43m,�[49m
�[1;32m     18�[0m �[43m        �[49m�[43mn_jobs�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m4�[39;49m�[43m,�[49m�[43m �[49m
�[1;32m     19�[0m �[43m        �[49m�[43mmax_models_on_disc�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m50�[39;49m�[43m,�[49m
�[1;32m     20�[0m �[43m        �[49m�[43mensemble_size�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m50�[39;49m�[43m,�[49m
�[1;32m     21�[0m �[43m        �[49m�[43mseed�[49m�[38;5;241;43m=�[39;49m�[38;5;241;43m42�[39;49m�[43m,�[49m
�[1;32m     22�[0m �[43m        �[49m�[43mmetric�[49m�[38;5;241;43m=�[39;49m�[43mscorer�[49m
�[1;32m     23�[0m �[43m    �[49m�[43m)�[49m
�[1;32m     24�[0m     classifier�[38;5;241m.�[39mfit(train_x, train_y, valid_x, valid_y)
�[1;32m     25�[0m     save_model(classifier, �[38;5;124m'�[39m�[38;5;124mautosklearn�[39m�[38;5;124m'�[39m, target)

File �[0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:311�[0m, in �[0;36mAutoSklearn2Classifier.__init__�[0;34m(self, time_left_for_this_task, per_run_time_limit, ensemble_size, ensemble_class, ensemble_kwargs, ensemble_nbest, max_models_on_disc, seed, memory_limit, tmp_folder, delete_tmp_folder_after_terminate, n_jobs, dask_client, disable_evaluator_output, smac_scenario_args, logging_config, metric, scoring_functions, load_models, dataset_compression, allow_string_features)�[0m
�[1;32m    306�[0m include_preprocessors �[38;5;241m=�[39m [�[38;5;124m"�[39m�[38;5;124mno_preprocessing�[39m�[38;5;124m"�[39m]
�[1;32m    307�[0m include �[38;5;241m=�[39m {
�[1;32m    308�[0m     �[38;5;124m"�[39m�[38;5;124mclassifier�[39m�[38;5;124m"�[39m: include_estimators,
�[1;32m    309�[0m     �[38;5;124m"�[39m�[38;5;124mfeature_preprocessor�[39m�[38;5;124m"�[39m: include_preprocessors,
�[1;32m    310�[0m }
�[0;32m--> 311�[0m �[38;5;28;43mself�[39;49m�[38;5;241;43m.�[39;49m�[43mtrain_selectors�[49m�[43m(�[49m�[43mselected_metric�[49m�[38;5;241;43m=�[39;49m�[43mmetric�[49m�[43m)�[49m
�[1;32m    312�[0m �[38;5;28msuper�[39m()�[38;5;241m.�[39m�[38;5;21m__init__�[39m(
�[1;32m    313�[0m     time_left_for_this_task�[38;5;241m=�[39mtime_left_for_this_task,
�[1;32m    314�[0m     per_run_time_limit�[38;5;241m=�[39mper_run_time_limit,
�[0;32m   (...)�[0m
�[1;32m    339�[0m     allow_string_features�[38;5;241m=�[39mallow_string_features,
�[1;32m    340�[0m )

File �[0;32m~/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/askl2.py:356�[0m, in �[0;36mAutoSklearn2Classifier.train_selectors�[0;34m(self, selected_metric)�[0m
�[1;32m    352�[0m �[38;5;28;01mfor�[39;00m metric �[38;5;129;01min�[39;00m metric_list:
�[1;32m    353�[0m     training_data_file �[38;5;241m=�[39m (
�[1;32m    354�[0m         �[38;5;28mself�[39m�[38;5;241m.�[39mthis_directory �[38;5;241m/�[39m metric�[38;5;241m.�[39mname �[38;5;241m/�[39m �[38;5;124m"�[39m�[38;5;124maskl2_training_data.json�[39m�[38;5;124m"�[39m
�[1;32m    355�[0m     )
�[0;32m--> 356�[0m     �[38;5;28;01mwith�[39;00m �[38;5;28;43mopen�[39;49m�[43m(�[49m�[43mtraining_data_file�[49m�[43m)�[49m �[38;5;28;01mas�[39;00m fh:
�[1;32m    357�[0m         training_data �[38;5;241m=�[39m json�[38;5;241m.�[39mload(fh)
�[1;32m    358�[0m         fh�[38;5;241m.�[39mseek(�[38;5;241m0�[39m)

�[0;31mFileNotFoundError�[0m: [Errno 2] No such file or directory: '/home/[email protected]/PycharmProjects/laion-copyright/venv_new/lib/python3.9/site-packages/autosklearn/experimental/mcc/askl2_training_data.json'

Sorry for broken encoding, the problem arises when a training_data_file is trying to get loaded at 356th line, askl2.py file. The tmp_folder does not exist.

Environment and installation:

  • Red Hat Enterprise Linux 8.8 (Ootpa)
  • venv
  • Python 3.9

pip freeze:

anyio==4.4.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arrow==1.3.0
asttokens==2.4.1
async-lru==2.0.4
attrs==23.2.0
auto-sklearn==0.15.0
Babel==2.15.0
beautifulsoup4==4.12.3
bleach==6.1.0
certifi==2024.7.4
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
cloudpickle==3.0.0
comm==0.2.2
ConfigSpace==0.4.21
Cython==3.0.10
dask==2024.7.1
debugpy==1.8.2
decorator==5.1.1
defusedxml==0.7.1
distributed==2024.7.1
distro==1.9.0
emcee==3.1.6
exceptiongroup==1.2.2
executing==2.0.1
fastjsonschema==2.20.0
fqdn==1.5.1
fsspec==2024.6.1
h11==0.14.0
httpcore==1.0.5
httpx==0.27.0
idna==3.7
importlib_metadata==8.2.0
ipykernel==6.29.5
ipython==8.18.1
ipywidgets==8.1.3
isoduration==20.11.0
jedi==0.19.1
Jinja2==3.1.4
joblib==1.4.2
json5==0.9.25
jsonpointer==3.0.0
jsonschema==4.23.0
jsonschema-specifications==2023.12.1
jupyter==1.0.0
jupyter-console==6.6.3
jupyter-events==0.10.0
jupyter-lsp==2.2.5
jupyter_client==8.6.2
jupyter_core==5.7.2
jupyter_server==2.14.2
jupyter_server_terminals==0.5.3
jupyterlab==4.2.4
jupyterlab_pygments==0.3.0
jupyterlab_server==2.27.3
jupyterlab_widgets==3.0.11
liac-arff==2.5.0
locket==1.0.0
MarkupSafe==2.1.5
matplotlib-inline==0.1.7
mistune==3.0.2
msgpack==1.0.8
nbclient==0.10.0
nbconvert==7.16.4
nbformat==5.10.4
nest-asyncio==1.6.0
notebook==7.2.1
notebook_shim==0.2.4
numpy==1.23.3
overrides==7.7.0
packaging==24.1
pandas==1.5.3
pandocfilters==1.5.1
parso==0.8.4
partd==1.4.2
pexpect==4.9.0
platformdirs==4.2.2
prometheus_client==0.20.0
prompt_toolkit==3.0.47
psutil==6.0.0
ptyprocess==0.7.0
pure_eval==0.2.3
pycparser==2.22
Pygments==2.18.0
pynisher==0.6.4
pyparsing==3.1.2
pyrfr==0.8.3
python-dateutil==2.9.0.post0
python-json-logger==2.0.7
pytz==2024.1
PyYAML==6.0.1
pyzmq==26.0.3
qtconsole==5.5.2
QtPy==2.4.1
referencing==0.35.1
requests==2.32.3
rfc3339-validator==0.1.4
rfc3986-validator==0.1.1
rpds-py==0.19.1
scikit-learn==0.24.2
scipy==1.13.1
Send2Trash==1.8.3
six==1.16.0
smac==1.2
sniffio==1.3.1
sortedcontainers==2.4.0
soupsieve==2.5
stack-data==0.6.3
tblib==3.0.0
terminado==0.18.1
threadpoolctl==3.5.0
tinycss2==1.3.0
tomli==2.0.1
toolz==0.12.1
tornado==6.4.1
tqdm==4.66.4
traitlets==5.14.3
types-python-dateutil==2.9.0.20240316
typing_extensions==4.12.2
uri-template==1.3.0
urllib3==2.2.2
wcwidth==0.2.13
webcolors==24.6.0
webencodings==0.5.1
websocket-client==1.8.0
widgetsnbextension==4.0.11
zict==3.0.0
zipp==3.19.2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant