diff --git a/setup.py b/setup.py index c496a48c1..1d8e47ba5 100755 --- a/setup.py +++ b/setup.py @@ -47,6 +47,7 @@ "codecov", "pep8", "mypy", + "openml" ], "examples": [ "matplotlib", diff --git a/test/conftest.py b/test/conftest.py index f05f573a7..c8ff6529e 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -8,6 +8,8 @@ import numpy as np +import openml + import pandas as pd import pytest @@ -23,6 +25,42 @@ from autoPyTorch.utils.pipeline import get_dataset_requirements +@pytest.fixture(scope="session") +def callattr_ahead_of_alltests(request): + """ + This procedure will run at the start of the pytest session. + It will prefetch several task that are going to be used by + the testing face, and it does so in a robust way, until the openml + API provides the desired resources + """ + tasks_used = [ + 146818, # Australian + 2295, # cholesterol + 2075, # abalone + 2071, # adult + 3, # kr-vs-kp + 9981, # cnae-9 + 146821, # car + 146822, # Segment + 2, # anneal + 53, # vehicle + 5136, # tecator + 4871, # sensory + 4857, # boston + 3916, # kc1 + ] + + # Populate the cache + # This will make the test fail immediately rather than + # Waiting for a openml fetch timeout + openml.populate_cache(task_ids=tasks_used) + # Also the bunch + for task in tasks_used: + fetch_openml(data_id=openml.tasks.get_task(task).dataset_id, + return_X_y=True) + return + + def slugify(text): return re.sub(r'[\[\]]+', '-', text.lower())