diff --git a/.flake8 b/.flake8
new file mode 100644
index 000000000..3548ad6a4
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,7 @@
+[flake8]
+ max-line-length = 120
+ show-source = True
+ application-import-names = autoPyTorch
+ exclude =
+     venv
+     build
diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml
new file mode 100644
index 000000000..b278a8563
--- /dev/null
+++ b/.github/workflows/examples.yml
@@ -0,0 +1,34 @@
+name: Examples
+
+on: [push, pull_request]
+
+jobs:
+  ubuntu:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8]
+      fail-fast:  false
+      max-parallel: 2
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install test dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .[examples]
+        which python
+        pip freeze
+    - name: Store repository status
+      id: status-before
+      run: |
+        echo "::set-output name=BEFORE::$(git status --porcelain -b)"
+    - name: Run tests
+      run: |
+        python examples/example_tabular_classification.py
+        python examples/example_image_classification.py
diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml
new file mode 100644
index 000000000..eabada7e8
--- /dev/null
+++ b/.github/workflows/pre-commit.yaml
@@ -0,0 +1,20 @@
+name: pre-commit
+
+on: [push, pull_request]
+
+jobs:
+  run-all-files:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python 3.7
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.7
+    - name: Install pre-commit
+      run: |
+        pip install pre-commit
+        pre-commit install
+    - name: Run pre-commit
+      run: |
+        pre-commit run --all-files
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
new file mode 100644
index 000000000..3f398c464
--- /dev/null
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,49 @@
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  ubuntu:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+      fail-fast:  false
+      max-parallel: 2
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install test dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .[test]
+    - name: Store repository status
+      id: status-before
+      run: |
+        echo "::set-output name=BEFORE::$(git status --porcelain -b)"
+    - name: Run tests
+      run: |
+        if [ ${{ matrix.code-cov }} ]; then codecov='--cov=autoPyTorch --cov-report=xml'; fi
+        python -m pytest -n 2 --timeout=600 --timeout-method=thread --dist load test -sv $codecov
+    - name: Check for files left behind by test
+      if: ${{ always() }}
+      run: |
+        before="${{ steps.status-before.outputs.BEFORE }}"
+        after="$(git status --porcelain -b)"
+        if [[ "$before" != "$after" ]]; then
+            echo "git status from before: $before"
+            echo "git status from after: $after"
+            echo "Not all generated files have been deleted!"
+            exit 1
+        fi
+    - name: Upload coverage
+      if: matrix.code-cov && always()
+      uses: codecov/codecov-action@v1
+      with:
+        fail_ci_if_error: true
+        verbose: true
diff --git a/.gitignore b/.gitignore
index 88339db1b..6709d3188 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,51 +1,140 @@
-# Visual Studio
-*.vs/*
-
-# Visual Studio Code
-*.vscode/*
-
-# Python
-*__pycache__*
-*.pyc
-.ipynb_checkpoints*
-
-# Zipped
-*.tar.gz
-
-# Temp
-*tmp_models/
-
-#Results
-benchmark_results/
-benchmark_results_cluster/
-ns_credentials*/
-configs.json
-results.json
-outputs/
-jobs.txt
-.pylintrc
-*worker_logs*
-
-# Build
-*build/
-*autoPyTorch.egg-info
-*.simg
-.DS_Store
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
 dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
 
-# Meta GPU
-*meta_logs/
-runs.log
-runs.log.lock
-logs*
+# IPython
+profile_default/
+ipython_config.py
 
-# ensemble data
-predictions_for_ensemble.npy
-test_predictions_for_ensemble.npy
-catboost_info
+# pyenv
+.python-version
 
-# testing
-tests.ipynb
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
 
-# venv
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
 env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# Idea workspace and task
+**/.idea/workspace.xml
+**/.idea/tasks.xml
+
+# Dask
+dask-worker-space/
+
+# Test output
+tmp/
+.tmp_evaluation
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..a8229b218
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,23 @@
+repos:
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.761
+    hooks:
+      - id: mypy
+        args: [--show-error-codes]
+        name: mypy AutoPyTorch
+        files: autoPyTorch/.*
+  - repo: https://gitlab.com/pycqa/flake8
+    rev: 3.8.3
+    hooks:
+      - id: flake8
+        name: flake8 AutoPyTorch
+        files: autoPyTorch/.*
+        additional_dependencies:
+          - flake8-print==3.1.4
+          - flake8-import-order
+      - id: flake8
+        name: flake8 tests
+        files: test/.*
+        additional_dependencies:
+          - flake8-print==3.1.4
+          - flake8-import-order
diff --git a/LICENSE b/LICENSE
index 58b3499b2..261eeb9e9 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [2019] The Contributors
+   Copyright [yyyy] [name of copyright owner]
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/MANIFEST.in b/MANIFEST.in
old mode 100644
new mode 100755
index ab30e9ace..d42ab8a2b
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1 +1,10 @@
-include *.txt
+include requirements.txt
+include autoPyTorch/utils/logging.yaml
+include autoPyTorch/configs/default_pipeline_options.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/catboost.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/rotation_forest.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/random_forest.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/knn.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/svm.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/extra_trees.json
+include autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/lgb.json
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
index dc10911bf..3d61245ee
--- a/README.md
+++ b/README.md
@@ -2,136 +2,57 @@
 
 Copyright (C) 2019  [AutoML Group Freiburg](http://www.automl.org/)
 
-This a very early pre-alpha version of our upcoming Auto-PyTorch.
-So far, Auto-PyTorch supports featurized data (classification, regression) and image data (classification).
+This an alpha version of Auto-PyTorch.
+So far, Auto-PyTorch supports tabular data (classification, regression), image data (classification) and time-series data (TODO).
 
-The newest features in Auto-PyTorch for tabular data are described in the paper ["Auto-PyTorch Tabular: Multi-Fidelity MetaLearning for Efficient and Robust AutoDL"](https://arxiv.org/abs/2006.13799).
 
 ## Installation
 
-Clone repository
+### Pip
+```sh
+$ pip install autoPyTorch
+```
 
+### Manually
 ```sh
 $ cd install/path
 $ git clone https://github.com/automl/Auto-PyTorch.git
 $ cd Auto-PyTorch
+$ cat requirements.txt | xargs -n 1 -L 1 pip install
+$ python setup.py install
 ```
-If you want to contribute to this repository switch to our current develop branch
 
-```sh
-$ git checkout develop
-```
 
-Install pytorch: 
-https://pytorch.org/
+## Contributing
 
-Install Auto-PyTorch:
+If you want to contribute to Auto-PyTorch, clone the repository and checkout our current development branch
 
 ```sh
-$ cat requirements.txt | xargs -n 1 -L 1 pip install
-$ python setup.py install
+$ git checkout development
 ```
 
 
 ## Examples
 
-Code for the [paper](https://arxiv.org/abs/2006.13799) is available under `examples/ensemble`.
-
 For a detailed tutorial, please refer to the jupyter notebook in https://github.com/automl/Auto-PyTorch/tree/master/examples/basics.
 
 In a nutshell:
 
 ```py
-from autoPyTorch import AutoNetClassification
-
-# data and metric imports
-import sklearn.model_selection
-import sklearn.datasets
-import sklearn.metrics
-X, y = sklearn.datasets.load_digits(return_X_y=True)
-X_train, X_test, y_train, y_test = \
-        sklearn.model_selection.train_test_split(X, y, random_state=1)
-
-# running Auto-PyTorch
-autoPyTorch = AutoNetClassification("tiny_cs",  # config preset
-                                    log_level='info',
-                                    max_runtime=300,
-                                    min_budget=30,
-                                    max_budget=90)
-
-autoPyTorch.fit(X_train, y_train, validation_split=0.3)
-y_pred = autoPyTorch.predict(X_test)
-
-print("Accuracy score", sklearn.metrics.accuracy_score(y_test, y_pred))
+from autoPyTorch import TODO
 ```
 
-More examples with datasets:
+For ore examples, checkout `examples/`.
 
-```sh
-$ cd examples/
-
-```
 
 ## Configuration
 
-How to configure Auto-PyTorch for your needs:
+### Pipeline configuration
 
-```py
-
-# Print all possible configuration options.
-AutoNetClassification().print_help()
-
-# You can use the constructor to configure Auto-PyTorch.
-autoPyTorch = AutoNetClassification(log_level='info', max_runtime=300, min_budget=30, max_budget=90)
-
-# You can overwrite this configuration in each fit call.
-autoPyTorch.fit(X_train, y_train, log_level='debug', max_runtime=900, min_budget=50, max_budget=150)
-
-# You can use presets to configure the config space.
-# Available presets: full_cs, medium_cs (default), tiny_cs.
-# These are defined in autoPyTorch/core/presets.
-# tiny_cs is recommended if you want fast results with few resources.
-# full_cs is recommended if you have many resources and a very high search budget.
-autoPyTorch = AutoNetClassification("full_cs")
-
-# Enable or disable components using the Auto-PyTorch config:
-autoPyTorch = AutoNetClassification(networks=["resnet", "shapedresnet", "mlpnet", "shapedmlpnet"])
-
-# You can take a look at the search space.
-# Each hyperparameter belongs to a node in Auto-PyTorch's ML Pipeline.
-# The names of the hyperparameters are prefixed with the name of the node: NodeName:hyperparameter_name.
-# If a hyperparameter belongs to a component: NodeName:component_name:hyperparameter_name.
-# Call with the same arguments as fit.
-autoPyTorch.get_hyperparameter_search_space(X_train, y_train, validation_split=0.3)
-
-# You can configure the search space of every hyperparameter of every component:
-from autoPyTorch import HyperparameterSearchSpaceUpdates
-search_space_updates = HyperparameterSearchSpaceUpdates()
-
-search_space_updates.append(node_name="NetworkSelector",
-                            hyperparameter="shapedresnet:activation",
-                            value_range=["relu", "sigmoid"])
-search_space_updates.append(node_name="NetworkSelector",
-                            hyperparameter="shapedresnet:blocks_per_group",
-                            value_range=[2,5],
-                            log=False)
-autoPyTorch = AutoNetClassification(hyperparameter_search_space_updates=search_space_updates)
-```
-
-Enable ensemble building (for featurized data):
-
-```py
-from autoPyTorch import AutoNetEnsemble
-autoPyTorchEnsemble = AutoNetEnsemble(AutoNetClassification, "tiny_cs", max_runtime=300, min_budget=30, max_budget=90)
+### Search space
 
-```
-
-Disable pynisher if you experience issues when using cuda:
-
-```py
-autoPyTorch = AutoNetClassification("tiny_cs", log_level='info', max_runtime=300, min_budget=30, max_budget=90, cuda=True, use_pynisher=False)
+### Fitting single configurations
 
-```
 
 ## License
 
@@ -147,7 +68,7 @@ along with this program (see LICENSE file).
 
 ## Reference
 
-```bibtex
+```
 @incollection{mendoza-automlbook18a,
   author    = {Hector Mendoza and Aaron Klein and Matthias Feurer and Jost Tobias Springenberg and Matthias Urban and Michael Burkart and Max Dippel and Marius Lindauer and Frank Hutter},
   title     = {Towards Automatically-Tuned Deep Neural Networks},
diff --git a/autoPyTorch/__init__.py b/autoPyTorch/__init__.py
index 3e5048fdc..e69de29bb 100644
--- a/autoPyTorch/__init__.py
+++ b/autoPyTorch/__init__.py
@@ -1,8 +0,0 @@
-import sys, os
-hpbandster = os.path.abspath(os.path.join(__file__, '..', '..', 'submodules', 'HpBandSter'))
-sys.path.append(hpbandster)
-
-from autoPyTorch.core.autonet_classes import AutoNetClassification, AutoNetMultilabel, AutoNetRegression, AutoNetImageClassification, AutoNetImageClassificationMultipleDatasets
-from autoPyTorch.data_management.data_manager import DataManager
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
-from autoPyTorch.core.ensemble import AutoNetEnsemble
diff --git a/autoPyTorch/components/__init__.py b/autoPyTorch/api/__init__.py
similarity index 100%
rename from autoPyTorch/components/__init__.py
rename to autoPyTorch/api/__init__.py
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
new file mode 100644
index 000000000..de4d49ac0
--- /dev/null
+++ b/autoPyTorch/api/base_task.py
@@ -0,0 +1,1117 @@
+import copy
+import json
+import logging.handlers
+import math
+import multiprocessing
+import os
+import sys
+import tempfile
+import time
+import typing
+import unittest.mock
+import warnings
+from abc import abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Union, cast
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+import dask
+
+import joblib
+
+import numpy as np
+
+import pandas as pd
+
+from smac.runhistory.runhistory import RunHistory
+from smac.stats.stats import Stats
+from smac.tae import StatusType
+
+from autoPyTorch.constants import (
+    REGRESSION_TASKS,
+    STRING_TO_OUTPUT_TYPES,
+    STRING_TO_TASK_TYPES,
+)
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.datasets.resampling_strategy import CrossValTypes, HoldoutValTypes
+from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
+from autoPyTorch.ensemble.ensemble_selection import EnsembleSelection
+from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
+from autoPyTorch.evaluation.abstract_evaluator import fit_and_suppress_warnings
+from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
+from autoPyTorch.optimizer.smbo import AutoMLSMBO
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models import get_available_classifiers
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score, get_metrics
+from autoPyTorch.utils.backend import Backend, create
+from autoPyTorch.utils.common import FitRequirement, replace_string_bool_to_bool
+from autoPyTorch.utils.logging_ import (
+    PicklableClientLogger,
+    get_named_client_logger,
+    setup_logger,
+    start_log_server,
+)
+from autoPyTorch.utils.pipeline import get_configuration_space, get_dataset_requirements
+from autoPyTorch.utils.stopwatch import StopWatch
+
+
+def _pipeline_predict(pipeline: BasePipeline,
+                      X: Union[np.ndarray, pd.DataFrame],
+                      batch_size: int,
+                      logger: PicklableClientLogger,
+                      task: int) -> np.ndarray:
+    @typing.no_type_check
+    def send_warnings_to_log(
+            message, category, filename, lineno, file=None, line=None):
+        logger.debug('%s:%s: %s:%s' % (filename, lineno, category.__name__, message))
+        return
+
+    X_ = X.copy()
+    with warnings.catch_warnings():
+        warnings.showwarning = send_warnings_to_log
+        if task in REGRESSION_TASKS:
+            prediction = pipeline.predict(X_, batch_size=batch_size)
+        else:
+            # Voting classifier predict proba does not support batch size
+            prediction = pipeline.predict_proba(X_)
+            # Check that all probability values lie between 0 and 1.
+            if not ((prediction >= 0).all() and (prediction <= 1).all()):
+                np.set_printoptions(threshold=sys.maxsize)
+                raise ValueError("For {}, prediction probability not within [0, 1]: {}/{}!".format(
+                    pipeline,
+                    prediction,
+                    np.sum(prediction, axis=1)
+                ))
+
+    if len(prediction.shape) < 1 or len(X_.shape) < 1 or \
+            X_.shape[0] < 1 or prediction.shape[0] != X_.shape[0]:
+        logger.warning(
+            "Prediction shape for model %s is %s while X_.shape is %s",
+            pipeline, str(prediction.shape), str(X_.shape)
+        )
+    return prediction
+
+
+class BaseTask:
+    """
+    Base class for the tasks that serve as API to the pipelines.
+    Args:
+        seed (int), (default=1): seed to be used for reproducibility.
+        n_jobs (int), (default=1): number of consecutive processes to spawn.
+        logging_config (Optional[Dict]): specifies configuration
+            for logging, if None, it is loaded from the logging.yaml
+        ensemble_size (int), (default=50): Number of models added to the ensemble built by
+            Ensemble selection from libraries of models.
+            Models are drawn with replacement.
+        ensemble_nbest (int), (default=50): only consider the ensemble_nbest
+            models to build the ensemble
+        max_models_on_disc (int), (default=50): maximum number of models saved to disc.
+            Also, controls the size of the ensemble as any additional models will be deleted.
+            Must be greater than or equal to 1.
+        temporary_directory (str): folder to store configuration output and log file
+        output_directory (str): folder to store predictions for optional test set
+        delete_tmp_folder_after_terminate (bool): determines whether to delete the temporary directory,
+            when finished
+        include_components (Optional[Dict]): If None, all possible components are used.
+            Otherwise specifies set of components to use.
+        exclude_components (Optional[Dict]): If None, all possible components are used.
+            Otherwise specifies set of components not to use. Incompatible with include
+            components
+    """
+
+    def __init__(
+            self,
+            seed: int = 1,
+            n_jobs: int = 1,
+            logging_config: Optional[Dict] = None,
+            ensemble_size: int = 50,
+            ensemble_nbest: int = 50,
+            max_models_on_disc: int = 50,
+            temporary_directory: Optional[str] = None,
+            output_directory: Optional[str] = None,
+            delete_tmp_folder_after_terminate: bool = True,
+            delete_output_folder_after_terminate: bool = True,
+            include_components: Optional[Dict] = None,
+            exclude_components: Optional[Dict] = None,
+            backend: Optional[Backend] = None,
+    ) -> None:
+        self.seed = seed
+        self.n_jobs = n_jobs
+        self.ensemble_size = ensemble_size
+        self.ensemble_nbest = ensemble_nbest
+        self.max_models_on_disc = max_models_on_disc
+        self.logging_config: Optional[Dict] = logging_config
+        self.include_components: Optional[Dict] = include_components
+        self.exclude_components: Optional[Dict] = exclude_components
+        self._temporary_directory = temporary_directory
+        self._output_directory = output_directory
+        if backend is not None:
+            self._backend = backend
+        else:
+            self._backend = create(
+                temporary_directory=self._temporary_directory,
+                output_directory=self._output_directory,
+                delete_tmp_folder_after_terminate=delete_tmp_folder_after_terminate,
+                delete_output_folder_after_terminate=delete_output_folder_after_terminate,
+            )
+        self._stopwatch = StopWatch()
+
+        self.pipeline_options = replace_string_bool_to_bool(json.load(open(
+            os.path.join(os.path.dirname(__file__), '../configs/default_pipeline_options.json'))))
+
+        self.search_space: Optional[ConfigurationSpace] = None
+        self._dataset_requirements: Optional[List[FitRequirement]] = None
+        self.task_type: Optional[str] = None
+        self._metric: Optional[autoPyTorchMetric] = None
+        self._logger: Optional[PicklableClientLogger] = None
+        self.run_history: Optional[RunHistory] = None
+        self.trajectory: Optional[List] = None
+        self.dataset_name: Optional[str] = None
+        self.cv_models_: Dict = {}
+
+        # By default try to use the TCP logging port or get a new port
+        self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
+
+        # Store the resampling strategy from the dataset, to load models as needed
+        self.resampling_strategy = None  # type: Optional[Union[CrossValTypes, HoldoutValTypes]]
+
+        self.stop_logging_server = None  # type: Optional[multiprocessing.synchronize.Event]
+
+    @abstractmethod
+    def _get_required_dataset_properties(self, dataset: BaseDataset) -> Dict[str, Any]:
+        """
+        given a pipeline type, this function returns the
+        dataset properties required by the dataset object
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def build_pipeline(self, dataset_properties: Dict[str, Any]) -> BasePipeline:
+        """
+        Build pipeline according to current task
+        and for the passed dataset properties
+        Args:
+            dataset_properties (Dict[str,Any]):
+
+        Returns:
+
+        """
+        raise NotImplementedError
+
+    def set_pipeline_config(
+            self,
+            **pipeline_config_kwargs: Any) -> None:
+        """
+        Check whether arguments are valid and
+        then sets them to the current pipeline
+        configuration.
+        Args:
+            **pipeline_config_kwargs: Valid config options include "job_id",
+            "device", "budget_type", "epochs", "runtime", "torch_num_threads",
+            "early_stopping", "use_tensorboard_logger", "use_pynisher",
+            "metrics_during_training"
+
+        Returns:
+            None
+        """
+        unknown_keys = []
+        for option, value in pipeline_config_kwargs.items():
+            if option in self.pipeline_options.keys():
+                pass
+            else:
+                unknown_keys.append(option)
+
+        if len(unknown_keys) > 0:
+            raise ValueError("Invalid configuration arguments given {},"
+                             " expected arguments to be in {}".
+                             format(unknown_keys, self.pipeline_options.keys()))
+
+        self.pipeline_options.update(pipeline_config_kwargs)
+
+    def get_pipeline_options(self) -> dict:
+        """
+        Returns the current pipeline configuration.
+        """
+        return self.pipeline_options
+
+    # def set_search_space(self, search_space: ConfigurationSpace) -> None:
+    #     """
+    #     Update the search space.
+    #     """
+    #     raise NotImplementedError
+    #
+    def get_search_space(self, dataset: BaseDataset = None) -> ConfigurationSpace:
+        """
+        Returns the current search space as ConfigurationSpace object.
+        """
+        if self.search_space is not None:
+            return self.search_space
+        elif dataset is not None:
+            dataset_requirements = get_dataset_requirements(
+                info=self._get_required_dataset_properties(dataset))
+            return get_configuration_space(info=dataset.get_dataset_properties(dataset_requirements),
+                                           include=self.include_components,
+                                           exclude=self.exclude_components)
+        raise Exception("No search space initialised and no dataset passed. "
+                        "Can't create default search space without the dataset")
+
+    def _get_logger(self, name: str) -> PicklableClientLogger:
+        """
+        Instantiates the logger used throughout the experiment
+        Args:
+            name (str): name of the log file,
+            usually the dataset name
+
+        Returns:
+            PicklableClientLogger
+        """
+        logger_name = 'AutoPyTorch:%s:%d' % (name, self.seed)
+
+        # Setup the configuration for the logger
+        # This is gonna be honored by the server
+        # Which is created below
+        setup_logger(
+            filename='%s.log' % str(logger_name),
+            logging_config=self.logging_config,
+            output_dir=self._backend.temporary_directory,
+        )
+
+        # As Auto-sklearn works with distributed process,
+        # we implement a logger server that can receive tcp
+        # pickled messages. They are unpickled and processed locally
+        # under the above logging configuration setting
+        # We need to specify the logger_name so that received records
+        # are treated under the logger_name ROOT logger setting
+        context = multiprocessing.get_context('spawn')
+        self.stop_logging_server = context.Event()
+        port = context.Value('l')  # be safe by using a long
+        port.value = -1
+
+        # "BaseContext" has no attribute "Process" motivates to ignore the attr check
+        self.logging_server = context.Process(  # type: ignore [attr-defined]
+            target=start_log_server,
+            kwargs=dict(
+                host='localhost',
+                logname=logger_name,
+                event=self.stop_logging_server,
+                port=port,
+                filename='%s.log' % str(logger_name),
+                logging_config=self.logging_config,
+                output_dir=self._backend.temporary_directory,
+            ),
+        )
+
+        self.logging_server.start()
+
+        while True:
+            with port.get_lock():
+                if port.value == -1:
+                    time.sleep(0.01)
+                else:
+                    break
+
+        self._logger_port = int(port.value)
+
+        return get_named_client_logger(
+            name=logger_name,
+            host='localhost',
+            port=self._logger_port,
+        )
+
+    def _clean_logger(self) -> None:
+        """
+        cleans the logging server created
+        Returns:
+
+        """
+        if not hasattr(self, 'stop_logging_server') or self.stop_logging_server is None:
+            return
+
+        # Clean up the logger
+        if self.logging_server.is_alive():
+            self.stop_logging_server.set()
+
+            # We try to join the process, after we sent
+            # the terminate event. Then we try a join to
+            # nicely join the event. In case something
+            # bad happens with nicely trying to kill the
+            # process, we execute a terminate to kill the
+            # process.
+            self.logging_server.join(timeout=5)
+            self.logging_server.terminate()
+            del self.stop_logging_server
+
+    def _create_dask_client(self) -> None:
+        """
+        creates the dask client that is used to parallelize
+        the training of pipelines
+        Returns:
+            None
+        """
+        self._is_dask_client_internally_created = True
+        dask.config.set({'distributed.worker.daemon': False})
+        self._dask_client = dask.distributed.Client(
+            dask.distributed.LocalCluster(
+                n_workers=self.n_jobs,
+                processes=True,
+                threads_per_worker=1,
+                # We use the temporal directory to save the
+                # dask workers, because deleting workers
+                # more time than deleting backend directories
+                # This prevent an error saying that the worker
+                # file was deleted, so the client could not close
+                # the worker properly
+                local_directory=tempfile.gettempdir(),
+                # Memory is handled by the pynisher, not by the dask worker/nanny
+                memory_limit=0,
+            ),
+            # Heartbeat every 10s
+            heartbeat_interval=10000,
+        )
+
+    def _close_dask_client(self) -> None:
+        """
+        Closes the created dask client
+        Returns:
+            None
+        """
+        if (
+                hasattr(self, '_is_dask_client_internally_created')
+                and self._is_dask_client_internally_created
+                and self._dask_client
+        ):
+            self._dask_client.shutdown()
+            self._dask_client.close()
+            del self._dask_client
+            self._dask_client = None
+            self._is_dask_client_internally_created = False
+            del self._is_dask_client_internally_created
+
+    def _load_models(self, resampling_strategy: Optional[Union[CrossValTypes, HoldoutValTypes]]
+                     ) -> bool:
+
+        """
+        Loads the models saved in the temporary directory
+        during the smac run and the final ensemble created
+        Args:
+            resampling_strategy (Union[CrossValTypes, HoldoutValTypes]): resampling strategy used to split the data
+                and to validate the performance of a candidate pipeline
+
+        Returns:
+            None
+        """
+        if resampling_strategy is None:
+            raise ValueError("Resampling strategy is needed to determine what models to load")
+        self.ensemble_ = self._backend.load_ensemble(self.seed)
+
+        # If no ensemble is loaded, try to get the best performing model
+        if not self.ensemble_:
+            self.ensemble_ = self._load_best_individual_model()
+
+        if self.ensemble_:
+            identifiers = self.ensemble_.get_selected_model_identifiers()
+            self.models_ = self._backend.load_models_by_identifiers(identifiers)
+            if isinstance(resampling_strategy, CrossValTypes):
+                self.cv_models_ = self._backend.load_cv_models_by_identifiers(identifiers)
+
+            if isinstance(resampling_strategy, CrossValTypes):
+                if len(self.cv_models_) == 0:
+                    raise ValueError('No models fitted!')
+
+        elif 'pipeline' not in self._disable_file_output:
+            model_names = self._backend.list_all_models(self.seed)
+
+            if len(model_names) == 0:
+                raise ValueError('No models fitted!')
+
+            self.models_ = {}
+
+        else:
+            self.models_ = {}
+
+        return True
+
+    def _load_best_individual_model(self) -> SingleBest:
+        """
+        In case of failure during ensemble building,
+        this method returns the single best model found
+        by AutoML.
+        This is a robust mechanism to be able to predict,
+        even though no ensemble was found by ensemble builder.
+        """
+
+        if self._metric is None:
+            raise ValueError("Providing a metric to AutoPytorch is required to fit a model. "
+                             "A default metric could not be inferred. Please check the log "
+                             "for error messages."
+                             )
+
+        # SingleBest contains the best model found by AutoML
+        ensemble = SingleBest(
+            metric=self._metric,
+            seed=self.seed,
+            run_history=self.run_history,
+            backend=self._backend,
+        )
+        if self._logger is None:
+            warnings.warn(
+                "No valid ensemble was created. Please check the log"
+                "file for errors. Default to the best individual estimator:{}".format(
+                    ensemble.identifiers_
+                )
+            )
+        else:
+            self._logger.exception(
+                "No valid ensemble was created. Please check the log"
+                "file for errors. Default to the best individual estimator:{}".format(
+                    ensemble.identifiers_
+                )
+            )
+
+        return ensemble
+
+    def _do_dummy_prediction(self, num_run: int) -> None:
+
+        assert self._metric is not None
+        assert self._logger is not None
+
+        self._logger.info("Starting to create dummy predictions.")
+
+        memory_limit = self._memory_limit
+        if memory_limit is not None:
+            memory_limit = int(math.ceil(memory_limit))
+
+        scenario_mock = unittest.mock.Mock()
+        scenario_mock.wallclock_limit = self._time_for_task
+        # This stats object is a hack - maybe the SMAC stats object should
+        # already be generated here!
+        stats = Stats(scenario_mock)
+        stats.start_timing()
+        ta = ExecuteTaFuncWithQueue(
+            backend=self._backend,
+            seed=self.seed,
+            metric=self._metric,
+            logger=self._logger,
+            cost_for_crash=get_cost_of_crash(self._metric),
+            abort_on_first_run_crash=False,
+            initial_num_run=num_run,
+            stats=stats,
+            memory_limit=memory_limit,
+            disable_file_output=True if len(self._disable_file_output) > 0 else False,
+            all_supported_metrics=self._all_supported_metrics
+        )
+
+        status, cost, runtime, additional_info = ta.run(num_run, cutoff=self._time_for_task)
+        if status == StatusType.SUCCESS:
+            self._logger.info("Finished creating dummy predictions.")
+        else:
+            if additional_info.get('exitcode') == -6:
+                self._logger.error(
+                    "Dummy prediction failed with run state %s. "
+                    "The error suggests that the provided memory limits were too tight. Please "
+                    "increase the 'ml_memory_limit' and try again. If this does not solve your "
+                    "problem, please open an issue and paste the additional output. "
+                    "Additional output: %s.",
+                    str(status), str(additional_info),
+                )
+                # Fail if dummy prediction fails.
+                raise ValueError(
+                    "Dummy prediction failed with run state %s. "
+                    "The error suggests that the provided memory limits were too tight. Please "
+                    "increase the 'ml_memory_limit' and try again. If this does not solve your "
+                    "problem, please open an issue and paste the additional output. "
+                    "Additional output: %s." %
+                    (str(status), str(additional_info)),
+                )
+
+            else:
+                self._logger.error(
+                    "Dummy prediction failed with run state %s and additional output: %s.",
+                    str(status), str(additional_info),
+                )
+                # Fail if dummy prediction fails.
+                raise ValueError(
+                    "Dummy prediction failed with run state %s and additional output: %s."
+                    % (str(status), str(additional_info))
+                )
+
+    def _do_traditional_prediction(self, num_run: int, time_for_traditional: int) -> int:
+
+        assert self._metric is not None
+        assert self._logger is not None
+
+        self._logger.info("Starting to create dummy predictions.")
+
+        memory_limit = self._memory_limit
+        if memory_limit is not None:
+            memory_limit = int(math.ceil(memory_limit))
+        available_classifiers = get_available_classifiers()
+        dask_futures = list()
+        time_for_traditional_classifier_sec = int(time_for_traditional / len(available_classifiers))
+        for n_r, classifier in enumerate(available_classifiers, start=num_run):
+            start_time = time.time()
+            scenario_mock = unittest.mock.Mock()
+            scenario_mock.wallclock_limit = time_for_traditional_classifier_sec
+            # This stats object is a hack - maybe the SMAC stats object should
+            # already be generated here!
+            stats = Stats(scenario_mock)
+            stats.start_timing()
+            ta = ExecuteTaFuncWithQueue(
+                backend=self._backend,
+                seed=self.seed,
+                metric=self._metric,
+                logger=self._logger,
+                cost_for_crash=get_cost_of_crash(self._metric),
+                abort_on_first_run_crash=False,
+                initial_num_run=num_run,
+                stats=stats,
+                memory_limit=memory_limit,
+                disable_file_output=True if len(self._disable_file_output) > 0 else False,
+                all_supported_metrics=self._all_supported_metrics
+            )
+            dask_futures.append((classifier, self._dask_client.submit(ta.run, config=classifier,
+                                                                      cutoff=time_for_traditional_classifier_sec)))
+
+            # In the case of a serial execution, calling submit halts the run for a resource
+            # dynamically adjust time in this case
+            time_for_traditional_classifier_sec -= int(time.time() - start_time)
+            num_run = n_r
+
+        for (classifier, future) in dask_futures:
+            status, cost, runtime, additional_info = future.result()
+            if status == StatusType.SUCCESS:
+                self._logger.info("Finished creating predictions for {}".format(classifier))
+            else:
+                if additional_info.get('exitcode') == -6:
+                    self._logger.error(
+                        "Traditional prediction for %s failed with run state %s. "
+                        "The error suggests that the provided memory limits were too tight. Please "
+                        "increase the 'ml_memory_limit' and try again. If this does not solve your "
+                        "problem, please open an issue and paste the additional output. "
+                        "Additional output: %s.",
+                        classifier, str(status), str(additional_info),
+                    )
+                else:
+                    # TODO: add check for timeout, and provide feedback to user to consider increasing the time limit
+                    self._logger.error(
+                        "Traditional prediction for %s failed with run state %s and additional output: %s.",
+                        classifier, str(status), str(additional_info),
+                    )
+        return num_run
+
+    def search(
+            self,
+            dataset: BaseDataset,
+            optimize_metric: str,
+            budget_type: Optional[str] = None,
+            budget: Optional[float] = None,
+            total_walltime_limit: int = 100,
+            func_eval_time_limit: int = 60,
+            traditional_per_total_budget: float = 0.1,
+            memory_limit: Optional[int] = 4096,
+            smac_scenario_args: Optional[Dict[str, Any]] = None,
+            get_smac_object_callback: Optional[Callable] = None,
+            all_supported_metrics: bool = True,
+            precision: int = 32,
+            disable_file_output: List = [],
+            load_models: bool = True,
+    ) -> 'BaseTask':
+        """
+        Search for the best pipeline configuration for the given dataset.
+
+        Fit both optimizes the machine learning models and builds an ensemble out of them.
+        To disable ensembling, set ensemble_size==0.
+        using the optimizer.
+        Args:
+            dataset (Dataset):
+                The argument that will provide the dataset splits. It is
+                a subclass of the  base dataset object which can
+                generate the splits based on different restrictions.
+            optimize_metric (str): name of the metric that is used to
+                evaluate a pipeline.
+            budget_type (Optional[str]):
+                Type of budget to be used when fitting the pipeline.
+                Either 'epochs' or 'runtime'. If not provided, uses
+                the default in the pipeline config ('epochs')
+            budget (Optional[float]):
+                Budget to fit a single run of the pipeline. If not
+                provided, uses the default in the pipeline config
+            total_walltime_limit (int), (default=100): Time limit
+                in seconds for the search of appropriate models.
+                By increasing this value, autopytorch has a higher
+                chance of finding better models.
+            func_eval_time_limit (int), (default=60): Time limit
+                for a single call to the machine learning model.
+                Model fitting will be terminated if the machine
+                learning algorithm runs over the time limit. Set
+                this value high enough so that typical machine
+                learning algorithms can be fit on the training
+                data.
+            traditional_per_total_budget (float), (default=0.1):
+                Percent of total walltime to be allocated for
+                running traditional classifiers.
+            memory_limit (Optional[int]), (default=4096): Memory
+                limit in MB for the machine learning algorithm. autopytorch
+                will stop fitting the machine learning algorithm if it tries
+                to allocate more than memory_limit MB. If None is provided,
+                no memory limit is set. In case of multi-processing, memory_limit
+                will be per job. This memory limit also applies to the ensemble
+                creation process.
+            smac_scenario_args (Optional[Dict]): Additional arguments inserted
+                into the scenario of SMAC. See the
+                [SMAC documentation] (https://automl.github.io/SMAC3/master/options.html?highlight=scenario#scenario)
+                for a list of available arguments.
+            get_smac_object_callback (Optional[Callable]): Callback function
+                to create an object of class
+                [smac.optimizer.smbo.SMBO](https://automl.github.io/SMAC3/master/apidoc/smac.optimizer.smbo.html).
+                The function must accept the arguments scenario_dict,
+                instances, num_params, runhistory, seed and ta. This is
+                an advanced feature. Use only if you are familiar with
+                [SMAC](https://automl.github.io/SMAC3/master/index.html).
+            all_supported_metrics (bool), (default=True): if True, all
+                metrics supporting current task will be calculated
+                for each pipeline and results will be available via cv_results
+            precision (int), (default=32): Numeric precision used when loading
+                ensemble data. Can be either '16', '32' or '64'.
+            disable_file_output (Union[bool, List]):
+            load_models (bool), (default=True): Whether to load the
+                models after fitting AutoPyTorch.
+
+        Returns:
+            self
+
+        """
+        if self.task_type != dataset.task_type:
+            raise ValueError("Incompatible dataset entered for current task,"
+                             "expected dataset to have task type :{} got "
+                             ":{}".format(self.task_type, dataset.task_type))
+
+        # Initialise information needed for the experiment
+        experiment_task_name = 'runSearch'
+        dataset_requirements = get_dataset_requirements(
+            info=self._get_required_dataset_properties(dataset))
+        self._dataset_requirements = dataset_requirements
+        dataset_properties = dataset.get_dataset_properties(dataset_requirements)
+        self._stopwatch.start_task(experiment_task_name)
+        self.dataset_name = dataset.dataset_name
+        self.resampling_strategy = dataset.resampling_strategy
+        self._logger = self._get_logger(self.dataset_name)
+        self._all_supported_metrics = all_supported_metrics
+        self._disable_file_output = disable_file_output
+        self._memory_limit = memory_limit
+        self._time_for_task = total_walltime_limit
+        # Save start time to backend
+        self._backend.save_start_time(str(self.seed))
+
+        self._backend.save_datamanager(dataset)
+
+        self._metric = get_metrics(
+            names=[optimize_metric], dataset_properties=dataset_properties)[0]
+
+        self.search_space = self.get_search_space(dataset)
+
+        budget_config: Dict[str, Union[float, str]] = {}
+        if budget_type is not None and budget is not None:
+            budget_config['budget_type'] = budget_type
+            budget_config[budget_type] = budget
+        elif budget_type is not None or budget is not None:
+            raise ValueError(
+                "budget type was not specified in budget_config"
+            )
+
+        if self.task_type is None:
+            raise ValueError("Cannot interpret task type from the dataset")
+
+        self._create_dask_client()
+
+        # ============> Run dummy predictions
+        num_run = 1
+        dummy_task_name = 'runDummy'
+        self._stopwatch.start_task(dummy_task_name)
+        self._do_dummy_prediction(num_run)
+        self._stopwatch.stop_task(dummy_task_name)
+
+        # ============> Run traditional ml
+
+        traditional_task_name = 'runTraditional'
+        self._stopwatch.start_task(traditional_task_name)
+        elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
+        time_for_traditional = int(traditional_per_total_budget * max(0, (self._time_for_task - elapsed_time)))
+        if time_for_traditional <= 0:
+            if traditional_per_total_budget > 0:
+                raise ValueError("Not enough time allocated to run traditional algorithms")
+        elif traditional_per_total_budget != 0:
+            num_run = self._do_traditional_prediction(num_run=num_run + 1, time_for_traditional=time_for_traditional)
+        self._stopwatch.stop_task(traditional_task_name)
+
+        # ============> Starting ensemble
+        elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
+        time_left_for_ensembles = max(0, total_walltime_limit - elapsed_time)
+        proc_ensemble = None
+        if time_left_for_ensembles <= 0:
+            # Fit only raises error when ensemble_size is not zero but
+            # time_left_for_ensembles is zero.
+            if self.ensemble_size > 0:
+                raise ValueError("Not starting ensemble builder because there "
+                                 "is no time left. Try increasing the value "
+                                 "of time_left_for_this_task.")
+        elif self.ensemble_size <= 0:
+            self._logger.info("Not starting ensemble builder as ensemble size is 0")
+        else:
+            self._logger.info("Starting ensemble")
+            ensemble_task_name = 'ensemble'
+            self._stopwatch.start_task(ensemble_task_name)
+            proc_ensemble = EnsembleBuilderManager(
+                start_time=time.time(),
+                time_left_for_ensembles=time_left_for_ensembles,
+                backend=copy.deepcopy(self._backend),
+                dataset_name=dataset.dataset_name,
+                output_type=STRING_TO_OUTPUT_TYPES[dataset.output_type],
+                task_type=STRING_TO_TASK_TYPES[self.task_type],
+                metrics=[self._metric],
+                opt_metric=optimize_metric,
+                ensemble_size=self.ensemble_size,
+                ensemble_nbest=self.ensemble_nbest,
+                max_models_on_disc=self.max_models_on_disc,
+                seed=self.seed,
+                max_iterations=None,
+                read_at_most=np.inf,
+                ensemble_memory_limit=self._memory_limit,
+                random_state=self.seed,
+                precision=precision,
+                logger_port=self._logger_port
+            )
+            self._stopwatch.stop_task(ensemble_task_name)
+
+        # ==> Run SMAC
+        smac_task_name = 'runSMAC'
+        self._stopwatch.start_task(smac_task_name)
+        elapsed_time = self._stopwatch.wall_elapsed(experiment_task_name)
+        time_left_for_smac = max(0, total_walltime_limit - elapsed_time)
+
+        self._logger.info("Starting SMAC with %5.2f sec time left" % time_left_for_smac)
+        if time_left_for_smac <= 0:
+            self._logger.warning(" Not starting SMAC because there is no time left")
+        else:
+
+            _proc_smac = AutoMLSMBO(
+                config_space=self.search_space,
+                dataset_name=dataset.dataset_name,
+                backend=self._backend,
+                total_walltime_limit=total_walltime_limit,
+                func_eval_time_limit=func_eval_time_limit,
+                dask_client=self._dask_client,
+                memory_limit=self._memory_limit,
+                n_jobs=self.n_jobs,
+                watcher=self._stopwatch,
+                metric=self._metric,
+                seed=self.seed,
+                include=self.include_components,
+                exclude=self.exclude_components,
+                disable_file_output=self._disable_file_output,
+                all_supported_metrics=self._all_supported_metrics,
+                smac_scenario_args=smac_scenario_args,
+                get_smac_object_callback=get_smac_object_callback,
+                pipeline_config={**self.pipeline_options, **budget_config},
+                ensemble_callback=proc_ensemble,
+                logger_port=self._logger_port,
+                start_num_run=num_run
+            )
+            try:
+                self.run_history, self.trajectory, budget_type = \
+                    _proc_smac.run_smbo()
+                trajectory_filename = os.path.join(
+                    self._backend.get_smac_output_directory_for_run(self.seed),
+                    'trajectory.json')
+                saveable_trajectory = \
+                    [list(entry[:2]) + [entry[2].get_dictionary()] + list(entry[3:])
+                     for entry in self.trajectory]
+                with open(trajectory_filename, 'w') as fh:
+                    json.dump(saveable_trajectory, fh)
+            except Exception as e:
+                self._logger.exception(str(e))
+                raise
+        # Wait until the ensemble process is finished to avoid shutting down
+        # while the ensemble builder tries to access the data
+        self._logger.info("Starting Shutdown")
+
+        if proc_ensemble is not None:
+            self.ensemble_performance_history = list(proc_ensemble.history)
+
+            # save the ensemble performance history file
+            if len(self.ensemble_performance_history) > 0:
+                pd.DataFrame(self.ensemble_performance_history).to_json(
+                    os.path.join(self._backend.internals_directory, 'ensemble_history.json'))
+
+            if len(proc_ensemble.futures) > 0:
+                future = proc_ensemble.futures.pop()
+                # Now we need to wait for the future to return as it cannot be cancelled while it
+                # is running: https://stackoverflow.com/a/49203129
+                self._logger.info("Ensemble script still running, waiting for it to finish.")
+                future.result()
+                self._logger.info("Ensemble script finished, continue shutdown.")
+
+        self._logger.info("Closing the dask infrastructure")
+        self._close_dask_client()
+        self._logger.info("Finished closing the dask infrastructure")
+
+        if load_models:
+            self._logger.info("Loading models...")
+            self._load_models(dataset.resampling_strategy)
+            self._logger.info("Finished loading models...")
+
+        # Clean up the logger
+        self._logger.info("Starting to clean up the logger")
+        self._clean_logger()
+
+        return self
+
+    def refit(
+            self,
+            dataset: BaseDataset,
+            budget_config: Dict[str, Union[int, str]] = {},
+            split_id: int = 0
+    ) -> "BaseTask":
+        """
+        Refit all models found with fit to new data.
+
+        Necessary when using cross-validation. During training, autoPyTorch
+        fits each model k times on the dataset, but does not keep any trained
+        model and can therefore not be used to predict for new data points.
+        This methods fits all models found during a call to fit on the data
+        given. This method may also be used together with holdout to avoid
+        only using 66% of the training data to fit the final model.
+        Args:
+            dataset: (Dataset)
+                The argument that will provide the dataset splits. It can either
+                be a dictionary with the splits, or the dataset object which can
+                generate the splits based on different restrictions.
+            budget_config: (Optional[Dict[str, Union[int, str]]])
+                can contain keys from 'budget_type' and the budget
+                specified using 'epochs' or 'runtime'.
+            split_id: (int)
+                split id to fit on.
+        Returns:
+            self
+        """
+
+        self._logger = self._get_logger(dataset.dataset_name)
+
+        dataset_requirements = get_dataset_requirements(
+            info=self._get_required_dataset_properties(dataset))
+        dataset_properties = dataset.get_dataset_properties(dataset_requirements)
+        self._backend.save_datamanager(dataset)
+
+        X: Dict[str, Any] = dict({'dataset_properties': dataset_properties,
+                                  'backend': self._backend,
+                                  'X_train': dataset.train_tensors[0],
+                                  'y_train': dataset.train_tensors[1],
+                                  'X_test': dataset.test_tensors[0] if dataset.test_tensors is not None else None,
+                                  'y_test': dataset.test_tensors[1] if dataset.test_tensors is not None else None,
+                                  'train_indices': dataset.splits[split_id][0],
+                                  'val_indices': dataset.splits[split_id][1],
+                                  'split_id': split_id,
+                                  'job_id': 0
+                                  })
+        X.update({**self.pipeline_options, **budget_config})
+        if self.models_ is None or len(self.models_) == 0 or self.ensemble_ is None:
+            self._load_models(dataset.resampling_strategy)
+
+        # Refit is not applicable when ensemble_size is set to zero.
+        if self.ensemble_ is None:
+            raise ValueError("Refit can only be called if 'ensemble_size != 0'")
+
+        for identifier in self.models_:
+            model = self.models_[identifier]
+            # this updates the model inplace, it can then later be used in
+            # predict method
+
+            # try to fit the model. If it fails, shuffle the data. This
+            # could alleviate the problem in algorithms that depend on
+            # the ordering of the data.
+            fit_and_suppress_warnings(self._logger, model, X, y=None)
+
+        self._clean_logger()
+
+        return self
+
+    def fit(self,
+            dataset: BaseDataset,
+            budget_config: Dict[str, Union[int, str]] = {},
+            pipeline_config: Optional[Configuration] = None,
+            split_id: int = 0) -> BasePipeline:
+        """
+        Fit a pipeline on the given task for the budget.
+        A pipeline configuration can be specified if None,
+        uses default
+        Args:
+            dataset: (Dataset)
+                The argument that will provide the dataset splits. It can either
+                be a dictionary with the splits, or the dataset object which can
+                generate the splits based on different restrictions.
+            budget_config: (Optional[Dict[str, Union[int, str]]])
+                can contain keys from 'budget_type' and the budget
+                specified using 'epochs' or 'runtime'.
+            split_id: (int) (default=0)
+                split id to fit on.
+            pipeline_config: (Optional[Configuration])
+                configuration to fit the pipeline with. If None,
+                uses default
+
+        Returns:
+            (BasePipeline): fitted pipeline
+        """
+        self._logger = self._get_logger(dataset.dataset_name)
+
+        # get dataset properties
+        dataset_requirements = get_dataset_requirements(
+            info=self._get_required_dataset_properties(dataset))
+        dataset_properties = dataset.get_dataset_properties(dataset_requirements)
+        self._backend.save_datamanager(dataset)
+
+        # build pipeline
+        pipeline = self.build_pipeline(dataset_properties)
+        if pipeline_config is not None:
+            pipeline.set_hyperparameters(pipeline_config)
+
+        # initialise fit dictionary
+        X: Dict[str, Any] = dict({'dataset_properties': dataset_properties,
+                                  'backend': self._backend,
+                                  'X_train': dataset.train_tensors[0],
+                                  'y_train': dataset.train_tensors[1],
+                                  'X_test': dataset.test_tensors[0] if dataset.test_tensors is not None else None,
+                                  'y_test': dataset.test_tensors[1] if dataset.test_tensors is not None else None,
+                                  'train_indices': dataset.splits[split_id][0],
+                                  'val_indices': dataset.splits[split_id][1],
+                                  'split_id': split_id,
+                                  'job_id': 0
+                                  })
+        X.update({**self.pipeline_options, **budget_config})
+
+        fit_and_suppress_warnings(self._logger, pipeline, X, y=None)
+
+        self._clean_logger()
+        return pipeline
+
+    def predict(
+            self,
+            X_test: np.ndarray,
+            batch_size: Optional[int] = None,
+            n_jobs: int = 1
+    ) -> np.ndarray:
+        """Generate the estimator predictions.
+        Generate the predictions based on the given examples from the test set.
+        Args:
+        X_test: (np.ndarray)
+            The test set examples.
+        Returns:
+            Array with estimator predictions.
+        """
+
+        # Parallelize predictions across models with n_jobs processes.
+        # Each process computes predictions in chunks of batch_size rows.
+        if self._logger is None:
+            self._logger = self._get_logger("Predict-Logger")
+
+        if self.ensemble_ is None and not self._load_models(self.resampling_strategy):
+            raise ValueError("No ensemble found. Either fit has not yet "
+                             "been called or no ensemble was fitted")
+
+        # Mypy assert
+        assert self.ensemble_ is not None, "Load models should error out if no ensemble"
+        self.ensemble_ = cast(Union[SingleBest, EnsembleSelection], self.ensemble_)
+
+        if isinstance(self.resampling_strategy, HoldoutValTypes):
+            models = self.models_
+        elif isinstance(self.resampling_strategy, CrossValTypes):
+            models = self.cv_models_
+
+        all_predictions = joblib.Parallel(n_jobs=n_jobs)(
+            joblib.delayed(_pipeline_predict)(
+                models[identifier], X_test, batch_size, self._logger, self.task_type
+            )
+            for identifier in self.ensemble_.get_selected_model_identifiers()
+        )
+
+        if len(all_predictions) == 0:
+            raise ValueError('Something went wrong generating the predictions. '
+                             'The ensemble should consist of the following '
+                             'models: %s, the following models were loaded: '
+                             '%s' % (str(list(self.ensemble_.indices_)),
+                                     str(list(self.models_))))
+
+        predictions = self.ensemble_.predict(all_predictions)
+
+        if self.task_type in REGRESSION_TASKS:
+            # Make sure prediction probabilities
+            # are within a valid range
+            # Individual models are checked in _pipeline_predict
+            if (
+                    (predictions >= 0).all() and (predictions <= 1).all()
+            ):
+                raise ValueError("For ensemble {}, prediction probability not within [0, 1]!".format(
+                    self.ensemble_)
+                )
+
+        self._clean_logger()
+
+        return predictions
+
+    def score(
+            self,
+            y_pred: np.ndarray,
+            y_test: Union[np.ndarray, pd.DataFrame]
+    ) -> Dict[str, float]:
+        """Calculate the score on the test set.
+        Calculate the evaluation measure on the test set.
+        Args:
+        y_pred: (np.ndarray)
+            The test predictions
+        y_test: (np.ndarray)
+            The test ground truth labels.
+        Returns:
+            Dict[str, float]: Value of the evaluation metric calculated on the test set.
+        """
+        if isinstance(y_test, pd.Series):
+            y_test = y_test.to_numpy(dtype=np.float)
+
+        if self._metric is None:
+            raise ValueError("No metric found. Either fit/search has not been called yet "
+                             "or AutoPyTorch failed to infer a metric from the dataset ")
+        if self.task_type is None:
+            raise ValueError("AutoPytorch failed to infer a task type from the dataset "
+                             "Please check the log file for related errors. ")
+        return calculate_score(target=y_test, prediction=y_pred,
+                               task_type=STRING_TO_TASK_TYPES[self.task_type],
+                               metrics=[self._metric])
+
+    def __getstate__(self) -> Dict[str, Any]:
+        # Cannot serialize a client!
+        self._dask_client = None
+        self.logging_server = None  # type: ignore [assignment]
+        self.stop_logging_server = None
+        return self.__dict__
+
+    def __del__(self) -> None:
+        # Clean up the logger
+        self._clean_logger()
+
+        self._close_dask_client()
+
+        # When a multiprocessing work is done, the
+        # objects are deleted. We don't want to delete run areas
+        # until the estimator is deleted
+        self._backend.context.delete_directories(force=False)
+
+    @typing.no_type_check
+    def get_incumbent_results(
+            self
+    ):
+        pass
+
+    @typing.no_type_check
+    def get_incumbent_config(
+            self
+    ):
+        pass
diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
new file mode 100644
index 000000000..70eac1c2a
--- /dev/null
+++ b/autoPyTorch/api/tabular_classification.py
@@ -0,0 +1,85 @@
+from typing import Any, Dict, Optional
+
+from autoPyTorch.api.base_task import BaseTask
+from autoPyTorch.constants import (
+    TABULAR_CLASSIFICATION,
+    TASK_TYPES_TO_STRING,
+)
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
+from autoPyTorch.utils.backend import Backend
+
+
+class TabularClassificationTask(BaseTask):
+    """
+    Tabular Classification API to the pipelines.
+    Args:
+        seed (int): seed to be used for reproducibility.
+        n_jobs (int), (default=1): number of consecutive processes to spawn.
+        logging_config (Optional[Dict]): specifies configuration
+            for logging, if None, it is loaded from the logging.yaml
+        ensemble_size (int), (default=50): Number of models added to the ensemble built by
+            Ensemble selection from libraries of models.
+            Models are drawn with replacement.
+        ensemble_nbest (int), (default=50): only consider the ensemble_nbest
+            models to build the ensemble
+        max_models_on_disc (int), (default=50): maximum number of models saved to disc.
+            Also, controls the size of the ensemble as any additional models will be deleted.
+            Must be greater than or equal to 1.
+        temporary_directory (str): folder to store configuration output and log file
+        output_directory (str): folder to store predictions for optional test set
+        delete_tmp_folder_after_terminate (bool): determines whether to delete the temporary directory,
+            when finished
+        include_components (Optional[Dict]): If None, all possible components are used.
+            Otherwise specifies set of components to use.
+        exclude_components (Optional[Dict]): If None, all possible components are used.
+            Otherwise specifies set of components not to use. Incompatible with include
+            components
+    """
+    def __init__(
+        self,
+        seed: int = 1,
+        n_jobs: int = 1,
+        logging_config: Optional[Dict] = None,
+        ensemble_size: int = 50,
+        ensemble_nbest: int = 50,
+        max_models_on_disc: int = 50,
+        temporary_directory: Optional[str] = None,
+        output_directory: Optional[str] = None,
+        delete_tmp_folder_after_terminate: bool = True,
+        delete_output_folder_after_terminate: bool = True,
+        include_components: Optional[Dict] = None,
+        exclude_components: Optional[Dict] = None,
+        backend: Optional[Backend] = None,
+    ):
+        super().__init__(
+            seed=seed,
+            n_jobs=n_jobs,
+            logging_config=logging_config,
+            ensemble_size=ensemble_size,
+            ensemble_nbest=ensemble_nbest,
+            max_models_on_disc=max_models_on_disc,
+            temporary_directory=temporary_directory,
+            output_directory=output_directory,
+            delete_tmp_folder_after_terminate=delete_tmp_folder_after_terminate,
+            delete_output_folder_after_terminate=delete_output_folder_after_terminate,
+            include_components=include_components,
+            exclude_components=exclude_components,
+            backend=backend,
+        )
+        self.task_type = TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION]
+
+    def _get_required_dataset_properties(self, dataset: BaseDataset) -> Dict[str, Any]:
+        if not isinstance(dataset, TabularDataset):
+            raise ValueError("Dataset is incompatible for the given task,: {}".format(
+                type(dataset)
+            ))
+        return {'task_type': dataset.task_type,
+                'output_type': dataset.output_type,
+                'issparse': dataset.issparse,
+                'numerical_columns': dataset.numerical_columns,
+                'categorical_columns': dataset.categorical_columns}
+
+    def build_pipeline(self, dataset_properties: Dict[str, Any]) -> TabularClassificationPipeline:
+        return TabularClassificationPipeline(dataset_properties=dataset_properties)
diff --git a/autoPyTorch/components/baselines/base_baseline.py b/autoPyTorch/components/baselines/base_baseline.py
deleted file mode 100644
index 01901a304..000000000
--- a/autoPyTorch/components/baselines/base_baseline.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import os as os
-import json
-import numpy as np
-import time
-import random
-import logging
-import pickle
-from abc import abstractmethod
-
-from sklearn.model_selection import train_test_split
-
-
-class BaseBaseline():
-
-    def __init__(self, name):
-
-        self.configure_logging()
-
-        self.name = name
-        self.config = self.get_config()
-
-        self.categoricals = None
-        self.all_nan = None
-        self.encode_dicts = None
-        self.num_classes = None
-
-    def configure_logging(self):
-        self.logger = logging.getLogger(__name__)
-        self.logger.setLevel(logging.INFO)
-
-        ch = logging.StreamHandler()
-        ch.setLevel(logging.INFO)
-        self.logger.addHandler(ch)
-
-    def get_config(self):
-        dirname = os.path.dirname(os.path.abspath(__file__))
-        config_path = os.path.join(dirname, "baseline_configs", self.name + ".json")
-        with open(config_path, "r") as f:
-            config = json.load(f)
-        for k,v in config.items():
-            if v=="True":
-                config[k] = True
-            if v=="False":
-                config[k] = False
-        return config
-
-    def save(self, model_path, info_path):
-        info_dict = {"nan_cols": self.all_nan,
-                     "encode_dict": self.encode_dicts,
-                     "categoricals": self.categoricals,
-                     "model_name": self.name,
-                     "num_classes": self.num_classes}
-
-        pickle.dump(info_dict, open(info_path, "wb"))
-        pickle.dump(self.model, open(model_path, "wb"))
-
-    def load(self, model_path, info_path):
-
-        info = pickle.load(open(info_path, "rb"))
-
-        #self.name = info["model_name"]
-        self.all_nan = info["nan_cols"]
-        self.categoricals = info["categoricals"]
-        self.encode_dicts = info["encode_dict"]
-        self.num_classes = info["num_classes"]
-
-        self.model = pickle.load(open(model_path, "rb"))
-
-    @abstractmethod
-    def fit(self, X_train, y_train, X_val, y_val):
-        pass
-
-    @abstractmethod
-    def score(self, X_test, y_test):
-        pass
diff --git a/autoPyTorch/components/baselines/baselines.py b/autoPyTorch/components/baselines/baselines.py
deleted file mode 100644
index f57ec4dac..000000000
--- a/autoPyTorch/components/baselines/baselines.py
+++ /dev/null
@@ -1,469 +0,0 @@
-import numpy as np
-
-import pickle
-from sklearn import metrics
-from sklearn.svm import SVC
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
-from lightgbm import LGBMClassifier
-from catboost import Pool, CatBoostClassifier
-
-from autoPyTorch.components.baselines.rotation_forest import RotationForestClassifier
-from autoPyTorch.components.baselines.base_baseline import BaseBaseline
-
-
-def encode_categoricals(X_train, X_val=None, encode_dicts=None):
-    
-    if encode_dicts is None:
-        encode_dicts = []
-        got_encoded_dicts = False
-    else:
-        got_encoded_dicts = True
-
-    for ind in range(X_train.shape[1]):
-        if isinstance(X_train[0, ind], str):
-            uniques = np.unique(X_train[0,:])
-
-            if got_encoded_dicts:
-                cat_to_int_dict = encode_dicts[ind]
-            else:
-                cat_to_int_dict = {val:ind for ind,val in enumerate(uniques)}
-
-            converted_column_train = [cat_to_int_dict[v] for v in X_train[0,:]]
-            x_train[0,:] = converted_column
-
-            if X_val is not None:
-                converted_column_val = [cat_to_int_dict[v] for v in X_val[0,:]]
-                x_val[0,:] = converted_column_val
-
-            if not got_encoded_dicts:
-                encode_dicts.append(cat_to_int_dict)
-    return X_train, X_val, encode_dicts
-
-
-class LGBBaseline(BaseBaseline):
-    
-    def __init__(self):
-        super(LGBBaseline, self).__init__(name="lgb")
-
-    def fit(self, X_train, y_train, X_val, y_val, categoricals=None):
-        results = dict()
-
-        self.num_classes = len(np.unique(y_train))
-        self.config["num_class"] = self.num_classes
-
-        self.all_nan = np.all(np.isnan(X_train), axis=0)
-        X_train = X_train[:, ~self.all_nan]
-        X_val = X_val[:, ~self.all_nan]
-
-        X_train = np.nan_to_num(X_train)
-        X_val = np.nan_to_num(X_val)
-
-        early_stopping = 150 if X_train.shape[0]>10000 else max(round(150*10000/X_train.shape[0]), 10)
-        self.config["early_stopping_rounds"] = early_stopping
-
-        categoricals = [ind for ind in range(X_train.shape[1]) if isinstance(X_train[0,ind], str)]
-        X_train, X_val, self.encode_dicts = encode_categoricals(X_train, X_val, encode_dicts=None)
-
-        self.model = LGBMClassifier(**self.config)
-        self.model.fit(X_train, y_train, eval_set=[(X_val, y_val)])
-
-        pred_train = self.model.predict_proba(X_train)
-        pred_val = self.model.predict_proba(X_val)
-
-        # This fixes a bug
-        if self.num_classes==2:
-            pred_train = pred_train.transpose()[0:len(y_train)]
-            pred_val = pred_val.transpose()[0:len(y_val)]
-
-        results["val_preds"] = pred_val.tolist()
-        results["labels"] = y_val.tolist()
-
-        pred_train = np.argmax(pred_train, axis=1)
-        pred_val = np.argmax(pred_val, axis=1)
-
-        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
-        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
-        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
-        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
-        
-        return results
-
-    def score(self, X_test, y_test):
-        results = dict()
-
-        y_pred = self.predict(X_test)
-
-        results["test_acc"] = metrics.accuracy_score(y_test, y_pred)
-        results["test_balanced_acc"] = metrics.balanced_accuracy_score(y_test, y_pred)
-        
-        return results
-
-    def predict(self, X_test, predict_proba=False):
-        X_test = X_test[:, ~self.all_nan]
-        X_test = np.nan_to_num(X_test)
-        X_test, _, _ = encode_categoricals(X_test, encode_dicts=self.encode_dicts)
-        
-        if predict_proba:
-            y_pred_proba = self.model.predict_proba(X_test)
-            if self.num_classes==2:
-                y_pred_proba = y_pred_proba.transpose()[0:len(X_test)]
-            return y_pred_proba
-        
-        y_pred = self.model.predict(X_test)
-        if self.num_classes==2:
-            y_pred = y_pred.transpose()[0:len(X_test)]
-        y_pred = np.argmax(y_pred, axis=1)
-        return y_pred
-
-
-class CatboostBaseline(BaseBaseline):
-
-    def __init__(self):
-        super(CatboostBaseline, self).__init__(name="catboost")
-
-    def fit(self, X_train, y_train, X_val, y_val, categoricals=None):
-        results = dict()
-
-        self.all_nan = np.all(np.isnan(X_train), axis=0)
-        X_train = X_train[:, ~self.all_nan]
-        X_val = X_val[:, ~self.all_nan]
-
-        X_train = np.nan_to_num(X_train)
-        X_val = np.nan_to_num(X_val)
-
-        categoricals = [ind for ind in range(X_train.shape[1]) if isinstance(X_train[0,ind], str)]
-
-        early_stopping = 150 if X_train.shape[0]>10000 else max(round(150*10000/X_train.shape[0]), 10)
-
-        X_train_pooled = Pool(data=X_train, label=y_train, cat_features=categoricals)
-        X_val_pooled = Pool(data=X_val, label=y_val, cat_features=categoricals)
-
-        self.model = CatBoostClassifier(**self.config)
-        self.model.fit(X_train_pooled, eval_set=X_val_pooled, use_best_model=True, early_stopping_rounds=early_stopping)
-
-        pred_train = self.model.predict_proba(X_train)
-        pred_val = self.model.predict_proba(X_val)
-
-        results["val_preds"] = pred_val.tolist()
-        results["labels"] = y_val.tolist()
-
-        try:
-            pred_train = np.argmax(pred_train, axis=1)
-            pred_val = np.argmax(pred_val, axis=1)
-        except:
-            print("==> No probabilities provided in predictions")
-
-        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
-        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
-        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
-        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
-
-        return results
-
-    def score(self, X_test, y_test):
-        results = dict()
-
-        y_pred = self.predict(X_test)
-
-        results["test_acc"] = metrics.accuracy_score(y_test, y_pred)
-        results["test_balanced_acc"] = metrics.balanced_accuracy_score(y_test, y_pred)
-
-        return results
-
-    def predict(self, X_test, predict_proba=False):
-        X_test = X_test[:, ~self.all_nan]
-        X_test = np.nan_to_num(X_test)
-        if predict_proba:
-            return self.model.predict_proba(X_test)
-        y_pred = self.model.predict(X_test)
-        return y_pred
-
-
-class RFBaseline(BaseBaseline):
-    
-    def __init__(self):
-        super(RFBaseline, self).__init__(name="random_forest")
-        
-    def fit(self, X_train, y_train, X_val, y_val):
-        results = dict()
-
-        self.all_nan = np.all(np.isnan(X_train), axis=0)
-        X_train = X_train[:, ~self.all_nan]
-        X_val = X_val[:, ~self.all_nan]
-
-        X_train = np.nan_to_num(X_train)
-        X_val = np.nan_to_num(X_val)
-
-        self.config["warm_start"] = False
-        self.num_classes = len(np.unique(y_train))
-        if self.num_classes>2:
-            print("==> Using warmstarting for multiclass")
-            final_n_estimators = self.config["n_estimators"]
-            self.config["n_estimators"] = 8
-            self.config["warm_start"] = True
-
-        self.model = RandomForestClassifier(**self.config)
-        
-        self.model.fit(X_train, y_train)
-        if self.config["warm_start"]:
-            self.model.n_estimators = final_n_estimators
-            self.model.fit(X_train, y_train)
-
-        pred_val_probas = self.model.predict_proba(X_val)
-
-        pred_train = self.model.predict(X_train)
-        pred_val = self.model.predict(X_val)
-
-        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
-        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
-        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
-        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
-        results["val_preds"] = pred_val_probas.tolist()
-        results["labels"] = y_val.tolist()
-
-        return results
-    
-    def score(self, X_test, y_test):
-        results = dict()
-
-        y_pred = self.predict(X_test)
-
-        results["test_acc"] = metrics.accuracy_score(y_test, y_pred)
-        results["test_balanced_acc"] = metrics.balanced_accuracy_score(y_test, y_pred)
-        
-        return results
-
-    def predict(self, X_test, predict_proba=False):
-        X_test = X_test[:, ~self.all_nan]
-        X_test = np.nan_to_num(X_test)
-        if predict_proba:
-            return self.model.predict_proba(X_test)
-        y_pred = self.model.predict(X_test)
-        return y_pred
-
-
-class ExtraTreesBaseline(BaseBaseline):
-
-    def __init__(self):
-        super(ExtraTreesBaseline, self).__init__(name="extra_trees")
-
-    def fit(self, X_train, y_train, X_val, y_val):
-        results = dict()
-
-        self.all_nan = np.all(np.isnan(X_train), axis=0)
-        X_train = X_train[:, ~self.all_nan]
-        X_val = X_val[:, ~self.all_nan]
-
-        X_train = np.nan_to_num(X_train)
-        X_val = np.nan_to_num(X_val)
-
-        self.config["warm_start"] = False
-        self.num_classes = len(np.unique(y_train))
-        if self.num_classes>2:
-            print("==> Using warmstarting for multiclass")
-            final_n_estimators = self.config["n_estimators"]
-            self.config["n_estimators"] = 8
-            self.config["warm_start"] = True
-
-        self.model = ExtraTreesClassifier(**self.config)
-
-        self.model.fit(X_train, y_train)
-        if self.config["warm_start"]:
-            self.model.n_estimators = final_n_estimators
-            self.model.fit(X_train, y_train)
-
-
-        pred_val_probas = self.model.predict_proba(X_val)
-
-        pred_train = self.model.predict(X_train)
-        pred_val = self.model.predict(X_val)
-
-        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
-        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
-        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
-        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
-        results["val_preds"] = pred_val_probas.tolist()
-        results["labels"] = y_val.tolist()
-
-        return results
-
-    def score(self, X_test, y_test):
-        results = dict()
-
-        y_pred = self.predict(X_test)
-
-        results["test_acc"] = metrics.accuracy_score(y_test, y_pred)
-        results["test_balanced_acc"] = metrics.balanced_accuracy_score(y_test, y_pred)
-
-        return results
-
-    def predict(self, X_test, predict_proba=False):
-        X_test = X_test[:, ~self.all_nan]
-        X_test = np.nan_to_num(X_test)
-        if predict_proba:
-            return self.model.predict_proba(X_test)
-        y_pred = self.model.predict(X_test)
-        return y_pred
-
-
-class RotationForestBaseline(BaseBaseline):
-
-    def __init__(self):
-        super(RotationForestBaseline, self).__init__(name="rotation_forest")
-
-    def fit(self, X_train, y_train, X_val, y_val):
-        results = dict()
-
-        self.all_nan = np.all(np.isnan(X_train), axis=0)
-        X_train = X_train[:, ~self.all_nan]
-        X_val = X_val[:, ~self.all_nan]
-
-        X_train = np.nan_to_num(X_train)
-        X_val = np.nan_to_num(X_val)
-
-        self.config["warm_start"] = False
-        self.num_classes = len(np.unique(y_train))
-
-        self.model = RotationForestClassifier(**self.config)
-
-        self.model.fit(X_train, y_train)
-
-        pred_val_probas = self.model.predict_proba(X_val)
-
-        pred_train = self.model.predict(X_train)
-        pred_val = self.model.predict(X_val)
-
-        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
-        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
-        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
-        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
-        results["val_preds"] = pred_val_probas.tolist()
-        results["labels"] = y_val.tolist()
-
-        return results
-
-    def score(self, X_test, y_test):
-        results = dict()
-
-        y_pred = self.predict(X_test)
-
-        results["test_acc"] = metrics.accuracy_score(y_test, y_pred)
-        results["test_balanced_acc"] = metrics.balanced_accuracy_score(y_test, y_pred)
-
-        return results
-
-    def predict(self, X_test, predict_proba=False):
-        X_test = X_test[:, ~self.all_nan]
-        X_test = np.nan_to_num(X_test)
-        if predict_proba:
-            return self.model.predict_proba(X_test)
-        y_pred = self.model.predict(X_test)
-        return y_pred
-
-
-class KNNBaseline(BaseBaseline):
-
-    def __init__(self):
-        super(KNNBaseline, self).__init__(name="knn")
-
-    def fit(self, X_train, y_train, X_val, y_val):
-        results = dict()
-
-        self.all_nan = np.all(np.isnan(X_train), axis=0)
-        X_train = X_train[:, ~self.all_nan]
-        X_val = X_val[:, ~self.all_nan]
-
-        self.categoricals = np.array([isinstance(X_train[0,ind], str) for ind in range(X_train.shape[1])])
-        X_train = X_train[:, ~self.categoricals]
-        X_val = X_val[:, ~self.categoricals]
-
-        X_train = np.nan_to_num(X_train)
-        X_val = np.nan_to_num(X_val)
-
-        self.num_classes = len(np.unique(y_train))
-        
-        self.model = KNeighborsClassifier(**self.config)
-        self.model.fit(X_train, y_train)
-
-        pred_val_probas = self.model.predict_proba(X_val)
-
-        pred_train = self.model.predict(X_train)
-        pred_val = self.model.predict(X_val)
-
-        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
-        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
-        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
-        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
-        results["val_preds"] = pred_val_probas.tolist()
-        results["labels"] = y_val.tolist()
-
-        return results
-
-    def score(self, X_test, y_test):
-        results = dict()
-
-        y_pred = self.predict(X_test)
-
-        results["test_acc"] = metrics.accuracy_score(y_test, y_pred)
-        results["test_balanced_acc"] = metrics.balanced_accuracy_score(y_test, y_pred)
-
-        return results
-
-    def predict(self, X_test, predict_proba=False):
-        X_test = X_test[:, ~self.all_nan]
-        X_test = X_test[:, ~self.categoricals]
-        X_test = np.nan_to_num(X_test)
-        if predict_proba:
-            return self.model.predict_proba(X_test)
-        y_pred = self.model.predict(X_test)
-        return y_pred
-
-
-class SVMBaseline(BaseBaseline):
-
-    def __init__(self):
-        super(SVMBaseline, self).__init__(name="svm")
-
-    def fit(self, X_train, y_train, X_val, y_val):
-        results = dict()
-
-        self.model = SVC(**self.config)
-
-        self.all_nan = np.all(np.isnan(X_train), axis=0)
-        X_train = X_train[:, ~self.all_nan]
-        X_val = X_val[:, ~self.all_nan]
-
-        self.model.fit(X_train, y_train)
-
-        pred_val_probas = self.model.predict_proba(X_val)
-
-        pred_train = self.model.predict(X_train)
-        pred_val = self.model.predict(X_val)
-
-        results["train_acc"] = metrics.accuracy_score(y_train, pred_train)
-        results["train_balanced_acc"] = metrics.balanced_accuracy_score(y_train, pred_train)
-        results["val_acc"] = metrics.accuracy_score(y_val, pred_val)
-        results["val_balanced_acc"] = metrics.balanced_accuracy_score(y_val, pred_val)
-        results["val_preds"] = pred_val_probas.tolist()
-        results["labels"] = y_val.tolist()
-
-        return results
-
-    def score(self, X_test, y_test):
-        results = dict()
-
-        y_pred = self.predict(X_test)
-
-        results["test_acc"] = metrics.accuracy_score(y_test, y_pred)
-        results["test_balanced_acc"] = metrics.balanced_accuracy_score(y_test, y_pred)
-
-        return results
-
-    def predict(self, X_test, predict_proba=False):
-        X_test = X_test[:, ~self.all_nan]
-        X_test = np.nan_to_num(X_test)
-        if predict_proba:
-            return self.model.predict_proba(X_test)
-        y_pred = self.model.predict(X_test)
-        return y_pred
diff --git a/autoPyTorch/components/baselines/rotation_forest.py b/autoPyTorch/components/baselines/rotation_forest.py
deleted file mode 100644
index b619723ec..000000000
--- a/autoPyTorch/components/baselines/rotation_forest.py
+++ /dev/null
@@ -1,150 +0,0 @@
-import numpy as np
-
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.tree._tree import DTYPE
-from sklearn.ensemble.forest import ForestClassifier
-from sklearn.utils import resample, gen_batches, check_random_state
-#from sklearn.utils.extmath import fast_dot
-from sklearn.decomposition import PCA
-
-
-def random_feature_subsets(array, batch_size, random_state=1234):
-    """ Generate K subsets of the features in X """
-    #random_state = check_random_state(random_state)
-    rng = np.random.default_rng(seed=random_state)
-    features = np.arange(array.shape[1])
-    #random_state.shuffle(features)
-    rng.shuffle(features)
-    for batch in gen_batches(len(features), batch_size):
-        yield features[batch]
-
-
-class RotationTreeClassifier(DecisionTreeClassifier):
-    def __init__(self,
-                 n_features_per_subset=3,
-                 rotation_algo='pca',
-                 criterion="gini",
-                 splitter="best",
-                 max_depth=None,
-                 min_samples_split=2,
-                 min_samples_leaf=1,
-                 min_weight_fraction_leaf=0.,
-                 max_features=1.0,
-                 random_state=None,
-                 max_leaf_nodes=None,
-                 class_weight=None,
-                 presort=False):
-
-        self.n_features_per_subset = n_features_per_subset
-        self.rotation_algo = rotation_algo
-
-        super(RotationTreeClassifier, self).__init__(
-            criterion=criterion,
-            splitter=splitter,
-            max_depth=max_depth,
-            min_samples_split=min_samples_split,
-            min_samples_leaf=min_samples_leaf,
-            min_weight_fraction_leaf=min_weight_fraction_leaf,
-            max_features=max_features,
-            max_leaf_nodes=max_leaf_nodes,
-            class_weight=class_weight,
-            random_state=random_state,
-            presort=presort)
-
-    def rotate(self, X):
-        if not hasattr(self, 'rotation_matrix'):
-            raise AttributeError("The estimator has not been fitted")
-
-        return np.dot(X, self.rotation_matrix)
-
-    def pca_algorithm(self):
-        """ Deterimine PCA algorithm to use. """
-        if self.rotation_algo == 'randomized':
-            PCA(svd_solver='randomized', whiten=True)
-            #return RandomizedPCA(random_state=self.random_state)
-        elif self.rotation_algo == 'pca':
-            return PCA()
-        else:
-            raise ValueError("`rotation_algo` must be either "
-                             "'pca' or 'randomized'.")
-
-    def _fit_rotation_matrix(self, X):
-        random_state = check_random_state(self.random_state)
-        n_samples, n_features = X.shape
-        self.rotation_matrix = np.zeros((n_features, n_features),
-                                        dtype=np.float32)
-        for i, subset in enumerate(
-                random_feature_subsets(X, self.n_features_per_subset,
-                                       random_state=self.random_state)):
-            # take a 75% bootstrap from the rows
-            x_sample = resample(X, n_samples=int(n_samples*0.75),
-                                random_state=10*i)
-            pca = self.pca_algorithm()
-            pca.fit(x_sample[:, subset])
-            self.rotation_matrix[np.ix_(subset, subset)] = pca.components_
-
-    def fit(self, X, y, sample_weight=None, check_input=True):
-        self._fit_rotation_matrix(X)
-        super(RotationTreeClassifier, self).fit(self.rotate(X), y,
-                                                sample_weight, check_input)
-
-    def predict_proba(self, X, check_input=True):
-        return  super(RotationTreeClassifier, self).predict_proba(self.rotate(X),
-                                                                  check_input)
-
-    def predict(self, X, check_input=True):
-        return super(RotationTreeClassifier, self).predict(self.rotate(X),
-                                                           check_input)
-
-    def apply(self, X, check_input=True):
-        return super(RotationTreeClassifier, self).apply(self.rotate(X),
-                                                         check_input)
-
-    def decision_path(self, X, check_input=True):
-        return super(RotationTreeClassifier, self).decision_path(self.rotate(X),
-                                                                 check_input)
-
-class RotationForestClassifier(ForestClassifier):
-    def __init__(self,
-                 n_estimators=10,
-                 criterion="gini",
-                 n_features_per_subset=3,
-                 rotation_algo='pca',
-                 max_depth=None,
-                 min_samples_split=2,
-                 min_samples_leaf=1,
-                 min_weight_fraction_leaf=0.,
-                 max_features=1.0,
-                 max_leaf_nodes=None,
-                 bootstrap=False,
-                 oob_score=False,
-                 n_jobs=1,
-                 random_state=None,
-                 verbose=0,
-                 warm_start=False,
-                 class_weight=None):
-        super(RotationForestClassifier, self).__init__(
-            base_estimator=RotationTreeClassifier(),
-            n_estimators=n_estimators,
-            estimator_params=("n_features_per_subset", "rotation_algo",
-                              "criterion", "max_depth", "min_samples_split",
-                              "min_samples_leaf", "min_weight_fraction_leaf",
-                              "max_features", "max_leaf_nodes",
-                              "random_state"),
-            bootstrap=bootstrap,
-            oob_score=oob_score,
-            n_jobs=n_jobs,
-            random_state=random_state,
-            verbose=verbose,
-            warm_start=warm_start,
-            class_weight=class_weight)
-
-        self.n_features_per_subset = n_features_per_subset
-        self.rotation_algo = rotation_algo
-        self.criterion = criterion
-        self.max_depth = max_depth
-        self.min_samples_split = min_samples_split
-        self.min_samples_leaf = min_samples_leaf
-        self.min_weight_fraction_leaf = min_weight_fraction_leaf
-        self.max_features = max_features
-        self.max_leaf_nodes = max_leaf_nodes
diff --git a/autoPyTorch/components/ensembles/abstract_ensemble.py b/autoPyTorch/components/ensembles/abstract_ensemble.py
deleted file mode 100644
index bcfde8298..000000000
--- a/autoPyTorch/components/ensembles/abstract_ensemble.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from abc import ABCMeta, abstractmethod
-
-
-class AbstractEnsemble(object):
-    """Ensemble interface extracted from auto-sklearn"""
-
-    __metaclass__ = ABCMeta
-
-    @abstractmethod
-    def fit(self, base_models_predictions, true_targets, model_identifiers):
-        """Fit an ensemble given predictions of base models and targets.
-
-        Ensemble building maximizes performance (in contrast to
-        hyperparameter optimization)!
-
-        Parameters
-        ----------
-        base_models_predictions : array of shape = [n_base_models, n_data_points, n_targets]
-            n_targets is the number of classes in case of classification,
-            n_targets is 0 or 1 in case of regression
-
-        true_targets : array of shape [n_targets]
-
-        model_identifiers : identifier for each base model.
-            Can be used for practical text output of the ensemble.
-
-        Returns
-        -------
-        self
-
-        """
-
-    @abstractmethod
-    def predict(self, base_models_predictions):
-        """Create ensemble predictions from the base model predictions.
-
-        Parameters
-        ----------
-        base_models_predictions : array of shape = [n_base_models, n_data_points, n_targets]
-            Same as in the fit method.
-
-        Returns
-        -------
-        array : [n_data_points]
-        """
-
-    @abstractmethod
-    def get_models_with_weights(self, models):
-        """Return a list of (weight, model) pairs
-
-        Parameters
-        ----------
-        models : dict {identifier : model object}
-            The identifiers are the same as the one presented to the fit()
-            method. Models can be used for nice printing.
-
-        Returns
-        -------
-        array : [(weight_1, model_1), ..., (weight_n, model_n)]
-        """
-
-
-    @abstractmethod
-    def get_selected_model_identifiers(self):
-        """Return identifiers of models in the ensemble.
-
-        This includes models which have a weight of zero!
-
-        Returns
-        -------
-        list
-        """
-
-    @abstractmethod
-    def get_validation_performance(self):
-        """Return validation performance of ensemble.
-
-        Return
-        ------
-        float
-        """
diff --git a/autoPyTorch/components/ensembles/ensemble_selection.py b/autoPyTorch/components/ensembles/ensemble_selection.py
deleted file mode 100644
index 4ec6dcec2..000000000
--- a/autoPyTorch/components/ensembles/ensemble_selection.py
+++ /dev/null
@@ -1,229 +0,0 @@
-from collections import Counter
-import random
-
-import numpy as np
-
-from autoPyTorch.components.ensembles.abstract_ensemble import AbstractEnsemble
-
-
-class EnsembleSelection(AbstractEnsemble):
-    """Ensemble Selection algorithm extracted from auto-sklearn"""
-    
-    def __init__(self, ensemble_size, metric,
-                 sorted_initialization_n_best=0, only_consider_n_best=0,
-                 bagging=False, mode='fast'):
-        self.ensemble_size = ensemble_size
-        self.metric = metric.get_loss_value
-        self.sorted_initialization_n_best = sorted_initialization_n_best
-        self.only_consider_n_best = only_consider_n_best
-        self.bagging = bagging
-        self.mode = mode
-
-    def fit(self, predictions, labels, identifiers):
-        self.ensemble_size = int(self.ensemble_size)
-        if self.ensemble_size < 1:
-            raise ValueError('Ensemble size cannot be less than one!')
-        if self.mode not in ('fast', 'slow'):
-            raise ValueError('Unknown mode %s' % self.mode)
-
-        if self.bagging:
-            self._bagging(predictions, labels)
-        else:
-            self._fit(predictions, labels)
-        self._calculate_weights()
-        self.identifiers_ = identifiers
-        return self
-
-    def _fit(self, predictions, labels):
-        if self.mode == 'fast':
-            self._fast(predictions, labels)
-        else:
-            self._slow(predictions, labels)
-        return self
-
-    def _fast(self, predictions, labels):
-        """Fast version of Rich Caruana's ensemble selection method."""
-        self.num_input_models_ = len(predictions)
-
-        ensemble = []
-        trajectory = []
-        order = []
-
-        ensemble_size = self.ensemble_size
-
-        if self.sorted_initialization_n_best > 0:
-            indices = self._sorted_initialization(predictions, labels, self.sorted_initialization_n_best)
-            for idx in indices:
-                ensemble.append(predictions[idx])
-                order.append(idx)
-                ensemble_ = np.array(ensemble).mean(axis=0)
-                ensemble_performance = self.metric(ensemble_, labels)
-                trajectory.append(ensemble_performance)
-            ensemble_size -= self.sorted_initialization_n_best
-        
-        only_consider_indices = None
-        if self.only_consider_n_best > 0:
-            only_consider_indices = set(self._sorted_initialization(predictions, labels, self.only_consider_n_best))
-
-        for i in range(ensemble_size):
-            scores = np.zeros((len(predictions)))
-            s = len(ensemble)
-            if s == 0:
-                weighted_ensemble_prediction = np.zeros(predictions[0].shape)
-            else:
-                ensemble_prediction = np.mean(np.array(ensemble), axis=0)
-                weighted_ensemble_prediction = (s / float(s + 1)) * \
-                                               ensemble_prediction
-            fant_ensemble_prediction = np.zeros(weighted_ensemble_prediction.shape)
-            for j, pred in enumerate(predictions):
-                # TODO: this could potentially be vectorized! - let's profile
-                # the script first!
-                if only_consider_indices and j not in only_consider_indices:
-                    scores[j] = float("inf")
-                    continue
-                fant_ensemble_prediction[:,:] = weighted_ensemble_prediction + \
-                                             (1. / float(s + 1)) * pred
-                scores[j] = self.metric(fant_ensemble_prediction, labels)
-            all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
-            best = np.random.choice(all_best)
-            ensemble.append(predictions[best])
-            trajectory.append(scores[best])
-            order.append(best)
-
-            # Handle special case
-            if len(predictions) == 1:
-                break
-
-        self.indices_ = order
-        self.trajectory_ = trajectory
-        self.train_score_ = trajectory[-1]
-
-    def _slow(self, predictions, labels):
-        """Rich Caruana's ensemble selection method."""
-        self.num_input_models_ = len(predictions)
-
-        ensemble = []
-        trajectory = []
-        order = []
-
-        ensemble_size = self.ensemble_size
-
-        if self.sorted_initialization_n_best > 0:
-            indices = self._sorted_initialization(predictions, labels, self.sorted_initialization_n_best)
-            for idx in indices:
-                ensemble.append(predictions[idx])
-                order.append(idx)
-                ensemble_ = np.array(ensemble).mean(axis=0)
-                ensemble_performance = self.metric(ensemble_, labels)
-                trajectory.append(ensemble_performance)
-            ensemble_size -= self.sorted_initialization_n_best
-        
-        only_consider_indices = None
-        if self.only_consider_n_best > 0:
-            only_consider_indices = set(self._sorted_initialization(predictions, labels, self.only_consider_n_best))
-
-        for i in range(ensemble_size):
-            scores = np.zeros([predictions.shape[0]])
-            for j, pred in enumerate(predictions):
-                if only_consider_indices and j not in only_consider_indices:
-                    scores[j] = float("inf")
-                    continue
-                ensemble.append(pred)
-                ensemble_prediction = np.mean(np.array(ensemble), axis=0)
-                scores[j] = self.metric(ensemble_prediction, labels)
-                ensemble.pop()
-            best = np.nanargmin(scores)
-            ensemble.append(predictions[best])
-            trajectory.append(scores[best])
-            order.append(best)
-
-            # Handle special case
-            if len(predictions) == 1:
-                break
-
-        self.indices_ = np.array(order)
-        self.trajectory_ = np.array(trajectory)
-        self.train_score_ = trajectory[-1]
-
-    def _calculate_weights(self):
-        ensemble_members = Counter(self.indices_).most_common()
-        weights = np.zeros((self.num_input_models_,), dtype=float)
-        for ensemble_member in ensemble_members:
-            weight = float(ensemble_member[1]) / self.ensemble_size
-            weights[ensemble_member[0]] = weight
-
-        if np.sum(weights) < 1:
-            weights = weights / np.sum(weights)
-
-        self.weights_ = weights
-
-    def _sorted_initialization(self, predictions, labels, n_best):
-        perf = np.zeros([predictions.shape[0]])
-
-        for idx, prediction in enumerate(predictions):
-            perf[idx] = self.metric(prediction, labels)
-
-        indices = np.argsort(perf)[:n_best]
-        return indices
-
-    def _bagging(self, predictions, labels, fraction=0.5, n_bags=20):
-        """Rich Caruana's ensemble selection method with bagging."""
-        raise ValueError('Bagging might not work with class-based interface!')
-        n_models = predictions.shape[0]
-        bag_size = int(n_models * fraction)
-
-        for j in range(n_bags):
-            # Bagging a set of models
-            indices = sorted(random.sample(range(0, n_models), bag_size))
-            bag = predictions[indices, :, :]
-            self._fit(bag, labels)
-
-    def predict(self, predictions):
-        if len(predictions) != len(self.weights_):
-            raise ValueError("Number of model predictions not the same as model weights")
-        #if len(predictions) < len(self.weights_):
-        #    weights = (weight for  weight in self.weights_ if weight > 0)
-        else:
-            weights = self.weights_
-
-        for i, weight in enumerate(weights):
-            predictions[i] *= weight
-        return np.sum(predictions, axis=0)
-
-    def __str__(self):
-        return 'Ensemble Selection:\n\tTrajectory: %s\n\tMembers: %s' \
-               '\n\tWeights: %s\n\tIdentifiers: %s' % \
-               (' '.join(['%d: %5f' % (idx, performance)
-                         for idx, performance in enumerate(self.trajectory_)]),
-                self.indices_, self.weights_,
-                ' '.join([str(identifier) for idx, identifier in
-                          enumerate(self.identifiers_)
-                          if self.weights_[idx] > 0]))
-
-    def get_models_with_weights(self, models):
-        output = []
-
-        for i, weight in enumerate(self.weights_):
-            identifier = self.identifiers_[i]
-            if weight > 0.0:
-                model = models[identifier]
-                output.append((weight, model))
-
-        output.sort(reverse=True, key=lambda t: t[0])
-
-        return output
-
-    def get_selected_model_identifiers(self):
-        output = []
-
-        for i, weight in enumerate(self.weights_):
-            identifier = self.identifiers_[i]
-            if weight > 0.0:
-                output.append(identifier)
-
-        output.sort(reverse=True, key=lambda t: t[0])
-
-        return output
-
-    def get_validation_performance(self):
-        return self.trajectory_[-1]
diff --git a/autoPyTorch/components/lr_scheduler/lr_schedulers.py b/autoPyTorch/components/lr_scheduler/lr_schedulers.py
deleted file mode 100644
index aee5968cd..000000000
--- a/autoPyTorch/components/lr_scheduler/lr_schedulers.py
+++ /dev/null
@@ -1,423 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-This file contains the different learning rate schedulers of AutoNet.
-"""
-
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-import numpy as np
-import math
-import torch
-import torch.optim.lr_scheduler as lr_scheduler
-from torch.optim import Optimizer
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-class AutoNetLearningRateSchedulerBase(object):
-    def __new__(cls, optimizer, config):
-        """Get a new instance of the scheduler
-        
-        Arguments:
-            cls {class} -- Type of scheduler
-            optimizer {Optmizer} -- A PyTorch Optimizer
-            config {dict} -- Sampled lr_scheduler config
-        
-        Returns:
-            AutoNetLearningRateSchedulerBase -- The learning rate scheduler object
-        """
-        scheduler = cls._get_scheduler(cls, optimizer, config)
-        if not hasattr(scheduler, "allows_early_stopping"):
-            scheduler.allows_early_stopping = True
-        if not hasattr(scheduler, "snapshot_before_restart"):
-            scheduler.snapshot_before_restart = False
-        return scheduler
-
-    def _get_scheduler(self, optimizer, config):
-        raise ValueError('Override the method _get_scheduler and do not call the base class implementation')
-
-    @staticmethod
-    def get_config_space():
-        return CS.ConfigurationSpace()
-
-
-class SchedulerNone(AutoNetLearningRateSchedulerBase):
-
-    def _get_scheduler(self, optimizer, config):
-        return NoScheduling(optimizer=optimizer)
-
-
-class SchedulerStepLR(AutoNetLearningRateSchedulerBase):
-    """
-    Step learning rate scheduler
-    """
-
-    def _get_scheduler(self, optimizer, config):
-        return lr_scheduler.StepLR(optimizer=optimizer, step_size=config['step_size'], gamma=config['gamma'], last_epoch=-1)
-    
-    @staticmethod
-    def get_config_space(
-        step_size=(1, 10),
-        gamma=(0.001, 0.9)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'step_size', step_size)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'gamma', gamma)
-        return cs
-
-
-class SchedulerExponentialLR(AutoNetLearningRateSchedulerBase):
-    """
-    Exponential learning rate scheduler
-    """
-
-    def _get_scheduler(self, optimizer, config):
-        return lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=config['gamma'], last_epoch=-1)
-    
-    @staticmethod
-    def get_config_space(
-        gamma=(0.8, 0.9999)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'gamma', gamma)
-        return cs
-
-
-class SchedulerReduceLROnPlateau(AutoNetLearningRateSchedulerBase):
-    """
-    Reduce LR on plateau learning rate scheduler
-    """
-    
-    def _get_scheduler(self, optimizer, config):
-        return lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, 
-                                              factor=config['factor'], 
-                                              patience=config['patience'])
-
-    @staticmethod
-    def get_config_space(
-        factor=(0.05, 0.5),
-        patience=(3, 10)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'factor', factor)
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'patience', patience)
-        return cs
-
-
-class SchedulerAdaptiveLR(AutoNetLearningRateSchedulerBase):
-    """
-    Adaptive cosine learning rate scheduler
-    """
-    
-    def _get_scheduler(self, optimizer, config):
-        return AdaptiveLR(optimizer=optimizer,
-                          T_max=config['T_max'],
-                          T_mul=config['T_mult'],
-                          patience=config['patience'],
-                          threshold=config['threshold'])
-
-    @staticmethod
-    def get_config_space(
-        T_max=(300,1000),
-        patience=(2,5),
-        T_mult=(1.0,2.0),
-        threshold=(0.001, 0.5)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'T_max', T_max)
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'patience', patience)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'T_mult', T_mult)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'threshold', threshold)
-        return cs
-
-
-class AdaptiveLR(object):
-
-    def __init__(self, optimizer, mode='min', T_max=30, T_mul=2.0, eta_min=0, patience=3, threshold=0.1, min_lr=0, eps=1e-8, last_epoch=-1):
-
-        if not isinstance(optimizer, Optimizer):
-            raise TypeError('{} is not an Optimizer'.format(
-                type(optimizer).__name__))
-
-        self.optimizer = optimizer
-
-        if last_epoch == -1:
-            for group in optimizer.param_groups:
-                group.setdefault('initial_lr', group['lr'])
-        else:
-            for i, group in enumerate(optimizer.param_groups):
-                if 'initial_lr' not in group:
-                    raise KeyError("param 'initial_lr' is not specified "
-                                   "in param_groups[{}] when resuming an optimizer".format(i))
-
-        self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
-        self.last_epoch = last_epoch
-
-        if isinstance(min_lr, list) or isinstance(min_lr, tuple):
-            if len(min_lr) != len(optimizer.param_groups):
-                raise ValueError("expected {} min_lrs, got {}".format(
-                    len(optimizer.param_groups), len(min_lr)))
-            self.min_lrs = list(min_lr)
-        else:
-            self.min_lrs = [min_lr] * len(optimizer.param_groups)
-
-        self.T_max = T_max
-        self.T_mul = T_mul
-        self.eta_min = eta_min
-        self.current_base_lrs = self.base_lrs
-        self.metric_values = []
-        self.threshold = threshold
-        self.patience = patience
-        self.steps = 0
-        
-    def step(self, metrics, epoch=None):
-        if epoch is None:
-            epoch = self.last_epoch + 1
-        self.last_epoch = epoch
-        
-        self.metric_values.append(metrics)
-        if len(self.metric_values) > self.patience:
-            self.metric_values = self.metric_values[1:]
-
-        if max(self.metric_values) - metrics > self.threshold:
-            self.current_base_lrs = self.get_lr()
-            self.steps = 0
-        else:
-            self.steps += 1
-        
-        self.last_metric_value = metrics
-
-        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
-            param_group['lr'] = lr
-
-    def get_lr(self):
-        '''
-        Override this method to the existing get_lr() of the parent class
-        '''
-        if self.steps >= self.T_max:
-            self.T_max = self.T_max * self.T_mul
-            self.current_base_lrs = self.base_lrs
-            self.metric_values = []
-            self.steps = 0
-
-        return [self.eta_min + (base_lr - self.eta_min) *
-                        (1 + math.cos(math.pi * self.steps / self.T_max)) / 2
-                        for base_lr in self.current_base_lrs]                    
-
-
-class SchedulerCyclicLR(AutoNetLearningRateSchedulerBase):
-    """
-    Cyclic learning rate scheduler
-    """
-
-    def _get_scheduler(self, optimizer, config):
-        maf = config['max_factor']
-        mif = config['min_factor']
-        cl = config['cycle_length']
-        r = maf - mif
-        def l(epoch):
-            if int(epoch//cl) % 2 == 1:
-                lr = mif + (r * (float(epoch % cl)/float(cl)))
-            else:
-                lr = maf - (r * (float(epoch % cl)/float(cl)))
-            return lr
-            
-        return lr_scheduler.LambdaLR(optimizer=optimizer, lr_lambda=l, last_epoch=-1)
-    
-    @staticmethod
-    def get_config_space(
-        max_factor=(1.0, 2),
-        min_factor=(0.001, 1.0),
-        cycle_length=(3, 10)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'max_factor', max_factor)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'min_factor', min_factor)
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'cycle_length', cycle_length)
-        return cs
-
-
-class CosineAnnealingToFlatLR(lr_scheduler.CosineAnnealingLR):
-
-    def __init__(self, *args, **kwargs):
-        super(CosineAnnealingToFlatLR, self).__init__(*args, **kwargs)
-
-    def get_lr(self):
-        if self.last_epoch > self.T_max:
-            return [self.eta_min for base_lr in self.base_lrs]
-        return super(CosineAnnealingToFlatLR, self).get_lr()
-
-
-class SchedulerCosineAnnealingLR(AutoNetLearningRateSchedulerBase):
-    """
-    Cosine annealing learning rate scheduler
-    """
-
-    def _get_scheduler(self, optimizer, config):
-        return CosineAnnealingToFlatLR(optimizer=optimizer, T_max=config['T_max'], eta_min=config['eta_min'], last_epoch=-1)
-
-    @staticmethod
-    def get_config_space(
-            T_max=(10,500),
-            eta_min=(1e-8, 1e-8)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'T_max', T_max)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'eta_min', eta_min)
-        return cs
-
-
-class SchedulerCosineAnnealingWithRestartsLR(AutoNetLearningRateSchedulerBase):
-    """
-    Cosine annealing learning rate scheduler with warm restarts
-    """
-
-    def _get_scheduler(self, optimizer, config):
-        scheduler = CosineAnnealingWithRestartsLR(optimizer, T_max=config['T_max'], T_mult=config['T_mult'],last_epoch=-1)
-        scheduler.allows_early_stopping = False
-        scheduler.snapshot_before_restart = True
-        return scheduler
-    
-    @staticmethod
-    def get_config_space(
-        T_max=(1, 20),
-        T_mult=(1.0, 2.0)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'T_max', T_max)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'T_mult', T_mult)
-        return cs
-
-
-class NoScheduling():
-    def __init__(self, optimizer):
-        self.optimizer = optimizer
-
-    def step(self, epoch):
-        return
-    
-    def get_lr(self):
-        try:
-            return [self.optimizer.defaults["lr"]]
-        except:
-            return [None]
-
-
-class CosineAnnealingWithRestartsLR(torch.optim.lr_scheduler._LRScheduler):
-
-    r"""Copyright: pytorch
-    Set the learning rate of each parameter group using a cosine annealing
-    schedule, where :math:`\eta_{max}` is set to the initial lr and
-    :math:`T_{cur}` is the number of epochs since the last restart in SGDR:
-
-    .. math::
-
-        \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})(1 +
-        \cos(\frac{T_{cur}}{T_{max}}\pi))
-
-    When last_epoch=-1, sets initial lr as lr.
-
-    It has been proposed in
-    `SGDR: Stochastic Gradient Descent with Warm Restarts`_. This implements
-    the cosine annealing part of SGDR, the restarts and number of iterations multiplier.
-
-    Args:
-        optimizer (Optimizer): Wrapped optimizer.
-        T_max (int): Maximum number of iterations.
-        T_mult (float): Multiply T_max by this number after each restart. Default: 1.
-        eta_min (float): Minimum learning rate. Default: 0.
-        last_epoch (int): The index of last epoch. Default: -1.
-
-    .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
-        https://arxiv.org/abs/1608.03983
-    """
-
-    def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1, T_mult=1):
-        self.T_max = T_max
-        self.T_mult = T_mult
-        self.restart_every = T_max
-        self.eta_min = eta_min
-        self.restarts = 0
-        self.restarted_at = 0
-        super().__init__(optimizer, last_epoch)
-    
-    def restart(self):
-        self.restart_every *= self.T_mult
-        self.restarted_at = self.last_epoch
-    
-    def cosine(self, base_lr):
-        return self.eta_min + (base_lr - self.eta_min) * (1 + math.cos(math.pi * self.step_n / self.restart_every)) / 2
-    
-    @property
-    def step_n(self):
-        return self.last_epoch - self.restarted_at
-
-    def get_lr(self):
-        if self.step_n >= self.restart_every:
-            self.restart()
-        return [self.cosine(base_lr) for base_lr in self.base_lrs]  
-
-    def needs_checkpoint(self):
-        return self.step_n + 1 >= self.restart_every
-
-
-class SchedulerAlternatingCosineLR(AutoNetLearningRateSchedulerBase):
-    """
-    Alternating cosine learning rate scheduler
-    """
-    
-    def _get_scheduler(self, optimizer, config):
-        scheduler = AlternatingCosineLR(optimizer, T_max=config['T_max'], T_mul=config['T_mult'], amplitude_reduction=config['amp_reduction'], last_epoch=-1)
-        return scheduler
-    
-    @staticmethod
-    def get_config_space(
-        T_max=(1, 20),
-        T_mult=(1.0, 2.0),
-        amp_reduction=(0.1,1)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'T_max', T_max)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'T_mult', T_mult)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'amp_reduction', amp_reduction)
-        return cs
-
-
-class AlternatingCosineLR(torch.optim.lr_scheduler._LRScheduler):
-    def __init__(self, optimizer, T_max, T_mul=1, amplitude_reduction=0.9, eta_min=0, last_epoch=-1):
-        '''
-        Here last_epoch actually means last_step since the
-        learning rate is decayed after each batch step.
-        '''
-
-        self.T_max = T_max
-        self.T_mul = T_mul
-        self.eta_min = eta_min
-        self.cumulative_time = 0
-        self.amplitude_mult = amplitude_reduction
-        self.base_lr_mult = 1
-        self.frequency_mult = 1
-        self.time_offset = 0
-        self.last_step = 0
-        super(AlternatingCosineLR, self).__init__(optimizer, last_epoch)
-
-    def get_lr(self):
-        '''
-        Override this method to the existing get_lr() of the parent class
-        '''
-        if self.last_epoch >= self.T_max:
-            self.T_max = self.T_max * self.T_mul
-            self.time_offset = self.T_max / 2
-            self.last_epoch = 0
-            self.base_lr_mult *= self.amplitude_mult
-            self.frequency_mult = 2
-            self.cumulative_time = 0
-        return [self.eta_min + (base_lr * self.base_lr_mult - self.eta_min) *
-                        (1 + math.cos(math.pi * (self.time_offset + self.cumulative_time) / self.T_max * self.frequency_mult)) / 2
-                        for base_lr in self.base_lrs]
diff --git a/autoPyTorch/components/metrics/__init__.py b/autoPyTorch/components/metrics/__init__.py
deleted file mode 100644
index 969b9193c..000000000
--- a/autoPyTorch/components/metrics/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from autoPyTorch.components.metrics.balanced_accuracy import balanced_accuracy
-from autoPyTorch.components.metrics.pac_score import pac_metric
-from autoPyTorch.components.metrics.standard_metrics import accuracy, auc_metric, mae, rmse, multilabel_accuracy, cross_entropy, top1, top3, top5
diff --git a/autoPyTorch/components/metrics/additional_logs.py b/autoPyTorch/components/metrics/additional_logs.py
deleted file mode 100644
index 96afa675d..000000000
--- a/autoPyTorch/components/metrics/additional_logs.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import numpy as np
-import torch
-
-from sklearn.metrics import accuracy_score
-
-class test_result_ens():
-    def __init__(self, autonet, X_test, Y_test):
-        self.autonet = autonet
-        self.X_test = X_test
-        self.Y_test = Y_test
-        from autoPyTorch.core.api import AutoNet
-        self.predict = AutoNet.predict
-
-    def __call__(self, model, epochs):
-        if self.Y_test is None or self.X_test is None:
-            return float("nan")
-
-        preds = self.predict(self.autonet, self.X_test, return_probabilities=False)
-        return accuracy_score(preds, self.Y_test)
-
-class test_result():
-    """Log the performance on the test set"""
-    def __init__(self, autonet, X_test, Y_test):
-        self.autonet = autonet
-        self.X_test = X_test
-        self.Y_test = Y_test
-    
-    def __call__(self, network, epochs):
-        if self.Y_test is None or self.X_test is None:
-            return float("nan")
-        
-        return self.autonet.score(self.X_test, self.Y_test)
-
-
-class gradient_norm():
-    """Log the mean norm of the loss gradients"""
-    def __init_(self):
-        pass
-
-    def __call__(self, network, epoch):
-        total_gradient = 0
-        n_params = 0
-
-        for p in list(filter(lambda p: p.grad is not None, network.parameters())):
-            total_gradient += p.grad.data.norm(2).item()
-            n_params += 1
-
-        # Prevent dividing by 0
-        if total_gradient==0:
-            n_params = 1
-
-        return total_gradient/n_params
-
-
-class gradient_mean():
-    """Log the mean of the loss gradients"""
-    def __init_(self):
-        pass
-
-    def __call__(self, network, epoch):
-
-        n_gradients = 0
-        sum_of_means = 0
-
-        for p in list(filter(lambda p: p.grad is not None, network.parameters())):
-            weight = np.prod(p.grad.data.shape)
-            n_gradients += weight
-            sum_of_means += p.grad.data.mean().item() * weight
-
-        # Prevent dividing by 0
-        if n_gradients==0:
-            n_gradients = 1
-
-        return sum_of_means/n_gradients
-
-
-class gradient_std():
-    """Log the norm of the loss gradients"""
-
-    def __init_(self):
-        pass
-
-    def __call__(self, network, epoch):
-
-        n = 0
-        total_sum = 0
-        sum_sq = 0
-
-        for p in list(filter(lambda p: p.grad is not None, network.parameters())):
-            for par in torch.flatten(p.grad.data):
-                n += 1
-                total_sum += par
-                sum_sq += par*par
-
-        # Prevent dividing by 0
-        if n==0:
-            n = 1
-
-        return (sum_sq - (total_sum * total_sum) / n) / (n-1)
diff --git a/autoPyTorch/components/metrics/balanced_accuracy.py b/autoPyTorch/components/metrics/balanced_accuracy.py
deleted file mode 100644
index 64969fb09..000000000
--- a/autoPyTorch/components/metrics/balanced_accuracy.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import numpy as np
-import scipy as sp
-
-from sklearn.metrics.classification import _check_targets, type_of_target
-
-
-def balanced_accuracy(solution, prediction):
-    """balanced accuracy implementation of auto-sklearn"""
-
-    y_type, solution, prediction = _check_targets(solution, prediction)
-
-    if y_type not in ["binary", "multiclass", 'multilabel-indicator']:
-        raise ValueError("{0} is not supported".format(y_type))
-
-    if y_type == 'binary':
-        # Do not transform into any multiclass representation
-        max_value = max(np.max(solution), np.max(prediction))
-        min_value = min(np.min(solution), np.min(prediction))
-        if max_value == min_value:
-            return 1.0
-        solution = (solution - min_value) / (max_value - min_value)
-        prediction = (prediction - min_value) / (max_value - min_value)
-
-    elif y_type == 'multiclass':
-        # Need to create a multiclass solution and a multiclass predictions
-        max_class = int(np.max((np.max(solution), np.max(prediction))))
-        solution_binary = np.zeros((len(solution), max_class + 1))
-        prediction_binary = np.zeros((len(prediction), max_class + 1))
-        for i in range(len(solution)):
-            solution_binary[i, int(solution[i])] = 1
-            prediction_binary[i, int(prediction[i])] = 1
-        solution = solution_binary
-        prediction = prediction_binary
-
-    elif y_type == 'multilabel-indicator':
-        solution = solution.toarray()
-        prediction = prediction.toarray()
-    else:
-        raise NotImplementedError('bac_metric does not support task type %s'
-                                  % y_type)
-
-    fn = np.sum(np.multiply(solution, (1 - prediction)), axis=0,
-                dtype=float)
-    tp = np.sum(np.multiply(solution, prediction), axis=0, dtype=float)
-    # Bounding to avoid division by 0
-    eps = 1e-15
-    tp = sp.maximum(eps, tp)
-    pos_num = sp.maximum(eps, tp + fn)
-    tpr = tp / pos_num  # true positive rate (sensitivity)
-
-    if y_type in ('binary', 'multilabel-indicator'):
-        tn = np.sum(np.multiply((1 - solution), (1 - prediction)),
-                    axis=0, dtype=float)
-        fp = np.sum(np.multiply((1 - solution), prediction), axis=0,
-                    dtype=float)
-        tn = sp.maximum(eps, tn)
-        neg_num = sp.maximum(eps, tn + fp)
-        tnr = tn / neg_num  # true negative rate (specificity)
-        bac = 0.5 * (tpr + tnr)
-    elif y_type == 'multiclass':
-        label_num = solution.shape[1]
-        bac = tpr
-    else:
-        raise ValueError(y_type)
-
-    return np.mean(bac)  # average over all classes
diff --git a/autoPyTorch/components/metrics/pac_score.py b/autoPyTorch/components/metrics/pac_score.py
deleted file mode 100644
index 198ff247c..000000000
--- a/autoPyTorch/components/metrics/pac_score.py
+++ /dev/null
@@ -1,186 +0,0 @@
-import numpy as np
-import scipy as sp
-
-from sklearn.metrics.classification import _check_targets, type_of_target
-
-
-def pac_metric(solution, prediction):
-    """
-    Probabilistic Accuracy based on log_loss metric.
-    We assume the solution is in {0, 1} and prediction in [0, 1].
-    Otherwise, run normalize_array.
-    :param solution:
-    :param prediction:
-    :param task:
-    :return:
-    """
-
-    def normalize_array(solution, prediction):
-        """
-        Use min and max of solution as scaling factors to normalize prediction,
-        then threshold it to [0, 1].
-        Binarize solution to {0, 1}. This allows applying classification
-        scores to all cases. In principle, this should not do anything to
-        properly formatted classification inputs and outputs.
-        :param solution:
-        :param prediction:
-        :return:
-        """
-        # Binarize solution
-        sol = np.ravel(solution)  # convert to 1-d array
-        maxi = np.nanmax(sol[np.isfinite(sol)])
-        mini = np.nanmin(sol[np.isfinite(sol)])
-        if maxi == mini:
-            print('Warning, cannot normalize')
-            return [solution, prediction]
-        diff = maxi - mini
-        mid = (maxi + mini) / 2.
-
-        solution[solution >= mid] = 1
-        solution[solution < mid] = 0
-        # Normalize and threshold predictions (takes effect only if solution not
-        # in {0, 1})
-
-        prediction -= float(mini)
-        prediction /= float(diff)
-
-        # and if predictions exceed the bounds [0, 1]
-        prediction[prediction > 1] = 1
-        prediction[prediction < 0] = 0
-        # Make probabilities smoother
-        # new_prediction = np.power(new_prediction, (1./10))
-
-        return [solution, prediction]
-
-    def log_loss(solution, prediction, task):
-        """Log loss for binary and multiclass."""
-        [sample_num, label_num] = solution.shape
-        # Lower gives problems with float32!
-        eps = 0.00000003
-
-        if (task == 'multiclass') and (label_num > 1):
-            # Make sure the lines add up to one for multi-class classification
-            norma = np.sum(prediction, axis=1)
-            for k in range(sample_num):
-                prediction[k, :] /= sp.maximum(norma[k], eps)
-
-            sample_num = solution.shape[0]
-            for i in range(sample_num):
-                j = np.argmax(solution[i, :])
-                solution[i, :] = 0
-                solution[i, j] = 1
-
-            solution = solution.astype(np.int32, copy=False)
-            # For the base prediction, this solution is ridiculous in the
-            # multi-label case
-
-            # Bounding of predictions to avoid log(0),1/0,...
-        prediction = sp.minimum(1 - eps, sp.maximum(eps, prediction))
-        # Compute the log loss
-        pos_class_log_loss = -np.mean(solution * np.log(prediction), axis=0)
-        if (task != 'multiclass') or (label_num == 1):
-            # The multi-label case is a bunch of binary problems.
-            # The second class is the negative class for each column.
-            neg_class_log_loss = -np.mean(
-                (1 - solution) * np.log(1 - prediction), axis=0)
-            log_loss = pos_class_log_loss + neg_class_log_loss
-            # Each column is an independent problem, so we average.
-            # The probabilities in one line do not add up to one.
-            # log_loss = mvmean(log_loss)
-            # print('binary {}'.format(log_loss))
-            # In the multilabel case, the right thing i to AVERAGE not sum
-            # We return all the scores so we can normalize correctly later on
-        else:
-            # For the multiclass case the probabilities in one line add up one.
-            log_loss = pos_class_log_loss
-            # We sum the contributions of the columns.
-            log_loss = np.sum(log_loss)
-            # print('multiclass {}'.format(log_loss))
-        return log_loss
-
-    def prior_log_loss(frac_pos, task):
-        """Baseline log loss.
-        For multiplr classes ot labels return the volues for each column
-        """
-        eps = 1e-15
-        frac_pos_ = sp.maximum(eps, frac_pos)
-        if task != 'multiclass':  # binary case
-            frac_neg = 1 - frac_pos
-            frac_neg_ = sp.maximum(eps, frac_neg)
-            pos_class_log_loss_ = -frac_pos * np.log(frac_pos_)
-            neg_class_log_loss_ = -frac_neg * np.log(frac_neg_)
-            base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
-            # base_log_loss = mvmean(base_log_loss)
-            # print('binary {}'.format(base_log_loss))
-            # In the multilabel case, the right thing i to AVERAGE not sum
-            # We return all the scores so we can normalize correctly later on
-        else:  # multiclass case
-            fp = frac_pos_ / sum(
-                frac_pos_
-            )  # Need to renormalize the lines in multiclass case
-            # Only ONE label is 1 in the multiclass case active for each line
-            pos_class_log_loss_ = -frac_pos * np.log(fp)
-            base_log_loss = np.sum(pos_class_log_loss_)
-        return base_log_loss
-
-    y_type = type_of_target(solution)
-
-    if y_type == 'binary':
-        if len(solution.shape) == 1:
-            solution = solution.reshape((-1, 1))
-        if len(prediction.shape) == 1:
-            prediction = prediction.reshape((-1, 1))
-        if len(prediction.shape) == 2:
-            if prediction.shape[1] > 2:
-                raise ValueError('A prediction array with probability values '
-                                 'for %d classes is not a binary '
-                                 'classification problem' % prediction.shape[1])
-            # Prediction will be copied into a new binary array - no copy
-            prediction = prediction[:, 1].reshape((-1, 1))
-        else:
-            raise ValueError('Invalid prediction shape %s' % prediction.shape)
-
-    elif y_type == 'multiclass':
-        if len(solution.shape) == 2:
-            if solution.shape[1] > 1:
-                raise ValueError('Solution array must only contain one class '
-                                 'label, but contains %d' % solution.shape[1])
-        elif len(solution.shape) == 1:
-            pass
-        else:
-            raise ValueError('Solution.shape %s' % solution.shape)
-
-        # Need to create a multiclass solution and a multiclass predictions
-        max_class = int(np.max((np.max(solution), np.max(prediction))))
-        solution_binary = np.zeros((len(solution), max_class + 1))
-        for i in range(len(solution)):
-            solution_binary[i, int(solution[i])] = 1
-        solution = solution_binary
-
-    elif y_type == 'multilabel-indicator':
-        solution = solution.copy()
-
-    else:
-        raise NotImplementedError('pac_score does not support task type %s'
-                                  % y_type)
-
-    solution, prediction = normalize_array(solution, prediction.copy())
-
-    sample_num, _ = solution.shape
-
-    eps = 1e-7
-    # Compute the base log loss (using the prior probabilities)
-    pos_num = 1. * np.sum(solution, axis=0, dtype=float)  # float conversion!
-    frac_pos = pos_num / sample_num  # prior proba of positive class
-    the_base_log_loss = prior_log_loss(frac_pos, y_type)
-    the_log_loss = log_loss(solution, prediction, y_type)
-
-    # Exponentiate to turn into an accuracy-like score.
-    # In the multi-label case, we need to average AFTER taking the exp
-    # because it is an NL operation
-    pac = np.mean(np.exp(the_log_loss * -1))
-    base_pac = np.mean(np.exp(the_base_log_loss * -1))
-    # Normalize: 0 for random, 1 for perfect
-    score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
-
-    return score
\ No newline at end of file
diff --git a/autoPyTorch/components/metrics/standard_metrics.py b/autoPyTorch/components/metrics/standard_metrics.py
deleted file mode 100644
index 5773c3b0b..000000000
--- a/autoPyTorch/components/metrics/standard_metrics.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import sklearn.metrics as metrics
-import numpy as np
-
-# classification metrics
-def accuracy(y_true, y_pred):
-    return np.mean(y_true == y_pred) * 100
-
-def auc_metric(y_true, y_pred):
-    return (2 * metrics.roc_auc_score(y_true, y_pred) - 1)
-
-def cross_entropy(y_true, y_pred):
-    if y_true==1:
-        return -np.log(y_pred)
-    else:
-        return -np.log(1-y_pred)
-
-def top1(y_pred, y_true):
-    return topN(y_pred, y_true, 1)
-
-def top3(y_pred, y_true):
-    return topN(y_pred, y_true, 3)
-    
-def top5(y_pred, y_true):
-    if y_pred.shape[1] < 5:
-        return -1
-    return topN(y_pred, y_true, 5)
-
-def topN(output, target, topk):
-    """Computes the accuracy over the k top predictions for the specified values of k"""
-    with torch.no_grad():
-        batch_size = target.size(0)
-
-        _, pred = output.topk(topk, 1, True, True)
-        pred = pred.t()
-        correct = pred.eq(target.view(1, -1).expand_as(pred))
-
-        correct_k = correct[:topk].view(-1).float().sum(0, keepdim=True)
-        return correct_k.mul_(100.0 / batch_size).item()
-
-
-# multilabel metrics
-def multilabel_accuracy(y_true, y_pred):
-    return np.mean(y_true == (y_pred > 0.5))
-
-
-# regression metrics
-def mae(y_true, y_pred):
-    return np.mean(np.abs(y_true - y_pred))
-
-def rmse(y_true, y_pred):
-    return np.sqrt(np.mean((y_true - y_pred)**2))
diff --git a/autoPyTorch/components/networks/activations.py b/autoPyTorch/components/networks/activations.py
deleted file mode 100644
index 8d7a33d86..000000000
--- a/autoPyTorch/components/networks/activations.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import torch.nn as nn
-import inspect
-
-
-all_activations = {
-    'relu' :        nn.ReLU,
-    'sigmoid' :     nn.Sigmoid,
-    'tanh' :        nn.Tanh,
-    'leakyrelu' :   nn.LeakyReLU,
-    'selu' :        nn.SELU,
-    'rrelu' :       nn.RReLU,
-    'tanhshrink' :  nn.Tanhshrink,
-    'hardtanh' :    nn.Hardtanh,
-    'elu' :         nn.ELU,
-    'prelu' :       nn.PReLU,
-}
-
-def get_activation(name, inplace=False):
-    if name not in all_activations:
-        raise ValueError('Activation ' + str(name) + ' not defined')
-    activation = all_activations[name]
-    activation_kwargs = { 'inplace': True } if 'inplace' in inspect.getfullargspec(activation)[0] else dict()
-    return activation(**activation_kwargs)
diff --git a/autoPyTorch/components/networks/base_net.py b/autoPyTorch/components/networks/base_net.py
deleted file mode 100644
index 154dc9af9..000000000
--- a/autoPyTorch/components/networks/base_net.py
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Parent Class of all Networks based on features.
-"""
-
-import torch.nn as nn
-from collections import OrderedDict
-import ConfigSpace
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class BaseNet(nn.Module):
-    """ Parent class for all Networks"""
-    def __init__(self, config, in_features, out_features, final_activation):
-        """
-        Initialize the BaseNet.
-        """
-
-        super(BaseNet, self).__init__()
-        self.layers = nn.Sequential()
-        self.config = config
-        self.n_feats = in_features
-        self.n_classes = out_features
-        self.epochs_trained = 0
-        self.budget_trained = 0
-        self.stopped_early = False
-        self.last_compute_result = None
-        self.logs = []
-        self.num_epochs_no_progress = 0
-        self.current_best_epoch_performance = None
-        self.best_parameters = None
-        self.final_activation = final_activation
-
-    def forward(self, x):
-        x = self.layers(x)
-        if not self.training and self.final_activation is not None:
-            x = self.final_activation(x)
-        return x
-    
-    def snapshot(self):
-        self.best_parameters = OrderedDict({key: value.cpu().clone() for key, value in self.state_dict().items()})
-    
-    def load_snapshot(self):
-        if self.best_parameters is not None:
-            self.load_state_dict(self.best_parameters)
-
-    @staticmethod
-    def get_config_space():
-        return ConfigSpace.ConfigurationSpace()
-    
-
-class BaseFeatureNet(BaseNet):
-    """ Parent class for MlpNet, ResNet, ... Can use entity embedding for cagtegorical features"""
-    def __init__(self, config, in_features, out_features, embedding, final_activation):
-        """
-        Initialize the BaseFeatureNet.
-        """
-
-        super(BaseFeatureNet, self).__init__(config, in_features, out_features, final_activation)
-        self.embedding = embedding
-
-    def forward(self, x):
-        x = self.embedding(x)
-        return super(BaseFeatureNet, self).forward(x)
-
-
-class BaseImageNet(BaseNet):
-    def __init__(self, config, in_features, out_features, final_activation):
-        super(BaseImageNet, self).__init__(config, in_features, out_features, final_activation)
-        
-        if len(in_features) == 2:
-            self.channels = 1
-            self.iw = in_features[0]
-            self.ih = in_features[1]
-        if len(in_features) == 3:
-            self.channels = in_features[0]
-            self.iw = in_features[1]
-            self.ih = in_features[2]
diff --git a/autoPyTorch/components/networks/feature/__init__.py b/autoPyTorch/components/networks/feature/__init__.py
deleted file mode 100644
index aaa621e53..000000000
--- a/autoPyTorch/components/networks/feature/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from autoPyTorch.components.networks.feature.embedding import LearnedEntityEmbedding, NoEmbedding
-from autoPyTorch.components.networks.feature.resnet import ResNet
-from autoPyTorch.components.networks.feature.mlpnet import MlpNet
-from autoPyTorch.components.networks.feature.shapedmlpnet import ShapedMlpNet
-from autoPyTorch.components.networks.feature.shapedresnet import ShapedResNet
\ No newline at end of file
diff --git a/autoPyTorch/components/networks/feature/embedding.py b/autoPyTorch/components/networks/feature/embedding.py
deleted file mode 100644
index 2e82d546f..000000000
--- a/autoPyTorch/components/networks/feature/embedding.py
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Class to learn an embedding for categorical hyperparameters.
-"""
-
-import ConfigSpace as CS
-import ConfigSpace.conditions as CSC
-import ConfigSpace.hyperparameters as CSH
-import torch
-import torch.nn as nn
-import numpy as np
-
-from autoPyTorch.utils.config_space_hyperparameter import get_hyperparameter, add_hyperparameter
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class LearnedEntityEmbedding(nn.Module):
-    """ Parent class for MlpNet, ResNet, ... Can use entity embedding for cagtegorical features"""
-    def __init__(self, config, in_features, one_hot_encoder):
-        """
-        Initialize the BaseFeatureNet.
-        
-        Arguments:
-            config: The configuration sampled by the hyperparameter optimizer
-            in_features: the number of features of the dataset
-            one_hot_encoder: OneHot encoder, that is used to encode X
-        """
-        super(LearnedEntityEmbedding, self).__init__()
-        self.config = config
-        self.n_feats = in_features
-        self.one_hot_encoder = one_hot_encoder
-
-        self.num_numerical = len([f for f in one_hot_encoder.categorical_features if not f])
-        self.num_input_features = [len(c) for c in one_hot_encoder.categories_]
-        self.embed_features = [num_in >= config["min_unique_values_for_embedding"] for num_in in self.num_input_features]
-        self.num_output_dimensions = [config["dimension_reduction_" + str(i)] * num_in for i, num_in in enumerate(self.num_input_features)]
-        self.num_output_dimensions = [int(np.clip(num_out, 1, num_in - 1)) for num_out, num_in in zip(self.num_output_dimensions, self.num_input_features)]
-        self.num_output_dimensions = [num_out if embed else num_in for num_out, embed, num_in in zip(self.num_output_dimensions, self.embed_features, self.num_input_features)]
-        self.num_out_feats = self.num_numerical + sum(self.num_output_dimensions)
-
-        self.ee_layers = self._create_ee_layers(in_features)
-
-
-    def forward(self, x):
-        # pass the columns of each categorical feature through entity embedding layer
-        # before passing it through the model
-        concat_seq = []
-        last_concat = 0
-        x_pointer = 0
-        layer_pointer = 0
-        for num_in, embed in zip(self.num_input_features, self.embed_features):
-            if not embed:
-                x_pointer += 1
-                continue
-            if x_pointer > last_concat:
-                concat_seq.append(x[:, last_concat : x_pointer])
-            categorical_feature_slice = x[:, x_pointer : x_pointer + num_in]
-            concat_seq.append(self.ee_layers[layer_pointer](categorical_feature_slice))
-            layer_pointer += 1
-            x_pointer += num_in
-            last_concat = x_pointer
-        
-        concat_seq.append(x[:, last_concat:])
-        return torch.cat(concat_seq, dim=1)
-    
-    def _create_ee_layers(self, in_features):
-        # entity embeding layers are Linear Layers
-         layers = nn.ModuleList()
-         for i, (num_in, embed, num_out) in enumerate(zip(self.num_input_features, self.embed_features, self.num_output_dimensions)):
-            if not embed:
-                continue
-            layers.append(nn.Linear(num_in, num_out))
-         return layers
-
-    @staticmethod
-    def get_config_space(
-        categorical_features=None,
-        min_unique_values_for_embedding=((3, 300), True),
-        dimension_reduction=(0, 1),
-        **kwargs
-    ):
-        # dimension of entity embedding layer is a hyperparameter
-        if categorical_features is None or not any(categorical_features):
-            return CS.ConfigurationSpace()
-        cs = CS.ConfigurationSpace()
-        min_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, "min_unique_values_for_embedding", min_unique_values_for_embedding)
-        cs.add_hyperparameter(min_hp)
-        for i in range(len([x for x in categorical_features if x])):
-            ee_dimensions_hp = get_hyperparameter(CSH.UniformFloatHyperparameter, "dimension_reduction_" + str(i),
-                kwargs.pop("dimension_reduction_" + str(i), dimension_reduction))
-            cs.add_hyperparameter(ee_dimensions_hp)
-        assert len(kwargs) == 0, "Invalid hyperparameter updates for learned embedding: %s" % str(kwargs)
-        return cs
-
-class NoEmbedding(nn.Module):
-    def __init__(self, config, in_features, one_hot_encoder):
-        super(NoEmbedding, self).__init__()
-        self.config = config
-        self.n_feats = in_features
-        self.num_out_feats = self.n_feats
-    
-    def forward(self, x):
-        return x
-    
-    @staticmethod
-    def get_config_space(*args, **kwargs):
-        return CS.ConfigurationSpace()
\ No newline at end of file
diff --git a/autoPyTorch/components/networks/feature/mlpnet.py b/autoPyTorch/components/networks/feature/mlpnet.py
deleted file mode 100644
index b0add95ac..000000000
--- a/autoPyTorch/components/networks/feature/mlpnet.py
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Basic Implementation of a multi layer perceptron.
-"""
-
-from __future__ import division, print_function
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-import torch.nn as nn
-
-from autoPyTorch.components.networks.base_net import BaseFeatureNet
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class MlpNet(BaseFeatureNet):
-    activations = {
-        'relu': nn.ReLU,
-        'sigmoid': nn.Sigmoid,
-        'tanh': nn.Tanh
-    }
-
-
-    def __init__(self, config, in_features, out_features, embedding, final_activation=None):
-        super(MlpNet, self).__init__(config, in_features, out_features, embedding, final_activation)
-        self.activation = self.activations[config['activation']]
-        self.layers = self._build_net(self.n_feats, self.n_classes)
-
-    def _build_net(self, in_features, out_features):
-        layers = list()
-        self._add_layer(layers, in_features, self.config["num_units_1"], 1)
-
-        for i in range(2, self.config["num_layers"] + 1):
-            self._add_layer(layers, self.config["num_units_%d" % (i-1)], self.config["num_units_%d" % i], i)
-
-        layers.append(nn.Linear(self.config["num_units_%d" % self.config["num_layers"]], out_features))
-        return nn.Sequential(*layers)
-
-    def _add_layer(self, layers, in_features, out_features, layer_id):
-        layers.append(nn.Linear(in_features, out_features))
-        layers.append(self.activation())
-        if self.config["use_dropout"]:
-            layers.append(nn.Dropout(self.config["dropout_%d" % layer_id]))
-
-    @staticmethod
-    def get_config_space(
-        num_layers=((1, 15), False),
-        num_units=((10, 1024), True),
-        activation=('sigmoid', 'tanh', 'relu'),
-        dropout=(0.0, 0.8),
-        use_dropout=(True, False),
-        **kwargs
-    ):
-        cs = CS.ConfigurationSpace()
-
-        num_layers_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, 'num_layers', num_layers)
-        cs.add_hyperparameter(num_layers_hp)
-        use_dropout_hp = add_hyperparameter(cs, CS.CategoricalHyperparameter, "use_dropout", use_dropout)
-
-        for i in range(1, num_layers[0][1] + 1):
-            n_units_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, "num_units_%d" % i, kwargs.pop("num_units_%d" % i, num_units))
-            cs.add_hyperparameter(n_units_hp)
-
-            if i > num_layers[0][0]:
-                cs.add_condition(CS.GreaterThanCondition(n_units_hp, num_layers_hp, i - 1))
-
-            if True in use_dropout:
-                dropout_hp = get_hyperparameter(CSH.UniformFloatHyperparameter, "dropout_%d" % i, kwargs.pop("dropout_%d" % i, dropout))
-                cs.add_hyperparameter(dropout_hp)
-                dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout_hp, True)
-
-                if i > num_layers[0][0]:
-                    dropout_condition_2 = CS.GreaterThanCondition(dropout_hp, num_layers_hp, i - 1)
-                    cs.add_condition(CS.AndConjunction(dropout_condition_1, dropout_condition_2))
-                else:
-                    cs.add_condition(dropout_condition_1)
-        
-        add_hyperparameter(cs, CSH.CategoricalHyperparameter,'activation', activation)
-        assert len(kwargs) == 0, "Invalid hyperparameter updates for mlpnet: %s" % str(kwargs)
-        return(cs)
\ No newline at end of file
diff --git a/autoPyTorch/components/networks/feature/resnet.py b/autoPyTorch/components/networks/feature/resnet.py
deleted file mode 100644
index 74001e796..000000000
--- a/autoPyTorch/components/networks/feature/resnet.py
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Imlementation of a ResNet with feature data.
-"""
-
-import ConfigSpace
-import torch.nn as nn
-
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-from autoPyTorch.components.networks.base_net import BaseFeatureNet
-from autoPyTorch.components.regularization.shake import (shake_drop,
-                                                     shake_drop_get_bl,
-                                                     shake_get_alpha_beta,
-                                                     shake_shake)
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class ResNet(BaseFeatureNet):
-    activations = {
-        'relu': nn.ReLU,
-        'sigmoid': nn.Sigmoid,
-        'tanh': nn.Tanh
-    }
-    
-    def __init__(self, config, in_features, out_features, embedding, final_activation=None):
-        super(ResNet, self).__init__(config, in_features, out_features, embedding, final_activation)
-        self.activation = self.activations[config['activation']]
-        self.layers = self._build_net(self.n_feats, self.n_classes)
-        
-    def _build_net(self, in_features, out_features):
-        
-        layers = list()
-        layers.append(nn.Linear(in_features, self.config["num_units_0"]))
-
-        # build num_groups-1 groups each consisting of blocks_per_group ResBlocks
-        # the output features of each group is defined by num_units_i
-        for i in range(1, self.config["num_groups"] + 1):
-            layers.append(self._add_group(  in_features=self.config["num_units_%d" % (i-1)], 
-                                            out_features=self.config["num_units_%d" % i], 
-                                            last_block_index=(i-1) * self.config["blocks_per_group"], 
-                                            dropout=self.config["use_dropout"]))
-
-        layers.append(nn.BatchNorm1d(self.config["num_units_%i" % self.config["num_groups"]]))
-        layers.append(self.activation())
-
-        layers.append(nn.Linear(self.config["num_units_%i" % self.config["num_groups"]], out_features))
-        return nn.Sequential(*layers)
-        
-    # Stacking Residual Blocks on the same stage
-    def _add_group(self, in_features, out_features, last_block_index, dropout):
-        blocks = list()
-        blocks.append(ResBlock(self.config, in_features, out_features, last_block_index, dropout, self.activation))
-        for i in range(1, self.config["blocks_per_group"]):
-            blocks.append(ResBlock(self.config, out_features, out_features, last_block_index+i, dropout, self.activation))
-        return nn.Sequential(*blocks)
-
-    @staticmethod
-    def get_config_space(
-        num_groups=((1, 9), False),
-        blocks_per_group=((1, 4), False),
-        num_units=((10, 1024), True),
-        activation=('sigmoid', 'tanh', 'relu'),
-        max_shake_drop_probability=(0, 1),
-        dropout=(0, 1.0),
-        use_shake_drop=(True, False),
-        use_shake_shake=(True, False),
-        use_dropout=(True, False),
-        **kwargs
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-
-        num_groups_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, "num_groups", num_groups)
-        cs.add_hyperparameter(num_groups_hp)
-        blocks_per_group_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, "blocks_per_group", blocks_per_group)
-        cs.add_hyperparameter(blocks_per_group_hp)
-        add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter, "activation", activation)
-        
-        use_dropout_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, "use_dropout", use_dropout)
-        cs.add_hyperparameter(use_dropout_hp)
-        add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter, "use_shake_shake", use_shake_shake)
-        
-        use_shake_drop_hp = add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter, "use_shake_drop", use_shake_drop)
-        if True in use_shake_drop:
-            shake_drop_prob_hp = add_hyperparameter(cs, ConfigSpace.UniformFloatHyperparameter, "max_shake_drop_probability",
-                max_shake_drop_probability)
-            cs.add_condition(ConfigSpace.EqualsCondition(shake_drop_prob_hp, use_shake_drop_hp, True))
-        
-
-        # it is the upper bound of the nr of groups, since the configuration will actually be sampled.
-        for i in range(0, num_groups[0][1] + 1):
-
-            n_units_hp = add_hyperparameter(cs, ConfigSpace.UniformIntegerHyperparameter,
-                "num_units_%d" % i, kwargs.pop("num_units_%d" % i, num_units))
-
-            if i > 1:
-                cs.add_condition(ConfigSpace.GreaterThanCondition(n_units_hp, num_groups_hp, i - 1))
-
-            if True in use_dropout:
-                dropout_hp = add_hyperparameter(cs, ConfigSpace.UniformFloatHyperparameter,
-                    "dropout_%d" % i, kwargs.pop("dropout_%d" % i, dropout))
-                dropout_condition_1 = ConfigSpace.EqualsCondition(dropout_hp, use_dropout_hp, True)
-
-                if i > 1:
-                
-                    dropout_condition_2 = ConfigSpace.GreaterThanCondition(dropout_hp, num_groups_hp, i - 1)
-
-                    cs.add_condition(ConfigSpace.AndConjunction(dropout_condition_1, dropout_condition_2))
-                else:
-                    cs.add_condition(dropout_condition_1)
-        assert len(kwargs) == 0, "Invalid hyperparameter updates for resnet: %s" % str(kwargs)
-        return cs
-
-
-class ResBlock(nn.Module):
-    
-    def __init__(self, config, in_features, out_features, block_index, dropout, activation):
-        super(ResBlock, self).__init__()
-        self.config = config
-        self.dropout = dropout
-        self.activation = activation
-
-        self.shortcut = None
-        self.start_norm = None
-
-        # if in != out the shortcut needs a linear layer to match the result dimensions
-        # if the shortcut needs a layer we apply batchnorm and activation to the shortcut as well (start_norm)
-        if in_features != out_features:
-            self.shortcut = nn.Linear(in_features, out_features)
-            self.start_norm = nn.Sequential(nn.BatchNorm1d(in_features), self.activation())
-
-        self.block_index = block_index
-        self.num_blocks = self.config["blocks_per_group"] * self.config["num_groups"]
-        self.layers = self._build_block(in_features, out_features)
-
-        if config["use_shake_shake"]:
-            self.shake_shake_layers = self._build_block(in_features, out_features)
-        
-
-    # each bloack consists of two linear layers with batch norm and activation
-    def _build_block(self, in_features, out_features):
-        layers = list()
-        
-        if self.start_norm == None:
-            layers.append(nn.BatchNorm1d(in_features))
-            layers.append(self.activation())
-        layers.append(nn.Linear(in_features, out_features))
-
-        layers.append(nn.BatchNorm1d(out_features))
-        layers.append(self.activation())
-        
-        if (self.config["use_dropout"]):
-            layers.append(nn.Dropout(self.dropout))
-        layers.append(nn.Linear(out_features, out_features))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        residual = x
-
-        # if shortcut is not none we need a layer such that x matches the output dimension
-        if self.shortcut != None: # in this case self.start_norm is also != none
-            # apply start_norm to x in order to have batchnorm+activation in front of shortcut and layers
-            # note that in this case layers does not start with batchnorm+activation but with the first linear layer (see _build_block)
-            # as a result if in_features == out_features -> result = x + W(~D(A(BN(W(A(BN(x))))))
-            # if in_features != out_features -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x))))))
-            x = self.start_norm(x)
-            residual = self.shortcut(x)
-        
-        if self.config["use_shake_shake"]:
-            x1 = self.layers(x)
-            x2 = self.shake_shake_layers(x)
-            alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
-            x = shake_shake(x1, x2, alpha, beta)
-        else:
-            x = self.layers(x)
-        
-        if self.config["use_shake_drop"]:
-            alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
-            bl = shake_drop_get_bl(self.block_index, 1 - self.config["max_shake_drop_probability"], self.num_blocks, self.training, x.is_cuda)
-            x = shake_drop(x, alpha, beta, bl)
-
-        x = x + residual
-        return x
diff --git a/autoPyTorch/components/networks/feature/shapedmlpnet.py b/autoPyTorch/components/networks/feature/shapedmlpnet.py
deleted file mode 100644
index 3d515b0d0..000000000
--- a/autoPyTorch/components/networks/feature/shapedmlpnet.py
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Multilayer Perceptrons in fancy shapes.
-"""
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-import torch.nn as nn
-
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-from autoPyTorch.components.networks.feature.mlpnet import MlpNet
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class ShapedMlpNet(MlpNet):
-    def __init__(self, *args, **kwargs):
-        super(ShapedMlpNet, self).__init__(*args, **kwargs)
-
-    def _build_net(self, in_features, out_features):
-        layers = list()
-        neuron_counts = get_shaped_neuron_counts(self.config['mlp_shape'],
-                                                  in_features,
-                                                  out_features,
-                                                  self.config['max_units'],
-                                                  self.config['num_layers'])
-        if self.config["use_dropout"] and self.config["max_dropout"]>0.05:
-            dropout_shape = get_shaped_neuron_counts( self.config['mlp_shape'], 0, 0, 1000, self.config['num_layers'])
-
-        previous = in_features
-        for i in range(self.config['num_layers']-1):
-            if (i >= len(neuron_counts)):
-                break
-            dropout = dropout_shape[i] / 1000 * self.config["max_dropout"] if (self.config["use_dropout"] and self.config["max_dropout"]>0.05) else 0
-            self._add_layer(layers, previous, neuron_counts[i], dropout)
-            previous = neuron_counts[i]
-
-        layers.append(nn.Linear(previous, out_features))
-        return nn.Sequential(*layers)
-
-    def _add_layer(self, layers, in_features, out_features, dropout):
-        layers.append(nn.Linear(in_features, out_features))
-        layers.append(self.activation())
-        if self.config["use_dropout"] and self.config["max_dropout"]>0.05:
-            layers.append(nn.Dropout(dropout))
-
-    @staticmethod
-
-    def get_config_space(
-        num_layers=(1, 15),
-        max_units=((10, 1024), True),
-        activation=('sigmoid', 'tanh', 'relu'),
-        mlp_shape=('funnel', 'long_funnel', 'diamond', 'hexagon', 'brick', 'triangle', 'stairs'),
-        max_dropout=(0, 1.0),
-        use_dropout=(True, False)
-    ):
-        cs = CS.ConfigurationSpace()
-        
-        mlp_shape_hp = get_hyperparameter(CSH.CategoricalHyperparameter, 'mlp_shape', mlp_shape)
-        cs.add_hyperparameter(mlp_shape_hp)
-
-        num_layers_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, 'num_layers', num_layers)
-        cs.add_hyperparameter(num_layers_hp)
-        max_units_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, "max_units", max_units)
-        cs.add_hyperparameter(max_units_hp)
-
-        use_dropout_hp = add_hyperparameter(cs, CS.CategoricalHyperparameter, "use_dropout", use_dropout)
-
-        max_dropout_hp = add_hyperparameter(cs, CSH.UniformFloatHyperparameter, "max_dropout", max_dropout)
-        cs.add_condition(CS.EqualsCondition(max_dropout_hp, use_dropout_hp, True))
-
-        add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'activation', activation)
-        return cs
-        
-        
-def get_shaped_neuron_counts(shape, in_feat, out_feat, max_neurons, layer_count):
-    counts = []
-
-    if (layer_count <= 0):
-        return counts
-
-    if (layer_count == 1):
-        counts.append(out_feat)
-        return counts
-
-    max_neurons = max(in_feat, max_neurons)
-    # https://mikkokotila.github.io/slate/#shapes
-
-    if shape == 'brick':
-        #
-        #   |        |
-        #   |        |
-        #   |        |
-        #   |        |
-        #   |        |
-        #   |___  ___|
-        #
-        for _ in range(layer_count-1):
-            counts.append(max_neurons)
-        counts.append(out_feat)
-
-    if shape == 'triangle':
-        #
-        #        /  \
-        #       /    \
-        #      /      \
-        #     /        \
-        #    /          \
-        #   /_____  _____\
-        #
-        previous = in_feat
-        step_size = int((max_neurons - previous) / (layer_count-1))
-        step_size = max(0, step_size)
-        for _ in range(layer_count-2):
-            previous = previous + step_size
-            counts.append(previous)
-        counts.append(max_neurons)
-        counts.append(out_feat)
-
-    if shape == 'funnel':
-        #
-        #   \            /
-        #    \          /
-        #     \        /
-        #      \      /
-        #       \    /
-        #        \  /
-        #
-        previous = max_neurons
-        counts.append(previous)
-        
-        step_size = int((previous - out_feat) / (layer_count-1))
-        step_size = max(0, step_size)
-        for _ in range(layer_count-2):
-            previous = previous - step_size
-            counts.append(previous)
-
-        counts.append(out_feat)
-
-    if shape == 'long_funnel':
-        #
-        #   |        |
-        #   |        |
-        #   |        |
-        #    \      /
-        #     \    /
-        #      \  /
-        #
-        brick_layer = int(layer_count / 2)
-        funnel_layer = layer_count - brick_layer
-        counts.extend(get_shaped_neuron_counts('brick', in_feat, max_neurons, max_neurons, brick_layer))
-        counts.extend(get_shaped_neuron_counts('funnel', in_feat, out_feat, max_neurons, funnel_layer))
-        
-        if (len(counts) != layer_count):
-            print("\nWarning: long funnel layer count does not match " + str(layer_count) + " != " + str(len(counts)) + "\n")
-    
-    if shape == 'diamond':
-        #
-        #     /  \
-        #    /    \
-        #   /      \
-        #   \      /
-        #    \    /
-        #     \  /
-        #
-        triangle_layer = int(layer_count / 2) + 1
-        funnel_layer = layer_count - triangle_layer
-        counts.extend(get_shaped_neuron_counts('triangle', in_feat, max_neurons, max_neurons, triangle_layer))
-        remove_triangle_layer = len(counts) > 1
-        if (remove_triangle_layer):
-            counts = counts[0:-2] # remove the last two layers since max_neurons == out_features (-> two layers with the same size)
-        counts.extend(get_shaped_neuron_counts('funnel', max_neurons, out_feat, max_neurons, funnel_layer + (2 if remove_triangle_layer else 0)))
-
-        if (len(counts) != layer_count):
-            print("\nWarning: diamond layer count does not match " + str(layer_count) + " != " + str(len(counts)) + "\n")
-
-    if shape == 'hexagon':
-        #
-        #     /  \
-        #    /    \
-        #   |      |
-        #   |      |
-        #    \    /
-        #     \  /
-        #
-        triangle_layer = int(layer_count / 3) + 1
-        funnel_layer = triangle_layer
-        brick_layer = layer_count - triangle_layer - funnel_layer
-        counts.extend(get_shaped_neuron_counts('triangle', in_feat, max_neurons, max_neurons, triangle_layer))
-        counts.extend(get_shaped_neuron_counts('brick', max_neurons, max_neurons, max_neurons, brick_layer))
-        counts.extend(get_shaped_neuron_counts('funnel', max_neurons, out_feat, max_neurons, funnel_layer))
-
-        if (len(counts) != layer_count):
-            print("\nWarning: hexagon layer count does not match " + str(layer_count) + " != " + str(len(counts)) + "\n")
-
-    if shape == 'stairs':
-        #
-        #   |          |
-        #   |_        _|
-        #     |      |
-        #     |_    _|
-        #       |  |
-        #       |  |
-        #
-        previous = max_neurons
-        counts.append(previous)
-
-        if layer_count % 2 == 1:
-            counts.append(previous)
-
-        step_size = 2 * int((max_neurons - out_feat) / (layer_count-1))
-        step_size = max(0, step_size)
-        for _ in range(int(layer_count / 2 - 1)):
-            previous = previous - step_size
-            counts.append(previous)
-            counts.append(previous)
-
-        counts.append(out_feat)
-        
-        if (len(counts) != layer_count):
-            print("\nWarning: stairs layer count does not match " + str(layer_count) + " != " + str(len(counts)) + "\n")
-
-    return counts
diff --git a/autoPyTorch/components/networks/feature/shapedresnet.py b/autoPyTorch/components/networks/feature/shapedresnet.py
deleted file mode 100644
index 8c12f848d..000000000
--- a/autoPyTorch/components/networks/feature/shapedresnet.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-ResNets in fancy shapes.
-"""
-
-from copy import deepcopy
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-from autoPyTorch.components.networks.feature.resnet import ResNet
-from autoPyTorch.components.networks.feature.shapedmlpnet import get_shaped_neuron_counts
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class ShapedResNet(ResNet):
-    def __init__(self, config, in_features, out_features, *args, **kwargs):
-        augmented_config = deepcopy(config)
-
-        neuron_counts = get_shaped_neuron_counts(config['resnet_shape'],
-                                                 in_features,
-                                                 out_features,
-                                                 config['max_units'],
-                                                 config['num_groups']+2)[:-1]
-        augmented_config.update(
-                {"num_units_%d" % (i) : num for i, num in enumerate(neuron_counts)})
-        
-
-        if (config['use_dropout'] and config["max_dropout"]>0.05):
-            dropout_shape = get_shaped_neuron_counts(config['resnet_shape'], 0, 0, 1000, config['num_groups'])
-            
-            dropout_shape = [dropout / 1000 * config["max_dropout"] for dropout in dropout_shape]
-        
-            augmented_config.update(
-                    {"dropout_%d" % (i+1) : dropout for i, dropout in enumerate(dropout_shape)})    
-
-        super(ShapedResNet, self).__init__(augmented_config, in_features, out_features, *args, **kwargs)
-
-
-    @staticmethod
-    def get_config_space(
-        num_groups=(1, 9),
-        blocks_per_group=(1, 4),
-        max_units=((10, 1024), True),
-        activation=('sigmoid', 'tanh', 'relu'),
-        max_shake_drop_probability=(0, 1),
-        max_dropout=(0, 1.0),
-        resnet_shape=('funnel', 'long_funnel', 'diamond', 'hexagon', 'brick', 'triangle', 'stairs'),
-        use_dropout=(True, False),
-        use_shake_shake=(True, False),
-        use_shake_drop=(True, False)
-    ):
-        cs = CS.ConfigurationSpace()
-        
-        num_groups_hp = get_hyperparameter(CS.UniformIntegerHyperparameter, "num_groups", num_groups)
-        cs.add_hyperparameter(num_groups_hp)
-        blocks_per_group_hp = get_hyperparameter(CS.UniformIntegerHyperparameter, "blocks_per_group", blocks_per_group)
-        cs.add_hyperparameter(blocks_per_group_hp)
-        add_hyperparameter(cs, CS.CategoricalHyperparameter, "activation", activation)
-        use_dropout_hp = add_hyperparameter(cs, CS.CategoricalHyperparameter, "use_dropout", use_dropout)
-        add_hyperparameter(cs, CS.CategoricalHyperparameter, "use_shake_shake", use_shake_shake)
-        
-        shake_drop_hp = add_hyperparameter(cs, CS.CategoricalHyperparameter, "use_shake_drop", use_shake_drop)
-        if True in use_shake_drop:
-            shake_drop_prob_hp = add_hyperparameter(cs, CS.UniformFloatHyperparameter, "max_shake_drop_probability",
-                max_shake_drop_probability)
-            cs.add_condition(CS.EqualsCondition(shake_drop_prob_hp, shake_drop_hp, True))
-        
-        add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'resnet_shape', resnet_shape)
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, "max_units", max_units)
-
-        if True in use_dropout:
-            max_dropout_hp = add_hyperparameter(cs, CSH.UniformFloatHyperparameter, "max_dropout", max_dropout)
-            cs.add_condition(CS.EqualsCondition(max_dropout_hp, use_dropout_hp, True))
-
-        return cs
diff --git a/autoPyTorch/components/networks/image/__init__.py b/autoPyTorch/components/networks/image/__init__.py
deleted file mode 100644
index 9a0900524..000000000
--- a/autoPyTorch/components/networks/image/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from autoPyTorch.components.networks.image.convnet import ConvNet
-from autoPyTorch.components.networks.image.densenet import DenseNet
-from autoPyTorch.components.networks.image.resnet import ResNet
-from autoPyTorch.components.networks.image.mobilenet import MobileNet
diff --git a/autoPyTorch/components/networks/image/convnet.py b/autoPyTorch/components/networks/image/convnet.py
deleted file mode 100644
index b87bc0fd0..000000000
--- a/autoPyTorch/components/networks/image/convnet.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Basic Implementation of a convolutional network.
-"""
-
-from __future__ import division, print_function
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-import torch.nn as nn
-
-from autoPyTorch.components.networks.base_net import BaseImageNet
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-class ConvNet(BaseImageNet):
-    def __init__(self, config, in_features, out_features, final_activation, *args, **kwargs):
-        super(ConvNet, self).__init__(config, in_features, out_features, final_activation)
-        self.layers = self._build_net(self.n_classes)
-
-
-    def forward(self, x):
-        x = self.layers(x)
-        x = x.reshape(x.size(0), -1)
-        x = self.last_layer(x)
-        if not self.training and self.final_activation is not None:
-            x = self.final_activation(x)
-        return x
-
-    def _build_net(self, out_features):
-        layers = list()
-        init_filter = self.config["conv_init_filters"]
-        self._add_layer(layers, self.channels, init_filter, 1)
-        
-        cw, ch = self._get_layer_size(self.iw, self.ih)
-        self.dense_size = init_filter * cw * ch
-        print(cw, ch, self.dense_size)
-        for i in range(2, self.config["num_layers"]+1):
-            cw, ch = self._get_layer_size(cw, ch)
-            if cw == 0 or ch == 0:
-                print("> reduce network size due to too small layers.")
-                break
-            self._add_layer(layers, init_filter, init_filter * 2, i)
-            init_filter *= 2
-            self.dense_size = init_filter * cw * ch
-            print(cw, ch, self.dense_size)
-            
-        self.last_layer = nn.Linear(self.dense_size, out_features)
-        nw = nn.Sequential(*layers)
-        #print(nw)
-        return nw
-    
-    def _get_layer_size(self, w, h):
-        cw = ((w - self.config["conv_kernel_size"] + 2 * self.config["conv_kernel_padding"])
-                //self.config["conv_kernel_stride"]) + 1
-        ch = ((h - self.config["conv_kernel_size"] + 2 * self.config["conv_kernel_padding"])
-                //self.config["conv_kernel_stride"]) + 1
-        cw, ch = cw // self.config["pool_size"], ch // self.config["pool_size"]
-        return cw, ch
-
-    def _add_layer(self, layers, in_filters, out_filters, layer_id):
-        layers.append(nn.Conv2d(in_filters, out_filters,
-                                kernel_size=self.config["conv_kernel_size"],
-                                stride=self.config["conv_kernel_stride"],
-                                padding=self.config["conv_kernel_padding"]))
-        layers.append(nn.BatchNorm2d(out_filters))
-        layers.append(self.activation())
-        layers.append(nn.MaxPool2d(kernel_size=self.config["pool_size"], stride=self.config["pool_size"]))
-
-    @staticmethod
-    def get_config_space(user_updates=None):
-        cs = CS.ConfigurationSpace()
-        
-        cs.add_hyperparameter(CSH.CategoricalHyperparameter('activation', ['relu'])) #'sigmoid', 'tanh',
-        num_layers = CSH.UniformIntegerHyperparameter('num_layers', lower=2, upper=5)
-        cs.add_hyperparameter(num_layers)
-        cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('conv_init_filters', lower=16, upper=64))
-        cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('conv_kernel_size', lower=1, upper=5))
-        cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('conv_kernel_stride', lower=1, upper=3))
-        cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('conv_kernel_padding', lower=2, upper=3))
-        cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('pool_size', lower=2, upper=3))
-
-        return(cs)
diff --git a/autoPyTorch/components/networks/image/darts/darts_worker.py b/autoPyTorch/components/networks/image/darts/darts_worker.py
deleted file mode 100644
index c4c4b882c..000000000
--- a/autoPyTorch/components/networks/image/darts/darts_worker.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import os
-import time
-import argparse
-#from copy import copy, deepcopy
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-from hpbandster.core.worker import Worker
-# from .helper import darts_cifar10
-
-
-PRIMITIVES = [
-    #'none',
-    'max_pool_3x3',
-    'avg_pool_3x3',
-    'skip_connect',
-    'sep_conv_3x3',
-    'sep_conv_5x5',
-    'dil_conv_3x3',
-    'dil_conv_5x5',
-]
-
-
-class DARTSWorker(Worker):
-    # def __init__(self, *args, **kwargs):
-    #     super().__init__(*args, **kwargs)
-    #     #self.darts_mainsourcepath = '/home/zelaa/Thesis/bohb-darts/workers/lib'
-    #     self.darts_path = os.getcwd() + '/workers/lib/darts_space'
-
-    # def compute(self, config, budget, config_id, working_directory):
-    #     return darts_cifar10(config=config,
-    #                          budget=int(budget),
-    #                          config_id=config_id,
-    #                          directory=working_directory,
-    #                          darts_source=self.darts_path)
-
-    @staticmethod
-    def get_config_space():
-        config_space = CS.ConfigurationSpace()
-
-        # here we instantiate one categorical hyperparameter for each edge in
-        # the DARTS cell
-        for i in range(14):
-            config_space.add_hyperparameter(CSH.CategoricalHyperparameter('edge_normal_{}'.format(i),
-                                                                          PRIMITIVES))
-            config_space.add_hyperparameter(CSH.CategoricalHyperparameter('edge_reduce_{}'.format(i),
-                                                                          PRIMITIVES))
-        # for the intermediate node 2 we add directly the two incoming edges to
-        # the config_space. All nodes are topologicaly sorted and the labels 0
-        # and 1 correspond to the 2 input nodes of the cell. nodes 2, 3, 4, 5
-        # are intermediate nodes. We define below a CategoricalHyperparameter
-        # for nodes 3, 4, 5 with each category representing two possible
-        # predecesor nodes indices (for node 2 there is only one possibility)
-        pred_nodes = {'3': ['0_1', '0_2', '1_2'],
-                      '4': ['0_1', '0_2', '0_3', '1_2', '1_3', '2_3'],
-                      '5': ['0_1', '0_2', '0_3', '0_4', '1_2', '1_3', '1_4',
-                            '2_3', '2_4', '3_4']
-                     }
-
-        for i in range(3, 6):
-            config_space.add_hyperparameter(CSH.CategoricalHyperparameter('inputs_node_normal_{}'.format(i),
-                                                                          pred_nodes[str(i)]))
-            config_space.add_hyperparameter(CSH.CategoricalHyperparameter('inputs_node_reduce_{}'.format(i),
-                                                                          pred_nodes[str(i)]))
-
-        config_space.add_hyperparameter(CSH.Constant('layers', 20))
-        config_space.add_hyperparameter(CSH.Constant('init_channels', 36))
-        config_space.add_hyperparameter(CSH.Constant('drop_path_prob', 0.1))
-        config_space.add_hyperparameter(CSH.CategoricalHyperparameter('auxiliary', [False]))
-
-        # now we define the conditions constraining the inclusion of the edges
-        # on the optimization in order to be consistent with the DARTS original
-        # search space
-        for cell_type in ['normal', 'reduce']:
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_2'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_3'.format(cell_type)),
-                                                      values=['0_1', '0_2']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_3'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_3'.format(cell_type)),
-                                                      values=['0_1', '1_2']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_4'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_3'.format(cell_type)),
-                                                      values=['0_2', '1_2']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_5'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_4'.format(cell_type)),
-                                                      values=['0_1', '0_2', '0_3']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_6'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_4'.format(cell_type)),
-                                                      values=['0_1', '1_2', '1_3']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_7'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_4'.format(cell_type)),
-                                                      values=['0_2', '1_2', '2_3']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_8'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_4'.format(cell_type)),
-                                                      values=['0_3', '1_3', '2_3']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_9'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_5'.format(cell_type)),
-                                                      values=['0_1', '0_2', '0_3', '0_4']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_10'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_5'.format(cell_type)),
-                                                      values=['0_1', '1_2', '1_3', '1_4']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_11'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_5'.format(cell_type)),
-                                                      values=['0_2', '1_2', '2_3', '2_4']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_12'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_5'.format(cell_type)),
-                                                      values=['0_3', '1_3', '2_3', '3_4']))
-            config_space.add_condition(CS.InCondition(child=config_space.get_hyperparameter('edge_{}_13'.format(cell_type)),
-                                                      parent=config_space.get_hyperparameter('inputs_node_{}_5'.format(cell_type)),
-                                                      values=['0_4', '1_4', '2_4', '3_4']))
-
-        return config_space
-
-
diff --git a/autoPyTorch/components/networks/image/darts/genotypes.py b/autoPyTorch/components/networks/image/darts/genotypes.py
deleted file mode 100644
index 2a46099fc..000000000
--- a/autoPyTorch/components/networks/image/darts/genotypes.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from functools import wraps
-from collections import namedtuple
-import random
-import sys
-
-Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
-
-PRIMITIVES = [
-    #'none',
-    'max_pool_3x3',
-    'avg_pool_3x3',
-    'skip_connect',
-    'sep_conv_3x3',
-    'sep_conv_5x5',
-    'dil_conv_3x3',
-    'dil_conv_5x5'
-]
-
-DARTS = Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1),
-                         ('sep_conv_3x3', 0), ('sep_conv_3x3', 1),
-                         ('sep_conv_3x3', 1), ('skip_connect', 0),
-                         ('skip_connect', 0), ('dil_conv_3x3', 2)],
-                 normal_concat=[2, 3, 4, 5], reduce=[('max_pool_3x3', 0),
-                                                     ('max_pool_3x3', 1),
-                                                     ('skip_connect', 2),
-                                                     ('max_pool_3x3', 1),
-                                                     ('max_pool_3x3', 0),
-                                                     ('skip_connect', 2),
-                                                     ('skip_connect', 2),
-                                                     ('max_pool_3x3', 1)],
-                 reduce_concat=[2, 3, 4, 5])
-
-
-def generate_genotype(gene_function):
-    @wraps(gene_function)
-    def wrapper(config=None, steps=4):
-        concat = range(2, 6)
-        gene_normal, gene_reduce = gene_function(config, steps).values()
-        genotype = Genotype(
-            normal=gene_normal, normal_concat=concat,
-            reduce=gene_reduce, reduce_concat=concat
-            )
-        return genotype
-    return wrapper
-
-
-@generate_genotype
-def get_gene_from_config(config, steps=4):
-    gene = {'normal': [], 'reduce': []}
-
-    # node 2
-    for cell_type in gene.keys():
-        first_edge = (config['edge_{}_0'.format(cell_type)], 0)
-        second_edge = (config['edge_{}_1'.format(cell_type)], 1)
-        gene[cell_type].append(first_edge)
-        gene[cell_type].append(second_edge)
-
-    # nodes 3, 4, 5
-    for i, offset in zip(range(3, steps+2), [2, 5, 9]):
-        for cell_type in gene.keys():
-            input_nodes = config['inputs_node_{}_{}'.format(cell_type, i)].split('_')
-            for node in input_nodes:
-                edge = (config['edge_{}_{}'.format(cell_type, int(node)+offset)],
-                        int(node))
-                gene[cell_type].append(edge)
-    return gene
-
-
-@generate_genotype
-def random_gene(config=None, steps=4):
-    gene = {'normal': [], 'reduce': []}
-
-    n = 1
-    for i in range(steps):
-        for cell_type in gene.keys():
-            first_edge = (random.choice(PRIMITIVES),
-                          random.randint(0, n))
-            second_edge = (random.choice(PRIMITIVES),
-                           random.randint(0, n))
-
-            gene[cell_type].append(first_edge)
-            gene[cell_type].append(second_edge)
-        n += 1
-    return gene
-
-
-if __name__ == '__main__':
-    if len(sys.argv) != 2:
-        print("usage:\n python {} CONFIGS".format(sys.argv[0]))
-        sys.exit(1)
-
-    with open('genotypes.py', 'a') as f:
-        _nr_random_genes = sys.argv[1]
-        for i in range(int(_nr_random_genes)):
-            gene = random_gene()
-            f.write('DARTS_%d = %s'%(i, gene))
-            f.write('\n')
-            print(gene)
diff --git a/autoPyTorch/components/networks/image/darts/model.py b/autoPyTorch/components/networks/image/darts/model.py
deleted file mode 100644
index 2fa9d332f..000000000
--- a/autoPyTorch/components/networks/image/darts/model.py
+++ /dev/null
@@ -1,238 +0,0 @@
-import torch
-import torch.nn as nn
-from .operations import *
-from .utils import drop_path
-
-
-class Cell(nn.Module):
-
-  def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
-    super(Cell, self).__init__()
-    # print(C_prev_prev, C_prev, C)
-
-    if reduction_prev:
-      self.preprocess0 = FactorizedReduce(C_prev_prev, C)
-    else:
-      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
-    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
-    
-    if reduction:
-      op_names, indices = zip(*genotype.reduce)
-      concat = genotype.reduce_concat
-    else:
-      op_names, indices = zip(*genotype.normal)
-      concat = genotype.normal_concat
-    self._compile(C, op_names, indices, concat, reduction)
-
-  def _compile(self, C, op_names, indices, concat, reduction):
-    assert len(op_names) == len(indices)
-    self._steps = len(op_names) // 2
-    self._concat = concat
-    self.multiplier = len(concat)
-
-    self._ops = nn.ModuleList()
-    for name, index in zip(op_names, indices):
-      stride = 2 if reduction and index < 2 else 1
-      op = OPS[name](C, stride, True)
-      self._ops += [op]
-    self._indices = indices
-
-  def forward(self, s0, s1, drop_prob):
-    s0 = self.preprocess0(s0)
-    s1 = self.preprocess1(s1)
-
-    states = [s0, s1]
-    for i in range(self._steps):
-      h1 = states[self._indices[2*i]]
-      h2 = states[self._indices[2*i+1]]
-      op1 = self._ops[2*i]
-      op2 = self._ops[2*i+1]
-      h1 = op1(h1)
-      h2 = op2(h2)
-      if self.training and drop_prob > 0.:
-        if not isinstance(op1, Identity):
-          h1 = drop_path(h1, drop_prob)
-        if not isinstance(op2, Identity):
-          h2 = drop_path(h2, drop_prob)
-      s = h1 + h2
-      states += [s]
-    return torch.cat([states[i] for i in self._concat], dim=1)
-
-
-class AuxiliaryHeadCIFAR(nn.Module):
-
-  def __init__(self, C, num_classes):
-    """assuming input size 8x8"""
-    super(AuxiliaryHeadCIFAR, self).__init__()
-    self.features = nn.Sequential(
-      nn.ReLU(inplace=True),
-      nn.AvgPool2d(5, stride=3, padding=0, count_include_pad=False), # image size = 2 x 2
-      nn.Conv2d(C, 128, 1, bias=False),
-      nn.BatchNorm2d(128),
-      nn.ReLU(inplace=True),
-      nn.Conv2d(128, 768, 2, bias=False),
-      nn.BatchNorm2d(768),
-      nn.ReLU(inplace=True)
-    )
-    self.classifier = nn.Linear(768, num_classes)
-
-  def forward(self, x):
-    x = self.features(x)
-    x = self.classifier(x.view(x.size(0),-1))
-    return x
-
-
-class AuxiliaryHeadImageNet(nn.Module):
-
-  def __init__(self, C, num_classes):
-    """assuming input size 14x14"""
-    super(AuxiliaryHeadImageNet, self).__init__()
-    self.features = nn.Sequential(
-      nn.ReLU(inplace=True),
-      nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
-      nn.Conv2d(C, 128, 1, bias=False),
-      nn.BatchNorm2d(128),
-      nn.ReLU(inplace=True),
-      nn.Conv2d(128, 768, 2, bias=False),
-      # NOTE: This batchnorm was omitted in my earlier implementation due to a typo.
-      # Commenting it out for consistency with the experiments in the paper.
-      # nn.BatchNorm2d(768),
-      nn.ReLU(inplace=True)
-    )
-    self.classifier = nn.Linear(768, num_classes)
-
-  def forward(self, x):
-    x = self.features(x)
-    x = self.classifier(x.view(x.size(0),-1))
-    return x
-
-
-from autoPyTorch.components.networks.base_net import BaseImageNet
-class NetworkCIFAR(BaseImageNet):
-
-  def __init__(self, C, num_classes, layers, auxiliary, genotype):
-    #super(NetworkCIFAR, self).__init__()
-    self._layers = layers
-    self._auxiliary = auxiliary
-
-    stem_multiplier = 3
-    C_curr = stem_multiplier*C
-    self.stem = nn.Sequential(
-      nn.Conv2d(3, C_curr, 3, padding=1, bias=False),
-      nn.BatchNorm2d(C_curr)
-    )
-    
-    C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
-    self.cells = nn.ModuleList()
-    reduction_prev = False
-    for i in range(layers):
-      if i in [layers//3, 2*layers//3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      self.cells += [cell]
-      C_prev_prev, C_prev = C_prev, cell.multiplier*C_curr
-      if i == 2*layers//3:
-        C_to_auxiliary = C_prev
-
-    if auxiliary:
-      self.auxiliary_head = AuxiliaryHeadCIFAR(C_to_auxiliary, num_classes)
-    self.global_pooling = nn.AdaptiveAvgPool2d(1)
-    self.classifier = nn.Linear(C_prev, num_classes)
-
-  def forward(self, input):
-    logits_aux = None
-    s0 = s1 = self.stem(input)
-    for i, cell in enumerate(self.cells):
-      s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
-      if i == 2*self._layers//3:
-        if self._auxiliary and self.training:
-          logits_aux = self.auxiliary_head(s1)
-    out = self.global_pooling(s1)
-    logits = self.classifier(out.view(out.size(0),-1))
-    return logits#, logits_aux
-
-
-
-class NetworkImageNet(BaseImageNet):
-
-  def __init__(self, C, num_classes, layers, auxiliary, genotype):
-    # super(NetworkImageNet, self).__init__()
-    self._layers = layers
-    self._auxiliary = auxiliary
-
-    self.stem0 = nn.Sequential(
-      nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False),
-      nn.BatchNorm2d(C // 2),
-      nn.ReLU(inplace=True),
-      nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False),
-      nn.BatchNorm2d(C),
-    )
-
-    self.stem1 = nn.Sequential(
-      nn.ReLU(inplace=True),
-      nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False),
-      nn.BatchNorm2d(C),
-    )
-
-    C_prev_prev, C_prev, C_curr = C, C, C
-
-    self.cells = nn.ModuleList()
-    reduction_prev = True
-    for i in range(layers):
-      if i in [layers // 3, 2 * layers // 3]:
-        C_curr *= 2
-        reduction = True
-      else:
-        reduction = False
-      cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
-      reduction_prev = reduction
-      self.cells += [cell]
-      C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
-      if i == 2 * layers // 3:
-        C_to_auxiliary = C_prev
-
-    if auxiliary:
-      self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
-    self.global_pooling = nn.AdaptiveAvgPool2d((1, 1))
-    self.classifier = nn.Linear(C_prev, num_classes)
-
-  def forward(self, input):
-    logits_aux = None
-    s0 = self.stem0(input)
-    s1 = self.stem1(s0)
-    for i, cell in enumerate(self.cells):
-      s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
-      if i == 2 * self._layers // 3:
-        if self._auxiliary and self.training:
-          logits_aux = self.auxiliary_head(s1)
-    out = self.global_pooling(s1)
-    logits = self.classifier(out.view(out.size(0), -1))
-    return logits#, logits_aux
-
-
-from .genotypes import get_gene_from_config
-from .darts_worker import DARTSWorker
-class DARTSImageNet(NetworkCIFAR): # use cifar10 base as we train ImageNet mostly with 64x64 images
-  def __init__(self, config, in_features, out_features, final_activation, **kwargs):
-    super(NetworkCIFAR, self).__init__(config, in_features, out_features, final_activation)
-
-    self.drop_path_prob = config['drop_path_prob']
-    topology = {key: config[key] for key in config if ('edge' in key) or ('inputs_node' in key)}
-    genotype = get_gene_from_config(topology)
-    super(DARTSImageNet, self).__init__(config['init_channels'], out_features, config['layers'], config['auxiliary'], genotype)
-
-  def forward(self, x):
-    x = super(DARTSImageNet, self).forward(x)
-
-    if not self.training and self.final_activation is not None:
-      x = self.final_activation(x)
-    return x
-
-  @staticmethod
-  def get_config_space(**kwargs):
-    return DARTSWorker.get_config_space()
-
diff --git a/autoPyTorch/components/networks/image/darts/operations.py b/autoPyTorch/components/networks/image/darts/operations.py
deleted file mode 100644
index b0c62c575..000000000
--- a/autoPyTorch/components/networks/image/darts/operations.py
+++ /dev/null
@@ -1,105 +0,0 @@
-import torch
-import torch.nn as nn
-
-OPS = {
-  'none' : lambda C, stride, affine: Zero(stride),
-  'avg_pool_3x3' : lambda C, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
-  'max_pool_3x3' : lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1),
-  'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine),
-  'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine),
-  'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine),
-  'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine),
-  'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
-  'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine),
-  'conv_7x1_1x7' : lambda C, stride, affine: nn.Sequential(
-    nn.ReLU(inplace=False),
-    nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
-    nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
-    nn.BatchNorm2d(C, affine=affine)
-    ),
-}
-
-class ReLUConvBN(nn.Module):
-
-  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
-    super(ReLUConvBN, self).__init__()
-    self.op = nn.Sequential(
-      nn.ReLU(inplace=False),
-      nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
-      nn.BatchNorm2d(C_out, affine=affine)
-    )
-
-  def forward(self, x):
-    return self.op(x)
-
-class DilConv(nn.Module):
-    
-  def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
-    super(DilConv, self).__init__()
-    self.op = nn.Sequential(
-      nn.ReLU(inplace=False),
-      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
-      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
-      nn.BatchNorm2d(C_out, affine=affine),
-      )
-
-  def forward(self, x):
-    return self.op(x)
-
-
-class SepConv(nn.Module):
-    
-  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
-    super(SepConv, self).__init__()
-    self.op = nn.Sequential(
-      nn.ReLU(inplace=False),
-      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
-      nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False),
-      nn.BatchNorm2d(C_in, affine=affine),
-      nn.ReLU(inplace=False),
-      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False),
-      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
-      nn.BatchNorm2d(C_out, affine=affine),
-      )
-
-  def forward(self, x):
-    return self.op(x)
-
-
-class Identity(nn.Module):
-
-  def __init__(self):
-    super(Identity, self).__init__()
-
-  def forward(self, x):
-    return x
-
-
-class Zero(nn.Module):
-
-  def __init__(self, stride):
-    super(Zero, self).__init__()
-    self.stride = stride
-
-  def forward(self, x):
-    if self.stride == 1:
-      return x.mul(0.)
-    return x[:,:,::self.stride,::self.stride].mul(0.)
-
-
-class FactorizedReduce(nn.Module):
-
-  def __init__(self, C_in, C_out, affine=True):
-    super(FactorizedReduce, self).__init__()
-    assert C_out % 2 == 0
-    self.relu = nn.ReLU(inplace=False)
-    self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
-    self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) 
-    self.bn = nn.BatchNorm2d(C_out, affine=affine)
-
-  def forward(self, x):
-    x = self.relu(x)
-    out = torch.cat([self.conv_1(x), self.conv_2(x[:,:,1:,1:])], dim=1)
-    out = self.bn(out)
-    return out
-
diff --git a/autoPyTorch/components/networks/image/darts/utils.py b/autoPyTorch/components/networks/image/darts/utils.py
deleted file mode 100644
index fd5081dbd..000000000
--- a/autoPyTorch/components/networks/image/darts/utils.py
+++ /dev/null
@@ -1,166 +0,0 @@
-import os
-import numpy as np
-import torch
-import shutil
-import torchvision.transforms as transforms
-from torch.autograd import Variable
-
-
-class AvgrageMeter(object):
-
-  def __init__(self):
-    self.reset()
-
-  def reset(self):
-    self.avg = 0
-    self.sum = 0
-    self.cnt = 0
-
-  def update(self, val, n=1):
-    self.sum += val * n
-    self.cnt += n
-    self.avg = self.sum / self.cnt
-
-
-def accuracy(output, target, topk=(1,)):
-  maxk = max(topk)
-  batch_size = target.size(0)
-
-  _, pred = output.topk(maxk, 1, True, True)
-  pred = pred.t()
-  correct = pred.eq(target.view(1, -1).expand_as(pred))
-
-  res = []
-  for k in topk:
-    correct_k = correct[:k].view(-1).float().sum(0)
-    res.append(correct_k.mul_(100.0/batch_size))
-  return res
-
-
-class Cutout(object):
-    def __init__(self, length):
-        self.length = length
-
-    def __call__(self, img):
-        h, w = img.size(1), img.size(2)
-        mask = np.ones((h, w), np.float32)
-        y = np.random.randint(h)
-        x = np.random.randint(w)
-
-        y1 = np.clip(y - self.length // 2, 0, h)
-        y2 = np.clip(y + self.length // 2, 0, h)
-        x1 = np.clip(x - self.length // 2, 0, w)
-        x2 = np.clip(x + self.length // 2, 0, w)
-
-        mask[y1: y2, x1: x2] = 0.
-        mask = torch.from_numpy(mask)
-        mask = mask.expand_as(img)
-        img *= mask
-        return img
-
-
-def _data_transforms_cifar10(args):
-  CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
-  CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
-
-  train_transform = transforms.Compose([
-    transforms.RandomCrop(32, padding=4),
-    transforms.RandomHorizontalFlip(),
-    transforms.ToTensor(),
-    transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
-  ])
-  if args.cutout:
-    train_transform.transforms.append(Cutout(args.cutout_length))
-
-  valid_transform = transforms.Compose([
-    transforms.ToTensor(),
-    transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
-    ])
-  return train_transform, valid_transform
-
-
-def count_parameters_in_MB(model):
-  return np.sum(np.prod(v.size()) for v in model.parameters())/1e6
-
-
-def save_checkpoint(state, is_best, save):
-  filename = os.path.join(save, 'checkpoint.pth.tar')
-  torch.save(state, filename)
-  if is_best:
-    best_filename = os.path.join(save, 'model_best.pth.tar')
-    shutil.copyfile(filename, best_filename)
-
-
-def save(model, model_path):
-  torch.save(model.state_dict(), model_path)
-
-
-def load(model, model_path, genotype):
-  pretrained_dict = torch.load(model_path)
-  model_dict = model.state_dict()
-
-  # keep only the weights for the specified genotype, 
-  # and prune all the other weights from the MixedOps
-  #pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
-
-  edge_dict = {(0,2): 0, (0,3): 2, (0,4): 5, (0,5): 9, (1,2): 1, (1,3): 3, (1,4): 6, (1,5): 10, (2,3): 4, (2,4): 7, (3,4): 8, (2,5): 11, (3,5): 12, (4,5): 13}
-
-  for layer in range(8):
-    first_number = layer
-    
-    for p in range(2):
-      if layer in [3, 6] and p == 0:
-        key = 'cells.{}.preprocess{}.conv_1.weight'.format(layer, p)
-        key = 'cells.{}.preprocess{}.conv_2.weight'.format(layer, p)
-      else:
-        key = 'cells.{}.preprocess{}.op.1.weight'.format(layer, p)
-      model_dict[key] = pretrained_dict[key]
-      
-    if layer in [2, 5]:
-      gene = genotype.reduce
-    else:
-      gene = genotype.normal
-      
-    for i in range(4):
-      for k in [2*i, 2*i + 1]:
-        op, j = gene[k]
-        second_number = edge_dict[(j, i + 2)]
-        if op == 'sep_conv_3x3':
-          third_number = 4
-          for h in [1, 2, 5, 6]:
-            key_model = 'cells.{}._ops.{}.op.{}.weight'.format(layer, k, h)
-            key_pretrained = 'cells.{}._ops.{}._ops.{}.op.{}.weight'.format(first_number, second_number, third_number, h)
-            model_dict[key_model] = pretrained_dict[key_pretrained] 
-        elif op == 'max_pool_3x3':
-          third_number = 1
-        elif op == 'avg_pool_3x3':
-          third_number = 2
-
-  model.load_state_dict(model_dict)
-
-
-def drop_path(x, drop_prob):
-  if drop_prob > 0.:
-    keep_prob = 1.-drop_prob
-    try:
-        mask = Variable(torch.cuda.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
-    except:
-        mask = Variable(torch.FloatTensor(x.size(0), 1, 1, 1).bernoulli_(keep_prob))
-    x.div_(keep_prob)
-    x.mul_(mask)
-  return x
-
-
-def create_exp_dir(path, scripts_to_save=None):
-  import time, random
-  time.sleep(random.uniform(1, 2))
-  if not os.path.exists(path):
-    os.mkdir(path)
-  print('Experiment dir : {}'.format(path))
-
-  if scripts_to_save is not None:
-    os.mkdir(os.path.join(path, 'scripts'))
-    for script in scripts_to_save:
-      dst_file = os.path.join(path, 'scripts', os.path.basename(script))
-      shutil.copyfile(script, dst_file)
-
diff --git a/autoPyTorch/components/networks/image/densenet.py b/autoPyTorch/components/networks/image/densenet.py
deleted file mode 100644
index 2931be7c0..000000000
--- a/autoPyTorch/components/networks/image/densenet.py
+++ /dev/null
@@ -1,173 +0,0 @@
-import re
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.model_zoo as model_zoo
-from collections import OrderedDict
-
-import ConfigSpace
-from autoPyTorch.components.networks.base_net import BaseImageNet
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-from autoPyTorch.components.networks.base_net import BaseImageNet
-
-class _DenseLayer(nn.Sequential):
-    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):
-        super(_DenseLayer, self).__init__()
-        self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
-        self.add_module('relu1', nn.ReLU(inplace=True)),
-        self.add_module('conv1', nn.Conv2d(num_input_features, bn_size *
-                        growth_rate, kernel_size=1, stride=1, bias=False)),
-        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
-        self.add_module('relu2', nn.ReLU(inplace=True)),
-        self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
-                        kernel_size=3, stride=1, padding=1, bias=False)),
-        self.drop_rate = drop_rate
-
-    def forward(self, x):
-        new_features = super(_DenseLayer, self).forward(x)
-        if self.drop_rate > 0:
-            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
-        return torch.cat([x, new_features], 1)
-
-
-class _DenseBlock(nn.Sequential):
-    def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate):
-        super(_DenseBlock, self).__init__()
-        for i in range(num_layers):
-            layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate, bn_size, drop_rate)
-            self.add_module('denselayer%d' % (i + 1), layer)
-
-
-class _Transition(nn.Sequential):
-    def __init__(self, num_input_features, num_output_features, pool_size):
-        super(_Transition, self).__init__()
-        self.add_module('norm', nn.BatchNorm2d(num_input_features))
-        self.add_module('relu', nn.ReLU(inplace=True))
-        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
-                                          kernel_size=1, stride=1, bias=False))
-        self.add_module('pool', nn.AvgPool2d(kernel_size=pool_size, stride=pool_size))
-
-
-class DenseNet(BaseImageNet):
-    r"""Densenet-BC model class, based on
-    `"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_
-    Args:
-        growth_rate (int) - how many filters to add each layer (`k` in paper)
-        block_config (list of 4 ints) - how many layers in each pooling block
-        num_init_features (int) - the number of filters to learn in the first convolution layer
-        bn_size (int) - multiplicative factor for number of bottle neck layers
-          (i.e. bn_size * k features in the bottleneck layer)
-        drop_rate (float) - dropout rate after each dense layer
-        num_classes (int) - number of classification classes
-    """
-
-    def __init__(self, config, in_features, out_features, final_activation, *args, **kwargs):
-
-        super(DenseNet, self).__init__(config, in_features, out_features, final_activation)
-
-        growth_rate = config['growth_rate']
-        block_config=[config['layer_in_block_%d' % (i+1)] for i in range(config['blocks'])]
-        num_init_features = 2 * growth_rate 
-        bn_size = 4
-        drop_rate = config['dropout'] if config['use_dropout'] else 0
-        num_classes = self.n_classes
-        
-        image_size, min_image_size = min(self.iw, self.ih), 1
-
-        import math
-        division_steps = math.floor(math.log2(image_size) - math.log2(min_image_size) - 1e-5) + 1
-
-        if division_steps > len(block_config) + 1:
-            # First convolution
-            self.features = nn.Sequential(OrderedDict([
-                ('conv0', nn.Conv2d(self.channels, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
-                ('norm0', nn.BatchNorm2d(num_init_features)),
-                ('relu0', nn.ReLU(inplace=True)),
-                ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
-            ]))
-            division_steps -= 2
-        else:
-            self.features = nn.Sequential(OrderedDict([
-                ('conv0', nn.Conv2d(self.channels, num_init_features, kernel_size=3, stride=1, padding=1, bias=False))
-            ]))
-
-
-        # Each denseblock
-        num_features = num_init_features
-        for i, num_layers in enumerate(block_config):
-            block = _DenseBlock(num_layers=num_layers, num_input_features=num_features,
-                                bn_size=bn_size, growth_rate=growth_rate, drop_rate=drop_rate)
-            self.features.add_module('denseblock%d' % (i + 1), block)
-            num_features = num_features + num_layers * growth_rate
-            if i != len(block_config) - 1:
-                trans = _Transition(num_input_features=num_features, num_output_features=num_features // 2, pool_size=2 if i > len(block_config) - division_steps else 1)
-                self.features.add_module('transition%d' % (i + 1), trans)
-                num_features = num_features // 2
-
-        # Final batch norm
-        self.features.add_module('last_norm', nn.BatchNorm2d(num_features))
-
-        # Linear layer
-        self.classifier = nn.Linear(num_features, num_classes)
-
-        # Official init from torch repo.
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, nn.BatchNorm2d):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                nn.init.constant_(m.bias, 0)
-
-        self.layers = nn.Sequential(self.features)
-
-    def forward(self, x):
-        features = self.features(x)
-        out = F.relu(features, inplace=True)
-        out = F.adaptive_avg_pool2d(out, (1, 1)).view(features.size(0), -1)
-        out = self.classifier(out)
-
-        if not self.training and self.final_activation is not None:
-            out = self.final_activation(out)
-        return out
-
-    @staticmethod
-    def get_config_space(growth_rate_range=(12, 40), nr_blocks=(3, 4), layer_range=([1, 12], [6, 24], [12, 64], [12, 64]), num_init_features=(32, 128), **kwargs):
-
-        import ConfigSpace as CS
-        import ConfigSpace.hyperparameters as CSH
-        from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter
-
-        cs = CS.ConfigurationSpace()
-        growth_rate_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'growth_rate', growth_rate_range)
-        cs.add_hyperparameter(growth_rate_hp)
-        # add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'bn_size', [2, 4])
-        # add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'num_init_features', num_init_features, log=True)
-        # add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'bottleneck', [True, False])
-
-        blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'blocks', nr_blocks)
-        cs.add_hyperparameter(blocks_hp)
-        use_dropout =   add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'use_dropout', [True, False])
-        dropout =       add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'dropout', [0.0, 1.0])
-        cs.add_condition(CS.EqualsCondition(dropout, use_dropout, True))
-
-        if type(nr_blocks[0]) == int:
-            min_blocks = nr_blocks[0]
-            max_blocks = nr_blocks[1]
-        else:
-            min_blocks = nr_blocks[0][0]
-            max_blocks = nr_blocks[0][1]
-
-        for i in range(1, max_blocks+1):
-            layer_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'layer_in_block_%d' % i, layer_range[i-1])
-            cs.add_hyperparameter(layer_hp)
-            
-            if i > min_blocks:
-                cs.add_condition(CS.GreaterThanCondition(layer_hp, blocks_hp, i-1))
-
-        return cs
-
-
-
diff --git a/autoPyTorch/components/networks/image/densenet_flexible.py b/autoPyTorch/components/networks/image/densenet_flexible.py
deleted file mode 100644
index 057d798ed..000000000
--- a/autoPyTorch/components/networks/image/densenet_flexible.py
+++ /dev/null
@@ -1,247 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Implementation of a Dense Net for image data.
-"""
-
-import torch
-import torch.nn as nn
-import math
-
-import ConfigSpace
-from autoPyTorch.components.networks.base_net import BaseImageNet
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-import inspect
-from autoPyTorch.components.networks.base_net import BaseImageNet
-from autoPyTorch.utils.modules import Reshape
-from autoPyTorch.components.networks.activations import all_activations, get_activation
-from .utils.utils import get_layer_params
-
-# https://github.com/liuzhuang13/DenseNet
-
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import logging
-logger = logging.getLogger('autonet')
-
-class PrintNode(nn.Module):
-    def __init__(self, msg):
-        super(PrintNode, self).__init__()
-        self.msg = msg
-
-    def forward(self, x):
-        logger.debug(self.msg)
-        return x
-
-
-class _DenseLayer(nn.Sequential):
-    def __init__(self, nChannels, growth_rate, drop_rate, bottleneck, kernel_size, activation):
-        super(_DenseLayer, self).__init__()
-        # self.add_module('p_layer1', PrintNode("layer1"))
-        self.add_module('norm1', nn.BatchNorm2d(nChannels))
-        self.add_module('relu1', get_activation(activation, inplace=True))
-        if bottleneck:
-            self.add_module('conv1', nn.Conv2d(nChannels, 4 * growth_rate, kernel_size=1, stride=1, bias=False))
-            nChannels = 4 * growth_rate
-            if drop_rate > 0:
-                self.add_module('drop', nn.Dropout2d(p=drop_rate, inplace=True))
-            # self.add_module('p_layer2', PrintNode("layer2"))
-            self.add_module('norm2', nn.BatchNorm2d(nChannels))
-            self.add_module('relu2', get_activation(activation, inplace=True))
-        self.add_module('conv2', nn.Conv2d(nChannels, growth_rate, kernel_size=kernel_size, stride=1, padding=int((kernel_size-1)/2), bias=False))
-        if drop_rate > 0:
-            self.add_module('drop', nn.Dropout2d(p=drop_rate, inplace=True))
-
-    def forward(self, x):
-        new_features = super(_DenseLayer, self).forward(x)
-        # logger.debug('concat ' + str(x.shape) + ' and ' + str(new_features.shape))
-        return torch.cat([x, new_features], 1)
-
-
-class _DenseBlock(nn.Sequential):
-    def __init__(self, N, nChannels, growth_rate, drop_rate, bottleneck, kernel_size, activation):
-        super(_DenseBlock, self).__init__()
-        for i in range(N):
-            self.add_module('denselayer%d' % (i + 1), _DenseLayer(nChannels, growth_rate, drop_rate, bottleneck, kernel_size, activation))
-            nChannels += growth_rate
-        
-
-
-class _Transition(nn.Sequential):
-    def __init__(self, nChannels, nOutChannels, drop_rate, last, pool_size, kernel_size, stride, padding, activation):
-        super(_Transition, self).__init__()
-        # self.add_module('p_transition', PrintNode("transition"))
-        self.add_module('norm', nn.BatchNorm2d(nChannels))
-        self.add_module('relu', get_activation(activation, inplace=True))
-        # self.add_module('p_last', PrintNode("last transition " + str(last)))
-        if last:
-            self.add_module('pool', nn.AvgPool2d(kernel_size=pool_size, stride=pool_size))
-            self.add_module('reshape', Reshape(nChannels))
-        else:
-            self.add_module('conv', nn.Conv2d(nChannels, nOutChannels, kernel_size=1, stride=1, bias=False))
-            if drop_rate > 0:
-                self.add_module('drop', nn.Dropout2d(p=drop_rate, inplace=True))
-            self.add_module('pool', nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=padding))
-        
-
-class DenseNetFlexible(BaseImageNet):
-
-    def __init__(self, config, in_features, out_features, final_activation, *args, **kwargs):
-
-        super(DenseNetFlexible, self).__init__(config, in_features, out_features, final_activation)
-
-        growth_rate=config['growth_rate']
-        bottleneck=config['bottleneck']
-        channel_reduction=config['channel_reduction']
-
-        in_size = min(self.iw, self.ih)
-        out_size = max(1, in_size * config['last_image_size'])
-        size_reduction = math.pow(in_size / out_size, 1 / (config['blocks'] + 1))
-
-        nChannels= 2 * growth_rate
-
-        self.features = nn.Sequential()
-
-        sizes = [max(1, round(in_size / math.pow(size_reduction, i+1))) for i in range(config['blocks'] + 2)]
-        
-        in_size, kernel_size, stride, padding = get_layer_params(in_size, sizes[0], config['first_conv_kernel'])
-        self.features.add_module('conv0', nn.Conv2d(self.channels, nChannels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False))
-        self.features.add_module('norm0', nn.BatchNorm2d(nChannels))
-        self.features.add_module('activ0', get_activation(config['first_activation'], inplace=True))
-
-        in_size, kernel_size, stride, padding = get_layer_params(in_size, sizes[1], config['first_pool_kernel'])
-        self.features.add_module('pool0', nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=padding))
-        # print(in_size)
-
-        nOutChannels = nChannels
-        # Each denseblock
-        for i in range(1, config['blocks']+1):
-            nChannels = nOutChannels
-
-            drop_rate = config['dropout_%d' % i] if config['use_dropout'] else 0
-
-            block = _DenseBlock(N=config['layer_in_block_%d' % i], nChannels=nChannels, bottleneck=bottleneck, 
-                                growth_rate=growth_rate, drop_rate=drop_rate, kernel_size=config['conv_kernel_%d' % i],
-                                activation=config['activation_%d' % i])
-
-            self.features.add_module('denseblock%d' % i, block)
-            nChannels = nChannels + config['layer_in_block_%d' % i] * growth_rate
-            nOutChannels = max(1, math.floor(nChannels * channel_reduction))
-
-            out_size, kernel_size, stride, padding = get_layer_params(in_size, sizes[i+1], config['pool_kernel_%d' % i])
-            transition = _Transition(   nChannels=nChannels, nOutChannels=nOutChannels, 
-                                        drop_rate=drop_rate, last=(i == config['blocks']), 
-                                        pool_size=in_size, # only used in last transition -> reduce to '1x1 image'
-                                        kernel_size=kernel_size, stride=stride, padding=padding,
-                                        activation=config['activation_%d' % i])
-            in_size = out_size
-
-            self.features.add_module('trans%d' % i, transition)
-
-        # Linear layer
-        self.classifier = nn.Linear(nChannels, out_features)
-
-        # Official init from torch repo.
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                self.matrix_init(m.weight, config['conv_init'])
-            elif isinstance(m, nn.BatchNorm2d):
-                self.matrix_init(m.weight, config['batchnorm_weight_init'])
-                self.matrix_init(m.bias, config['batchnorm_bias_init'])
-            elif isinstance(m, nn.Linear):
-                self.matrix_init(m.bias, config['linear_bias_init'])
-
-        # logger.debug(print(self))
-
-        self.layers = nn.Sequential(self.features)
-    
-    def matrix_init(self, matrix, init_type):
-        if init_type == 'kaiming_normal':
-            nn.init.kaiming_normal_(matrix)
-        elif init_type == 'constant_0':
-            nn.init.constant_(matrix, 0)
-        elif init_type == 'constant_1':
-            nn.init.constant_(matrix, 1)
-        elif init_type == 'constant_05':
-            nn.init.constant_(matrix, 0.5)
-        elif init_type == 'random':
-            return
-        else:
-            raise ValueError('Init type ' + init_type + ' is not supported')
-            
-
-    def forward(self, x):
-        out = self.features(x)
-        out = self.classifier(out)
-        if not self.training and self.final_activation is not None:
-            out = self.final_activation(out)
-        return out
-
-    @staticmethod
-    def get_config_space(   growth_rate_range=(5, 128), nr_blocks=(1, 5), kernel_range=(2, 7), 
-                            layer_range=(5, 50), activations=all_activations.keys(),
-                            conv_init=('random', 'kaiming_normal', 'constant_0', 'constant_1', 'constant_05'),
-                            batchnorm_weight_init=('random', 'constant_0', 'constant_1', 'constant_05'),
-                            batchnorm_bias_init=('random', 'constant_0', 'constant_1', 'constant_05'),
-                            linear_bias_init=('random', 'constant_0', 'constant_1', 'constant_05'), **kwargs):
-
-        import ConfigSpace as CS
-        import ConfigSpace.hyperparameters as CSH
-        from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter
-
-        cs = CS.ConfigurationSpace()
-        growth_rate_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'growth_rate', growth_rate_range)
-        first_conv_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'first_conv_kernel', kernel_range)
-        first_pool_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'first_pool_kernel', kernel_range)
-        conv_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'conv_init', conv_init)
-        batchnorm_weight_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'batchnorm_weight_init', batchnorm_weight_init)
-        batchnorm_bias_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'batchnorm_bias_init', batchnorm_bias_init)
-        linear_bias_init_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'linear_bias_init', linear_bias_init)
-        first_activation_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'first_activation', sorted(set(activations).intersection(all_activations)))
-        blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'blocks', nr_blocks)
-
-        cs.add_hyperparameter(growth_rate_hp)
-        cs.add_hyperparameter(first_conv_kernel_hp)
-        cs.add_hyperparameter(first_pool_kernel_hp)
-        cs.add_hyperparameter(conv_init_hp)
-        cs.add_hyperparameter(batchnorm_weight_init_hp)
-        cs.add_hyperparameter(batchnorm_bias_init_hp)
-        cs.add_hyperparameter(linear_bias_init_hp)
-        cs.add_hyperparameter(first_activation_hp)
-        cs.add_hyperparameter(blocks_hp)
-        add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'channel_reduction', [0.1, 0.9])
-        add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'last_image_size', [0, 1])
-        add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'bottleneck', [True, False])
-        use_dropout =   add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'use_dropout', [True, False])
-
-        if type(nr_blocks[0]) == int:
-            min_blocks = nr_blocks[0]
-            max_blocks = nr_blocks[1]
-        else:
-            min_blocks = nr_blocks[0][0]
-            max_blocks = nr_blocks[0][1]
-
-        for i in range(1, max_blocks+1):
-            layer_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'layer_in_block_%d' % i, layer_range)
-            pool_kernel_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'pool_kernel_%d' % i, kernel_range)
-            activation_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'activation_%d' % i, sorted(set(activations).intersection(all_activations)))
-            cs.add_hyperparameter(layer_hp)
-            cs.add_hyperparameter(pool_kernel_hp)
-            cs.add_hyperparameter(activation_hp)
-            dropout =       add_hyperparameter(cs,   CSH.UniformFloatHyperparameter, 'dropout_%d' % i, [0.0, 1.0])
-            conv_kernel =   add_hyperparameter(cs,    CSH.CategoricalHyperparameter, 'conv_kernel_%d' % i, [3, 5, 7])
-
-            
-            if i > min_blocks:
-                cs.add_condition(CS.GreaterThanCondition(layer_hp, blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(conv_kernel, blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(pool_kernel_hp, blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(activation_hp, blocks_hp, i-1))
-                cs.add_condition(CS.AndConjunction(CS.EqualsCondition(dropout, use_dropout, True), CS.GreaterThanCondition(dropout, blocks_hp, i-1)))
-            else:
-                cs.add_condition(CS.EqualsCondition(dropout, use_dropout, True))
-
-        return cs
diff --git a/autoPyTorch/components/networks/image/mobilenet.py b/autoPyTorch/components/networks/image/mobilenet.py
deleted file mode 100644
index a2190b1a3..000000000
--- a/autoPyTorch/components/networks/image/mobilenet.py
+++ /dev/null
@@ -1,258 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-import math
-
-import ConfigSpace
-from autoPyTorch.components.networks.base_net import BaseImageNet
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-from torch.autograd import Variable
-from autoPyTorch.components.networks.base_net import BaseImageNet
-
-from .utils.mobilenet_utils import GenEfficientNet, _decode_arch_def, _resolve_bn_args, _round_channels, swish, sigmoid, hard_swish, hard_sigmoid, SelectAdaptivePool2d
-
-# TODO
-# EXPANSION RATIO (currenty hardcoded)
-# ACTIVATION? (currently swish)
-
-class Arch_Encoder():
-    """ Encode block definition string
-    Encodes a list of config space (dicts) through a string notation of arguments for further usage with _decode_architecure and timm.
-    E.g. ir_r2_k3_s2_e1_i32_o16_se0.25_noskip
-    
-    leading string - block type (
-      ir = InvertedResidual, ds = DepthwiseSep, dsa = DeptwhiseSep with pw act, cn = ConvBnAct)
-    r - number of repeat blocks,
-    k - kernel size,
-    s - strides (1-9),
-    e - expansion ratio,
-    c - output channels,
-    se - squeeze/excitation ratio
-    n - activation fn ('re', 'r6', 'hs', or 'sw')
-    Args:
-        block hyperpar dict as coming from MobileNet class
-    Returns:
-        Architecture encoded as string for further usage with _decode_architecure and timm.
-    """
-
-    def __init__(self, block_types, nr_sub_blocks, kernel_sizes, strides, output_filters, se_ratios, skip_connections, expansion_rates=0):
-        self.block_types = block_types
-        self.nr_sub_blocks = nr_sub_blocks
-        self.kernel_sizes = kernel_sizes
-        self.strides = strides
-        self.expansion_rates = expansion_rates
-        self.output_filters = output_filters
-        self.se_ratios = se_ratios
-        self.skip_connections = skip_connections
-
-        self.arch_encoded = [[""] for ind in range(len(self.block_types))]
-        self._encode_architecture()
-
-    def _encode_architecture(self):
-        encoding_functions = [self._get_encoded_blocks, self._get_encoded_nr_sub_bocks, self._get_encoded_kernel_sizes, self._get_encoded_strides,
-                              self._get_encoded_expansion_rates , self._get_encoded_output_filters, self._get_encoded_se_ratios, self._get_encoded_skip_connections]
-
-        for func in encoding_functions:
-            return_val = func()
-            self._add_specifications(return_val)
-
-    def _add_specifications(self, arguments):
-        for ind, arg in enumerate(arguments):
-            if len(self.arch_encoded[ind][0])!=0 and arg!="" and not self.arch_encoded[ind][0].endswith("_") :
-                self.arch_encoded[ind][0] = self.arch_encoded[ind][0] + "_"
-            self.arch_encoded[ind][0] = self.arch_encoded[ind][0] + arg
-
-    def _get_encoded_blocks(self):
-        block_type_dict = {"inverted_residual":"ir", "dwise_sep_conv":"ds", "conv_bn_act":"cn"}
-        block_type_list = self._dict_to_list(self.block_types)
-        return [block_type_dict[item] for item in block_type_list]
-
-    def _get_encoded_nr_sub_bocks(self):
-        nr_sub_blocks_dict = dict([(i, "r"+str(i)) for i in range(10)])
-        nr_sub_blocks_list = self._dict_to_list(self.nr_sub_blocks)
-        return [nr_sub_blocks_dict[item] for item in nr_sub_blocks_list]
-
-    def _get_encoded_kernel_sizes(self):
-        kernel_sizes_dict = dict([(i, "k"+str(i)) for i in range(10)])
-        kernel_sizes_list = self._dict_to_list(self.kernel_sizes)
-        return [kernel_sizes_dict[item] for item in kernel_sizes_list]
-
-    def _get_encoded_strides(self):
-        strides_dict = dict([(i, "s"+str(i)) for i in range(10)])
-        strides_list = self._dict_to_list(self.strides)
-        return [strides_dict[item] for item in strides_list]
-
-    def _get_encoded_expansion_rates(self):
-        if self.expansion_rates == 0:
-            exp_list = ["e1","e6","e6","e6","e6","e6","e6"]
-            return exp_list[0:len(self.block_types)]
-        else:
-            expansion_rates_dict = dict([(i, "e"+str(i)) for i in range(10)])
-            expansion_rates_list = self._dict_to_list(self.expansion_rates)
-            return [expansion_rates_dict[item] for item in expansion_rates_list]
-
-    def _get_encoded_output_filters(self):
-        output_filters_dict = dict([(i, "c"+str(i)) for i in range(5000)])
-        output_filters_list = self._dict_to_list(self.output_filters)
-        return [output_filters_dict[item] for item in output_filters_list]
-
-    def _get_encoded_se_ratios(self):
-        se_ratios_dict = {0:"", 0.25:"se0.25"}
-        se_ratios_list = self._dict_to_list(self.se_ratios)
-        return [se_ratios_dict[item] for item in se_ratios_list]
-
-    def _get_encoded_skip_connections(self):
-        skip_connections_dict = {True : "", False: "no_skip"}
-        skip_connections_list = self._dict_to_list(self.skip_connections)
-        return [skip_connections_dict[item] for item in skip_connections_list]
-
-    def _dict_to_list(self, input_dict):
-        output_list = []
-        dict_len = len(input_dict)
-        for ind in range(dict_len):
-            output_list.append(input_dict["Group_" + str(ind+1)])
-        return output_list
-        
-    def get_encoded_architecture(self):
-        return self.arch_encoded
-
-
-class MobileNet(BaseImageNet):
-    """
-    Implements a search space as in MnasNet (https://arxiv.org/abs/1807.11626) using inverted residuals.
-    """
-    def __init__(self, config, in_features, out_features, final_activation, **kwargs):
-        super(MobileNet, self).__init__(config, in_features, out_features, final_activation)
-
-        # Initialize hyperpars for architecture
-        nn.Module.config = config
-        self.final_activation = final_activation
-        self.nr_main_blocks = config['nr_main_blocks']
-        self.initial_filters = config['initial_filters']
-
-
-        self.nr_sub_blocks = dict([
-            ('Group_%d' % (i+1), config['nr_sub_blocks_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.op_types = dict([
-            ('Group_%d' % (i+1), config['op_type_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.kernel_sizes = dict([
-            ('Group_%d' % (i+1), config['kernel_size_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.strides = dict([
-            ('Group_%d' % (i+1), config['stride_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.output_filters = dict([
-            ('Group_%d' % (i+1), config['out_filters_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.skip_cons = dict([
-            ('Group_%d' % (i+1), config['skip_con_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.se_ratios = dict([
-            ('Group_%d' % (i+1), config['se_ratio_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        
-        ########## Create model
-        encoder = Arch_Encoder(block_types=self.op_types,
-                               nr_sub_blocks=self.nr_sub_blocks, 
-                               kernel_sizes=self.kernel_sizes,
-                               strides=self.strides,
-                               expansion_rates=0,
-                               output_filters=self.output_filters,
-                               se_ratios=self.se_ratios,
-                               skip_connections=self.skip_cons)
-        arch_enc = encoder.get_encoded_architecture()
-
-        kwargs["bn_momentum"] = 0.01
-
-        self.model = GenEfficientNet(_decode_arch_def(arch_enc, depth_multiplier=1.0),
-                                     num_classes=out_features,
-                                     stem_size=self.initial_filters,
-                                     channel_multiplier=1.0,
-                                     num_features=_round_channels(1280, 1.0, 8, None),
-                                     bn_args=_resolve_bn_args(kwargs),
-                                     act_fn=swish,
-                                     drop_connect_rate=0.2,
-                                     drop_rate=0.2,
-                                     **kwargs)
-
-        def _cfg(url='', **kwargs):
-            return {'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
-                    'crop_pct': 0.875, 'interpolation': 'bicubic',
-                    'mean': (0.485, 0.456, 0.406), 'std': (0.229, 0.224, 0.225),
-                    'first_conv': 'conv_stem', 'classifier': 'classifier', **kwargs}
-
-        self.model.default_cfg = _cfg(url='', input_size=in_features, pool_size=(10, 10), crop_pct=0.904, num_classes=out_features)
-
-    def forward(self, x):
-        # make sure channels first
-        x = self.model(x)
-        if not self.training and self.final_activation is not None:
-            x = self.final_activation(x)
-        return x
-
-    @staticmethod
-    def get_config_space(   nr_main_blocks=[3, 7], initial_filters=([8, 32], True), nr_sub_blocks=([1, 4], False),
-                            op_types = ["inverted_residual", "dwise_sep_conv"], kernel_sizes=[3, 5],  strides=[1,2],
-                            output_filters = [[12, 16, 20],
-                                              [18, 24, 30],
-                                              [24, 32, 40],
-                                              [48, 64, 80],
-                                              [72, 96, 120],
-                                              [120, 160, 200], 
-                                              [240, 320, 400]],   # the idea is to search for e.g. 0.75, 1, 1.25* output_filters(mainblock number)
-                            skip_connection = [True, False], se_ratios = [0, 0.25], **kwargs):
-                            
-        import ConfigSpace as CS
-        import ConfigSpace.hyperparameters as CSH
-
-        cs = CS.ConfigurationSpace()
-
-        
-        main_blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, "nr_main_blocks", nr_main_blocks)
-        initial_filters_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, "initial_filters", initial_filters)
-        cs.add_hyperparameter(main_blocks_hp)
-        cs.add_hyperparameter(initial_filters_hp)
-
-        if type(nr_main_blocks[0]) == int:
-            min_blocks = nr_main_blocks[0]
-            max_blocks = nr_main_blocks[1]
-        else:
-            min_blocks = nr_main_blocks[0][0]
-            max_blocks = nr_main_blocks[0][1]
-	    
-        for i in range(1, max_blocks + 1):
-            sub_blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'nr_sub_blocks_%d' % i, nr_sub_blocks)
-            op_type_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'op_type_%d' % i, op_types)
-            kernel_size_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'kernel_size_%d' % i, kernel_sizes)
-            stride_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'stride_%d' % i, strides)
-            out_filters_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'out_filters_%d' % i, output_filters[i-1])             # take output_filters list i-1 as options
-            se_ratio_hp = get_hyperparameter(ConfigSpace.CategoricalHyperparameter, 'se_ratio_%d' % i, se_ratios)
-            cs.add_hyperparameter(sub_blocks_hp)
-            cs.add_hyperparameter(op_type_hp)
-            cs.add_hyperparameter(kernel_size_hp)
-            cs.add_hyperparameter(stride_hp)
-            cs.add_hyperparameter(out_filters_hp)
-            cs.add_hyperparameter(se_ratio_hp)
-            skip_con = cs.add_hyperparameter(CSH.CategoricalHyperparameter('skip_con_%d' % i, [True, False]))
-
-            if i > min_blocks:
-                cs.add_condition(CS.GreaterThanCondition(sub_blocks_hp, main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(op_type_hp, main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(kernel_size_hp, main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(stride_hp, main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(out_filters_hp, main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(skip_con, main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(se_ratio_hp, main_blocks_hp, i-1))
-
-        return cs
diff --git a/autoPyTorch/components/networks/image/resnet.py b/autoPyTorch/components/networks/image/resnet.py
deleted file mode 100644
index c709f7a3d..000000000
--- a/autoPyTorch/components/networks/image/resnet.py
+++ /dev/null
@@ -1,294 +0,0 @@
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.autograd import Variable
-
-import ConfigSpace
-from autoPyTorch.components.networks.base_net import BaseImageNet
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-from autoPyTorch.components.networks.image.utils.utils import initialize_weights
-from autoPyTorch.components.networks.image.utils.shakeshakeblock import shake_shake, generate_alpha_beta
-from autoPyTorch.components.networks.image.utils.shakedrop import shake_drop, generate_alpha_beta_single
-
-class SkipConnection(nn.Module):
-    def __init__(self, in_channels, out_channels, stride):
-        super(SkipConnection, self).__init__()
-
-        self.s1 = nn.Sequential()
-        self.s1.add_module('Skip_1_AvgPool',
-                           nn.AvgPool2d(1, stride=stride))
-        self.s1.add_module('Skip_1_Conv',
-                           nn.Conv2d(in_channels,
-                                     int(out_channels / 2),
-                                     kernel_size=1,
-                                     stride=1,
-                                     padding=0,
-                                     bias=False))
-
-        self.s2 = nn.Sequential()
-        self.s2.add_module('Skip_2_AvgPool',
-                           nn.AvgPool2d(1, stride=stride))
-        self.s2.add_module('Skip_2_Conv',
-                           nn.Conv2d(in_channels,
-                                     int(out_channels / 2) if out_channels % 2 == 0 else int(out_channels / 2) + 1,
-                                     kernel_size=1,
-                                     stride=1,
-                                     padding=0,
-                                     bias=False))
-
-        self.batch_norm = nn.BatchNorm2d(out_channels)
-
-    def forward(self, x):
-        out1 = F.relu(x, inplace=False)
-        out1 = self.s1(out1)
-
-        out2 = F.pad(x[:, :, 1:, 1:], (0, 1, 0, 1))
-        out2 = self.s2(out2)
-
-        out = torch.cat([out1, out2], dim=1)
-        out = self.batch_norm(out)
-
-        return out
-
-
-class ResidualBranch(nn.Module):
-    def __init__(self, in_channels, out_channels, filter_size, stride, branch_index):
-        super(ResidualBranch, self).__init__()
-
-        self.residual_branch = nn.Sequential()
-
-        self.residual_branch.add_module('Branch_{}:ReLU_1'.format(branch_index),
-                                        nn.ReLU(inplace=False))
-        self.residual_branch.add_module('Branch_{}:Conv_1'.format(branch_index),
-                                        nn.Conv2d(in_channels,
-                                                  out_channels,
-                                                  kernel_size=filter_size,
-                                                  stride=stride,
-                                                  padding=round(filter_size / 3),
-                                                  bias=False))
-        self.residual_branch.add_module('Branch_{}:BN_1'.format(branch_index),
-                                        nn.BatchNorm2d(out_channels))
-        self.residual_branch.add_module('Branch_{}:ReLU_2'.format(branch_index),
-                                        nn.ReLU(inplace=False))
-        self.residual_branch.add_module('Branch_{}:Conv_2'.format(branch_index),
-                                        nn.Conv2d(out_channels,
-                                                  out_channels,
-                                                  kernel_size=filter_size,
-                                                  stride=1,
-                                                  padding=round(filter_size / 3),
-                                                  bias=False))
-        self.residual_branch.add_module('Branch_{}:BN_2'.format(branch_index),
-                                        nn.BatchNorm2d(out_channels))
-
-    def forward(self, x):
-        return self.residual_branch(x)
-
-
-class BasicBlock(nn.Module):
-    def __init__(self, n_input_plane, n_output_plane, filter_size, res_branches, stride, shake_config):
-        super(BasicBlock, self).__init__()
-
-        self.shake_config = shake_config
-        self.branches = nn.ModuleList([ResidualBranch(n_input_plane, n_output_plane, filter_size, stride, branch + 1) for branch in range(res_branches)])
-
-        # Skip connection
-        self.skip = nn.Sequential()
-        if n_input_plane != n_output_plane or stride != 1:
-            self.skip.add_module('Skip_connection',
-                                 SkipConnection(n_input_plane, n_output_plane, stride))
-                                 
-
-    def forward(self, x):
-        if len(self.branches) == 1:
-            out = self.branches[0](x)
-            if self.config.apply_shakeDrop:
-                alpha, beta = generate_alpha_beta_single(out.size(), self.shake_config if self.training else (False, False, False), x.is_cuda)
-                out = shake_drop(out, alpha, beta, self.config.death_rate, self.training)
-        else:
-            if self.config.apply_shakeShake:
-                alpha, beta = generate_alpha_beta(len(self.branches), x.size(0), self.shake_config if self.training else (False, False, False), x.is_cuda)
-                branches = [self.branches[i](x) for i in range(len(self.branches))]
-                out = shake_shake(alpha, beta, *branches)
-            else:
-                out = sum([self.branches[i](x) for i in range(len(self.branches))])
-
-        return out + self.skip(x)
-
-
-class ResidualGroup(nn.Module):
-    def __init__(self, block, n_input_plane, n_output_plane, n_blocks, filter_size, res_branches, stride, shake_config):
-        super(ResidualGroup, self).__init__()
-        self.group = nn.Sequential()
-        self.n_blocks = n_blocks
-
-        # The first residual block in each group is responsible for the input downsampling
-        self.group.add_module('Block_1',
-                              block(n_input_plane,
-                                    n_output_plane,
-                                    filter_size,
-                                    res_branches,
-                                    stride=stride,
-                                    shake_config=shake_config))
-
-        # The following residual block do not perform any downsampling (stride=1)
-        for block_index in range(2, n_blocks + 1):
-            block_name = 'Block_{}'.format(block_index)
-            self.group.add_module(block_name,
-                                  block(n_output_plane,
-                                        n_output_plane,
-                                        filter_size,
-                                        res_branches,
-                                        stride=1,
-                                        shake_config=shake_config))
-
-    def forward(self, x):
-        return self.group(x)
-
-
-class ResNet(BaseImageNet):
-    def __init__(self, config, in_features, out_features, final_activation, **kwargs):
-        super(ResNet, self).__init__(config, in_features, out_features, final_activation)
-
-        nn.Module.config = config
-        self.final_activation = final_activation
-        self.nr_main_blocks = config['nr_main_blocks']
-        config.initial_filters = config['initial_filters']
-        config.death_rate = config['death_rate']
-
-        config.forward_shake = True
-        config.backward_shake = True
-        config.shake_image = True
-        config.apply_shakeDrop = True
-        config.apply_shakeShake = True
-
-        self.nr_residual_blocks = dict([
-            ('Group_%d' % (i+1), config['nr_residual_blocks_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
- 
-        self.widen_factors = dict([
-            ('Group_%d' % (i+1), config['widen_factor_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.res_branches = dict([
-            ('Group_%d' % (i+1), config['res_branches_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-
-        self.filters_size = dict([
-            ('Group_%d' % (i+1), 3) #config['filters_size_%i' % (i+1)]) 
-            for i in range(self.nr_main_blocks)])
-        
-        shake_config = (config.forward_shake, config.backward_shake,
-                             config.shake_image)
-
-        ##########
-        self.model = nn.Sequential()
-
-        # depth = sum([config.nr_convs * self.nr_residual_blocks['Group_{}'.format(i)] + 2 for i in range(1, self.nr_main_blocks + 1)])
-        # print(' | Multi-branch ResNet-' + str(depth) + ' CIFAR-10')
-
-        block = BasicBlock
-
-        im_size = max(self.ih, self.iw)
-
-        self.model.add_module('Conv_0',
-                                nn.Conv2d(self.channels,
-                                        config.initial_filters,
-                                        kernel_size=7 if im_size > 200 else 3,
-                                        stride=2 if im_size > 200 else 1,
-                                        padding=3 if im_size > 200 else 1,
-                                        bias=False))
-        self.model.add_module('BN_0',
-                                nn.BatchNorm2d(config.initial_filters))
-
-        if im_size > 200:
-            self.model.add_module('ReLU_0', nn.ReLU(inplace=True))
-            self.model.add_module('Pool_0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
-
-        feature_maps_in = int(round(config.initial_filters * self.widen_factors['Group_1']))
-        self.model.add_module('Group_1',
-                                ResidualGroup(block, 
-                                            config.initial_filters, 
-                                            feature_maps_in, 
-                                            self.nr_residual_blocks['Group_1'], 
-                                            self.filters_size['Group_1'],
-                                            self.res_branches['Group_1'],
-                                            1, #2 if im_size > 100 else 1, 
-                                            shake_config))
-
-        # image_size, min_image_size = min(self.iw, self.ih), 5
-        # division_steps = math.floor(math.log2(image_size) - math.log2(min_image_size) - 1e-5)
-
-        for main_block_nr in range(2, self.nr_main_blocks + 1):
-            feature_maps_out = int(round(feature_maps_in * self.widen_factors['Group_{}'.format(main_block_nr)]))
-            self.model.add_module('Group_{}'.format(main_block_nr),
-                                    ResidualGroup(block, 
-                                                feature_maps_in, 
-                                                feature_maps_out, 
-                                                self.nr_residual_blocks['Group_{}'.format(main_block_nr)],
-                                                self.filters_size['Group_{}'.format(main_block_nr)],
-                                                self.res_branches['Group_{}'.format(main_block_nr)],
-                                                2, # if main_block_nr > self.nr_main_blocks - division_steps else 1, 
-                                                shake_config))
-
-            #image_size = math.floor((image_size+1)/2.0) if main_block_nr > self.nr_main_blocks - division_steps else image_size
-            feature_maps_in = feature_maps_out
-
-        self.feature_maps_out = feature_maps_in
-        self.model.add_module('ReLU_0', nn.ReLU(inplace=True))
-        self.model.add_module('AveragePool', nn.AdaptiveAvgPool2d(1))
-        self.fc = nn.Linear(self.feature_maps_out, out_features)
-
-        self.apply(initialize_weights)
-
-        self.layers = nn.Sequential(self.model)
-
-    def forward(self, x):
-        x = self.model(x)
-        x = x.view(-1, self.feature_maps_out)
-        x = self.fc(x)
-        if not self.training and self.final_activation is not None:
-            x = self.final_activation(x)
-        return x
-
-    @staticmethod
-    def get_config_space(   nr_main_blocks=[1, 8], nr_residual_blocks=([1, 16], True), initial_filters=([8, 32], True), widen_factor=([0.5, 4], True), 
-                            res_branches=([1, 5], False), filters_size=[3, 3], **kwargs):
-                            
-        import ConfigSpace as CS
-        import ConfigSpace.hyperparameters as CSH
-
-        cs = CS.ConfigurationSpace()
-
-        nr_main_blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, "nr_main_blocks", nr_main_blocks)
-        cs.add_hyperparameter(nr_main_blocks_hp)
-        initial_filters_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, "initial_filters", initial_filters)
-        cs.add_hyperparameter(initial_filters_hp)
-        # add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'nr_convs', nr_convs, log=True)
-        death_rate_hp = get_hyperparameter(ConfigSpace.UniformFloatHyperparameter, "death_rate", ([0,1], False))
-        cs.add_hyperparameter(death_rate_hp)
-
-        if type(nr_main_blocks[0]) is int:
-            main_blocks_min = nr_main_blocks[0]
-            main_blocks_max = nr_main_blocks[1]
-        else:
-            main_blocks_min = nr_main_blocks[0][0]
-            main_blocks_max = nr_main_blocks[0][1]
-	    
-        for i in range(1, main_blocks_max + 1):
-            blocks_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'nr_residual_blocks_%d' % i, nr_residual_blocks)
-            blocks = cs.add_hyperparameter(blocks_hp)
-            widen_hp = get_hyperparameter(ConfigSpace.UniformFloatHyperparameter, 'widen_factor_%d' % i, widen_factor)
-            widen = cs.add_hyperparameter(widen_hp)
-            branches_hp = get_hyperparameter(ConfigSpace.UniformIntegerHyperparameter, 'res_branches_%d' % i, res_branches)
-            branches = cs.add_hyperparameter(branches_hp)
-            # filters = add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'filters_size_%d' % i, filters_size, log=False)
-
-            if i > main_blocks_min:
-                cs.add_condition(CS.GreaterThanCondition(blocks_hp, nr_main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(widen_hp, nr_main_blocks_hp, i-1))
-                cs.add_condition(CS.GreaterThanCondition(branches_hp, nr_main_blocks_hp, i-1))
-                # cs.add_condition(CS.GreaterThanCondition(filters, main_blocks, i-1))
-
-        return cs
diff --git a/autoPyTorch/components/networks/image/resnet152.py b/autoPyTorch/components/networks/image/resnet152.py
deleted file mode 100644
index 22dc6b476..000000000
--- a/autoPyTorch/components/networks/image/resnet152.py
+++ /dev/null
@@ -1,192 +0,0 @@
-
-import torch.nn as nn
-import torch.utils.model_zoo as model_zoo
-
-
-__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
-           'resnet152']
-
-
-model_urls = {
-    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
-    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
-    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
-    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
-    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
-}
-
-
-def conv3x3(in_planes, out_planes, stride=1):
-    """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
-
-
-def conv1x1(in_planes, out_planes, stride=1):
-    """1x1 convolution"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
-
-
-class BasicBlock(nn.Module):
-    expansion = 1
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(inplanes, planes, stride)
-        self.bn1 = nn.BatchNorm2d(planes)
-        self.relu = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(planes, planes)
-        self.bn2 = nn.BatchNorm2d(planes)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        identity = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out += identity
-        out = self.relu(out)
-
-        return out
-
-
-class Bottleneck(nn.Module):
-    expansion = 4
-
-    def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = conv1x1(inplanes, planes)
-        self.bn1 = nn.BatchNorm2d(planes)
-        self.conv2 = conv3x3(planes, planes, stride)
-        self.bn2 = nn.BatchNorm2d(planes)
-        self.conv3 = conv1x1(planes, planes * self.expansion)
-        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
-        self.relu = nn.ReLU(inplace=True)
-        self.downsample = downsample
-        self.stride = stride
-
-    def forward(self, x):
-        identity = x
-
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-
-        out = self.conv2(out)
-        out = self.bn2(out)
-        out = self.relu(out)
-
-        out = self.conv3(out)
-        out = self.bn3(out)
-
-        if self.downsample is not None:
-            identity = self.downsample(x)
-
-        out += identity
-        out = self.relu(out)
-
-        return out
-
-
-from autoPyTorch.components.networks.base_net import BaseImageNet
-class ResNet(BaseImageNet):
-
-    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
-        self.inplanes = 64
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
-                               bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(block, 64, layers[0])
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
-        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-        self.fc = nn.Linear(512 * block.expansion, num_classes)
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-            elif isinstance(m, nn.BatchNorm2d):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-
-        # Zero-initialize the last BN in each residual branch,
-        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
-        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
-        if zero_init_residual:
-            for m in self.modules():
-                if isinstance(m, Bottleneck):
-                    nn.init.constant_(m.bn3.weight, 0)
-                elif isinstance(m, BasicBlock):
-                    nn.init.constant_(m.bn2.weight, 0)
-
-        self.layers = nn.Sequential(self.conv1, self.bn1, self.relu, self.maxpool, self.layer1, self.layer2, self.layer3, self.layer4, self.avgpool)
-
-    def _make_layer(self, block, planes, blocks, stride=1):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                conv1x1(self.inplanes, planes * block.expansion, stride),
-                nn.BatchNorm2d(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, downsample))
-        self.inplanes = planes * block.expansion
-        for _ in range(1, blocks):
-            layers.append(block(self.inplanes, planes))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-
-        return x
-
-
-class ResNet152(ResNet):
-    def __init__(self, config, in_features, out_features, final_activation, **kwargs):
-        super(ResNet, self).__init__(config, in_features, out_features, final_activation)
-        super(ResNet152, self).__init__(Bottleneck, [3, 8, 36, 3], num_classes=out_features)
-
-
-    def forward(self, x):
-        x = super(ResNet152, self).forward(x)
-    
-        if not self.training and self.final_activation is not None:
-            x = self.final_activation(x)
-        return x
-
-
-# def resnet152(pretrained=False, **kwargs):
-#     """Constructs a ResNet-152 model.
-#     Args:
-#         pretrained (bool): If True, returns a model pre-trained on ImageNet
-#     """
-#     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
-#     if pretrained:
-#         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
-#     return model
diff --git a/autoPyTorch/components/networks/image/utils/conv2d_helpers.py b/autoPyTorch/components/networks/image/utils/conv2d_helpers.py
deleted file mode 100644
index 75801c374..000000000
--- a/autoPyTorch/components/networks/image/utils/conv2d_helpers.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright 2019 Ross Wightman
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import math
-
-
-def _is_static_pad(kernel_size, stride=1, dilation=1, **_):
-    return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0
-
-
-def _get_padding(kernel_size, stride=1, dilation=1, **_):
-    padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2
-    return padding
-
-
-def _calc_same_pad(i, k, s, d):
-    return max((math.ceil(i / s) - 1) * s + (k - 1) * d + 1 - i, 0)
-
-
-def _split_channels(num_chan, num_groups):
-    split = [num_chan // num_groups for _ in range(num_groups)]
-    split[0] += num_chan - sum(split)
-    return split
-
-
-class Conv2dSame(nn.Conv2d):
-    """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions
-    """
-    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
-                 padding=0, dilation=1, groups=1, bias=True):
-        super(Conv2dSame, self).__init__(
-            in_channels, out_channels, kernel_size, stride, 0, dilation,
-            groups, bias)
-
-    def forward(self, x):
-        ih, iw = x.size()[-2:]
-        kh, kw = self.weight.size()[-2:]
-        pad_h = _calc_same_pad(ih, kh, self.stride[0], self.dilation[0])
-        pad_w = _calc_same_pad(iw, kw, self.stride[1], self.dilation[1])
-        if pad_h > 0 or pad_w > 0:
-            x = F.pad(x, [pad_w//2, pad_w - pad_w//2, pad_h//2, pad_h - pad_h//2])
-        return F.conv2d(x, self.weight, self.bias, self.stride,
-                        self.padding, self.dilation, self.groups)
-
-
-def conv2d_pad(in_chs, out_chs, kernel_size, **kwargs):
-    padding = kwargs.pop('padding', '')
-    kwargs.setdefault('bias', False)
-    if isinstance(padding, str):
-        # for any string padding, the padding will be calculated for you, one of three ways
-        padding = padding.lower()
-        if padding == 'same':
-            # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact
-            if _is_static_pad(kernel_size, **kwargs):
-                # static case, no extra overhead
-                padding = _get_padding(kernel_size, **kwargs)
-                return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
-            else:
-                # dynamic padding
-                return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs)
-        elif padding == 'valid':
-            # 'VALID' padding, same as padding=0
-            return nn.Conv2d(in_chs, out_chs, kernel_size, padding=0, **kwargs)
-        else:
-            # Default to PyTorch style 'same'-ish symmetric padding
-            padding = _get_padding(kernel_size, **kwargs)
-            return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
-    else:
-        # padding was specified as a number or pair
-        return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
-
-
-class MixedConv2d(nn.Module):
-    """ Mixed Grouped Convolution
-    Based on MDConv and GroupedConv in MixNet impl:
-      https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py
-    """
-
-    def __init__(self, in_channels, out_channels, kernel_size=3,
-                 stride=1, padding='', dilated=False, depthwise=False, **kwargs):
-        super(MixedConv2d, self).__init__()
-
-        kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size]
-        num_groups = len(kernel_size)
-        in_splits = _split_channels(in_channels, num_groups)
-        out_splits = _split_channels(out_channels, num_groups)
-        for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)):
-            d = 1
-            # FIXME make compat with non-square kernel/dilations/strides
-            if stride == 1 and dilated:
-                d, k = (k - 1) // 2, 3
-            conv_groups = out_ch if depthwise else 1
-            # use add_module to keep key space clean
-            self.add_module(
-                str(idx),
-                conv2d_pad(
-                    in_ch, out_ch, k, stride=stride,
-                    padding=padding, dilation=d, groups=conv_groups, **kwargs)
-            )
-        self.splits = in_splits
-
-    def forward(self, x):
-        x_split = torch.split(x, self.splits, 1)
-        x_out = [c(x) for x, c in zip(x_split, self._modules.values())]
-        x = torch.cat(x_out, 1)
-        return x
-
-
-# helper method
-def select_conv2d(in_chs, out_chs, kernel_size, **kwargs):
-    assert 'groups' not in kwargs  # only use 'depthwise' bool arg
-    if isinstance(kernel_size, list):
-        # We're going to use only lists for defining the MixedConv2d kernel groups,
-        # ints, tuples, other iterables will continue to pass to normal conv and specify h, w.
-        return MixedConv2d(in_chs, out_chs, kernel_size, **kwargs)
-    else:
-        depthwise = kwargs.pop('depthwise', False)
-        groups = out_chs if depthwise else 1
-        return conv2d_pad(in_chs, out_chs, kernel_size, groups=groups, **kwargs)
diff --git a/autoPyTorch/components/networks/image/utils/mobilenet_utils.py b/autoPyTorch/components/networks/image/utils/mobilenet_utils.py
deleted file mode 100644
index 4218554bb..000000000
--- a/autoPyTorch/components/networks/image/utils/mobilenet_utils.py
+++ /dev/null
@@ -1,753 +0,0 @@
-# Copyright 2019 Ross Wightman
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-""" Generic EfficientNets
-A generic class with building blocks to support a variety of models with efficient architectures:
-* EfficientNet (B0-B7)
-* MixNet (Small, Medium, and Large)
-* MnasNet B1, A1 (SE), Small
-* MobileNet V1, V2, and V3
-* FBNet-C (TODO A & B)
-* ChamNet (TODO still guessing at architecture definition)
-* Single-Path NAS Pixel1
-* And likely more...
-TODO not all combinations and variations have been tested. Currently working on training hyper-params...
-Hacked together by Ross Wightman
-"""
-
-import math
-import re
-import logging
-from copy import deepcopy
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from .conv2d_helpers import select_conv2d
-
-
-IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
-IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
-
-
-__all__ = ['GenEfficientNet']
-
-
-def _cfg(url='', **kwargs):
-    return {
-        'url': url, 'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': (7, 7),
-        'crop_pct': 0.875, 'interpolation': 'bicubic',
-        'mean': IMAGENET_DEFAULT_MEAN, 'std': IMAGENET_DEFAULT_STD,
-        'first_conv': 'conv_stem', 'classifier': 'classifier',
-        **kwargs
-    }
-
-
-_DEBUG = False
-
-# Default args for PyTorch BN impl
-_BN_MOMENTUM_PT_DEFAULT = 0.01
-_BN_EPS_PT_DEFAULT = 1e-5
-_BN_ARGS_PT = dict(momentum=_BN_MOMENTUM_PT_DEFAULT, eps=_BN_EPS_PT_DEFAULT)
-
-# Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per
-# papers and TF reference implementations. PT momentum equiv for TF decay is (1 - TF decay)
-# NOTE: momentum varies btw .99 and .9997 depending on source
-# .99 in official TF TPU impl
-# .9997 (/w .999 in search space) for paper
-# HERE CHANGED TO WORK WITH PYTORCH
-_BN_MOMENTUM_TF_DEFAULT = 1 - 0.99
-_BN_EPS_TF_DEFAULT = 1e-3
-_BN_ARGS_TF = dict(momentum=_BN_MOMENTUM_TF_DEFAULT, eps=_BN_EPS_TF_DEFAULT)
-
-
-def adaptive_pool_feat_mult(pool_type='avg'):
-    if pool_type == 'catavgmax':
-        return 2
-    else:
-        return 1
-
-
-def adaptive_avgmax_pool2d(x, output_size=1):
-    x_avg = F.adaptive_avg_pool2d(x, output_size)
-    x_max = F.adaptive_max_pool2d(x, output_size)
-    return 0.5 * (x_avg + x_max)
-
-
-def adaptive_catavgmax_pool2d(x, output_size=1):
-    x_avg = F.adaptive_avg_pool2d(x, output_size)
-    x_max = F.adaptive_max_pool2d(x, output_size)
-    return torch.cat((x_avg, x_max), 1)
-
-
-def select_adaptive_pool2d(x, pool_type='avg', output_size=1):
-    """Selectable global pooling function with dynamic input kernel size
-    """
-    if pool_type == 'avg':
-        x = F.adaptive_avg_pool2d(x, output_size)
-    elif pool_type == 'avgmax':
-        x = adaptive_avgmax_pool2d(x, output_size)
-    elif pool_type == 'catavgmax':
-        x = adaptive_catavgmax_pool2d(x, output_size)
-    elif pool_type == 'max':
-        x = F.adaptive_max_pool2d(x, output_size)
-    else:
-        assert False, 'Invalid pool type: %s' % pool_type
-    return x
-
-
-class AdaptiveAvgMaxPool2d(nn.Module):
-    def __init__(self, output_size=1):
-        super(AdaptiveAvgMaxPool2d, self).__init__()
-        self.output_size = output_size
-
-    def forward(self, x):
-        return adaptive_avgmax_pool2d(x, self.output_size)
-
-
-class AdaptiveCatAvgMaxPool2d(nn.Module):
-    def __init__(self, output_size=1):
-        super(AdaptiveCatAvgMaxPool2d, self).__init__()
-        self.output_size = output_size
-
-    def forward(self, x):
-        return adaptive_catavgmax_pool2d(x, self.output_size)
-
-
-class SelectAdaptivePool2d(nn.Module):
-    """Selectable global pooling layer with dynamic input kernel size
-    """
-    def __init__(self, output_size=1, pool_type='avg'):
-        super(SelectAdaptivePool2d, self).__init__()
-        self.output_size = output_size
-        self.pool_type = pool_type
-        if pool_type == 'avgmax':
-            self.pool = AdaptiveAvgMaxPool2d(output_size)
-        elif pool_type == 'catavgmax':
-            self.pool = AdaptiveCatAvgMaxPool2d(output_size)
-        elif pool_type == 'max':
-            self.pool = nn.AdaptiveMaxPool2d(output_size)
-        else:
-            if pool_type != 'avg':
-                assert False, 'Invalid pool type: %s' % pool_type
-            self.pool = nn.AdaptiveAvgPool2d(output_size)
-
-    def forward(self, x):
-        return self.pool(x)
-
-    def feat_mult(self):
-        return adaptive_pool_feat_mult(self.pool_type)
-
-    def __repr__(self):
-        return self.__class__.__name__ + ' (' \
-               + 'output_size=' + str(self.output_size) \
-               + ', pool_type=' + self.pool_type + ')'
-
-
-def _resolve_bn_args(kwargs):
-    bn_args = _BN_ARGS_TF.copy() if kwargs.pop('bn_tf', False) else _BN_ARGS_PT.copy()
-    bn_momentum = kwargs.pop('bn_momentum', None)
-    if bn_momentum is not None:
-        bn_args['momentum'] = bn_momentum
-    bn_eps = kwargs.pop('bn_eps', None)
-    if bn_eps is not None:
-        bn_args['eps'] = bn_eps
-    return bn_args
-
-
-def _round_channels(channels, multiplier=1.0, divisor=8, channel_min=None):
-    """Round number of filters based on depth multiplier."""
-    if not multiplier:
-        return channels
-
-    channels *= multiplier
-    channel_min = channel_min or divisor
-    new_channels = max(
-        int(channels + divisor / 2) // divisor * divisor,
-        channel_min)
-    # Make sure that round down does not go down by more than 10%.
-    if new_channels < 0.9 * channels:
-        new_channels += divisor
-    return new_channels
-
-
-def _parse_ksize(ss):
-    if ss.isdigit():
-        return int(ss)
-    else:
-        return [int(k) for k in ss.split('.')]
-
-
-def _decode_block_str(block_str, depth_multiplier=1.0):
-    """ Decode block definition string
-    Gets a list of block arg (dicts) through a string notation of arguments.
-    E.g. ir_r2_k3_s2_e1_i32_o16_se0.25_noskip
-    All args can exist in any order with the exception of the leading string which
-    is assumed to indicate the block type.
-    leading string - block type (
-      ir = InvertedResidual, ds = DepthwiseSep, dsa = DeptwhiseSep with pw act, cn = ConvBnAct)
-    r - number of repeat blocks,
-    k - kernel size,
-    s - strides (1-9),
-    e - expansion ratio,
-    c - output channels,
-    se - squeeze/excitation ratio
-    n - activation fn ('re', 'r6', 'hs', or 'sw')
-    Args:
-        block_str: a string representation of block arguments.
-    Returns:
-        A list of block args (dicts)
-    Raises:
-        ValueError: if the string def not properly specified (TODO)
-    """
-    assert isinstance(block_str, str)
-    ops = block_str.split('_')
-    block_type = ops[0]  # take the block type off the front
-    ops = ops[1:]
-    options = {}
-    noskip = False
-    for op in ops:
-        # string options being checked on individual basis, combine if they grow
-        if op == 'noskip':
-            noskip = True
-        elif op.startswith('n'):
-            # activation fn
-            key = op[0]
-            v = op[1:]
-            if v == 're':
-                value = F.relu
-            elif v == 'r6':
-                value = F.relu6
-            elif v == 'hs':
-                value = hard_swish
-            elif v == 'sw':
-                value = swish
-            else:
-                continue
-            options[key] = value
-        else:
-            # all numeric options
-            splits = re.split(r'(\d.*)', op)
-            if len(splits) >= 2:
-                key, value = splits[:2]
-                options[key] = value
-
-    # if act_fn is None, the model default (passed to model init) will be used
-    act_fn = options['n'] if 'n' in options else None
-    exp_kernel_size = _parse_ksize(options['a']) if 'a' in options else 1
-    pw_kernel_size = _parse_ksize(options['p']) if 'p' in options else 1
-
-    num_repeat = int(options['r'])
-    # each type of block has different valid arguments, fill accordingly
-    if block_type == 'ir':
-        block_args = dict(
-            block_type=block_type,
-            dw_kernel_size=_parse_ksize(options['k']),
-            exp_kernel_size=exp_kernel_size,
-            pw_kernel_size=pw_kernel_size,
-            out_chs=int(options['c']),
-            exp_ratio=float(options['e']),
-            se_ratio=float(options['se']) if 'se' in options else None,
-            stride=int(options['s']),
-            act_fn=act_fn,
-            noskip=noskip,
-        )
-    elif block_type == 'ds' or block_type == 'dsa':
-        block_args = dict(
-            block_type=block_type,
-            dw_kernel_size=_parse_ksize(options['k']),
-            pw_kernel_size=pw_kernel_size,
-            out_chs=int(options['c']),
-            se_ratio=float(options['se']) if 'se' in options else None,
-            stride=int(options['s']),
-            act_fn=act_fn,
-            pw_act=block_type == 'dsa',
-            noskip=block_type == 'dsa' or noskip,
-        )
-    elif block_type == 'cn':
-        block_args = dict(
-            block_type=block_type,
-            kernel_size=int(options['k']),
-            out_chs=int(options['c']),
-            stride=int(options['s']),
-            act_fn=act_fn,
-        )
-    else:
-        assert False, 'Unknown block type (%s)' % block_type
-
-    # return a list of block args expanded by num_repeat and
-    # scaled by depth_multiplier
-    num_repeat = int(math.ceil(num_repeat * depth_multiplier))
-    return [deepcopy(block_args) for _ in range(num_repeat)]
-
-
-def _decode_arch_def(arch_def, depth_multiplier=1.0):
-    arch_args = []
-    for stack_idx, block_strings in enumerate(arch_def):
-        assert isinstance(block_strings, list)
-        stack_args = []
-        for block_str in block_strings:
-            assert isinstance(block_str, str)
-            stack_args.extend(_decode_block_str(block_str, depth_multiplier))
-        arch_args.append(stack_args)
-    return arch_args
-
-
-def swish(x, inplace=False):
-    if inplace:
-        return x.mul_(x.sigmoid())
-    else:
-        return x * x.sigmoid()
-
-
-def sigmoid(x, inplace=False):
-    return x.sigmoid_() if inplace else x.sigmoid()
-
-
-def hard_swish(x, inplace=False):
-    if inplace:
-        return x.mul_(F.relu6(x + 3.) / 6.)
-    else:
-        return x * F.relu6(x + 3.) / 6.
-
-
-def hard_sigmoid(x, inplace=False):
-    if inplace:
-        return x.add_(3.).clamp_(0., 6.).div_(6.)
-    else:
-        return F.relu6(x + 3.) / 6.
-
-
-class _BlockBuilder:
-    """ Build Trunk Blocks
-    This ended up being somewhat of a cross between
-    https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_models.py
-    and
-    https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/modeling/backbone/fbnet_builder.py
-    """
-    def __init__(self, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
-                 pad_type='', act_fn=None, se_gate_fn=sigmoid, se_reduce_mid=False,
-                 bn_args=_BN_ARGS_PT, drop_connect_rate=0., verbose=False):
-        self.channel_multiplier = channel_multiplier
-        self.channel_divisor = channel_divisor
-        self.channel_min = channel_min
-        self.pad_type = pad_type
-        self.act_fn = act_fn
-        self.se_gate_fn = se_gate_fn
-        self.se_reduce_mid = se_reduce_mid
-        self.bn_args = bn_args
-        self.drop_connect_rate = drop_connect_rate
-        self.verbose = verbose
-
-        # updated during build
-        self.in_chs = None
-        self.block_idx = 0
-        self.block_count = 0
-
-    def _round_channels(self, chs):
-        return _round_channels(chs, self.channel_multiplier, self.channel_divisor, self.channel_min)
-
-    def _make_block(self, ba):
-        bt = ba.pop('block_type')
-        ba['in_chs'] = self.in_chs
-        ba['out_chs'] = self._round_channels(ba['out_chs'])
-        ba['bn_args'] = self.bn_args
-        ba['pad_type'] = self.pad_type
-        # block act fn overrides the model default
-        ba['act_fn'] = ba['act_fn'] if ba['act_fn'] is not None else self.act_fn
-        assert ba['act_fn'] is not None
-        if bt == 'ir':
-            ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
-            ba['se_gate_fn'] = self.se_gate_fn
-            ba['se_reduce_mid'] = self.se_reduce_mid
-            if self.verbose:
-                logging.info('  InvertedResidual {}, Args: {}'.format(self.block_idx, str(ba)))
-            block = InvertedResidual(**ba)
-        elif bt == 'ds' or bt == 'dsa':
-            ba['drop_connect_rate'] = self.drop_connect_rate * self.block_idx / self.block_count
-            if self.verbose:
-                logging.info('  DepthwiseSeparable {}, Args: {}'.format(self.block_idx, str(ba)))
-            block = DepthwiseSeparableConv(**ba)
-        elif bt == 'cn':
-            if self.verbose:
-                logging.info('  ConvBnAct {}, Args: {}'.format(self.block_idx, str(ba)))
-            block = ConvBnAct(**ba)
-        else:
-            assert False, 'Uknkown block type (%s) while building model.' % bt
-        self.in_chs = ba['out_chs']  # update in_chs for arg of next block
-
-        return block
-
-    def _make_stack(self, stack_args):
-        blocks = []
-        # each stack (stage) contains a list of block arguments
-        for i, ba in enumerate(stack_args):
-            if self.verbose:
-                logging.info(' Block: {}'.format(i))
-            if i >= 1:
-                # only the first block in any stack can have a stride > 1
-                ba['stride'] = 1
-            block = self._make_block(ba)
-            blocks.append(block)
-            self.block_idx += 1  # incr global idx (across all stacks)
-        return nn.Sequential(*blocks)
-
-    def __call__(self, in_chs, block_args):
-        """ Build the blocks
-        Args:
-            in_chs: Number of input-channels passed to first block
-            block_args: A list of lists, outer list defines stages, inner
-                list contains strings defining block configuration(s)
-        Return:
-             List of block stacks (each stack wrapped in nn.Sequential)
-        """
-        if self.verbose:
-            logging.info('Building model trunk with %d stages...' % len(block_args))
-        self.in_chs = in_chs
-        self.block_count = sum([len(x) for x in block_args])
-        self.block_idx = 0
-        blocks = []
-        # outer list of block_args defines the stacks ('stages' by some conventions)
-        for stack_idx, stack in enumerate(block_args):
-            if self.verbose:
-                logging.info('Stack: {}'.format(stack_idx))
-            assert isinstance(stack, list)
-            stack = self._make_stack(stack)
-            blocks.append(stack)
-        return blocks
-
-
-def _initialize_weight_goog(m):
-    # weight init as per Tensorflow Official impl
-    # https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mnasnet_model.py
-    if isinstance(m, nn.Conv2d):
-        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels  # fan-out
-        m.weight.data.normal_(0, math.sqrt(2.0 / n))
-        if m.bias is not None:
-            m.bias.data.zero_()
-    elif isinstance(m, nn.BatchNorm2d):
-        m.weight.data.fill_(1.0)
-        m.bias.data.zero_()
-    elif isinstance(m, nn.Linear):
-        n = m.weight.size(0)  # fan-out
-        init_range = 1.0 / math.sqrt(n)
-        m.weight.data.uniform_(-init_range, init_range)
-        m.bias.data.zero_()
-
-
-def _initialize_weight_default(m):
-    if isinstance(m, nn.Conv2d):
-        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
-    elif isinstance(m, nn.BatchNorm2d):
-        m.weight.data.fill_(1.0)
-        m.bias.data.zero_()
-    elif isinstance(m, nn.Linear):
-        nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='linear')
-
-
-def drop_connect(inputs, training=False, drop_connect_rate=0.):
-    """Apply drop connect."""
-    if not training:
-        return inputs
-
-    keep_prob = 1 - drop_connect_rate
-    random_tensor = keep_prob + torch.rand(
-        (inputs.size()[0], 1, 1, 1), dtype=inputs.dtype, device=inputs.device)
-    random_tensor.floor_()  # binarize
-    output = inputs.div(keep_prob) * random_tensor
-    return output
-
-
-class ChannelShuffle(nn.Module):
-    # FIXME haven't used yet
-    def __init__(self, groups):
-        super(ChannelShuffle, self).__init__()
-        self.groups = groups
-
-    def forward(self, x):
-        """Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]"""
-        N, C, H, W = x.size()
-        g = self.groups
-        assert C % g == 0, "Incompatible group size {} for input channel {}".format(
-            g, C
-        )
-        return (
-            x.view(N, g, int(C / g), H, W)
-            .permute(0, 2, 1, 3, 4)
-            .contiguous()
-            .view(N, C, H, W)
-        )
-
-
-class SqueezeExcite(nn.Module):
-    def __init__(self, in_chs, reduce_chs=None, act_fn=F.relu, gate_fn=sigmoid):
-        super(SqueezeExcite, self).__init__()
-        self.act_fn = act_fn
-        self.gate_fn = gate_fn
-        reduced_chs = reduce_chs or in_chs
-        self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True)
-        self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True)
-
-    def forward(self, x):
-        # NOTE adaptiveavgpool can be used here, but seems to cause issues with NVIDIA AMP performance
-        x_se = x.view(x.size(0), x.size(1), -1).mean(-1).view(x.size(0), x.size(1), 1, 1)
-        x_se = self.conv_reduce(x_se)
-        x_se = self.act_fn(x_se, inplace=True)
-        x_se = self.conv_expand(x_se)
-        x = x * self.gate_fn(x_se)
-        return x
-
-
-class ConvBnAct(nn.Module):
-    def __init__(self, in_chs, out_chs, kernel_size,
-                 stride=1, pad_type='', act_fn=F.relu, bn_args=_BN_ARGS_PT):
-        super(ConvBnAct, self).__init__()
-        assert stride in [1, 2]
-        self.act_fn = act_fn
-        self.conv = select_conv2d(in_chs, out_chs, kernel_size, stride=stride, padding=pad_type)
-        self.bn1 = nn.BatchNorm2d(out_chs, **bn_args)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn1(x)
-        x = self.act_fn(x, inplace=True)
-        return x
-
-
-class DepthwiseSeparableConv(nn.Module):
-    """ DepthwiseSeparable block
-    Used for DS convs in MobileNet-V1 and in the place of IR blocks with an expansion
-    factor of 1.0. This is an alternative to having a IR with optional first pw conv.
-    """
-    def __init__(self, in_chs, out_chs, dw_kernel_size=3,
-                 stride=1, pad_type='', act_fn=F.relu, noskip=False,
-                 pw_kernel_size=1, pw_act=False,
-                 se_ratio=0., se_gate_fn=sigmoid,
-                 bn_args=_BN_ARGS_PT, drop_connect_rate=0.):
-        super(DepthwiseSeparableConv, self).__init__()
-        assert stride in [1, 2]
-        self.has_se = se_ratio is not None and se_ratio > 0.
-        self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip
-        self.has_pw_act = pw_act  # activation after point-wise conv
-        self.act_fn = act_fn
-        self.drop_connect_rate = drop_connect_rate
-
-        self.conv_dw = select_conv2d(
-            in_chs, in_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True)
-        self.bn1 = nn.BatchNorm2d(in_chs, **bn_args)
-
-        # Squeeze-and-excitation
-        if self.has_se:
-            self.se = SqueezeExcite(
-                in_chs, reduce_chs=max(1, int(in_chs * se_ratio)), act_fn=act_fn, gate_fn=se_gate_fn)
-
-        self.conv_pw = select_conv2d(in_chs, out_chs, pw_kernel_size, padding=pad_type)
-        self.bn2 = nn.BatchNorm2d(out_chs, **bn_args)
-
-    def forward(self, x):
-        residual = x
-
-        x = self.conv_dw(x)
-        x = self.bn1(x)
-        x = self.act_fn(x, inplace=True)
-
-        if self.has_se:
-            x = self.se(x)
-
-        x = self.conv_pw(x)
-        x = self.bn2(x)
-        if self.has_pw_act:
-            x = self.act_fn(x, inplace=True)
-
-        if self.has_residual:
-            if self.drop_connect_rate > 0.:
-                x = drop_connect(x, self.training, self.drop_connect_rate)
-            x += residual
-        return x
-
-
-class InvertedResidual(nn.Module):
-    """ Inverted residual block w/ optional SE"""
-
-    def __init__(self, in_chs, out_chs, dw_kernel_size=3,
-                 stride=1, pad_type='', act_fn=F.relu, noskip=False,
-                 exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
-                 se_ratio=0., se_reduce_mid=False, se_gate_fn=sigmoid,
-                 shuffle_type=None, bn_args=_BN_ARGS_PT, drop_connect_rate=0.):
-        super(InvertedResidual, self).__init__()
-        mid_chs = int(in_chs * exp_ratio)
-        self.has_se = se_ratio is not None and se_ratio > 0.
-        self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
-        self.act_fn = act_fn
-        self.drop_connect_rate = drop_connect_rate
-
-        # Point-wise expansion
-        self.conv_pw = select_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type)
-        self.bn1 = nn.BatchNorm2d(mid_chs, **bn_args)
-
-        self.shuffle_type = shuffle_type
-        if shuffle_type is not None and isinstance(exp_kernel_size, list):
-            self.shuffle = ChannelShuffle(len(exp_kernel_size))
-
-        # Depth-wise convolution
-        self.conv_dw = select_conv2d(
-            mid_chs, mid_chs, dw_kernel_size, stride=stride, padding=pad_type, depthwise=True)
-        self.bn2 = nn.BatchNorm2d(mid_chs, **bn_args)
-
-        # Squeeze-and-excitation
-        if self.has_se:
-            se_base_chs = mid_chs if se_reduce_mid else in_chs
-            self.se = SqueezeExcite(
-                mid_chs, reduce_chs=max(1, int(se_base_chs * se_ratio)), act_fn=act_fn, gate_fn=se_gate_fn)
-
-        # Point-wise linear projection
-        self.conv_pwl = select_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type)
-        self.bn3 = nn.BatchNorm2d(out_chs, **bn_args)
-
-    def forward(self, x):
-        residual = x
-
-        # Point-wise expansion
-        x = self.conv_pw(x)
-        x = self.bn1(x)
-        x = self.act_fn(x, inplace=True)
-
-        # FIXME haven't tried this yet
-        # for channel shuffle when using groups with pointwise convs as per FBNet variants
-        if self.shuffle_type == "mid":
-            x = self.shuffle(x)
-
-        # Depth-wise convolution
-        x = self.conv_dw(x)
-        x = self.bn2(x)
-        x = self.act_fn(x, inplace=True)
-
-        # Squeeze-and-excitation
-        if self.has_se:
-            x = self.se(x)
-
-        # Point-wise linear projection
-        x = self.conv_pwl(x)
-        x = self.bn3(x)
-
-        if self.has_residual:
-            if self.drop_connect_rate > 0.:
-                x = drop_connect(x, self.training, self.drop_connect_rate)
-            x += residual
-
-        # NOTE maskrcnn_benchmark building blocks have an SE module defined here for some variants
-
-        return x
-
-
-class GenEfficientNet(nn.Module):
-    """ Generic EfficientNet
-    An implementation of efficient network architectures, in many cases mobile optimized networks:
-      * MobileNet-V1
-      * MobileNet-V2
-      * MobileNet-V3
-      * MnasNet A1, B1, and small
-      * FBNet A, B, and C
-      * ChamNet (arch details are murky)
-      * Single-Path NAS Pixel1
-      * EfficientNet B0-B7
-      * MixNet S, M, L
-    """
-
-    def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=32, num_features=1280,
-                 channel_multiplier=1.0, channel_divisor=8, channel_min=None,
-                 pad_type='', act_fn=F.relu, drop_rate=0., drop_connect_rate=0.,
-                 se_gate_fn=sigmoid, se_reduce_mid=False, bn_args=_BN_ARGS_PT,
-                 global_pool='avg', head_conv='default', weight_init='goog'):
-        super(GenEfficientNet, self).__init__()
-        self.num_classes = num_classes
-        self.drop_rate = drop_rate
-        self.act_fn = act_fn
-        self.num_features = num_features
-
-        stem_size = _round_channels(stem_size, channel_multiplier, channel_divisor, channel_min)
-        self.conv_stem = select_conv2d(in_chans, stem_size, 3, stride=2, padding=pad_type)
-        self.bn1 = nn.BatchNorm2d(stem_size, **bn_args)
-        in_chs = stem_size
-
-        builder = _BlockBuilder(
-            channel_multiplier, channel_divisor, channel_min,
-            pad_type, act_fn, se_gate_fn, se_reduce_mid,
-            bn_args, drop_connect_rate, verbose=_DEBUG)
-        self.blocks = nn.Sequential(*builder(in_chs, block_args))
-        in_chs = builder.in_chs
-
-        if not head_conv or head_conv == 'none':
-            self.efficient_head = False
-            self.conv_head = None
-            assert in_chs == self.num_features
-        else:
-            self.efficient_head = head_conv == 'efficient'
-            self.conv_head = select_conv2d(in_chs, self.num_features, 1, padding=pad_type)
-            self.bn2 = None if self.efficient_head else nn.BatchNorm2d(self.num_features, **bn_args)
-
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.classifier = nn.Linear(self.num_features * self.global_pool.feat_mult(), self.num_classes)
-
-        for m in self.modules():
-            if weight_init == 'goog':
-                _initialize_weight_goog(m)
-            else:
-                _initialize_weight_default(m)
-
-    def get_classifier(self):
-        return self.classifier
-
-    def reset_classifier(self, num_classes, global_pool='avg'):
-        self.global_pool = SelectAdaptivePool2d(pool_type=global_pool)
-        self.num_classes = num_classes
-        del self.classifier
-        if num_classes:
-            self.classifier = nn.Linear(
-                self.num_features * self.global_pool.feat_mult(), num_classes)
-        else:
-            self.classifier = None
-
-    def forward_features(self, x, pool=True):
-        x = self.conv_stem(x)
-        x = self.bn1(x)
-        x = self.act_fn(x, inplace=True)
-        x = self.blocks(x)
-        if self.efficient_head:
-            # efficient head, currently only mobilenet-v3 performs pool before last 1x1 conv
-            x = self.global_pool(x)  # always need to pool here regardless of flag
-            x = self.conv_head(x)
-            # no BN
-            x = self.act_fn(x, inplace=True)
-            if pool:
-                # expect flattened output if pool is true, otherwise keep dim
-                x = x.view(x.size(0), -1)
-        else:
-            if self.conv_head is not None:
-                x = self.conv_head(x)
-                x = self.bn2(x)
-            x = self.act_fn(x, inplace=True)
-            if pool:
-                x = self.global_pool(x)
-                x = x.view(x.size(0), -1)
-        return x
-
-    def forward(self, x):
-        x = self.forward_features(x)
-        if self.drop_rate > 0.:
-            x = F.dropout(x, p=self.drop_rate, training=self.training)
-        return self.classifier(x)
-
diff --git a/autoPyTorch/components/networks/image/utils/shakedrop.py b/autoPyTorch/components/networks/image/utils/shakedrop.py
deleted file mode 100644
index 3cfa6d3f4..000000000
--- a/autoPyTorch/components/networks/image/utils/shakedrop.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import torch
-from torch.autograd import Variable, Function
-
-
-class ShakeDrop(Function):
-    @staticmethod
-    def forward(ctx, x, alpha, beta, death_rate, is_train):
-        gate = (torch.rand(1) > death_rate).numpy()
-        ctx.gate = gate
-        ctx.save_for_backward(x, alpha, beta)
-
-        if is_train:
-            if not gate:
-                y = alpha * x
-            else:
-                y = x
-        else:
-            y = x.mul(1 - (death_rate * 1.0))
-
-        return y
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        x, alpha, beta = ctx.saved_variables
-        grad_x1 = grad_alpha = grad_beta = None
-
-        if ctx.needs_input_grad[0]:
-            if not ctx.gate:
-                grad_x = grad_output * beta
-            else:
-                grad_x = grad_output
-
-        return grad_x, grad_alpha, grad_beta, None, None
-
-shake_drop = ShakeDrop.apply
-
-
-def generate_alpha_beta_single(tensor_size, shake_config, is_cuda):
-    forward_shake, backward_shake, shake_image = shake_config
-
-    if forward_shake and not shake_image:
-        alpha = torch.rand(tensor_size).mul(2).add(-1)
-    elif forward_shake and shake_image:
-        alpha = torch.rand(tensor_size[0]).view(tensor_size[0], 1, 1, 1)
-        alpha.mul_(2).add_(-1) # alpha from -1 to 1
-    else:
-        alpha = torch.FloatTensor([0.5])
-
-    if backward_shake and not shake_image:
-        beta = torch.rand(tensor_size)
-    elif backward_shake and shake_image:
-        beta = torch.rand(tensor_size[0]).view(tensor_size[0], 1, 1, 1)
-    else:
-        beta = torch.FloatTensor([0.5])
-
-    if is_cuda:
-        alpha = alpha.cuda()
-        beta = beta.cuda()
-
-    return Variable(alpha), Variable(beta)
\ No newline at end of file
diff --git a/autoPyTorch/components/networks/image/utils/shakeshakeblock.py b/autoPyTorch/components/networks/image/utils/shakeshakeblock.py
deleted file mode 100644
index 4ebc5085b..000000000
--- a/autoPyTorch/components/networks/image/utils/shakeshakeblock.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# coding: utf-8
-
-import torch
-from torch.autograd import Variable, Function
-
-
-class ShakeShakeBlock(Function):
-    @staticmethod
-    def forward(ctx, alpha, beta, *args):
-        ctx.save_for_backward(beta)
-
-        y = sum(alpha[i] * args[i] for i in range(len(args)))
-        return y
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        beta = ctx.saved_variables
-        grad_x = [beta[0][i] * grad_output for i in range(beta[0].shape[0])]
-
-        return (None, None, *grad_x)
-
-shake_shake = ShakeShakeBlock.apply
-
-
-def generate_alpha_beta(num_branches, batch_size, shake_config, is_cuda):
-    forward_shake, backward_shake, shake_image = shake_config
-
-    if forward_shake and not shake_image:
-        alpha = torch.rand(num_branches)
-    elif forward_shake and shake_image:
-        alpha = torch.rand(num_branches, batch_size).view(num_branches, batch_size, 1, 1, 1)
-    else:
-        alpha = torch.ones(num_branches)
-
-    if backward_shake and not shake_image:
-        beta = torch.rand(num_branches) 
-    elif backward_shake and shake_image:
-        beta = torch.rand(num_branches, batch_size).view(num_branches, batch_size, 1, 1, 1)
-    else:
-        beta = torch.ones(num_branches)
-
-    alpha = torch.nn.Softmax(0)(Variable(alpha))    
-    beta = torch.nn.Softmax(0)(Variable(beta))
-
-    if is_cuda:
-        alpha = alpha.cuda()
-        beta = beta.cuda()
-    
-    return alpha, beta
\ No newline at end of file
diff --git a/autoPyTorch/components/networks/image/utils/utils.py b/autoPyTorch/components/networks/image/utils/utils.py
deleted file mode 100644
index c743b4eb0..000000000
--- a/autoPyTorch/components/networks/image/utils/utils.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import torch.nn as nn
-import math
-
-def initialize_weights(module):
-    if isinstance(module, nn.Conv2d):
-        n = module.kernel_size[0] * module.kernel_size[1] * module.out_channels
-        module.weight.data.normal_(0, math.sqrt(2. / n))
-        #nn.init.kaiming_normal(module.weight.data, mode='fan_out')
-    elif isinstance(module, nn.BatchNorm2d):
-        module.weight.data.fill_(1)
-        module.bias.data.zero_()
-    elif isinstance(module, nn.Linear):
-        module.bias.data.zero_()
-
-def get_layer_params(in_size, out_size, kernel_size):
-    kernel_size = int(kernel_size)
-    stride = int(max(1, math.ceil((in_size - kernel_size) / (out_size - 1)) if out_size > 1 else 1))
-    cur_out_size = _get_out_size(in_size, kernel_size, stride, 0)
-    required_padding = (stride / 2) * (in_size - cur_out_size)
-
-    cur_padding = int(math.ceil(required_padding))
-    cur_out_size = _get_out_size(in_size, kernel_size, stride, cur_padding)
-    if cur_padding < kernel_size and cur_out_size <= in_size and cur_out_size >= 1:
-        return cur_out_size, kernel_size, stride, cur_padding
-    
-    cur_padding = int(math.floor(required_padding))
-    cur_out_size = _get_out_size(in_size, kernel_size, stride, cur_padding)
-    if cur_padding < kernel_size and cur_out_size <= in_size and cur_out_size >= 1:
-        return cur_out_size, kernel_size, stride, cur_padding
-
-    if stride > 1:
-        stride = int(stride - 1)
-        cur_padding = 0
-        cur_out_size = int(_get_out_size(in_size, kernel_size, stride, cur_padding))
-        if cur_padding < kernel_size and cur_out_size <= in_size and cur_out_size >= 1:
-            return cur_out_size, kernel_size, stride, cur_padding
-
-    if (kernel_size % 2) == 0 and out_size == in_size:
-        return get_layer_params(in_size, out_size, kernel_size + 1) # an odd kernel can always keep the dimension (with stride 1)
-
-    raise Exception('Could not find padding and stride to reduce ' + str(in_size) + ' to ' + str(out_size) + ' using kernel ' + str(kernel_size))
-
-def _get_out_size(in_size, kernel_size, stride, padding):
-    return int(math.floor((in_size - kernel_size + 2 * padding) / stride + 1))
\ No newline at end of file
diff --git a/autoPyTorch/components/networks/initialization.py b/autoPyTorch/components/networks/initialization.py
deleted file mode 100644
index f00a08edc..000000000
--- a/autoPyTorch/components/networks/initialization.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import torch
-import ConfigSpace
-
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-class SimpleInitializer():
-    initialize_layers = (
-        torch.nn.Conv1d,
-        torch.nn.Conv2d,
-        torch.nn.Conv3d,
-        torch.nn.Linear
-    )
-
-    def __init__(self, hyperparameter_config):
-        self.initialize_bias = hyperparameter_config["initialize_bias"]
-
-    def apply(self, module, initialization_method, initialization_kwargs):
-        initialization_method_bias = initialization_method
-        initialization_kwargs_bias = initialization_kwargs
-
-        if self.initialize_bias == "Zero":
-            initialization_method_bias = torch.nn.init.constant_
-            initialization_kwargs_bias = {"val": 0}
-
-        def perform_initialization(m):
-            if isinstance(m, self.initialize_layers):
-                if initialization_method is not None:
-                    initialization_method(m.weight.data, **initialization_kwargs)
-
-                if m.bias is not None and self.initialize_bias != "No" and initialization_method_bias is not None:
-                    try:
-                        initialization_method_bias(m.bias.data, **initialization_kwargs_bias)
-                    except ValueError:
-                        pass
-        module.apply(perform_initialization)
-    
-    @staticmethod
-    def get_hyperparameter_search_space(
-        initialize_bias=("Yes", "No", "Zero")
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-        add_hyperparameter(cs, ConfigSpace.CategoricalHyperparameter, "initialize_bias", initialize_bias)
-        return cs
-
-
-class BaseInitialization():
-    initialization_method = None
-
-    def __init__(self, initializer, hyperparameter_config):
-        self.initializer = initializer
-        self.hyperparameter_config = hyperparameter_config
-    
-    def apply(self, module):
-        initialization_kwargs = self.hyperparameter_config if isinstance(self.hyperparameter_config, dict) else self.hyperparameter_config.get_dictionary()
-        self.initializer.apply(module, self.initialization_method, initialization_kwargs)
-    
-    @staticmethod
-    def get_hyperparameter_search_space():
-        cs = ConfigSpace.ConfigurationSpace()
-        return cs
-
-
-class SparseInitialization(BaseInitialization):
-    initialization_method = staticmethod(torch.nn.init.sparse_)
-
-    @staticmethod
-    def get_hyperparameter_search_space():
-        cs = ConfigSpace.ConfigurationSpace()
-        cs.add_hyperparameter(ConfigSpace.Constant("sparsity", 0.9))
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/optimizer/optimizer.py b/autoPyTorch/components/optimizer/optimizer.py
deleted file mode 100644
index 6f0a7bc59..000000000
--- a/autoPyTorch/components/optimizer/optimizer.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-File which contains the optimizers.
-"""
-
-from autoPyTorch.utils.config_space_hyperparameter import get_hyperparameter, add_hyperparameter
-
-import torch.optim as optim
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-class AutoNetOptimizerBase(object):
-    def __new__(cls, params, config):
-        return cls._get_optimizer(cls, params, config)
-
-    def _get_optimizer(self, params, config):
-        raise ValueError('Override the method _get_optimizer and do not call the base class implementation')
-
-    @staticmethod
-    def get_config_space(*args, **kwargs):
-        return CS.ConfigurationSpace()
-
-
-class AdamOptimizer(AutoNetOptimizerBase):
-    
-    def _get_optimizer(self, params, config):
-        return optim.Adam(params=params, lr=config['learning_rate'], weight_decay=config['weight_decay'])
-    
-    @staticmethod
-    def get_config_space(
-        learning_rate=((1e-4, 0.1), True),
-        weight_decay=(1e-5, 0.1)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'learning_rate', learning_rate)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'weight_decay', weight_decay)
-        return cs
-
-
-class AdamWOptimizer(AutoNetOptimizerBase):
-    
-    def _get_optimizer(self, params, config):
-        return optim.AdamW(params=params, lr=config['learning_rate'], weight_decay=config['weight_decay'])
-    
-    @staticmethod
-    def get_config_space(
-        learning_rate=((1e-4, 0.1), True),
-        weight_decay=(1e-5, 0.1)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'learning_rate', learning_rate)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'weight_decay', weight_decay)
-        return cs
-
-
-class SgdOptimizer(AutoNetOptimizerBase):
-    
-    def _get_optimizer(self, params, config):
-        return optim.SGD(params=params, lr=config['learning_rate'], momentum=config['momentum'], weight_decay=config['weight_decay'])
-    
-    @staticmethod
-    def get_config_space(
-        learning_rate=((1e-4, 0.1), True),
-        momentum=((0.1, 0.999), False),
-        weight_decay=(1e-5, 0.1)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'learning_rate', learning_rate)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'momentum', momentum)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'weight_decay', weight_decay)
-        return cs
-
-
-class RMSpropOptimizer(AutoNetOptimizerBase):
-    
-    def _get_optimizer(self, params, config):
-        return optim.RMSprop(params=params, lr=config['learning_rate'], momentum=config['momentum'], weight_decay=config['weight_decay'], centered=False)
-    
-    @staticmethod
-    def get_config_space(
-        learning_rate=((1e-4, 0.1), True),
-        momentum=((0.1, 0.99), True),
-        weight_decay=(1e-5, 0.1),
-        alpha=(0.1,0.99)
-    ):
-        cs = CS.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'learning_rate', learning_rate)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'momentum', momentum)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'weight_decay', weight_decay)
-        add_hyperparameter(cs, CSH.UniformFloatHyperparameter, 'alpha', alpha)
-        return cs
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/__init__.py b/autoPyTorch/components/preprocessing/feature_preprocessing/__init__.py
deleted file mode 100644
index a3966dd32..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from autoPyTorch.components.preprocessing.feature_preprocessing.truncated_svd import TruncatedSVD
-from autoPyTorch.components.preprocessing.feature_preprocessing.fast_ica import FastICA
-from autoPyTorch.components.preprocessing.feature_preprocessing.polynomial_features import PolynomialFeatures
-from autoPyTorch.components.preprocessing.feature_preprocessing.kitchen_sinks import RandomKitchenSinks
-from autoPyTorch.components.preprocessing.feature_preprocessing.kernel_pca import KernelPCA
-from autoPyTorch.components.preprocessing.feature_preprocessing.nystroem import Nystroem
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-from autoPyTorch.components.preprocessing.feature_preprocessing.power_transformer import PowerTransformer
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/fast_ica.py b/autoPyTorch/components/preprocessing/feature_preprocessing/fast_ica.py
deleted file mode 100644
index 0ed84eca7..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/fast_ica.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import torch
-import warnings
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-import ConfigSpace.conditions as CSC
-
-from autoPyTorch.utils.config_space_hyperparameter import get_hyperparameter
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-
-class FastICA(PreprocessorBase):
-    def __init__(self, hyperparameter_config):
-        self.algorithm = hyperparameter_config['algorithm']
-        self.whiten = hyperparameter_config['whiten']
-        self.fun = hyperparameter_config['fun']
-        self.n_components = None
-        if (self.whiten):
-            self.n_components = hyperparameter_config['n_components']
-
-    def fit(self, X, Y):
-        import sklearn.decomposition
-
-        self.preprocessor = sklearn.decomposition.FastICA(
-            n_components=self.n_components, algorithm=self.algorithm,
-            fun=self.fun, whiten=self.whiten
-        )
-
-        # Make the RuntimeWarning an Exception!
-        with warnings.catch_warnings():
-            warnings.filterwarnings("error", message='array must not contain infs or NaNs')
-            try:
-                return self.preprocessor.fit(X)
-            except ValueError as e:
-                if 'array must not contain infs or NaNs' in e.args[0]:
-                    raise ValueError("Bug in scikit-learn: https://github.com/scikit-learn/scikit-learn/pull/2738")
-
-
-    def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        dataset_info=None,
-        n_components=(10,2000),
-        algorithm=('parallel', 'deflation'),
-        whiten=(True, False),
-        fun=('logcosh', 'exp', 'cube'),
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-
-        n_components_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, "n_components", n_components)
-        algorithm_hp = get_hyperparameter(CSH.CategoricalHyperparameter, 'algorithm', algorithm)
-        whiten_hp = get_hyperparameter(CSH.CategoricalHyperparameter, 'whiten', whiten)
-        fun_hp = get_hyperparameter(CSH.CategoricalHyperparameter, 'fun', fun)
-
-        if True in whiten:
-            cs.add_hyperparameters([n_components_hp, algorithm_hp, whiten_hp, fun_hp])
-            cs.add_condition(CSC.EqualsCondition(n_components_hp, whiten_hp, True))
-
-        return cs
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/kernel_pca.py b/autoPyTorch/components/preprocessing/feature_preprocessing/kernel_pca.py
deleted file mode 100644
index 45eb7633d..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/kernel_pca.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import warnings
-
-import numpy as np
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-import ConfigSpace.conditions as CSC
-
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-
-class KernelPCA(PreprocessorBase):
-    def __init__(self, hyperparameter_config):
-        self.n_components = int(hyperparameter_config['n_components'])
-        self.kernel = hyperparameter_config['kernel']
-        
-        self.degree = int(hyperparameter_config['degree']) if self.kernel == 'poly' else 3
-        self.gamma = float(hyperparameter_config['gamma']) if self.kernel in ['poly', 'rbf'] else 0.25
-        self.coef0 = float(hyperparameter_config['coef0']) if self.kernel in ['poly', 'sigmoid'] else 0.0
-
-    def fit(self, X, Y=None):
-        import scipy.sparse
-        import sklearn.decomposition
-
-        self.preprocessor = sklearn.decomposition.KernelPCA(
-            n_components=self.n_components, kernel=self.kernel,
-            degree=self.degree, gamma=self.gamma, coef0=self.coef0,
-            remove_zero_eig=True)
-
-        if scipy.sparse.issparse(X):
-            X = X.astype(np.float64)
-        with warnings.catch_warnings():
-            warnings.filterwarnings("error")
-            self.preprocessor.fit(X)
-
-        # Raise an informative error message, equation is based ~line 249 in
-        # kernel_pca.py in scikit-learn
-        if len(self.preprocessor.alphas_ / self.preprocessor.lambdas_) == 0:
-            raise ValueError('KernelPCA removed all features!')
-        return self
-
-    def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        with warnings.catch_warnings():
-            warnings.filterwarnings("error")
-            X_new = self.preprocessor.transform(X)
-
-            # TODO write a unittest for this case
-            if X_new.shape[1] == 0:
-                raise ValueError("KernelPCA removed all features!")
-
-            return X_new
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        dataset_info=None,
-        kernel=('poly', 'rbf', 'sigmoid', 'cosine'),
-        n_components=(10, 2000),
-        gamma=((3.0517578125e-05, 8), True),
-        degree=(2, 5),
-        coef0=(-1, 1)
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-        kernel_hp = add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'kernel', kernel)
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, "n_components", n_components)
-
-        if "poly" in kernel:
-            degree_hp = add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'degree', degree)
-            cs.add_condition(CSC.EqualsCondition(degree_hp, kernel_hp, "poly"))
-        if set(["poly", "sigmoid"]) & set(kernel):
-            coef0_hp = add_hyperparameter(cs, CSH.UniformFloatHyperparameter, "coef0", coef0)
-            cs.add_condition(CSC.InCondition(coef0_hp, kernel_hp, list(set(["poly", "sigmoid"]) & set(kernel))))
-        if set(["poly", "rbf", "sigmoid"]) & set(kernel):
-            gamma_hp = add_hyperparameter(cs, CSH.UniformFloatHyperparameter, "gamma", gamma)
-            cs.add_condition(CSC.InCondition(gamma_hp, kernel_hp, list(set(["poly", "rbf", "sigmoid"]) & set(kernel))))
-        return cs
-
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/kitchen_sinks.py b/autoPyTorch/components/preprocessing/feature_preprocessing/kitchen_sinks.py
deleted file mode 100644
index b76f1ffac..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/kitchen_sinks.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-
-from autoPyTorch.utils.config_space_hyperparameter import get_hyperparameter
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-class RandomKitchenSinks(PreprocessorBase):
-
-    def __init__(self, hyperparameter_config):
-        self.gamma = float(hyperparameter_config['gamma'])
-        self.n_components = int(hyperparameter_config['n_components'])
-
-    def fit(self, X, Y):
-        import sklearn.kernel_approximation
-
-        self.preprocessor = sklearn.kernel_approximation.RBFSampler(self.gamma, self.n_components)
-        self.preprocessor.fit(X)
-    
-    def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        dataset_info=None,
-        n_components=((50, 10000), True),
-        gamma=((3.0517578125e-05, 8), True),
-    ):
-        n_components_hp = get_hyperparameter(CSH.UniformIntegerHyperparameter, "n_components", n_components)
-        gamma_hp = get_hyperparameter(CSH.UniformFloatHyperparameter, "gamma", gamma)
-        cs = ConfigSpace.ConfigurationSpace()
-        cs.add_hyperparameters([gamma_hp, n_components_hp])
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/nystroem.py b/autoPyTorch/components/preprocessing/feature_preprocessing/nystroem.py
deleted file mode 100644
index ed1f657dd..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/nystroem.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import numpy as np
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-import ConfigSpace.conditions as CSC
-
-from autoPyTorch.utils.config_space_hyperparameter import get_hyperparameter, add_hyperparameter
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-
-class Nystroem(PreprocessorBase):
-    def __init__(self, hyperparameter_config):
-        self.kernel = hyperparameter_config['kernel']
-        self.n_components = int(hyperparameter_config['n_components'])
-        self.gamma = float(hyperparameter_config['gamma']) if self.kernel in ["poly", "rbf", "sigmoid"] else 1.0
-        self.degree =  int(hyperparameter_config['degree']) if self.kernel == "poly" else 3
-        self.coef0 = float(hyperparameter_config['coef0']) if self.kernel in ["poly", "sigmoid"] else 1
-
-    def fit(self, X, Y=None):
-        import sklearn.kernel_approximation
-
-        self.preprocessor = sklearn.kernel_approximation.Nystroem(
-            kernel=self.kernel, n_components=self.n_components,
-            gamma=self.gamma, degree=self.degree, coef0=self.coef0)
-
-        self.preprocessor.fit(X.astype(np.float64))
-        return self
-
-    def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        dataset_info=None,
-        kernel=('poly', 'rbf', 'sigmoid', 'cosine'),
-        n_components=((50, 10000), True),
-        gamma=((3.0517578125e-05, 8), True),
-        degree=(2, 5),
-        coef0=(-1, 1)
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-        kernel_hp = add_hyperparameter(cs, CSH.CategoricalHyperparameter, 'kernel', kernel)
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, "n_components", n_components)
-
-        if "poly" in kernel:
-            degree_hp = add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'degree', degree)
-            cs.add_condition(CSC.EqualsCondition(degree_hp, kernel_hp, "poly"))
-        if set(["poly", "sigmoid"]) & set(kernel):
-            coef0_hp = add_hyperparameter(cs, CSH.UniformFloatHyperparameter, "coef0", coef0)
-            cs.add_condition(CSC.InCondition(coef0_hp, kernel_hp, list(set(["poly", "sigmoid"]) & set(kernel))))
-        if set(["poly", "rbf", "sigmoid"]) & set(kernel):
-            gamma_hp = add_hyperparameter(cs, CSH.UniformFloatHyperparameter, "gamma", gamma)
-            cs.add_condition(CSC.InCondition(gamma_hp, kernel_hp, list(set(["poly", "rbf", "sigmoid"]) & set(kernel))))
-
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/polynomial_features.py b/autoPyTorch/components/preprocessing/feature_preprocessing/polynomial_features.py
deleted file mode 100644
index 31874c36b..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/polynomial_features.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import torch
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-
-from autoPyTorch.utils.config_space_hyperparameter import get_hyperparameter, add_hyperparameter
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-
-class PolynomialFeatures(PreprocessorBase):
-    def __init__(self, hyperparameter_config):
-        self.degree = hyperparameter_config['degree']
-        self.interaction_only = hyperparameter_config['interaction_only']
-        self.include_bias = hyperparameter_config['include_bias']
-        self.preprocessor = None
-
-    def fit(self, X, Y):
-        import sklearn.preprocessing
-
-        self.preprocessor = sklearn.preprocessing.PolynomialFeatures(
-            degree=self.degree, interaction_only=self.interaction_only,
-            include_bias=self.include_bias)
-
-        self.preprocessor.fit(X, Y)
-
-    def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        dataset_info=None,
-        degree=(2, 3),
-        interaction_only=(True, False),
-        include_bias=(True, False)
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, "degree", degree)
-        add_hyperparameter(cs, CSH.CategoricalHyperparameter, "interaction_only", [False, True])
-        add_hyperparameter(cs, CSH.CategoricalHyperparameter, "include_bias", [True, False])
-
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/power_transformer.py b/autoPyTorch/components/preprocessing/feature_preprocessing/power_transformer.py
deleted file mode 100644
index 6b5a026fd..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/power_transformer.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import numpy as np
-import torch
-
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-
-
-class PowerTransformer(PreprocessorBase):
-    def __init__(self, hyperparameter_config):
-        self.preprocessor = None
-        self.method = hyperparameter_config["method"] if "method" in hyperparameter_config else "yeo-johnson"
-        self.standardize = hyperparameter_config["standardize"]
-
-    def fit(self, X, Y):
-        import sklearn.preprocessing
-
-        try:
-            self.preprocessor = sklearn.preprocessing.PowerTransformer(method=self.method, standardize=self.standardize, copy=False)
-            self.preprocessor.fit(X, Y)
-        except ValueError as exception:
-            print(exception)
-            print("Using yeo-johnson instead")
-            self.preprocessor = sklearn.preprocessing.PowerTransformer(standardize=self.standardize, copy=False)
-            self.preprocessor.fit(X, Y)
-
-    def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        dataset_info=None,
-        standardize=(True, False),
-        method=("yeo-johnson", "box-cox"),
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.CategoricalHyperparameter, "standardize", standardize)
-        if dataset_info is None or (
-                (dataset_info.x_min_value is None or dataset_info.x_min_value > 0) and not any(dataset_info.categorical_features)):
-            add_hyperparameter(cs, CSH.CategoricalHyperparameter, "method", method)
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/feature_preprocessing/truncated_svd.py b/autoPyTorch/components/preprocessing/feature_preprocessing/truncated_svd.py
deleted file mode 100644
index 1d3c42a25..000000000
--- a/autoPyTorch/components/preprocessing/feature_preprocessing/truncated_svd.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import numpy as np
-import torch
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter
-
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-
-
-class TruncatedSVD(PreprocessorBase):
-    def __init__(self, hyperparameter_config):
-        self.target_dim = hyperparameter_config['target_dim']
-        self.preprocessor = None
-
-    def fit(self, X, Y):
-        import sklearn.decomposition
-
-        self.target_dim = int(self.target_dim)
-        target_dim = min(self.target_dim, X.shape[1] - 1)
-        self.preprocessor = sklearn.decomposition.TruncatedSVD(target_dim, algorithm='randomized')
-        self.preprocessor.fit(X, Y)
-
-    def transform(self, X):
-        if self.preprocessor is None:
-            raise NotImplementedError()
-        return self.preprocessor.transform(X)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        dataset_info=None,
-        target_dim=(10, 256)
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, "target_dim", target_dim)
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/image_preprocessing/archive.py b/autoPyTorch/components/preprocessing/image_preprocessing/archive.py
deleted file mode 100644
index 6a12871a6..000000000
--- a/autoPyTorch/components/preprocessing/image_preprocessing/archive.py
+++ /dev/null
@@ -1,28 +0,0 @@
-#   Copyright 2019 Kakao Brain
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-def fa_reduced_cifar10():
-    p = [[["Contrast", 0.8320659688593578, 0.49884310562180767], ["TranslateX", 0.41849883971249136, 0.394023086494538]], [["Color", 0.3500483749890918, 0.43355143929883955], ["Color", 0.5120716140300229, 0.7508299643325016]], [["Rotate", 0.9447932604389472, 0.29723465088990375], ["Sharpness", 0.1564936149799504, 0.47169309978091745]], [["Rotate", 0.5430015349185097, 0.6518626678905443], ["Color", 0.5694844928020679, 0.3494533005430269]], [["AutoContrast", 0.5558922032451064, 0.783136004977799], ["TranslateY", 0.683914191471972, 0.7597025305860181]], [["TranslateX", 0.03489224481658926, 0.021025488042663354], ["Equalize", 0.4788637403857401, 0.3535481281496117]], [["Sharpness", 0.6428916269794158, 0.22791511918580576], ["Contrast", 0.016014045073950323, 0.26811312269487575]], [["Rotate", 0.2972727228410451, 0.7654251516829896], ["AutoContrast", 0.16005809254943348, 0.5380523650108116]], [["Contrast", 0.5823671057717301, 0.7521166301398389], ["TranslateY", 0.9949449214751978, 0.9612671341689751]], [["Equalize", 0.8372126687702321, 0.6944127225621206], ["Rotate", 0.25393282929784755, 0.3261658365286546]], [["Invert", 0.8222011603194572, 0.6597915864008403], ["Posterize", 0.31858707654447327, 0.9541013715579584]], [["Sharpness", 0.41314621282107045, 0.9437344470879956], ["Cutout", 0.6610495837889337, 0.674411664255093]], [["Contrast", 0.780121736705407, 0.40826152397463156], ["Color", 0.344019192125256, 0.1942922781355767]], [["Rotate", 0.17153139555621344, 0.798745732456474], ["Invert", 0.6010555860501262, 0.320742172554767]], [["Invert", 0.26816063450777416, 0.27152062163148327], ["Equalize", 0.6786829200236982, 0.7469412443514213]], [["Contrast", 0.3920564414367518, 0.7493644582838497], ["TranslateY", 0.8941657805606704, 0.6580846856375955]], [["Equalize", 0.875509207399372, 0.9061130537645283], ["Cutout", 0.4940280679087308, 0.7896229623628276]], [["Contrast", 0.3331423298065147, 0.7170041362529597], ["ShearX", 0.7425484291842793, 0.5285117152426109]], [["Equalize", 0.97344237365026, 0.4745759720473106], ["TranslateY", 0.055863458430295276, 0.9625142022954672]], [["TranslateX", 0.6810614083109192, 0.7509937355495521], ["TranslateY", 0.3866463019475701, 0.5185481505576112]], [["Sharpness", 0.4751529944753671, 0.550464012488733], ["Cutout", 0.9472914750534814, 0.5584925992985023]], [["Contrast", 0.054606784909375095, 0.17257080196712182], ["Cutout", 0.6077026782754803, 0.7996504165944938]], [["ShearX", 0.328798428243695, 0.2769563264079157], ["Cutout", 0.9037632437023772, 0.4915809476763595]], [["Cutout", 0.6891202672363478, 0.9951490996172914], ["Posterize", 0.06532762462628705, 0.4005246609075227]], [["TranslateY", 0.6908583592523334, 0.725612120376128], ["Rotate", 0.39907735501746666, 0.36505798032223147]], [["TranslateX", 0.10398364107399072, 0.5913918470536627], ["Rotate", 0.7169811539340365, 0.8283850670648724]], [["ShearY", 0.9526373530768361, 0.4482347365639251], ["Contrast", 0.4203947336351471, 0.41526799558953864]], [["Contrast", 0.24894431199700073, 0.09578870500994707], ["Solarize", 0.2273713345927395, 0.6214942914963707]], [["TranslateX", 0.06331228870032912, 0.8961907489444944], ["Cutout", 0.5110007859958743, 0.23704875994050723]], [["Cutout", 0.3769183548846172, 0.6560944580253987], ["TranslateY", 0.7201924599434143, 0.4132476526938319]], [["Invert", 0.6707431156338866, 0.11622795952464149], ["Posterize", 0.12075972752370845, 0.18024933294172307]], [["Color", 0.5010057264087142, 0.5277767327434318], ["Rotate", 0.9486115946366559, 0.31485546630220784]], [["ShearX", 0.31741302466630406, 0.1991215806270692], ["Invert", 0.3744727015523084, 0.6914113986757578]], [["Brightness", 0.40348479064392617, 0.8924182735724888], ["Brightness", 0.1973098763857779, 0.3939288933689655]], [["Color", 0.01208688664030888, 0.6055693000885217], ["Equalize", 0.433259451147881, 0.420711137966155]], [["Cutout", 0.2620018360076487, 0.11594468278143644], ["Rotate", 0.1310401567856766, 0.7244318146544101]], [["ShearX", 0.15249651845933576, 0.35277277071866986], ["Contrast", 0.28221794032094016, 0.42036586509397444]], [["Brightness", 0.8492912150468908, 0.26386920887886056], ["Solarize", 0.8764208056263386, 0.1258195122766067]], [["ShearX", 0.8537058239675831, 0.8415101816171269], ["AutoContrast", 0.23958568830416294, 0.9889049529564014]], [["Rotate", 0.6463207930684552, 0.8750192129056532], ["Contrast", 0.6865032211768652, 0.8564981333033417]], [["Equalize", 0.8877190311811044, 0.7370995897848609], ["TranslateX", 0.9979660314391368, 0.005683998913244781]], [["Color", 0.6420017551677819, 0.6225337265571229], ["Solarize", 0.8344504978566362, 0.8332856969941151]], [["ShearX", 0.7439332981992567, 0.9747608698582039], ["Equalize", 0.6259189804002959, 0.028017478098245174]], [["TranslateY", 0.39794770293366843, 0.8482966537902709], ["Rotate", 0.9312935630405351, 0.5300586925826072]], [["Cutout", 0.8904075572021911, 0.3522934742068766], ["Equalize", 0.6431186289473937, 0.9930577962126151]], [["Contrast", 0.9183553386089476, 0.44974266209396685], ["TranslateY", 0.8193684583123862, 0.9633741156526566]], [["ShearY", 0.616078299924283, 0.19219314358924766], ["Solarize", 0.1480945914138868, 0.05922109541654652]], [["Solarize", 0.25332455064128157, 0.18853037431947994], ["ShearY", 0.9518390093954243, 0.14603930044061142]], [["Color", 0.8094378664335412, 0.37029830225408433], ["Contrast", 0.29504113617467465, 0.065096365468442]], [["AutoContrast", 0.7075167558685455, 0.7084621693458267], ["Sharpness", 0.03555539453323875, 0.5651948313888351]], [["TranslateY", 0.5969982600930229, 0.9857264201029572], ["Rotate", 0.9898628564873607, 0.1985685534926911]], [["Invert", 0.14915939942810352, 0.6595839632446547], ["Posterize", 0.768535289994361, 0.5997358684618563]], [["Equalize", 0.9162691815967111, 0.3331035307653627], ["Color", 0.8169118187605557, 0.7653910258006366]], [["Rotate", 0.43489185299530897, 0.752215269135173], ["Brightness", 0.1569828560334806, 0.8002808712857853]], [["Invert", 0.931876215328345, 0.029428644395760872], ["Equalize", 0.6330036052674145, 0.7235531014288485]], [["ShearX", 0.5216138393704968, 0.849272958911589], ["AutoContrast", 0.19572688655120263, 0.9786551568639575]], [["ShearX", 0.9899586208275011, 0.22580547500610293], ["Brightness", 0.9831311903178727, 0.5055159610855606]], [["Brightness", 0.29179117009211486, 0.48003584672937294], ["Solarize", 0.7544252317330058, 0.05806581735063043]], [["AutoContrast", 0.8919800329537786, 0.8511261613698553], ["Contrast", 0.49199446084551035, 0.7302297140181429]], [["Cutout", 0.7079723710644835, 0.032565015538375874], ["AutoContrast", 0.8259782090388609, 0.7860708789468442]], [["Posterize", 0.9980262659801914, 0.6725084224935673], ["ShearY", 0.6195568269664682, 0.5444170291816751]], [["Posterize", 0.8687351834713217, 0.9978004914422602], ["Equalize", 0.4532646848325955, 0.6486748015710573]], [["Contrast", 0.2713928776950594, 0.15255249557027806], ["ShearY", 0.9276834387970199, 0.5266542862333478]], [["AutoContrast", 0.5240786618055582, 0.9325642258930253], ["Cutout", 0.38448627892037357, 0.21219415055662394]], [["TranslateX", 0.4299517937295352, 0.20133751201386152], ["TranslateX", 0.6753468310276597, 0.6985621035400441]], [["Rotate", 0.4006472499103597, 0.6704748473357586], ["Equalize", 0.674161668148079, 0.6528530101705237]], [["Equalize", 0.9139902833674455, 0.9015103149680278], ["Sharpness", 0.7289667720691948, 0.7623606352376232]], [["Cutout", 0.5911267429414259, 0.5953141187177585], ["Rotate", 0.5219064817468504, 0.11085141355857986]], [["TranslateX", 0.3620095133946267, 0.26194039409492476], ["Rotate", 0.3929841359545597, 0.4913406720338047]], [["Invert", 0.5175298901458896, 0.001661410821811482], ["Invert", 0.004656581318332242, 0.8157622192213624]], [["AutoContrast", 0.013609693335051465, 0.9318651749409604], ["Invert", 0.8980844358979592, 0.2268511862780368]], [["ShearY", 0.7717126261142194, 0.09975547983707711], ["Equalize", 0.7808494401429572, 0.4141412091009955]], [["TranslateX", 0.5878675721341552, 0.29813268038163376], ["Posterize", 0.21257276051591356, 0.2837285296666412]], [["Brightness", 0.4268335108566488, 0.4723784991635417], ["Cutout", 0.9386262901570471, 0.6597686851494288]], [["ShearX", 0.8259423807590159, 0.6215304795389204], ["Invert", 0.6663365779667443, 0.7729669184580387]], [["ShearY", 0.4801338723951297, 0.5220145420100984], ["Solarize", 0.9165803796596582, 0.04299335502862134]], [["Color", 0.17621114853558817, 0.7092601754635434], ["ShearX", 0.9014406936728542, 0.6028711944367818]], [["Rotate", 0.13073284972300658, 0.9088831512880851], ["ShearX", 0.4228105332316806, 0.7985249783662675]], [["Brightness", 0.9182753692730031, 0.0063635477774044436], ["Color", 0.4279825602663798, 0.28727149118585327]], [["Equalize", 0.578218285372267, 0.9611758542158054], ["Contrast", 0.5471552264150691, 0.8819635504027596]], [["Brightness", 0.3208589067274543, 0.45324733565167497], ["Solarize", 0.5218455808633233, 0.5946097503647126]], [["Equalize", 0.3790381278653, 0.8796082535775276], ["Solarize", 0.4875526773149246, 0.5186585878052613]], [["ShearY", 0.12026461479557571, 0.1336953429068397], ["Posterize", 0.34373988646025766, 0.8557727670803785]], [["Cutout", 0.2396745247507467, 0.8123036135209865], ["Equalize", 0.05022807681008945, 0.6648492261984383]], [["Brightness", 0.35226676470748264, 0.5950011514888855], ["Rotate", 0.27555076067000894, 0.9170063321486026]], [["ShearX", 0.320224630647278, 0.9683584649071976], ["Invert", 0.6905585196648905, 0.5929115667894518]], [["Color", 0.9941395717559652, 0.7474441679798101], ["Sharpness", 0.7559998478658021, 0.6656052889626682]], [["ShearY", 0.4004220568345669, 0.5737646992826074], ["Equalize", 0.9983495213746147, 0.8307907033362303]], [["Color", 0.13726809242038207, 0.9378850119950549], ["Equalize", 0.9853362454752445, 0.42670264496554156]], [["Invert", 0.13514636153298576, 0.13516363849081958], ["Sharpness", 0.2031189356693901, 0.6110226359872745]], [["TranslateX", 0.7360305209630797, 0.41849698571655614], ["Contrast", 0.8972161549144564, 0.7820296625565641]], [["Color", 0.02713118828682548, 0.717110684828096], ["TranslateY", 0.8118759006836348, 0.9120098002024992]], [["Sharpness", 0.2915428949403711, 0.7630303724396518], ["Solarize", 0.22030536162851078, 0.38654526772661757]], [["Equalize", 0.9949114839538582, 0.7193630656062793], ["AutoContrast", 0.00889496657931299, 0.2291400476524672]], [["Rotate", 0.7120948976490488, 0.7804359309791055], ["Cutout", 0.10445418104923654, 0.8022999156052766]], [["Equalize", 0.7941710117902707, 0.8648170634288153], ["Invert", 0.9235642581144047, 0.23810725859722381]], [["Cutout", 0.3669397998623156, 0.42612815083245004], ["Solarize", 0.5896322046441561, 0.40525016166956795]], [["Color", 0.8389858785714184, 0.4805764176488667], ["Rotate", 0.7483931487048825, 0.4731174601400677]], [["Sharpness", 0.19006538611394763, 0.9480745790240234], ["TranslateY", 0.13904429049439282, 0.04117685330615939]], [["TranslateY", 0.9958097661701637, 0.34853788612580905], ["Cutout", 0.2235829624082113, 0.3737887095480745]], [["ShearX", 0.635453761342424, 0.6063917273421382], ["Posterize", 0.8738297843709666, 0.4893042590265556]], [["Brightness", 0.7907245198402727, 0.7082189713070691], ["Color", 0.030313003541849737, 0.6927897798493439]], [["Cutout", 0.6965622481073525, 0.8103522907758203], ["ShearY", 0.6186794303078708, 0.28640671575703547]], [["ShearY", 0.43734910588450226, 0.32549342535621517], ["ShearX", 0.08154980987651872, 0.3286764923112455]], [["AutoContrast", 0.5262462005050853, 0.8175584582465848], ["Contrast", 0.8683217097363655, 0.548776281479276]], [["ShearY", 0.03957878500311707, 0.5102350637943197], ["Rotate", 0.13794708520303778, 0.38035687712954236]], [["Sharpness", 0.634288567312677, 0.6387948309075822], ["AutoContrast", 0.13437288694693272, 0.7150448869023095]], [["Contrast", 0.5198339640088544, 0.9409429390321714], ["Cutout", 0.09489154903321972, 0.6228488803821982]], [["Equalize", 0.8955909061806043, 0.7727336527163008], ["AutoContrast", 0.6459479564441762, 0.7065467781139214]], [["Invert", 0.07214420843537739, 0.15334721382249505], ["ShearX", 0.9242027778363903, 0.5809187849982554]], [["Equalize", 0.9144084379856188, 0.9457539278608998], ["Sharpness", 0.14337499858300173, 0.5978054365425495]], [["Posterize", 0.18894269796951202, 0.14676331276539045], ["Equalize", 0.846204299950047, 0.0720601838168885]], [["Contrast", 0.47354445405741163, 0.1793650330107468], ["Solarize", 0.9086106327264657, 0.7578807802091502]], [["AutoContrast", 0.11805466892967886, 0.6773620948318575], ["TranslateX", 0.584222568299264, 0.9475693349391936]], [["Brightness", 0.5833017701352768, 0.6892593824176294], ["AutoContrast", 0.9073141314561828, 0.5823085733964589]], [["TranslateY", 0.5711231614144834, 0.6436240447620021], ["Contrast", 0.21466964050052473, 0.8042843954486391]], [["Contrast", 0.22967904487976765, 0.2343103109298762], ["Invert", 0.5502897289159286, 0.386181060792375]], [["Invert", 0.7008423439928628, 0.4234003051405053], ["Rotate", 0.77270460187611, 0.6650852696828039]], [["Invert", 0.050618322309703534, 0.24277027926683614], ["TranslateX", 0.789703489736613, 0.5116446685339312]], [["Color", 0.363898083076868, 0.7870323584210503], ["ShearY", 0.009608425513626617, 0.6188625018465327]], [["TranslateY", 0.9447601615216088, 0.8605867115798349], ["Equalize", 0.24139180127003634, 0.9587337957930782]], [["Equalize", 0.3968589440144503, 0.626206375426996], ["Solarize", 0.3215967960673186, 0.826785464835443]], [["TranslateX", 0.06947339047121326, 0.016705969558222122], ["Contrast", 0.6203392406528407, 0.6433525559906872]], [["Solarize", 0.2479835265518212, 0.6335009955617831], ["Sharpness", 0.6260191862978083, 0.18998095149428562]], [["Invert", 0.9818841924943431, 0.03252098144087934], ["TranslateY", 0.9740718042586802, 0.32038951753031475]], [["Solarize", 0.8795784664090814, 0.7014953994354041], ["AutoContrast", 0.8508018319577783, 0.09321935255338443]], [["Color", 0.8067046326105318, 0.13732893832354054], ["Contrast", 0.7358549680271418, 0.7880588355974301]], [["Posterize", 0.5005885536838065, 0.7152229305267599], ["ShearX", 0.6714249591308944, 0.7732232697859908]], [["TranslateY", 0.5657943483353953, 0.04622399873706862], ["AutoContrast", 0.2787442688649845, 0.567024378767143]], [["ShearY", 0.7589839214283295, 0.041071003934029404], ["Equalize", 0.3719852873722692, 0.43285778682687326]], [["Posterize", 0.8841266183653291, 0.42441306955476366], ["Cutout", 0.06578801759412933, 0.5961125797961526]], [["Rotate", 0.4057875004314082, 0.20241115848366442], ["AutoContrast", 0.19331542807918067, 0.7175484678480565]], [["Contrast", 0.20331327116693088, 0.17135387852218742], ["Cutout", 0.6282459410351067, 0.6690015305529187]], [["ShearX", 0.4309850328306535, 0.99321178125828], ["AutoContrast", 0.01809604030453338, 0.693838277506365]], [["Rotate", 0.24343531125298268, 0.5326412444169899], ["Sharpness", 0.8663989992597494, 0.7643990609130789]], [["Rotate", 0.9785019204622459, 0.8941922576710696], ["ShearY", 0.3823185048761075, 0.9258854046017292]], [["ShearY", 0.5502613342963388, 0.6193478797355644], ["Sharpness", 0.2212116534610532, 0.6648232390110979]], [["TranslateY", 0.43222920981513757, 0.5657636397633089], ["ShearY", 0.9153733286073634, 0.4868521171273169]], [["Posterize", 0.12246560519738336, 0.9132288825898972], ["Cutout", 0.6058471327881816, 0.6426901876150983]], [["Color", 0.3693970222695844, 0.038929141432555436], ["Equalize", 0.6228052875653781, 0.05064436511347281]], [["Color", 0.7172600331356893, 0.2824542634766688], ["Color", 0.425293116261649, 0.1796441283313972]], [["Cutout", 0.7539608428122959, 0.9896141728228921], ["Solarize", 0.17811081117364758, 0.9064195503634402]], [["AutoContrast", 0.6761242607012717, 0.6484842446399923], ["AutoContrast", 0.1978135076901828, 0.42166879492601317]], [["ShearY", 0.25901666379802524, 0.4770778270322449], ["Solarize", 0.7640963173407052, 0.7548463227094349]], [["TranslateY", 0.9222487731783499, 0.33658389819616463], ["Equalize", 0.9159112511468139, 0.8877136302394797]], [["TranslateX", 0.8994836977137054, 0.11036053676846591], ["Sharpness", 0.9040333410652747, 0.007266095214664592]], [["Invert", 0.627758632524958, 0.8075245097227242], ["Color", 0.7525387912148516, 0.05950239294733184]], [["TranslateX", 0.43505193292761857, 0.38108822876120796], ["TranslateY", 0.7432578052364004, 0.685678116134759]], [["Contrast", 0.9293507582470425, 0.052266842951356196], ["Posterize", 0.45187123977747456, 0.8228290399726782]], [["ShearX", 0.07240786542746291, 0.8945667925365756], ["Brightness", 0.5305443506561034, 0.12025274552427578]], [["Invert", 0.40157564448143335, 0.5364745514006678], ["Posterize", 0.3316124671813876, 0.43002413237035997]], [["ShearY", 0.7152314630009072, 0.1938339083417453], ["Invert", 0.14102478508140615, 0.41047623580174253]], [["Equalize", 0.19862832613849246, 0.5058521685279254], ["Sharpness", 0.16481208629549782, 0.29126323102770557]], [["Equalize", 0.6951591703541872, 0.7294822018800076], ["ShearX", 0.8726656726111219, 0.3151484225786487]], [["Rotate", 0.17234370554263745, 0.9356543193000078], ["TranslateX", 0.4954374070084091, 0.05496727345849217]], [["Contrast", 0.347405480122842, 0.831553005022885], ["ShearX", 0.28946367213071134, 0.11905898704394013]], [["Rotate", 0.28096672507990683, 0.16181284050307398], ["Color", 0.6554918515385365, 0.8739728050797386]], [["Solarize", 0.05408073374114053, 0.5357087283758337], ["Posterize", 0.42457175211495335, 0.051807130609045515]], [["TranslateY", 0.6216669362331361, 0.9691341207381867], ["Rotate", 0.9833579358130944, 0.12227426932415297]], [["AutoContrast", 0.7572619475282892, 0.8062834082727393], ["Contrast", 0.1447865402875591, 0.40242646573228436]], [["Rotate", 0.7035658783466086, 0.9840285268256428], ["Contrast", 0.04613961510519471, 0.7666683217450163]], [["TranslateX", 0.4580462177951252, 0.6448678609474686], ["AutoContrast", 0.14845695613708987, 0.1581134188537895]], [["Color", 0.06795037145259564, 0.9115552821158709], ["TranslateY", 0.9972953449677655, 0.6791016521791214]], [["Cutout", 0.3586908443690823, 0.11578558293480945], ["Color", 0.49083981719164294, 0.6924851425917189]], [["Brightness", 0.7994717831637873, 0.7887316255321768], ["Posterize", 0.01280463502435425, 0.2799086732858721]], [["ShearY", 0.6733451536131859, 0.8122332639516706], ["AutoContrast", 0.20433889615637357, 0.29023346867819966]], [["TranslateY", 0.709913512385177, 0.6538196931503809], ["Invert", 0.06629795606579203, 0.40913219547548296]], [["Sharpness", 0.4704559834362948, 0.4235993305308414], ["Equalize", 0.7578132044306966, 0.9388824249397175]], [["AutoContrast", 0.5281702802395268, 0.8077253610116979], ["Equalize", 0.856446858814119, 0.0479755681647559]], [["Color", 0.8244145826797791, 0.038409264586238945], ["Equalize", 0.4933123249234237, 0.8251940933672189]], [["TranslateX", 0.23949314158035084, 0.13576027004706692], ["ShearX", 0.8547563771688399, 0.8309262160483606]], [["Cutout", 0.4655680937486001, 0.2819807000622825], ["Contrast", 0.8439552665937905, 0.4843617871587037]], [["TranslateX", 0.19142454476784831, 0.7516148119169537], ["AutoContrast", 0.8677128351329768, 0.34967990912346336]], [["Contrast", 0.2997868299880966, 0.919508054854469], ["AutoContrast", 0.3003418493384957, 0.812314984368542]], [["Invert", 0.1070424236198183, 0.614674386498809], ["TranslateX", 0.5010973510899923, 0.20828478805259465]], [["Contrast", 0.6775882415611454, 0.6938564815591685], ["Cutout", 0.4814634264207498, 0.3086844939744179]], [["TranslateY", 0.939427105020265, 0.02531043619423201], ["Contrast", 0.793754257944812, 0.6676072472565451]], [["Sharpness", 0.09833672397575444, 0.5937214638292085], ["Rotate", 0.32530675291753763, 0.08302275740932441]], [["Sharpness", 0.3096455511562728, 0.6726732004553959], ["TranslateY", 0.43268997648796537, 0.8755012330217743]], [["ShearY", 0.9290771880324833, 0.22114736271319912], ["Equalize", 0.5520199288501478, 0.34269650332060553]], [["AutoContrast", 0.39763980746649374, 0.4597414582725454], ["Contrast", 0.941507852412761, 0.24991270562477041]], [["Contrast", 0.19419400547588095, 0.9127524785329233], ["Invert", 0.40544905179551727, 0.770081532844878]], [["Invert", 0.30473757368608334, 0.23534811781828846], ["Cutout", 0.26090722356706686, 0.5478390909877727]], [["Posterize", 0.49434361308057373, 0.05018423270527428], ["Color", 0.3041910676883317, 0.2603810415446437]], [["Invert", 0.5149061746764011, 0.9507449210221298], ["TranslateY", 0.4458076521892904, 0.8235358255774426]], [["Cutout", 0.7900006753351625, 0.905578861382507], ["Cutout", 0.6707153655762056, 0.8236715672258502]], [["Solarize", 0.8750534386579575, 0.10337670467100568], ["Posterize", 0.6102379615481381, 0.9264503915416868]], [["ShearY", 0.08448689377082852, 0.13981233725811626], ["TranslateX", 0.13979689669329498, 0.768774869872818]], [["TranslateY", 0.35752572266759985, 0.22827299847812488], ["Solarize", 0.3906957174236011, 0.5663314388307709]], [["ShearY", 0.29155240367061563, 0.8427516352971683], ["ShearX", 0.988825367441916, 0.9371258864857649]], [["Posterize", 0.3470780859769458, 0.5467686612321239], ["Rotate", 0.5758606274160093, 0.8843838082656007]], [["Cutout", 0.07825368363221841, 0.3230799425855425], ["Equalize", 0.2319163865298529, 0.42133965674727325]], [["Invert", 0.41972172597448654, 0.34618622513582953], ["ShearX", 0.33638469398198834, 0.9098575535928108]], [["Invert", 0.7322652233340448, 0.7747502957687412], ["Cutout", 0.9643121397298106, 0.7983335094634907]], [["TranslateY", 0.30039942808098496, 0.229018798182827], ["TranslateY", 0.27009499739380194, 0.6435577237846236]], [["Color", 0.38245274994070644, 0.7030758568461645], ["ShearX", 0.4429321461666281, 0.6963787864044149]], [["AutoContrast", 0.8432798685515605, 0.5775214369578088], ["Brightness", 0.7140899735355927, 0.8545854720117658]], [["Rotate", 0.14418935535613786, 0.5637968282213426], ["Color", 0.7115231912479835, 0.32584796564566776]], [["Sharpness", 0.4023501062807533, 0.4162097130412771], ["Brightness", 0.5536372686153666, 0.03004023273348777]], [["TranslateX", 0.7526053265574295, 0.5365938133399961], ["Cutout", 0.07914142706557492, 0.7544953091603148]], [["TranslateY", 0.6932934644882822, 0.5302211727137424], ["Invert", 0.5040606028391255, 0.6074863635108957]], [["Sharpness", 0.5013938602431629, 0.9572417724333157], ["TranslateY", 0.9160516359783026, 0.41798927975391675]], [["ShearY", 0.5130018836722556, 0.30209438428424185], ["Color", 0.15017170588500262, 0.20653495360587826]], [["TranslateX", 0.5293300090022314, 0.6407011888285266], ["Rotate", 0.4809817860439001, 0.3537850070371702]], [["Equalize", 0.42243081336551014, 0.13472721311046565], ["Posterize", 0.4700309639484068, 0.5197704360874883]], [["AutoContrast", 0.40674959899687235, 0.7312824868168921], ["TranslateX", 0.7397527975920833, 0.7068339877944815]], [["TranslateY", 0.5880995184787206, 0.41294111378078946], ["ShearX", 0.3181387627799316, 0.4810010147143413]], [["Color", 0.9898680233928507, 0.13241525577655167], ["Contrast", 0.9824932511238534, 0.5081145010853807]], [["Invert", 0.1591854062582687, 0.9760371953250404], ["Color", 0.9913399302056851, 0.8388709501056177]], [["Rotate", 0.6427451962231163, 0.9486793975292853], ["AutoContrast", 0.8501937877930463, 0.021326757974406196]], [["Contrast", 0.13611684531087598, 0.3050858709483848], ["Posterize", 0.06618644756084646, 0.8776928511951034]], [["TranslateX", 0.41021065663839407, 0.4965319749091702], ["Rotate", 0.07088831484595115, 0.4435516708223345]], [["Sharpness", 0.3151707977154323, 0.28275482520179296], ["Invert", 0.36980384682133804, 0.20813616084536624]], [["Cutout", 0.9979060206661017, 0.39712948644725854], ["Brightness", 0.42451052896163466, 0.942623075649937]], [["Equalize", 0.5300853308425644, 0.010183500830128867], ["AutoContrast", 0.06930788523716991, 0.5403125318991522]], [["Contrast", 0.010385458959237814, 0.2588311035539086], ["ShearY", 0.9347048553928764, 0.10439028366854963]], [["ShearY", 0.9867649486508592, 0.8409258132716434], ["ShearX", 0.48031199530836444, 0.7703375364614137]], [["ShearY", 0.04835889473136512, 0.2671081675890492], ["Brightness", 0.7856432618509617, 0.8032169570159564]], [["Posterize", 0.11112884927351185, 0.7116956530752987], ["TranslateY", 0.7339151092128607, 0.3331241226029017]], [["Invert", 0.13527036207875454, 0.8425980515358883], ["Color", 0.7836395778298139, 0.5517059252678862]], [["Sharpness", 0.012541163521491816, 0.013197550692292892], ["Invert", 0.6295957932861318, 0.43276521236056054]], [["AutoContrast", 0.7681480991225756, 0.3634284648496289], ["Brightness", 0.09708289828517969, 0.45016725043529726]], [["Brightness", 0.5839450499487329, 0.47525965678316795], ["Posterize", 0.43096581990183735, 0.9332382960125196]], [["Contrast", 0.9725334964552795, 0.9142902966863341], ["Contrast", 0.12376116410622995, 0.4355916974126801]], [["TranslateX", 0.8572708473690132, 0.02544522678265526], ["Sharpness", 0.37902120723460364, 0.9606092969833118]], [["TranslateY", 0.8907359001296927, 0.8011363927236099], ["Color", 0.7693777154407178, 0.0936768686746503]], [["Equalize", 0.0002657688243309364, 0.08190798535970034], ["Rotate", 0.5215478065240905, 0.5773519995038368]], [["TranslateY", 0.3383007813932477, 0.5733428274739165], ["Sharpness", 0.2436110797174722, 0.4757790814590501]], [["Cutout", 0.0957402176213592, 0.8914395928996034], ["Cutout", 0.4959915628586883, 0.25890349461645246]], [["AutoContrast", 0.594787300189186, 0.9627455357634459], ["ShearY", 0.5136027621132064, 0.10419602450259002]], [["Solarize", 0.4684077211553732, 0.6592850629431414], ["Sharpness", 0.2382385935956325, 0.6589291408243176]], [["Cutout", 0.4478786947325877, 0.6893616643143388], ["TranslateX", 0.2761781720270474, 0.21750622627277727]], [["Sharpness", 0.39476077929016484, 0.930902796668923], ["Cutout", 0.9073012208742808, 0.9881122386614257]], [["TranslateY", 0.0933719180021565, 0.7206252503441172], ["ShearX", 0.5151400441789256, 0.6307540083648309]], [["AutoContrast", 0.7772689258806401, 0.8159317013156503], ["AutoContrast", 0.5932793713915097, 0.05262217353927168]], [["Equalize", 0.38017352056118914, 0.8084724050448412], ["ShearY", 0.7239725628380852, 0.4246314890359326]], [["Cutout", 0.741157483503503, 0.13244380646497977], ["Invert", 0.03395378056675935, 0.7140036618098844]], [["Rotate", 0.0662727247460636, 0.7099861732415447], ["Rotate", 0.3168532707508249, 0.3553167425022127]], [["AutoContrast", 0.7429303516734129, 0.07117444599776435], ["Posterize", 0.5379537435918104, 0.807221330263993]], [["TranslateY", 0.9788586874795164, 0.7967243851346594], ["Invert", 0.4479103376922362, 0.04260360776727545]], [["Cutout", 0.28318121763188997, 0.7748680701406292], ["AutoContrast", 0.9109258369403016, 0.17126397858002085]], [["Color", 0.30183727885272027, 0.46718354750112456], ["TranslateX", 0.9628952256033627, 0.10269543754135535]], [["AutoContrast", 0.6316709389784041, 0.84287698792044], ["Brightness", 0.5544761629904337, 0.025264772745200004]], [["Rotate", 0.08803313299532567, 0.306059720523696], ["Invert", 0.5222165872425064, 0.045935208620454304]], [["TranslateY", 0.21912346831923835, 0.48529224559004436], ["TranslateY", 0.15466734731903942, 0.8929485418495068]], [["ShearX", 0.17141022847016563, 0.8607600402165531], ["ShearX", 0.6890511341106859, 0.7540899265679949]], [["Invert", 0.9417455522972059, 0.9021733684991224], ["Solarize", 0.7693107057723746, 0.7268007946568782]], [["Posterize", 0.02376991543373752, 0.6768442864453844], ["Rotate", 0.7736875065112697, 0.6706331753139825]], [["Contrast", 0.3623841610390669, 0.15023657344457686], ["Equalize", 0.32975472189318666, 0.05629246869510651]], [["Sharpness", 0.7874882420165824, 0.49535778020457066], ["Posterize", 0.09485578893387558, 0.6170768580482466]], [["Brightness", 0.7099280202949585, 0.021523012961427335], ["Posterize", 0.2076371467666719, 0.17168118578815206]], [["Color", 0.8546367645761538, 0.832011891505731], ["Equalize", 0.6429734783051777, 0.2618995960561532]], [["Rotate", 0.8780793721476224, 0.5920897827664297], ["ShearX", 0.5338303685064825, 0.8605424531336439]], [["Sharpness", 0.7504493806631884, 0.9723552387375258], ["Sharpness", 0.3206385634203266, 0.45127845905824693]], [["ShearX", 0.23794709526711355, 0.06257530645720066], ["Solarize", 0.9132374030587093, 0.6240819934824045]], [["Sharpness", 0.790583587969259, 0.28551171786655405], ["Contrast", 0.39872982844590554, 0.09644706751019538]], [["Equalize", 0.30681999237432944, 0.5645045018157916], ["Posterize", 0.525966242669736, 0.7360106111256014]], [["TranslateX", 0.4881014179825114, 0.6317220208872226], ["ShearX", 0.2935158995550958, 0.23104608987381758]], [["Rotate", 0.49977116738568395, 0.6610761068306319], ["TranslateY", 0.7396566602715687, 0.09386747830045217]], [["ShearY", 0.5909773790018789, 0.16229529902832718], ["Equalize", 0.06461394468918358, 0.6661349001143908]], [["TranslateX", 0.7218443721851834, 0.04435720302810153], ["Cutout", 0.986686540951642, 0.734771197038724]], [["ShearX", 0.5353800096911666, 0.8120139502148365], ["Equalize", 0.4613239578449774, 0.5159528929124512]], [["Color", 0.0871713897628631, 0.7708895183198486], ["Solarize", 0.5811386808912219, 0.35260648120785887]], [["Posterize", 0.3910857927477053, 0.4329219555775561], ["Color", 0.9115983668789468, 0.6043069944145293]], [["Posterize", 0.07493067637060635, 0.4258000066006725], ["AutoContrast", 0.4740957581389772, 0.49069587151651295]], [["Rotate", 0.34086200894268937, 0.9812149332288828], ["Solarize", 0.6801012471371733, 0.17271491146753837]], [["Color", 0.20542270872895207, 0.5532087457727624], ["Contrast", 0.2718692536563381, 0.20313287569510108]], [["Equalize", 0.05199827210980934, 0.0832859890912212], ["AutoContrast", 0.8092395764794107, 0.7778945136511004]], [["Sharpness", 0.1907689513066838, 0.7705754572256907], ["Color", 0.3911178658498049, 0.41791326933095485]], [["Solarize", 0.19611855804748257, 0.2407807485604081], ["AutoContrast", 0.5343964972940493, 0.9034209455548394]], [["Color", 0.43586520148538865, 0.4711164626521439], ["ShearY", 0.28635408186820555, 0.8417816793020271]], [["Cutout", 0.09818482420382535, 0.1649767430954796], ["Cutout", 0.34582392911178494, 0.3927982995799828]], [["ShearX", 0.001253882705272269, 0.48661629027584596], ["Solarize", 0.9229221435457137, 0.44374894836659073]], [["Contrast", 0.6829734655718668, 0.8201750485099037], ["Cutout", 0.7886756837648936, 0.8423285219631946]], [["TranslateY", 0.857017093561528, 0.3038537151773969], ["Invert", 0.12809228606383538, 0.23637166191748027]], [["Solarize", 0.9829027723424164, 0.9723093910674763], ["Color", 0.6346495302126811, 0.5405494753107188]], [["AutoContrast", 0.06868643520377715, 0.23758659417688077], ["AutoContrast", 0.6648225411500879, 0.5618315648260103]], [["Invert", 0.44202305603311676, 0.9945938909685547], ["Equalize", 0.7991650497684454, 0.16014142656347097]], [["AutoContrast", 0.8778631604769588, 0.03951977631894088], ["ShearY", 0.8495160088963707, 0.35771447321250416]], [["Color", 0.5365078341001592, 0.21102444169782308], ["ShearX", 0.7168869678248874, 0.3904298719872734]], [["TranslateX", 0.6517203786101899, 0.6467598990650437], ["Invert", 0.26552491504364517, 0.1210812827294625]], [["Posterize", 0.35196021684368994, 0.8420648319941891], ["Invert", 0.7796829363930631, 0.9520895999240896]], [["Sharpness", 0.7391572148971984, 0.4853940393452846], ["TranslateX", 0.7641915295592839, 0.6351349057666782]], [["Posterize", 0.18485880221115913, 0.6117603277356728], ["Rotate", 0.6541660490605724, 0.5704041108375348]], [["TranslateY", 0.27517423188070533, 0.6610080904072458], ["Contrast", 0.6091250547289317, 0.7702443247557892]], [["Equalize", 0.3611798581067118, 0.6623615672642768], ["TranslateX", 0.9537265090885917, 0.06352772509358584]], [["ShearX", 0.09720029389103535, 0.7800423126320308], ["Invert", 0.30314352455858884, 0.8519925470889914]], [["Brightness", 0.06931529763458055, 0.57760829499712], ["Cutout", 0.637251974467394, 0.7184346129191052]], [["AutoContrast", 0.5026722100286064, 0.32025257156541886], ["Contrast", 0.9667478703047919, 0.14178519432669368]], [["Equalize", 0.5924463845816984, 0.7187610262181517], ["TranslateY", 0.7059479079159405, 0.06551471830655187]], [["Sharpness", 0.18161164512332928, 0.7576138481173385], ["Brightness", 0.19191138767695282, 0.7865880269424701]], [["Brightness", 0.36780861866078696, 0.0677855546737901], ["AutoContrast", 0.8491446654142264, 0.09217782099938121]], [["TranslateY", 0.06011399855120858, 0.8374487034710264], ["TranslateY", 0.8373922962070498, 0.1991295720254297]], [["Posterize", 0.702559916122481, 0.30257509683007755], ["Rotate", 0.249899495398891, 0.9370437251176267]], [["ShearX", 0.9237874098232075, 0.26241907483351146], ["Brightness", 0.7221766836146657, 0.6880749752986671]], [["Cutout", 0.37994098189193104, 0.7836874473657957], ["ShearX", 0.9212861960976824, 0.8140948561570449]], [["Posterize", 0.2584098274786417, 0.7990847652004848], ["Invert", 0.6357731737590063, 0.1066304859116326]], [["Sharpness", 0.4412790857539922, 0.9692465283229825], ["Color", 0.9857401617339051, 0.26755393929808713]], [["Equalize", 0.22348671644912665, 0.7370019910830038], ["Posterize", 0.5396106339575417, 0.5559536849843303]], [["Equalize", 0.8742967663495852, 0.2797122599926307], ["Rotate", 0.4697322053105951, 0.8769872942579476]], [["Sharpness", 0.44279911640509206, 0.07729581896071613], ["Cutout", 0.3589177366154631, 0.2704031551235969]], [["TranslateX", 0.614216412574085, 0.47929659784170453], ["Brightness", 0.6686234118438007, 0.05700784068205689]], [["ShearY", 0.17920614630857634, 0.4699685075827862], ["Color", 0.38251870810870003, 0.7262706923005887]], [["Solarize", 0.4951799001144561, 0.212775278026479], ["TranslateX", 0.8666105646463097, 0.6750496637519537]], [["Color", 0.8110864170849051, 0.5154263861958484], ["Sharpness", 0.2489044083898776, 0.3763372541462343]], [["Cutout", 0.04888193613483871, 0.06041664638981603], ["Color", 0.06438587718683708, 0.5797881428892969]], [["Rotate", 0.032427448352152166, 0.4445797818376559], ["Posterize", 0.4459357828482998, 0.5879865187630777]], [["ShearX", 0.1617179557693058, 0.050796802246318884], ["Cutout", 0.8142465452060423, 0.3836391305618707]], [["TranslateY", 0.1806857249209416, 0.36697730355422675], ["Rotate", 0.9897576550818276, 0.7483432452225264]], [["Brightness", 0.18278016458098223, 0.952352527690299], ["Cutout", 0.3269735224453044, 0.3924869905012752]], [["ShearX", 0.870832707718742, 0.3214743207190739], ["Cutout", 0.6805560681792573, 0.6984188155282459]], [["TranslateX", 0.4157118388833776, 0.3964216288135384], ["TranslateX", 0.3253012682285006, 0.624835513104391]], [["Contrast", 0.7678168037628158, 0.31033802162621793], ["ShearX", 0.27022424855977134, 0.3773245605126201]], [["TranslateX", 0.37812621869017593, 0.7657993810740699], ["Rotate", 0.18081890120092914, 0.8893511219618171]], [["Posterize", 0.8735859716088367, 0.18243793043074286], ["TranslateX", 0.90435994250313, 0.24116383818819453]], [["Invert", 0.06666709253664793, 0.3881076083593933], ["TranslateX", 0.3783333964963522, 0.14411014979589543]], [["Equalize", 0.8741147867162096, 0.14203839235846816], ["TranslateX", 0.7801536758037405, 0.6952401607812743]], [["Cutout", 0.6095335117944475, 0.5679026063718094], ["Posterize", 0.06433868172233115, 0.07139559616012303]], [["TranslateY", 0.3020364047315408, 0.21459810361176246], ["Cutout", 0.7097677414888889, 0.2942144632587549]], [["Brightness", 0.8223662419048653, 0.195700694016108], ["Invert", 0.09345407040803999, 0.779843655582099]], [["TranslateY", 0.7353462929356228, 0.0468520680237382], ["Cutout", 0.36530918247940425, 0.3897292909049672]], [["Invert", 0.9676896451721213, 0.24473302189463453], ["Invert", 0.7369271521408992, 0.8193267003356975]], [["Sharpness", 0.8691871972054326, 0.4441713912682772], ["ShearY", 0.47385584832119887, 0.23521684584675429]], [["ShearY", 0.9266946026184021, 0.7611986713358834], ["TranslateX", 0.6195820760253926, 0.14661428669483678]], [["Sharpness", 0.08470870576026868, 0.3380219099907229], ["TranslateX", 0.3062343307496658, 0.7135777338095889]], [["Sharpness", 0.5246448204194909, 0.3193061215236702], ["ShearX", 0.8160637208508432, 0.9720697396582731]], [["Posterize", 0.5249259956549405, 0.3492042382504774], ["Invert", 0.8183138799547441, 0.11107271762524618]], [["TranslateY", 0.210869733350744, 0.7138905840721885], ["Sharpness", 0.7773226404450125, 0.8005353621959782]], [["Posterize", 0.33067522385556025, 0.32046239220630124], ["AutoContrast", 0.18918147708798405, 0.4646281070474484]], [["TranslateX", 0.929502026131094, 0.8029128121556285], ["Invert", 0.7319794306118105, 0.5421878712623392]], [["ShearX", 0.25645940834182723, 0.42754710760160963], ["ShearX", 0.44640695310173306, 0.8132185532296811]], [["Color", 0.018436846416536312, 0.8439313862001113], ["Sharpness", 0.3722867661453415, 0.5103570873163251]], [["TranslateX", 0.7285989086776543, 0.4809027697099264], ["TranslateY", 0.9740807004893643, 0.8241085438636939]], [["Posterize", 0.8721868989693397, 0.5700907310383815], ["Posterize", 0.4219074410577852, 0.8032643572845402]], [["Contrast", 0.9811380092558266, 0.8498397471632105], ["Sharpness", 0.8380884329421594, 0.18351306571903125]], [["TranslateY", 0.3878939366762001, 0.4699103438753077], ["Invert", 0.6055556353233807, 0.8774727658400134]], [["TranslateY", 0.052317005261018346, 0.39471450378745787], ["ShearX", 0.8612486845942395, 0.28834103278807466]], [["Color", 0.511993351208063, 0.07251427040525904], ["Solarize", 0.9898097047354855, 0.299761565689576]], [["Equalize", 0.2721248231619904, 0.6870975927455507], ["Cutout", 0.8787327242363994, 0.06228061428917098]], [["Invert", 0.8931880335225408, 0.49720931867378193], ["Posterize", 0.9619698792159256, 0.17859639696940088]], [["Posterize", 0.0061688075074411985, 0.08082938731035938], ["Brightness", 0.27745128028826993, 0.8638528796903816]], [["ShearY", 0.9140200609222026, 0.8240421430867707], ["Invert", 0.651734417415332, 0.08871906369930926]], [["Color", 0.45585010413511196, 0.44705070078574316], ["Color", 0.26394624901633146, 0.11242877788650807]], [["ShearY", 0.9200278466372522, 0.2995901331149652], ["Cutout", 0.8445407215116278, 0.7410524214287446]], [["ShearY", 0.9950483746990132, 0.112964468262847], ["ShearY", 0.4118332303218585, 0.44839613407553636]], [["Contrast", 0.7905821952255192, 0.23360046159385106], ["Posterize", 0.8611787233956044, 0.8984260048943528]], [["TranslateY", 0.21448061359312853, 0.8228112806838331], ["Contrast", 0.8992297266152983, 0.9179231590570998]], [["Invert", 0.3924194798946006, 0.31830516468371495], ["Rotate", 0.8399556845248508, 0.3764892022932781]], [["Cutout", 0.7037916990046816, 0.9214620769502728], ["AutoContrast", 0.02913794613018239, 0.07808607528954048]], [["ShearY", 0.6041490474263381, 0.6094184590800105], ["Equalize", 0.2932954517354919, 0.5840888946081727]], [["ShearX", 0.6056801676269449, 0.6948580442549543], ["Cutout", 0.3028001021044615, 0.15117101733894078]], [["Brightness", 0.8011486803860253, 0.18864079729374195], ["Solarize", 0.014965327213230961, 0.8842620292527029]], [["Invert", 0.902244007904273, 0.5634673798052033], ["Equalize", 0.13422913507398349, 0.4110956745883727]], [["TranslateY", 0.9981773319103838, 0.09568550987216096], ["Color", 0.7627662124105109, 0.8494409737419493]], [["Cutout", 0.3013527640416782, 0.03377226729898486], ["ShearX", 0.5727964831614619, 0.8784196638222834]], [["TranslateX", 0.6050722426803684, 0.3650103962378708], ["TranslateX", 0.8392084589130886, 0.6479816470292911]], [["Rotate", 0.5032806606500023, 0.09276980118866307], ["TranslateY", 0.7800234515261191, 0.18896454379343308]], [["Invert", 0.9266027256244017, 0.8246111062199752], ["Contrast", 0.12112023357797697, 0.33870762271759436]], [["Brightness", 0.8688784756993134, 0.17263759696106606], ["ShearX", 0.5133700431071326, 0.6686811994542494]], [["Invert", 0.8347840440941976, 0.03774897445901726], ["Brightness", 0.24925057499276548, 0.04293631677355758]], [["Color", 0.5998145279485104, 0.4820093200092529], ["TranslateY", 0.6709586184077769, 0.07377334081382858]], [["AutoContrast", 0.7898846202957984, 0.325293526672498], ["Contrast", 0.5156435596826767, 0.2889223168660645]], [["ShearX", 0.08147389674998307, 0.7978924681113669], ["Contrast", 0.7270003309106291, 0.009571215234092656]], [["Sharpness", 0.417607614440786, 0.9532566433338661], ["Posterize", 0.7186586546796782, 0.6936509907073302]], [["ShearX", 0.9555300215926675, 0.1399385550263872], ["Color", 0.9981041061848231, 0.5037462398323248]], [["Equalize", 0.8003487831375474, 0.5413759363796945], ["ShearY", 0.0026607045117773565, 0.019262273030984933]], [["TranslateY", 0.04845391502469176, 0.10063445212118283], ["Cutout", 0.8273170186786745, 0.5045257728554577]], [["TranslateX", 0.9690985344978033, 0.505202991815533], ["TranslateY", 0.7255326592928096, 0.02103609500701631]], [["Solarize", 0.4030771176836736, 0.8424237871457034], ["Cutout", 0.28705805963928965, 0.9601617893682582]], [["Sharpness", 0.16865290353070606, 0.6899673563468826], ["Posterize", 0.3985430034869616, 0.6540651997730774]], [["ShearY", 0.21395578485362032, 0.09519358818949009], ["Solarize", 0.6692821708524135, 0.6462523623552485]], [["AutoContrast", 0.912360598054091, 0.029800239085051583], ["Invert", 0.04319256403746308, 0.7712501517098587]], [["ShearY", 0.9081969961839055, 0.4581560239984739], ["AutoContrast", 0.5313894814729159, 0.5508393335751848]], [["ShearY", 0.860528568424097, 0.8196987216301588], ["Posterize", 0.41134650331494205, 0.3686632018978778]], [["AutoContrast", 0.8753670810078598, 0.3679438326304749], ["Invert", 0.010444228965415858, 0.9581244779208277]], [["Equalize", 0.07071836206680682, 0.7173594756186462], ["Brightness", 0.06111434312497388, 0.16175064669049277]], [["AutoContrast", 0.10522219073562122, 0.9768776621069855], ["TranslateY", 0.2744795945215529, 0.8577967957127298]], [["AutoContrast", 0.7628146493166175, 0.996157376418147], ["Contrast", 0.9255565598518469, 0.6826126662976868]], [["TranslateX", 0.017225816199011312, 0.2470332491402908], ["Solarize", 0.44048494909493807, 0.4492422515972162]], [["ShearY", 0.38885252627795064, 0.10272256704901939], ["Equalize", 0.686154959829183, 0.8973517148655337]], [["Rotate", 0.29628991573592967, 0.16639926575004715], ["ShearX", 0.9013782324726413, 0.0838318162771563]], [["Color", 0.04968391374688563, 0.6138600739645352], ["Invert", 0.11177127838716283, 0.10650198522261578]], [["Invert", 0.49655016367624016, 0.8603374164829688], ["ShearY", 0.40625439617553727, 0.4516437918820778]], [["TranslateX", 0.15015718916062992, 0.13867777502116208], ["Brightness", 0.3374464418810188, 0.7613355669536931]], [["Invert", 0.644644393321966, 0.19005804481199562], ["AutoContrast", 0.2293259789431853, 0.30335723256340186]], [["Solarize", 0.004968793254801596, 0.5370892072646645], ["Contrast", 0.9136902637865596, 0.9510587477779084]], [["Rotate", 0.38991518440867123, 0.24796987467455756], ["Sharpness", 0.9911180315669776, 0.5265657122981591]], [["Solarize", 0.3919646484436238, 0.6814994037194909], ["Sharpness", 0.4920838987787103, 0.023425724294012018]], [["TranslateX", 0.25107587874378867, 0.5414936560189212], ["Cutout", 0.7932919623814599, 0.9891303444820169]], [["Brightness", 0.07863012174272999, 0.045175652208389594], ["Solarize", 0.889609658064552, 0.8228793315963948]], [["Cutout", 0.20477096178169596, 0.6535063675027364], ["ShearX", 0.9216318577173639, 0.2908690977359947]], [["Contrast", 0.7035118947423187, 0.45982709058312454], ["Contrast", 0.7130268070749464, 0.8635123354235471]], [["Sharpness", 0.26319477541228997, 0.7451278726847078], ["Rotate", 0.8170499362173754, 0.13998593411788207]], [["Rotate", 0.8699365715164192, 0.8878057721750832], ["Equalize", 0.06682350555715044, 0.7164702080630689]], [["ShearY", 0.3137466057521987, 0.6747433496011368], ["Rotate", 0.42118828936218133, 0.980121180104441]], [["Solarize", 0.8470375049950615, 0.15287589264139223], ["Cutout", 0.14438435054693055, 0.24296463267973512]], [["TranslateY", 0.08822241792224905, 0.36163911974799356], ["TranslateY", 0.11729726813270003, 0.6230889726445291]], [["ShearX", 0.7720112337718541, 0.2773292905760122], ["Sharpness", 0.756290929398613, 0.27830353710507705]], [["Color", 0.33825031007968287, 0.4657590047522816], ["ShearY", 0.3566628994713067, 0.859750504071925]], [["TranslateY", 0.06830147433378053, 0.9348778582086664], ["TranslateX", 0.15509346516378553, 0.26320778885339435]], [["Posterize", 0.20266751150740858, 0.008351463842578233], ["Sharpness", 0.06506971109417259, 0.7294471760284555]], [["TranslateY", 0.6278911394418829, 0.8702181892620695], ["Invert", 0.9367073860264247, 0.9219230428944211]], [["Sharpness", 0.1553425337673321, 0.17601557714491345], ["Solarize", 0.7040449681338888, 0.08764313147327729]], [["Equalize", 0.6082233904624664, 0.4177428549911376], ["AutoContrast", 0.04987405274618151, 0.34516208204700916]], [["Brightness", 0.9616085936167699, 0.14561237331885468], ["Solarize", 0.8927707736296572, 0.31176907850205704]], [["Brightness", 0.6707778304730988, 0.9046457117525516], ["Brightness", 0.6801448953060988, 0.20015313057149042]], [["Color", 0.8292680845499386, 0.5181603879593888], ["Brightness", 0.08549161770369762, 0.6567870536463203]], [["ShearY", 0.267802208078051, 0.8388133819588173], ["Sharpness", 0.13453409120796123, 0.10028351311149486]], [["Posterize", 0.775796593610272, 0.05359034561289766], ["Cutout", 0.5067360625733027, 0.054451986840317934]], [["TranslateX", 0.5845238647690084, 0.7507147553486293], ["Brightness", 0.2642051786121197, 0.2578358927056452]], [["Cutout", 0.10787517610922692, 0.8147986902794228], ["Contrast", 0.2190149206329539, 0.902210615462459]], [["TranslateX", 0.5663614214181296, 0.05309965916414028], ["ShearX", 0.9682797885154938, 0.41791929533938466]], [["ShearX", 0.2345325577621098, 0.383780128037189], ["TranslateX", 0.7298083748149163, 0.644325797667087]], [["Posterize", 0.5138725709682734, 0.7901809917259563], ["AutoContrast", 0.7966018627776853, 0.14529337543427345]], [["Invert", 0.5973031989249785, 0.417399314592829], ["Solarize", 0.9147539948653116, 0.8221272315548086]], [["Posterize", 0.601596043336383, 0.18969646160963938], ["Color", 0.7527275484079655, 0.431793831326888]], [["Equalize", 0.6731483454430538, 0.7866786558207602], ["TranslateX", 0.97574396899191, 0.5970255778044692]], [["Cutout", 0.15919495850169718, 0.8916094305850562], ["Invert", 0.8351348834751027, 0.4029937360314928]], [["Invert", 0.5894085405226027, 0.7283806854157764], ["Brightness", 0.3973976860470554, 0.949681121498567]], [["AutoContrast", 0.3707914135327408, 0.21192068592079616], ["ShearX", 0.28040127351140676, 0.6754553511344856]], [["Solarize", 0.07955132378694896, 0.15073572961927306], ["ShearY", 0.5735850168851625, 0.27147326850217746]], [["Equalize", 0.678653949549764, 0.8097796067861455], ["Contrast", 0.2283048527510083, 0.15507804874474185]], [["Equalize", 0.286013868374536, 0.186785848694501], ["Posterize", 0.16319021740810458, 0.1201304443285659]], [["Sharpness", 0.9601590830563757, 0.06267915026513238], ["AutoContrast", 0.3813920685124327, 0.294224403296912]], [["Brightness", 0.2703246632402241, 0.9168405377492277], ["ShearX", 0.6156009855831097, 0.4955986055846403]], [["Color", 0.9065504424987322, 0.03393612216080133], ["ShearY", 0.6768595880405884, 0.9981068127818191]], [["Equalize", 0.28812842368483904, 0.300387487349145], ["ShearY", 0.28812248704858345, 0.27105076231533964]], [["Brightness", 0.6864882730513477, 0.8205553299102412], ["Cutout", 0.45995236371265424, 0.5422030370297759]], [["Color", 0.34941404877084326, 0.25857961830158516], ["AutoContrast", 0.3451390878441899, 0.5000938249040454]], [["Invert", 0.8268247541815854, 0.6691380821226468], ["Cutout", 0.46489193601530476, 0.22620873109485895]], [["Rotate", 0.17879730528062376, 0.22670425330593935], ["Sharpness", 0.8692795688221834, 0.36586055020855723]], [["Brightness", 0.31203975139659634, 0.6934046293010939], ["Cutout", 0.31649437872271236, 0.08078625004157935]], [["Cutout", 0.3119482836150119, 0.6397160035509996], ["Contrast", 0.8311248624784223, 0.22897510169718616]], [["TranslateX", 0.7631157841429582, 0.6482890521284557], ["Brightness", 0.12681196272427664, 0.3669813784257344]], [["TranslateX", 0.06027722649179801, 0.3101104512201861], ["Sharpness", 0.5652076706249394, 0.05210008400968136]], [["AutoContrast", 0.39213552101583127, 0.5047021194355596], ["ShearY", 0.7164003055682187, 0.8063370761002899]], [["Solarize", 0.9574307011238342, 0.21472064809226854], ["AutoContrast", 0.8102612285047174, 0.716870148067014]], [["Rotate", 0.3592634277567387, 0.6452602893051465], ["AutoContrast", 0.27188430331411506, 0.06003099168464854]], [["Cutout", 0.9529536554825503, 0.5285505311027461], ["Solarize", 0.08478231903311029, 0.15986449762728216]], [["TranslateY", 0.31176130458018936, 0.5642853506158253], ["Equalize", 0.008890883901317648, 0.5146121040955942]], [["Color", 0.40773645085566157, 0.7110398926612682], ["Color", 0.18233100156439364, 0.7830036002758337]], [["Posterize", 0.5793809197821732, 0.043748553135581236], ["Invert", 0.4479962016131668, 0.7349663010359488]], [["TranslateX", 0.1994882312299382, 0.05216859488899439], ["Rotate", 0.48288726352035416, 0.44713829026777585]], [["Posterize", 0.22122838185154603, 0.5034546841241283], ["TranslateX", 0.2538745835410222, 0.6129055170893385]], [["Color", 0.6786559960640814, 0.4529749369803212], ["Equalize", 0.30215879674415336, 0.8733394611096772]], [["Contrast", 0.47316062430673456, 0.46669538897311447], ["Invert", 0.6514906551984854, 0.3053339444067804]], [["Equalize", 0.6443202625334524, 0.8689731394616441], ["Color", 0.7549183794057628, 0.8889001426329578]], [["Solarize", 0.616709740662654, 0.7792180816399313], ["ShearX", 0.9659155537406062, 0.39436937531179495]], [["Equalize", 0.23694011299406226, 0.027711152164392128], ["TranslateY", 0.1677339686527083, 0.3482126536808231]], [["Solarize", 0.15234175951790285, 0.7893840414281341], ["TranslateX", 0.2396395768284183, 0.27727219214979715]], [["Contrast", 0.3792017455380605, 0.32323660409845334], ["Contrast", 0.1356037413846466, 0.9127772969992305]], [["ShearX", 0.02642732222284716, 0.9184662576502115], ["Equalize", 0.11504884472142995, 0.8957638893097964]], [["TranslateY", 0.3193812913345325, 0.8828100030493128], ["ShearY", 0.9374975727563528, 0.09909415611083694]], [["AutoContrast", 0.025840721736048122, 0.7941037581373024], ["TranslateY", 0.498518003323313, 0.5777122846572548]], [["ShearY", 0.6042199307830248, 0.44809668754508836], ["Cutout", 0.3243978207701482, 0.9379740926294765]], [["ShearY", 0.6858549297583574, 0.9993252035788924], ["Sharpness", 0.04682428732773203, 0.21698099707915652]], [["ShearY", 0.7737469436637263, 0.8810127181224531], ["ShearY", 0.8995655445246451, 0.4312416220354539]], [["TranslateY", 0.4953094136709374, 0.8144161580138571], ["Solarize", 0.26301211718928097, 0.518345311180405]], [["Brightness", 0.8820246486031275, 0.571075863786249], ["ShearX", 0.8586669146703955, 0.0060476383595142735]], [["Sharpness", 0.20519233710982254, 0.6144574759149729], ["Posterize", 0.07976625267460813, 0.7480145046726968]], [["ShearY", 0.374075419680195, 0.3386105402023202], ["ShearX", 0.8228083637082115, 0.5885174783155361]], [["Brightness", 0.3528780713814561, 0.6999884884306623], ["Sharpness", 0.3680348120526238, 0.16953358258959617]], [["Brightness", 0.24891223104442084, 0.7973853494920095], ["TranslateX", 0.004256803835524736, 0.0470216343108546]], [["Posterize", 0.1947344282646012, 0.7694802711054367], ["Cutout", 0.9594385534844785, 0.5469744140592429]], [["Invert", 0.19012504762806026, 0.7816140211434693], ["TranslateY", 0.17479746932338402, 0.024249345245078602]], [["Rotate", 0.9669262055946796, 0.510166180775991], ["TranslateX", 0.8990602034610352, 0.6657802719304693]], [["ShearY", 0.5453049050407278, 0.8476872739603525], ["Cutout", 0.14226529093962592, 0.15756960661106634]], [["Equalize", 0.5895291156113004, 0.6797218994447763], ["TranslateY", 0.3541442192192753, 0.05166001155849864]], [["Equalize", 0.39530681662726097, 0.8448335365081087], ["Brightness", 0.6785483272734143, 0.8805568647038574]], [["Cutout", 0.28633258271917905, 0.7750870268336066], ["Equalize", 0.7221097824537182, 0.5865506280531162]], [["Posterize", 0.9044429629421187, 0.4620266401793388], ["Invert", 0.1803008045494473, 0.8073190766288534]], [["Sharpness", 0.7054649148075851, 0.3877207948962055], ["TranslateX", 0.49260224225927285, 0.8987462620731029]], [["Sharpness", 0.11196934729294483, 0.5953704422694938], ["Contrast", 0.13969334315069737, 0.19310569898434204]], [["Posterize", 0.5484346101051778, 0.7914140118600685], ["Brightness", 0.6428044691630473, 0.18811316670808076]], [["Invert", 0.22294834094984717, 0.05173157689962704], ["Cutout", 0.6091129168510456, 0.6280845506243643]], [["AutoContrast", 0.5726444076195267, 0.2799840903601295], ["Cutout", 0.3055752727786235, 0.591639807512993]], [["Brightness", 0.3707116723204462, 0.4049175910826627], ["Rotate", 0.4811601625588309, 0.2710760253723644]], [["ShearY", 0.627791719653608, 0.6877498291550205], ["TranslateX", 0.8751753308366824, 0.011164650018719358]], [["Posterize", 0.33832547954522263, 0.7087039872581657], ["Posterize", 0.6247474435007484, 0.7707784192114796]], [["Contrast", 0.17620186308493468, 0.9946224854942095], ["Solarize", 0.5431896088395964, 0.5867904203742308]], [["ShearX", 0.4667959516719652, 0.8938082224109446], ["TranslateY", 0.7311343008292865, 0.6829842246020277]], [["ShearX", 0.6130281467237769, 0.9924010909612302], ["Brightness", 0.41039241699696916, 0.9753218875311392]], [["TranslateY", 0.0747250386427123, 0.34602725521067534], ["Rotate", 0.5902597465515901, 0.361094672021087]], [["Invert", 0.05234890878959486, 0.36914978664919407], ["Sharpness", 0.42140532878231374, 0.19204058551048275]], [["ShearY", 0.11590485361909497, 0.6518540857972316], ["Invert", 0.6482444740361704, 0.48256237896163945]], [["Rotate", 0.4931329446923608, 0.037076242417301675], ["Contrast", 0.9097939772412852, 0.5619594905306389]], [["Posterize", 0.7311032479626216, 0.4796364593912915], ["Color", 0.13912123993932402, 0.03997286439663705]], [["AutoContrast", 0.6196602944085344, 0.2531430457527588], ["Rotate", 0.5583937060431972, 0.9893379795224023]], [["AutoContrast", 0.8847753125072959, 0.19123028952580057], ["TranslateY", 0.494361716097206, 0.14232297727461696]], [["Invert", 0.6212360716340707, 0.033898871473033165], ["AutoContrast", 0.30839896957008295, 0.23603569542166247]], [["Equalize", 0.8255583546605049, 0.613736933157845], ["AutoContrast", 0.6357166629525485, 0.7894617347709095]], [["Brightness", 0.33840706322846814, 0.07917167871493658], ["ShearY", 0.15693175752528676, 0.6282773652129153]], [["Cutout", 0.7550520024859294, 0.08982367300605598], ["ShearX", 0.5844942417320858, 0.36051195083380105]]]
-    return p
-
-
-def fa_reduced_imagenet():
-    p = [[["ShearY", 0.14143816458479197, 0.513124791615952], ["Sharpness", 0.9290316227291179, 0.9788406212603302]], [["Color", 0.21502874228385338, 0.3698477943880306], ["TranslateY", 0.49865058747734736, 0.4352676987103321]], [["Brightness", 0.6603452126485386, 0.6990174510500261], ["Cutout", 0.7742953773992511, 0.8362550883640804]], [["Posterize", 0.5188375788270497, 0.9863648925446865], ["TranslateY", 0.8365230108655313, 0.6000972236440252]], [["ShearY", 0.9714994964711299, 0.2563663552809896], ["Equalize", 0.8987567223581153, 0.1181761775609772]], [["Sharpness", 0.14346409304565366, 0.5342189791746006], ["Sharpness", 0.1219714162835897, 0.44746801278319975]], [["TranslateX", 0.08089260772173967, 0.028011721602479833], ["TranslateX", 0.34767877352421406, 0.45131294688688794]], [["Brightness", 0.9191164585327378, 0.5143232242627864], ["Color", 0.9235247849934283, 0.30604586249462173]], [["Contrast", 0.4584173187505879, 0.40314219914942756], ["Rotate", 0.550289356406774, 0.38419022293237126]], [["Posterize", 0.37046156420799325, 0.052693291117634544], ["Cutout", 0.7597581409366909, 0.7535799791937421]], [["Color", 0.42583964114658746, 0.6776641859552079], ["ShearY", 0.2864805671096011, 0.07580175477739545]], [["Brightness", 0.5065952125552232, 0.5508640233704984], ["Brightness", 0.4760021616081475, 0.3544313318097987]], [["Posterize", 0.5169630851995185, 0.9466018906715961], ["Posterize", 0.5390336503396841, 0.1171015788193209]], [["Posterize", 0.41153170909576176, 0.7213063942615204], ["Rotate", 0.6232230424824348, 0.7291984098675746]], [["Color", 0.06704687234714028, 0.5278429246040438], ["Sharpness", 0.9146652195810183, 0.4581415618941407]], [["ShearX", 0.22404644446773492, 0.6508620171913467], ["Brightness", 0.06421961538672451, 0.06859528721039095]], [["Rotate", 0.29864103693134797, 0.5244313199644495], ["Sharpness", 0.4006161706584276, 0.5203708477368657]], [["AutoContrast", 0.5748186910788027, 0.8185482599354216], ["Posterize", 0.9571441684265188, 0.1921474117448481]], [["ShearY", 0.5214786760436251, 0.8375629059785009], ["Invert", 0.6872393349333636, 0.9307694335024579]], [["Contrast", 0.47219838080793364, 0.8228524484275648], ["TranslateY", 0.7435518856840543, 0.5888865560614439]], [["Posterize", 0.10773482839638836, 0.6597021018893648], ["Contrast", 0.5218466423129691, 0.562985661685268]], [["Rotate", 0.4401753067886466, 0.055198255925702475], ["Rotate", 0.3702153509335602, 0.5821574425474759]], [["TranslateY", 0.6714729117832363, 0.7145542887432927], ["Equalize", 0.0023263758097700205, 0.25837341854887885]], [["Cutout", 0.3159707561240235, 0.19539664199170742], ["TranslateY", 0.8702824829864558, 0.5832348977243467]], [["AutoContrast", 0.24800812729140026, 0.08017301277245716], ["Brightness", 0.5775505849482201, 0.4905904775616114]], [["Color", 0.4143517886294533, 0.8445937742921498], ["ShearY", 0.28688910858536587, 0.17539366839474402]], [["Brightness", 0.6341134194059947, 0.43683815933640435], ["Brightness", 0.3362277685899835, 0.4612826163288225]], [["Sharpness", 0.4504035748829761, 0.6698294470467474], ["Posterize", 0.9610055612671645, 0.21070714173174876]], [["Posterize", 0.19490421920029832, 0.7235798208354267], ["Rotate", 0.8675551331308305, 0.46335565746433094]], [["Color", 0.35097958351003306, 0.42199181561523186], ["Invert", 0.914112788087429, 0.44775583211984815]], [["Cutout", 0.223575616055454, 0.6328591417299063], ["TranslateY", 0.09269465212259387, 0.5101073959070608]], [["Rotate", 0.3315734525975911, 0.9983593458299167], ["Sharpness", 0.12245416662856974, 0.6258689139914664]], [["ShearY", 0.696116760180471, 0.6317805202283014], ["Color", 0.847501151593963, 0.4440116609830195]], [["Solarize", 0.24945891607225948, 0.7651150206105561], ["Cutout", 0.7229677092930331, 0.12674657348602494]], [["TranslateX", 0.43461945065713675, 0.06476571036747841], ["Color", 0.6139316940180952, 0.7376264330632316]], [["Invert", 0.1933003530637138, 0.4497819016184308], ["Invert", 0.18391634069983653, 0.3199769100951113]], [["Color", 0.20418296626476137, 0.36785101882029814], ["Posterize", 0.624658293920083, 0.8390081535735991]], [["Sharpness", 0.5864963540530814, 0.586672446690273], ["Posterize", 0.1980280647652339, 0.222114611452575]], [["Invert", 0.3543654961628104, 0.5146369635250309], ["Equalize", 0.40751271919434434, 0.4325310837291978]], [["ShearY", 0.22602859359451877, 0.13137880879778158], ["Posterize", 0.7475029061591305, 0.803900538461099]], [["Sharpness", 0.12426276165599924, 0.5965912716602046], ["Invert", 0.22603903038966913, 0.4346802001255868]], [["TranslateY", 0.010307035630661765, 0.16577665156754046], ["Posterize", 0.4114319141395257, 0.829872913683949]], [["TranslateY", 0.9353069865746215, 0.5327821671247214], ["Color", 0.16990443486261103, 0.38794866007484197]], [["Cutout", 0.1028174322829021, 0.3955952903458266], ["ShearY", 0.4311995281335693, 0.48024695395374734]], [["Posterize", 0.1800334334284686, 0.0548749478418862], ["Brightness", 0.7545808536793187, 0.7699080551646432]], [["Color", 0.48695305373084197, 0.6674269768464615], ["ShearY", 0.4306032279086781, 0.06057690550239343]], [["Brightness", 0.4919399683825053, 0.677338905806407], ["Brightness", 0.24112708387760828, 0.42761103121157656]], [["Posterize", 0.4434818644882532, 0.9489450593207714], ["Posterize", 0.40957675116385955, 0.015664946759584186]], [["Posterize", 0.41307949855153797, 0.6843276552020272], ["Rotate", 0.8003545094091291, 0.7002300783416026]], [["Color", 0.7038570031770905, 0.4697612983649519], ["Sharpness", 0.9700016496081002, 0.25185103545948884]], [["AutoContrast", 0.714641656154856, 0.7962423001719023], ["Sharpness", 0.2410097684093468, 0.5919171048019731]], [["TranslateX", 0.8101567644494714, 0.7156447005337443], ["Solarize", 0.5634727831229329, 0.8875158446846]], [["Sharpness", 0.5335258857303261, 0.364743126378182], ["Color", 0.453280875871377, 0.5621962714743068]], [["Cutout", 0.7423678127672542, 0.7726370777867049], ["Invert", 0.2806161382641934, 0.6021111986900146]], [["TranslateY", 0.15190341320343761, 0.3860373175487939], ["Cutout", 0.9980805818665679, 0.05332384819400854]], [["Posterize", 0.36518675678786605, 0.2935819027397963], ["TranslateX", 0.26586180351840005, 0.303641300745208]], [["Brightness", 0.19994509744377761, 0.90813953707639], ["Equalize", 0.8447217761297836, 0.3449396603478335]], [["Sharpness", 0.9294773669936768, 0.999713346583839], ["Brightness", 0.1359744825665662, 0.1658489221872924]], [["TranslateX", 0.11456529257659381, 0.9063795878367734], ["Equalize", 0.017438134319894553, 0.15776887259743755]], [["ShearX", 0.9833726383270114, 0.5688194948373335], ["Equalize", 0.04975615490994345, 0.8078130016227757]], [["Brightness", 0.2654654830488695, 0.8989789725280538], ["TranslateX", 0.3681535065952329, 0.36433345713161036]], [["Rotate", 0.04956524209892327, 0.5371942433238247], ["ShearY", 0.0005527499145153714, 0.56082571605602]], [["Rotate", 0.7918337108932019, 0.5906896260060501], ["Posterize", 0.8223967034091191, 0.450216998388943]], [["Color", 0.43595106766978337, 0.5253013785221605], ["Sharpness", 0.9169421073531799, 0.8439997639348893]], [["TranslateY", 0.20052300197155504, 0.8202662448307549], ["Sharpness", 0.2875792108435686, 0.6997181624527842]], [["Color", 0.10568089980973616, 0.3349467065132249], ["Brightness", 0.13070947282207768, 0.5757725013960775]], [["AutoContrast", 0.3749999712869779, 0.6665578760607657], ["Brightness", 0.8101178402610292, 0.23271946112218125]], [["Color", 0.6473605933679651, 0.7903409763232029], ["ShearX", 0.588080941572581, 0.27223524148254086]], [["Cutout", 0.46293361616697304, 0.7107761001833921], ["AutoContrast", 0.3063766931658412, 0.8026114219854579]], [["Brightness", 0.7884854981520251, 0.5503669863113797], ["Brightness", 0.5832456158675261, 0.5840349298921661]], [["Solarize", 0.4157539625058916, 0.9161905834309929], ["Sharpness", 0.30628197221802017, 0.5386291658995193]], [["Sharpness", 0.03329610069672856, 0.17066672983670506], ["Invert", 0.9900547302690527, 0.6276238841220477]], [["Solarize", 0.551015648982762, 0.6937104775938737], ["Color", 0.8838491591064375, 0.31596634380795385]], [["AutoContrast", 0.16224182418148447, 0.6068227969351896], ["Sharpness", 0.9599468096118623, 0.4885289719905087]], [["TranslateY", 0.06576432526133724, 0.6899544605400214], ["Posterize", 0.2177096480169678, 0.9949164789616582]], [["Solarize", 0.529820544480292, 0.7576047224165541], ["Sharpness", 0.027047878909321643, 0.45425231553970685]], [["Sharpness", 0.9102526010473146, 0.8311987141993857], ["Invert", 0.5191838751826638, 0.6906136644742229]], [["Solarize", 0.4762773516008588, 0.7703654263842423], ["Color", 0.8048437792602289, 0.4741523094238038]], [["Sharpness", 0.7095055508594206, 0.7047344238075169], ["Sharpness", 0.5059623654132546, 0.6127255499234886]], [["TranslateY", 0.02150725921966186, 0.3515764519224378], ["Posterize", 0.12482170119714735, 0.7829851754051393]], [["Color", 0.7983830079184816, 0.6964694521670339], ["Brightness", 0.3666527856286296, 0.16093151636495978]], [["AutoContrast", 0.6724982375829505, 0.536777706678488], ["Sharpness", 0.43091754837597646, 0.7363240924241439]], [["Brightness", 0.2889770401966227, 0.4556557902380539], ["Sharpness", 0.8805303296690755, 0.6262218017754902]], [["Sharpness", 0.5341939854581068, 0.6697109101429343], ["Rotate", 0.6806606655137529, 0.4896914517968317]], [["Sharpness", 0.5690509737059344, 0.32790632371915096], ["Posterize", 0.7951894258661069, 0.08377850335209162]], [["Color", 0.6124132978216081, 0.5756485920709012], ["Brightness", 0.33053544654445344, 0.23321841707002083]], [["TranslateX", 0.0654795026615917, 0.5227246924310244], ["ShearX", 0.2932320531132063, 0.6732066478183716]], [["Cutout", 0.6226071187083615, 0.01009274433736012], ["ShearX", 0.7176799968189801, 0.3758780240463811]], [["Rotate", 0.18172339508029314, 0.18099184896819184], ["ShearY", 0.7862658331645667, 0.295658135767252]], [["Contrast", 0.4156099177015862, 0.7015784500878446], ["Sharpness", 0.6454135310009, 0.32335858947955287]], [["Color", 0.6215885089922037, 0.6882673235388836], ["Brightness", 0.3539881732605379, 0.39486736455795496]], [["Invert", 0.8164816716866418, 0.7238192000817796], ["Sharpness", 0.3876355847343607, 0.9870077619731956]], [["Brightness", 0.1875628712629315, 0.5068115936257], ["Sharpness", 0.8732419122060423, 0.5028019258530066]], [["Sharpness", 0.6140734993408259, 0.6458239834366959], ["Rotate", 0.5250107862824867, 0.533419456933602]], [["Sharpness", 0.5710893143725344, 0.15551651073007305], ["ShearY", 0.6548487860151722, 0.021365083044319146]], [["Color", 0.7610250354649954, 0.9084452893074055], ["Brightness", 0.6934611792619156, 0.4108071412071374]], [["ShearY", 0.07512550098923898, 0.32923768385754293], ["ShearY", 0.2559588911696498, 0.7082337365398496]], [["Cutout", 0.5401319018926146, 0.004750568603408445], ["ShearX", 0.7473354415031975, 0.34472481968368773]], [["Rotate", 0.02284154583679092, 0.1353450082435801], ["ShearY", 0.8192458031684238, 0.2811653613473772]], [["Contrast", 0.21142896718139154, 0.7230739568811746], ["Sharpness", 0.6902690582665707, 0.13488436112901683]], [["Posterize", 0.21701219600958138, 0.5900695769640687], ["Rotate", 0.7541095031505971, 0.5341162375286219]], [["Posterize", 0.5772853064792737, 0.45808311743269936], ["Brightness", 0.14366050177823675, 0.4644871239446629]], [["Cutout", 0.8951718842805059, 0.4970074074310499], ["Equalize", 0.3863835903119882, 0.9986531042150006]], [["Equalize", 0.039411354473938925, 0.7475477254908457], ["Sharpness", 0.8741966378291861, 0.7304822679596362]], [["Solarize", 0.4908704265218634, 0.5160677350249471], ["Color", 0.24961813832742435, 0.09362352627360726]], [["Rotate", 7.870457075154214e-05, 0.8086950025500952], ["Solarize", 0.10200484521793163, 0.12312889222989265]], [["Contrast", 0.8052564975559727, 0.3403813036543645], ["Solarize", 0.7690158533600184, 0.8234626822018851]], [["AutoContrast", 0.680362728854513, 0.9415320040873628], ["TranslateY", 0.5305871824686941, 0.8030609611614028]], [["Cutout", 0.1748050257378294, 0.06565343731910589], ["TranslateX", 0.1812738872339903, 0.6254461448344308]], [["Brightness", 0.4230502644722749, 0.3346463682905031], ["ShearX", 0.19107198973659312, 0.6715789128604919]], [["ShearX", 0.1706528684548394, 0.7816570201200446], ["TranslateX", 0.494545185948171, 0.4710810058360291]], [["TranslateX", 0.42356251508933324, 0.23865307292867322], ["TranslateX", 0.24407503619326745, 0.6013778508137331]], [["AutoContrast", 0.7719512185744232, 0.3107905373009763], ["ShearY", 0.49448082925617176, 0.5777951230577671]], [["Cutout", 0.13026983827940525, 0.30120438757485657], ["Brightness", 0.8857896834516185, 0.7731541459513939]], [["AutoContrast", 0.6422800349197934, 0.38637401090264556], ["TranslateX", 0.25085431400995084, 0.3170642592664873]], [["Sharpness", 0.22336654455367122, 0.4137774852324138], ["ShearY", 0.22446851054920894, 0.518341735882535]], [["Color", 0.2597579403253848, 0.7289643913060193], ["Sharpness", 0.5227416670468619, 0.9239943674030637]], [["Cutout", 0.6835337711563527, 0.24777620448593812], ["AutoContrast", 0.37260245353051846, 0.4840361183247263]], [["Posterize", 0.32756602788628375, 0.21185124493743707], ["ShearX", 0.25431504951763967, 0.19585996561416225]], [["AutoContrast", 0.07930627591849979, 0.5719381348340309], ["AutoContrast", 0.335512380071304, 0.4208050118308541]], [["Rotate", 0.2924360268257798, 0.5317629242879337], ["Sharpness", 0.4531050021499891, 0.4102650087199528]], [["Equalize", 0.5908862210984079, 0.468742362277498], ["Brightness", 0.08571766548550425, 0.5629320703375056]], [["Cutout", 0.52751122383816, 0.7287774744737556], ["Equalize", 0.28721628275296274, 0.8075179887475786]], [["AutoContrast", 0.24208377391366226, 0.34616549409607644], ["TranslateX", 0.17454707403766834, 0.5278055700078459]], [["Brightness", 0.5511881924749478, 0.999638675514418], ["Equalize", 0.14076197797220913, 0.2573030693317552]], [["ShearX", 0.668731433926434, 0.7564253049646743], ["Color", 0.63235486543845, 0.43954436063340785]], [["ShearX", 0.40511960873276237, 0.5710419512142979], ["Contrast", 0.9256769948746423, 0.7461350716211649]], [["Cutout", 0.9995917204023061, 0.22908419326246265], ["TranslateX", 0.5440902956629469, 0.9965570051216295]], [["Color", 0.22552987172228894, 0.4514558960849747], ["Sharpness", 0.638058150559443, 0.9987829481002615]], [["Contrast", 0.5362775837534763, 0.7052133185951871], ["ShearY", 0.220369845547023, 0.7593922994775721]], [["ShearX", 0.0317785822935219, 0.775536785253455], ["TranslateX", 0.7939510227015061, 0.5355620618496535]], [["Cutout", 0.46027969917602196, 0.31561199122527517], ["Color", 0.06154066467629451, 0.5384660000729091]], [["Sharpness", 0.7205483743301113, 0.552222392539886], ["Posterize", 0.5146496404711752, 0.9224333144307473]], [["ShearX", 0.00014547730356910538, 0.3553954298642108], ["TranslateY", 0.9625736029090676, 0.57403418640424]], [["Posterize", 0.9199917903297341, 0.6690259107633706], ["Posterize", 0.0932558110217602, 0.22279303372106138]], [["Invert", 0.25401453476874863, 0.3354329544078385], ["Posterize", 0.1832673201325652, 0.4304718799821412]], [["TranslateY", 0.02084122674367607, 0.12826181437197323], ["ShearY", 0.655862534043703, 0.3838330909470975]], [["Contrast", 0.35231797644104523, 0.3379356652070079], ["Cutout", 0.19685599014304822, 0.1254328595280942]], [["Sharpness", 0.18795594984191433, 0.09488678946484895], ["ShearX", 0.33332876790679306, 0.633523782574133]], [["Cutout", 0.28267175940290246, 0.7901991550267817], ["Contrast", 0.021200195312951198, 0.4733128702798515]], [["ShearX", 0.966231043411256, 0.7700673327786812], ["TranslateX", 0.7102390777763321, 0.12161245817120675]], [["Cutout", 0.5183324259533826, 0.30766086003013055], ["Color", 0.48399078150128927, 0.4967477809069189]], [["Sharpness", 0.8160855187385873, 0.47937658961644], ["Posterize", 0.46360395447862535, 0.7685454058155061]], [["ShearX", 0.10173571421694395, 0.3987290690178754], ["TranslateY", 0.8939980277379345, 0.5669994143735713]], [["Posterize", 0.6768089584801844, 0.7113149244621721], ["Posterize", 0.054896856043358935, 0.3660837250743921]], [["AutoContrast", 0.5915576211896306, 0.33607718177676493], ["Contrast", 0.3809408206617828, 0.5712201773913784]], [["AutoContrast", 0.012321347472748323, 0.06379072432796573], ["Rotate", 0.0017964439160045656, 0.7598026295973337]], [["Contrast", 0.6007100085192627, 0.36171972473370206], ["Invert", 0.09553573684975913, 0.12218510774295901]], [["AutoContrast", 0.32848604643836266, 0.2619457656206414], ["Invert", 0.27082113532501784, 0.9967965642293485]], [["AutoContrast", 0.6156282120903395, 0.9422706516080884], ["Sharpness", 0.4215509247379262, 0.4063347716503587]], [["Solarize", 0.25059210436331264, 0.7215305521159305], ["Invert", 0.1654465185253614, 0.9605851884186778]], [["AutoContrast", 0.4464438610980994, 0.685334175815482], ["Cutout", 0.24358625461158645, 0.4699066834058694]], [["Rotate", 0.5931657741857909, 0.6813978655574067], ["AutoContrast", 0.9259100547738681, 0.4903201223870492]], [["Color", 0.8203976071280751, 0.9777824466585101], ["Posterize", 0.4620669369254169, 0.2738895968716055]], [["Contrast", 0.13754352055786848, 0.3369433962088463], ["Posterize", 0.48371187792441916, 0.025718004361451302]], [["Rotate", 0.5208233630704999, 0.1760188899913535], ["TranslateX", 0.49753461392937226, 0.4142935276250922]], [["Cutout", 0.5967418240931212, 0.8028675552639539], ["Cutout", 0.20021854152659121, 0.19426330549590076]], [["ShearY", 0.549583567386676, 0.6601326640171705], ["Cutout", 0.6111813470383047, 0.4141935587984994]], [["Brightness", 0.6354891977535064, 0.31591459747846745], ["AutoContrast", 0.7853952208711621, 0.6555861906702081]], [["AutoContrast", 0.7333725370546154, 0.9919410576081586], ["Cutout", 0.9984177877923588, 0.2938253683694291]], [["Color", 0.33219296307742263, 0.6378995578424113], ["AutoContrast", 0.15432820754183288, 0.7897899838932103]], [["Contrast", 0.5905289460222578, 0.8158577207653422], ["Cutout", 0.3980284381203051, 0.43030531250317217]], [["TranslateX", 0.452093693346745, 0.5251475931559115], ["Rotate", 0.991422504871258, 0.4556503729269001]], [["Color", 0.04560406292983776, 0.061574671308480766], ["Brightness", 0.05161079440128734, 0.6718398142425688]], [["Contrast", 0.02913302416506853, 0.14402056093217708], ["Rotate", 0.7306930378774588, 0.47088249057922094]], [["Solarize", 0.3283072384190169, 0.82680847744367], ["Invert", 0.21632614168418854, 0.8792241691482687]], [["Equalize", 0.4860808352478527, 0.9440534949023064], ["Cutout", 0.31395897639184694, 0.41805859306017523]], [["Rotate", 0.2816043232522335, 0.5451282807926706], ["Color", 0.7388520447173302, 0.7706503658143311]], [["Color", 0.9342776719536201, 0.9039981381514299], ["Rotate", 0.6646389177840164, 0.5147917008383647]], [["Cutout", 0.08929430082050335, 0.22416445996932374], ["Posterize", 0.454485751267457, 0.500958345348237]], [["TranslateX", 0.14674201106374488, 0.7018633472428202], ["Sharpness", 0.6128796723832848, 0.743535235614809]], [["TranslateX", 0.5189900164469432, 0.6491132403587601], ["Contrast", 0.26309555778227806, 0.5976857969656114]], [["Solarize", 0.23569808291972655, 0.3315781686591778], ["ShearY", 0.07292078937544964, 0.7460326987587573]], [["ShearY", 0.7090542757477153, 0.5246437008439621], ["Sharpness", 0.9666919148538443, 0.4841687888767071]], [["Solarize", 0.3486952615189488, 0.7012877201721799], ["Invert", 0.1933387967311534, 0.9535472742828175]], [["AutoContrast", 0.5393460721514914, 0.6924005011697713], ["Cutout", 0.16988156769247176, 0.3667207571712882]], [["Rotate", 0.5815329514554719, 0.5390406879316949], ["AutoContrast", 0.7370538341589625, 0.7708822194197815]], [["Color", 0.8463701017918459, 0.9893491045831084], ["Invert", 0.06537367901579016, 0.5238468509941635]], [["Contrast", 0.8099771812443645, 0.39371603893945184], ["Posterize", 0.38273629875646487, 0.46493786058573966]], [["Color", 0.11164686537114032, 0.6771450570033168], ["Posterize", 0.27921361289661406, 0.7214300893597819]], [["Contrast", 0.5958265906571906, 0.5963959447666958], ["Sharpness", 0.2640889223630885, 0.3365870842641453]], [["Color", 0.255634146724125, 0.5610029792926452], ["ShearY", 0.7476893976084721, 0.36613194760395557]], [["ShearX", 0.2167581882130063, 0.022978065071245002], ["TranslateX", 0.1686864409720319, 0.4919575435512007]], [["Solarize", 0.10702753776284957, 0.3954707963684698], ["Contrast", 0.7256100635368403, 0.48845259655719686]], [["Sharpness", 0.6165615058519549, 0.2624079463213861], ["ShearX", 0.3804820351860919, 0.4738994677544202]], [["TranslateX", 0.18066394808448177, 0.8174509422318228], ["Solarize", 0.07964569396290502, 0.45495935736800974]], [["Sharpness", 0.2741884021129658, 0.9311045302358317], ["Cutout", 0.0009101326429323388, 0.5932102256756948]], [["Rotate", 0.8501796375826188, 0.5092564038282137], ["Brightness", 0.6520146983999912, 0.724091283316938]], [["Brightness", 0.10079744898900078, 0.7644088017429471], ["AutoContrast", 0.33540215138213575, 0.1487538541758792]], [["ShearY", 0.10632545944757177, 0.9565164562996977], ["Rotate", 0.275833816849538, 0.6200731548023757]], [["Color", 0.6749819274397422, 0.41042188598168844], ["AutoContrast", 0.22396590966461932, 0.5048018491863738]], [["Equalize", 0.5044277111650255, 0.2649182381110667], ["Brightness", 0.35715133289571355, 0.8653260893016869]], [["Cutout", 0.49083594426355326, 0.5602781291093129], ["Posterize", 0.721795488514384, 0.5525847430754974]], [["Sharpness", 0.5081835448947317, 0.7453323423804428], ["TranslateX", 0.11511932212234266, 0.4337766796030984]], [["Solarize", 0.3817050641766593, 0.6879004573473403], ["Invert", 0.0015041436267447528, 0.9793134066888262]], [["AutoContrast", 0.5107410439697935, 0.8276720355454423], ["Cutout", 0.2786270701864015, 0.43993387208414564]], [["Rotate", 0.6711202569428987, 0.6342930903972932], ["Posterize", 0.802820231163559, 0.42770002619222053]], [["Color", 0.9426854321337312, 0.9055431782458764], ["AutoContrast", 0.3556422423506799, 0.2773922428787449]], [["Contrast", 0.10318991257659992, 0.30841372533347416], ["Posterize", 0.4202264962677853, 0.05060395018085634]], [["Invert", 0.549305630337048, 0.886056156681853], ["Cutout", 0.9314157033373055, 0.3485836940307909]], [["ShearX", 0.5642891775895684, 0.16427372934801418], ["Invert", 0.228741164726475, 0.5066345406806475]], [["ShearY", 0.5813123201003086, 0.33474363490586106], ["Equalize", 0.11803439432255824, 0.8583936440614798]], [["Sharpness", 0.1642809706111211, 0.6958675237301609], ["ShearY", 0.5989560762277414, 0.6194018060415276]], [["Rotate", 0.05092104774529638, 0.9358045394527796], ["Cutout", 0.6443254331615441, 0.28548414658857657]], [["Brightness", 0.6986036769232594, 0.9618046340942727], ["Sharpness", 0.5564490243465492, 0.6295231286085622]], [["Brightness", 0.42725649792574105, 0.17628028916784244], ["Equalize", 0.4425109360966546, 0.6392872650036018]], [["ShearY", 0.5758622795525444, 0.8773349286588288], ["ShearX", 0.038525646435423666, 0.8755366512394268]], [["Sharpness", 0.3704459924265827, 0.9236361456197351], ["Color", 0.6379842432311235, 0.4548767717224531]], [["Contrast", 0.1619523824549347, 0.4506528800882731], ["AutoContrast", 0.34513874426188385, 0.3580290330996726]], [["Contrast", 0.728699731513527, 0.6932238009822878], ["Brightness", 0.8602917375630352, 0.5341445123280423]], [["Equalize", 0.3574552353044203, 0.16814745124536548], ["Rotate", 0.24191717169379262, 0.3279497108179034]], [["ShearY", 0.8567478695576244, 0.37746117240238164], ["ShearX", 0.9654125389830487, 0.9283047610798827]], [["ShearY", 0.4339052480582405, 0.5394548246617406], ["Cutout", 0.5070570647967001, 0.7846286976687882]], [["AutoContrast", 0.021620100406875065, 0.44425839772845227], ["AutoContrast", 0.33978157614075183, 0.47716564815092244]], [["Contrast", 0.9727600659025666, 0.6651758819229426], ["Brightness", 0.9893133904996626, 0.39176397622636105]], [["Equalize", 0.283428620586305, 0.18727922861893637], ["Rotate", 0.3556063466797136, 0.3722839913107821]], [["ShearY", 0.7276172841941864, 0.4834188516302227], ["ShearX", 0.010783217950465884, 0.9756458772142235]], [["ShearY", 0.2901753295101581, 0.5684700238749064], ["Cutout", 0.655585564610337, 0.9490071307790201]], [["AutoContrast", 0.008507193981450278, 0.4881150103902877], ["AutoContrast", 0.6561989723231185, 0.3715071329838596]], [["Contrast", 0.7702505530948414, 0.6961371266519999], ["Brightness", 0.9953051630261895, 0.3861962467326121]], [["Equalize", 0.2805270012472756, 0.17715406116880994], ["Rotate", 0.3111256593947474, 0.15824352183820073]], [["Brightness", 0.9888680802094193, 0.4856236485253163], ["ShearX", 0.022370252047332284, 0.9284975906226682]], [["ShearY", 0.4065719044318099, 0.7468528006921563], ["AutoContrast", 0.19494427109708126, 0.8613186475174786]], [["AutoContrast", 0.023296727279367765, 0.9170949567425306], ["AutoContrast", 0.11663051100921168, 0.7908646792175343]], [["AutoContrast", 0.7335191671571732, 0.4958357308292425], ["Color", 0.7964964008349845, 0.4977687544324929]], [["ShearX", 0.19905221600021472, 0.3033081933150046], ["Equalize", 0.9383410219319321, 0.3224669877230161]], [["ShearX", 0.8265450331466404, 0.6509091423603757], ["Sharpness", 0.7134181178748723, 0.6472835976443643]], [["ShearY", 0.46962439525486044, 0.223433110541722], ["Rotate", 0.7749806946212373, 0.5337060376916906]], [["Posterize", 0.1652499695106796, 0.04860659068586126], ["Brightness", 0.6644577712782511, 0.4144528269429337]], [["TranslateY", 0.6220449565731829, 0.4917495676722932], ["Posterize", 0.6255000355409635, 0.8374266890984867]], [["AutoContrast", 0.4887160797052227, 0.7106426020530529], ["Sharpness", 0.7684218571497236, 0.43678474722954763]], [["Invert", 0.13178101535845366, 0.8301141976359813], ["Color", 0.002820877424219378, 0.49444413062487075]], [["TranslateX", 0.9920683666478188, 0.5862245842588877], ["Posterize", 0.5536357075855376, 0.5454300367281468]], [["Brightness", 0.8150181219663427, 0.1411060258870707], ["Sharpness", 0.8548823004164599, 0.77008691072314]], [["Brightness", 0.9580478020413399, 0.7198667636628974], ["ShearY", 0.8431585033377366, 0.38750016565010803]], [["Solarize", 0.2331505347152334, 0.25754361489084787], ["TranslateY", 0.447431373734262, 0.5782399531772253]], [["TranslateY", 0.8904927998691309, 0.25872872455072315], ["AutoContrast", 0.7129888139716263, 0.7161603231650524]], [["ShearY", 0.6336216800247362, 0.5247508616674911], ["Cutout", 0.9167315119726633, 0.2060557387978919]], [["ShearX", 0.001661782345968199, 0.3682225725445044], ["Solarize", 0.12303352043754572, 0.5014989548584458]], [["Brightness", 0.9723625105116246, 0.6555444729681099], ["Contrast", 0.5539208721135375, 0.7819973409318487]], [["Equalize", 0.3262607499912611, 0.0006745572802121513], ["Contrast", 0.35341551623767103, 0.36814689398886347]], [["ShearY", 0.7478539900243613, 0.37322078030129185], ["TranslateX", 0.41558847793529247, 0.7394615158544118]], [["Invert", 0.13735541232529067, 0.5536403864332143], ["Cutout", 0.5109718190377135, 0.0447509485253679]], [["AutoContrast", 0.09403602327274725, 0.5909250807862687], ["ShearY", 0.53234060616395, 0.5316981359469398]], [["ShearX", 0.5651922367876323, 0.6794110241313183], ["Posterize", 0.7431624856363638, 0.7896861463783287]], [["Brightness", 0.30949179379286806, 0.7650569096019195], ["Sharpness", 0.5461629122105034, 0.6814369444005866]], [["Sharpness", 0.28459340191768434, 0.7802208350806028], ["Rotate", 0.15097973114238117, 0.5259683294104645]], [["ShearX", 0.6430803693700531, 0.9333735880102375], ["Contrast", 0.7522209520030653, 0.18831747966185058]], [["Contrast", 0.4219455937915647, 0.29949769435499646], ["Color", 0.6925322933509542, 0.8095523885795443]], [["ShearX", 0.23553236193043048, 0.17966207900468323], ["AutoContrast", 0.9039700567886262, 0.21983629944639108]], [["ShearX", 0.19256223146671514, 0.31200739880443584], ["Sharpness", 0.31962196883294713, 0.6828107668550425]], [["Cutout", 0.5947690279080912, 0.21728220253899178], ["Rotate", 0.6757188879871141, 0.489460599679474]], [["ShearY", 0.18365897125470526, 0.3988571115918058], ["Brightness", 0.7727489489504, 0.4790369956329955]], [["Contrast", 0.7090301084131432, 0.5178303607560537], ["ShearX", 0.16749258277688506, 0.33061773301592356]], [["ShearX", 0.3706690885419934, 0.38510677124319415], ["AutoContrast", 0.8288356276501032, 0.16556487668770264]], [["TranslateY", 0.16758043046445614, 0.30127092823893986], ["Brightness", 0.5194636577132354, 0.6225165310621702]], [["Cutout", 0.6087289363049726, 0.10439287037803044], ["Rotate", 0.7503452083033819, 0.7425316019981433]], [["ShearY", 0.24347189588329932, 0.5554979486672325], ["Brightness", 0.9468115239174161, 0.6132449358023568]], [["Brightness", 0.7144508395807994, 0.4610594769966929], ["ShearX", 0.16466683833092968, 0.3382903812375781]], [["Sharpness", 0.27743648684265465, 0.17200038071656915], ["Color", 0.47404262107546236, 0.7868991675614725]], [["Sharpness", 0.8603993513633618, 0.324604728411791], ["TranslateX", 0.3331597130403763, 0.9369586812977804]], [["Color", 0.1535813630595832, 0.4700116846558207], ["Color", 0.5435647971896318, 0.7639291483525243]], [["Brightness", 0.21486188101947656, 0.039347277341450576], ["Cutout", 0.7069526940684954, 0.39273934115015696]], [["ShearY", 0.7267130888840517, 0.6310800726389485], ["AutoContrast", 0.662163190824139, 0.31948540372237766]], [["ShearX", 0.5123132117185981, 0.1981015909438834], ["AutoContrast", 0.9009347363863067, 0.26790399126924036]], [["Brightness", 0.24245061453231648, 0.2673478678291436], ["ShearX", 0.31707976089283946, 0.6800582845544948]], [["Cutout", 0.9257780138367764, 0.03972673526848819], ["Rotate", 0.6807858944518548, 0.46974332280612097]], [["ShearY", 0.1543443071262312, 0.6051682587030671], ["Brightness", 0.9758203119828304, 0.4941406868162414]], [["Contrast", 0.07578049236491124, 0.38953819133407647], ["ShearX", 0.20194918288164293, 0.4141510791947318]], [["Color", 0.27826402243792286, 0.43517491081531157], ["AutoContrast", 0.6159269026143263, 0.2021846783488046]], [["AutoContrast", 0.5039377966534692, 0.19241507605941105], ["Invert", 0.5563931144385394, 0.7069728937319112]], [["Sharpness", 0.19031632433810566, 0.26310171056096743], ["Color", 0.4724537593175573, 0.6715201448387876]], [["ShearY", 0.2280910467786642, 0.33340559088059313], ["ShearY", 0.8858560034869303, 0.2598627441471076]], [["ShearY", 0.07291814128021593, 0.5819462692986321], ["Cutout", 0.27605696060512147, 0.9693427371868695]], [["Posterize", 0.4249871586563321, 0.8256952014328607], ["Posterize", 0.005907466926447169, 0.8081353382152597]], [["Brightness", 0.9071305290601128, 0.4781196213717954], ["Posterize", 0.8996214311439275, 0.5540717376630279]], [["Brightness", 0.06560728936236392, 0.9920627849065685], ["TranslateX", 0.04530789794044952, 0.5318568944702607]], [["TranslateX", 0.6800263601084814, 0.4611536772507228], ["Rotate", 0.7245888375283157, 0.0914772551375381]], [["Sharpness", 0.879556061897963, 0.42272481462067535], ["TranslateX", 0.4600350422524085, 0.5742175429334919]], [["AutoContrast", 0.5005776243176145, 0.22597121331684505], ["Invert", 0.10763286370369299, 0.6841782704962373]], [["Sharpness", 0.7422908472000116, 0.6850324203882405], ["TranslateX", 0.3832914614128403, 0.34798646673324896]], [["ShearY", 0.31939465302679326, 0.8792088167639516], ["Brightness", 0.4093604352811235, 0.21055483197261338]], [["AutoContrast", 0.7447595860998638, 0.19280222555998586], ["TranslateY", 0.317754779431227, 0.9983454520593591]], [["Equalize", 0.27706973689750847, 0.6447455020660622], ["Contrast", 0.5626579126863761, 0.7920049962776781]], [["Rotate", 0.13064369451773816, 0.1495367590684905], ["Sharpness", 0.24893941981801215, 0.6295943894521504]], [["ShearX", 0.6856269993063254, 0.5167938584189854], ["Sharpness", 0.24835352574609537, 0.9990550493102627]], [["AutoContrast", 0.461654115871693, 0.43097388896245004], ["Cutout", 0.366359682416437, 0.08011826474215511]], [["AutoContrast", 0.993892672935951, 0.2403608711236933], ["ShearX", 0.6620817870694181, 0.1744814077869482]], [["ShearY", 0.6396747719986443, 0.15031017143644265], ["Brightness", 0.9451954879495629, 0.26490678840264714]], [["Color", 0.19311480787397262, 0.15712300697448575], ["Posterize", 0.05391448762015258, 0.6943963643155474]], [["Sharpness", 0.6199669674684085, 0.5412492335319072], ["Invert", 0.14086213450149815, 0.2611850277919339]], [["Posterize", 0.5533129268803405, 0.5332478159319912], ["ShearX", 0.48956244029096635, 0.09223930853562916]], [["ShearY", 0.05871590849449765, 0.19549715278943228], ["TranslateY", 0.7208521362741379, 0.36414003004659434]], [["ShearY", 0.7316263417917531, 0.0629747985768501], ["Contrast", 0.036359793501448245, 0.48658745414898386]], [["Rotate", 0.3301497610942963, 0.5686622043085637], ["ShearX", 0.40581487555676843, 0.5866127743850192]], [["ShearX", 0.6679039628249283, 0.5292270693200821], ["Sharpness", 0.25901391739310703, 0.9778360586541461]], [["AutoContrast", 0.27373222012596854, 0.14456771405730712], ["Contrast", 0.3877220783523938, 0.7965158941894336]], [["Solarize", 0.29440905483979096, 0.06071633809388455], ["Equalize", 0.5246736285116214, 0.37575084834661976]], [["TranslateY", 0.2191269464520395, 0.7444942293988484], ["Posterize", 0.3840878524812771, 0.31812671711741247]], [["Solarize", 0.25159267140731356, 0.5833264622559661], ["Brightness", 0.07552262572348738, 0.33210648549288435]], [["AutoContrast", 0.9770099298399954, 0.46421915310428197], ["AutoContrast", 0.04707358934642503, 0.24922048012183493]], [["Cutout", 0.5379685806621965, 0.02038212605928355], ["Brightness", 0.5900728303717965, 0.28807872931416956]], [["Sharpness", 0.11596624872886108, 0.6086947716949325], ["AutoContrast", 0.34876470059667525, 0.22707897759730578]], [["Contrast", 0.276545513135698, 0.8822580384226156], ["Rotate", 0.04874027684061846, 0.6722214281612163]], [["ShearY", 0.595839851757025, 0.4389866852785822], ["Equalize", 0.5225492356128832, 0.2735290854063459]], [["Sharpness", 0.9918029636732927, 0.9919926583216121], ["Sharpness", 0.03672376137997366, 0.5563865980047012]], [["AutoContrast", 0.34169589759999847, 0.16419911552645738], ["Invert", 0.32995953043129234, 0.15073174739720568]], [["Posterize", 0.04600255098477292, 0.2632612790075844], ["TranslateY", 0.7852153329831825, 0.6990722310191976]], [["AutoContrast", 0.4414653815356372, 0.2657468780017082], ["Posterize", 0.30647061536763337, 0.3688222724948656]], [["Contrast", 0.4239361091421837, 0.6076562806342001], ["Cutout", 0.5780707784165284, 0.05361325256745192]], [["Sharpness", 0.7657895907855394, 0.9842407321667671], ["Sharpness", 0.5416352696151596, 0.6773681575200902]], [["AutoContrast", 0.13967381098331305, 0.10787258006315015], ["Posterize", 0.5019536507897069, 0.9881978222469807]], [["Brightness", 0.030528346448984903, 0.31562058762552847], ["TranslateY", 0.0843808140595676, 0.21019213305350526]], [["AutoContrast", 0.6934579165006736, 0.2530484168209199], ["Rotate", 0.0005751408130693636, 0.43790043943210005]], [["TranslateX", 0.611258547664328, 0.25465240215894935], ["Sharpness", 0.5001446909868196, 0.36102204109889413]], [["Contrast", 0.8995127327150193, 0.5493190695343996], ["Brightness", 0.242708780669213, 0.5461116653329015]], [["AutoContrast", 0.3751825351022747, 0.16845985803896962], ["Cutout", 0.25201103287363663, 0.0005893331783358435]], [["ShearX", 0.1518985779435941, 0.14768180777304504], ["Color", 0.85133530274324, 0.4006641163378305]], [["TranslateX", 0.5489668255504668, 0.4694591826554948], ["Rotate", 0.1917354490155893, 0.39993269385802177]], [["ShearY", 0.6689267479532809, 0.34304285013663577], ["Equalize", 0.24133154048883143, 0.279324043138247]], [["Contrast", 0.3412544002099494, 0.20217358823930232], ["Color", 0.8606984790510235, 0.14305503544676373]], [["Cutout", 0.21656155695311988, 0.5240101349572595], ["Brightness", 0.14109877717636352, 0.2016827341210295]], [["Sharpness", 0.24764371218833872, 0.19655480259925423], ["Posterize", 0.19460398862039913, 0.4975414350200679]], [["Brightness", 0.6071850094982323, 0.7270716448607151], ["Solarize", 0.111786402398499, 0.6325641684614275]], [["Contrast", 0.44772949532200856, 0.44267502710695955], ["AutoContrast", 0.360117506402693, 0.2623958228760273]], [["Sharpness", 0.8888131688583053, 0.936897400764746], ["Sharpness", 0.16080674198274894, 0.5681119841445879]], [["AutoContrast", 0.8004456226590612, 0.1788600469525269], ["Brightness", 0.24832285390647374, 0.02755350284841604]], [["ShearY", 0.06910320102646594, 0.26076407321544054], ["Contrast", 0.8633703022354964, 0.38968514704043056]], [["AutoContrast", 0.42306251382780613, 0.6883260271268138], ["Rotate", 0.3938724346852023, 0.16740881249086037]], [["Contrast", 0.2725343884286728, 0.6468194318074759], ["Sharpness", 0.32238942646494745, 0.6721149242783824]], [["AutoContrast", 0.942093919956842, 0.14675331481712853], ["Posterize", 0.5406276708262192, 0.683901182218153]], [["Cutout", 0.5386811894643584, 0.04498833938429728], ["Posterize", 0.17007257321724775, 0.45761177118620633]], [["Contrast", 0.13599408935104654, 0.53282738083886], ["Solarize", 0.26941667995081114, 0.20958261079465895]], [["Color", 0.6600788518606634, 0.9522228302165842], ["Invert", 0.0542722262516899, 0.5152431169321683]], [["Contrast", 0.5328934819727553, 0.2376220512388278], ["Posterize", 0.04890422575781711, 0.3182233123739474]], [["AutoContrast", 0.9289628064340965, 0.2976678437448435], ["Color", 0.20936893798507963, 0.9649612821434217]], [["Cutout", 0.9019423698575457, 0.24002036989728096], ["Brightness", 0.48734445615892974, 0.047660899809176316]], [["Sharpness", 0.09347824275711591, 0.01358686275590612], ["Posterize", 0.9248539660538934, 0.4064232632650468]], [["Brightness", 0.46575675383704634, 0.6280194775484345], ["Invert", 0.17276207634499413, 0.21263495428839635]], [["Brightness", 0.7238014711679732, 0.6178946027258592], ["Equalize", 0.3815496086340364, 0.07301281068847276]], [["Contrast", 0.754557393588416, 0.895332753570098], ["Color", 0.32709957750707447, 0.8425486003491515]], [["Rotate", 0.43406698081696576, 0.28628263254953723], ["TranslateY", 0.43949548709125374, 0.15927082198238685]], [["Brightness", 0.0015838339831640708, 0.09341692553352654], ["AutoContrast", 0.9113966907329718, 0.8345900469751112]], [["ShearY", 0.46698796308585017, 0.6150701348176804], ["Invert", 0.14894062704815722, 0.2778388046184728]], [["Color", 0.30360499169455957, 0.995713092016834], ["Contrast", 0.2597016288524961, 0.8654420870658932]], [["Brightness", 0.9661642031891435, 0.7322006407169436], ["TranslateY", 0.4393502786333408, 0.33934762664274265]], [["Color", 0.9323638351992302, 0.912776309755293], ["Brightness", 0.1618274755371618, 0.23485741708056307]], [["Color", 0.2216470771158821, 0.3359240197334976], ["Sharpness", 0.6328691811471494, 0.6298393874452548]], [["Solarize", 0.4772769142265505, 0.7073470698713035], ["ShearY", 0.2656114148206966, 0.31343097010487253]], [["Solarize", 0.3839017339304234, 0.5985505779429036], ["Equalize", 0.002412059429196589, 0.06637506181196245]], [["Contrast", 0.12751196553017863, 0.46980311434237976], ["Sharpness", 0.3467487455865491, 0.4054907610444406]], [["AutoContrast", 0.9321813669127206, 0.31328471589533274], ["Rotate", 0.05801738717432747, 0.36035756254444273]], [["TranslateX", 0.52092390458353, 0.5261722561643886], ["Contrast", 0.17836804476171306, 0.39354333443158535]], [["Posterize", 0.5458100909925713, 0.49447244994482603], ["Brightness", 0.7372536822363605, 0.5303409097463796]], [["Solarize", 0.1913974941725724, 0.5582966653986761], ["Equalize", 0.020733669175727026, 0.9377467166472878]], [["Equalize", 0.16265732137763889, 0.5206282340874929], ["Sharpness", 0.2421533133595281, 0.506389065871883]], [["AutoContrast", 0.9787324801448523, 0.24815051941486466], ["Rotate", 0.2423487151245957, 0.6456493129745148]], [["TranslateX", 0.6809867726670327, 0.6949687002397612], ["Contrast", 0.16125673359747458, 0.7582679978218987]], [["Posterize", 0.8212000950994955, 0.5225012157831872], ["Brightness", 0.8824891856626245, 0.4499216779709508]], [["Solarize", 0.12061313332505218, 0.5319371283368052], ["Equalize", 0.04120865969945108, 0.8179402157299602]], [["Rotate", 0.11278256686005855, 0.4022686554165438], ["ShearX", 0.2983451019112792, 0.42782525461812604]], [["ShearY", 0.8847385513289983, 0.5429227024179573], ["Rotate", 0.21316428726607445, 0.6712120087528564]], [["TranslateX", 0.46448081241068717, 0.4746090648963252], ["Brightness", 0.19973580961271142, 0.49252862676553605]], [["Posterize", 0.49664100539481526, 0.4460713166484651], ["Brightness", 0.6629559985581529, 0.35192346529003693]], [["Color", 0.22710733249173676, 0.37943185764616194], ["ShearX", 0.015809774971472595, 0.8472080190835669]], [["Contrast", 0.4187366322381491, 0.21621979869256666], ["AutoContrast", 0.7631045030367304, 0.44965231251615134]], [["Sharpness", 0.47240637876720515, 0.8080091811749525], ["Cutout", 0.2853425420104144, 0.6669811510150936]], [["Posterize", 0.7830320527127324, 0.2727062685529881], ["Solarize", 0.527834000867504, 0.20098218845222998]], [["Contrast", 0.366380535288225, 0.39766001659663075], ["Cutout", 0.8708808878088891, 0.20669525734273086]], [["ShearX", 0.6815427281122932, 0.6146858582671569], ["AutoContrast", 0.28330622372053493, 0.931352024154997]], [["AutoContrast", 0.8668174463154519, 0.39961453880632863], ["AutoContrast", 0.5718557712359253, 0.6337062930797239]], [["ShearY", 0.8923152519411871, 0.02480062504737446], ["Cutout", 0.14954159341231515, 0.1422219808492364]], [["Rotate", 0.3733718175355636, 0.3861928572224287], ["Sharpness", 0.5651126520194574, 0.6091103847442831]], [["Posterize", 0.8891714191922857, 0.29600154265251016], ["TranslateY", 0.7865351723963945, 0.5664998548985523]], [["TranslateX", 0.9298214806998273, 0.729856565052017], ["AutoContrast", 0.26349082482341846, 0.9638882609038888]], [["Sharpness", 0.8387378377527128, 0.42146721129032494], ["AutoContrast", 0.9860522000876452, 0.4200699464169384]], [["ShearY", 0.019609159303115145, 0.37197835936879514], ["Cutout", 0.22199340461754258, 0.015932573201085848]], [["Rotate", 0.43871085583928443, 0.3283504258860078], ["Sharpness", 0.6077702068037776, 0.6830305349618742]], [["Contrast", 0.6160211756538094, 0.32029451083389626], ["Cutout", 0.8037631428427006, 0.4025688837399259]], [["TranslateY", 0.051637820936985435, 0.6908417834391846], ["Sharpness", 0.7602756948473368, 0.4927111506643095]], [["Rotate", 0.4973618638052235, 0.45931479729281227], ["TranslateY", 0.04701789716427618, 0.9408779705948676]], [["Rotate", 0.5214194592768602, 0.8371249272013652], ["Solarize", 0.17734812472813338, 0.045020798970228315]], [["ShearX", 0.7457999920079351, 0.19025612553075893], ["Sharpness", 0.5994846101703786, 0.5665094068864229]], [["Contrast", 0.6172655452900769, 0.7811432139704904], ["Cutout", 0.09915620454670282, 0.3963692287596121]], [["TranslateX", 0.2650112299235817, 0.7377261946165307], ["AutoContrast", 0.5019539734059677, 0.26905046992024506]], [["Contrast", 0.6646299821370135, 0.41667784809592945], ["Cutout", 0.9698457154992128, 0.15429001887703997]], [["Sharpness", 0.9467079029475773, 0.44906457469098204], ["Cutout", 0.30036908747917396, 0.4766149689663106]], [["Equalize", 0.6667517691051055, 0.5014839828447363], ["Solarize", 0.4127890336820831, 0.9578274770236529]], [["Cutout", 0.6447384874120834, 0.2868806107728985], ["Cutout", 0.4800990488106021, 0.4757538246206956]], [["Solarize", 0.12560195032363236, 0.5557473475801568], ["Equalize", 0.019957161871490228, 0.5556797187823773]], [["Contrast", 0.12607637375759484, 0.4300633627435161], ["Sharpness", 0.3437273670109087, 0.40493203127714417]], [["AutoContrast", 0.884353334807183, 0.5880138314357569], ["Rotate", 0.9846032404597116, 0.3591877296622974]], [["TranslateX", 0.6862295865975581, 0.5307482119690076], ["Contrast", 0.19439251187251982, 0.3999195825722808]], [["Posterize", 0.4187641835025246, 0.5008988942651585], ["Brightness", 0.6665805605402482, 0.3853288204214253]], [["Posterize", 0.4507470690013903, 0.4232437206624681], ["TranslateX", 0.6054107416317659, 0.38123828040922203]], [["AutoContrast", 0.29562338573283276, 0.35608605102687474], ["TranslateX", 0.909954785390274, 0.20098894888066549]], [["Contrast", 0.6015278411777212, 0.6049140992035096], ["Cutout", 0.47178713636517855, 0.5333747244651914]], [["TranslateX", 0.490851976691112, 0.3829593925141144], ["Sharpness", 0.2716675173824095, 0.5131696240367152]], [["Posterize", 0.4190558294646337, 0.39316689077269873], ["Rotate", 0.5018526072725914, 0.295712490156129]], [["AutoContrast", 0.29624715560691617, 0.10937329832409388], ["Posterize", 0.8770505275992637, 0.43117765012206943]], [["Rotate", 0.6649970092751698, 0.47767131373391974], ["ShearX", 0.6257923540490786, 0.6643337040198358]], [["Sharpness", 0.5553620705849509, 0.8467799429696928], ["Cutout", 0.9006185811918932, 0.3537270716262]], [["ShearY", 0.0007619678283789788, 0.9494591850536303], ["Invert", 0.24267733654007673, 0.7851608409575828]], [["Contrast", 0.9730916198112872, 0.404670123321921], ["Sharpness", 0.5923587793251186, 0.7405792404430281]], [["Cutout", 0.07393909593373034, 0.44569630026328344], ["TranslateX", 0.2460593252211425, 0.4817527814541055]], [["Brightness", 0.31058654119340867, 0.7043749950260936], ["ShearX", 0.7632161538947713, 0.8043681264908555]], [["AutoContrast", 0.4352334371415373, 0.6377550087204297], ["Rotate", 0.2892714673415678, 0.49521052050510556]], [["Equalize", 0.509071051375276, 0.7352913414974414], ["ShearX", 0.5099959429711828, 0.7071566714593619]], [["Posterize", 0.9540506532512889, 0.8498853304461906], ["ShearY", 0.28199061357155397, 0.3161715627214629]], [["Posterize", 0.6740855359097433, 0.684004694936616], ["Posterize", 0.6816720350737863, 0.9654766942980918]], [["Solarize", 0.7149344531717328, 0.42212789795181643], ["Brightness", 0.686601460864528, 0.4263050070610551]], [["Cutout", 0.49577164991501, 0.08394890892056037], ["Rotate", 0.5810369852730606, 0.3320732965776973]], [["TranslateY", 0.1793755480490623, 0.6006520265468684], ["Brightness", 0.3769016576438939, 0.7190746300828186]], [["TranslateX", 0.7226363597757153, 0.3847027238123509], ["Brightness", 0.7641713191794035, 0.36234003077512544]], [["TranslateY", 0.1211227055347106, 0.6693523474608023], ["Brightness", 0.13011180247738063, 0.5126647617294864]], [["Equalize", 0.1501070550869129, 0.0038548909451806557], ["Posterize", 0.8266535939653881, 0.5502199643499207]], [["Sharpness", 0.550624117428359, 0.2023044586648523], ["Brightness", 0.06291556314780017, 0.7832635398703937]], [["Color", 0.3701578205508141, 0.9051537973590863], ["Contrast", 0.5763972727739397, 0.4905511239739898]], [["Rotate", 0.7678527224046323, 0.6723066265307555], ["Solarize", 0.31458533097383207, 0.38329324335154524]], [["Brightness", 0.292050127929522, 0.7047582807953063], ["ShearX", 0.040541891910333805, 0.06639328601282746]], [["TranslateY", 0.4293891393238555, 0.6608516902234284], ["Sharpness", 0.7794685477624004, 0.5168044063408147]], [["Color", 0.3682450402286552, 0.17274523597220048], ["ShearY", 0.3936056470397763, 0.5702597289866161]], [["Equalize", 0.43436990310624657, 0.9207072627823626], ["Contrast", 0.7608688260846083, 0.4759023148841439]], [["Brightness", 0.7926088966143935, 0.8270093925674497], ["ShearY", 0.4924174064969461, 0.47424347505831244]], [["Contrast", 0.043917555279430476, 0.15861903591675125], ["ShearX", 0.30439480405505853, 0.1682659341098064]], [["TranslateY", 0.5598255583454538, 0.721352536005039], ["Posterize", 0.9700921973303752, 0.6882015184440126]], [["AutoContrast", 0.3620887415037668, 0.5958176322317132], ["TranslateX", 0.14213781552733287, 0.6230799786459947]], [["Color", 0.490366889723972, 0.9863152892045195], ["Color", 0.817792262022319, 0.6755656429452775]], [["Brightness", 0.7030707021937771, 0.254633187122679], ["Color", 0.13977318232688843, 0.16378180123959793]], [["AutoContrast", 0.2933247831326118, 0.6283663376211102], ["Sharpness", 0.85430478154147, 0.9753613184208796]], [["Rotate", 0.6674299955457268, 0.48571208708018976], ["Contrast", 0.47491370175907016, 0.6401079552479657]], [["Sharpness", 0.37589579644127863, 0.8475131989077025], ["TranslateY", 0.9985149867598191, 0.057815729375099975]], [["Equalize", 0.0017194373841596389, 0.7888361311461602], ["Contrast", 0.6779293670669408, 0.796851411454113]], [["TranslateY", 0.3296782119072306, 0.39765117357271834], ["Sharpness", 0.5890554357001884, 0.6318339473765834]], [["Posterize", 0.25423810893163856, 0.5400430289894207], ["Sharpness", 0.9273643918988342, 0.6480913470982622]], [["Cutout", 0.850219975768305, 0.4169812455601289], ["Solarize", 0.5418755745870089, 0.5679666650495466]], [["Brightness", 0.008881361977310959, 0.9282562314720516], ["TranslateY", 0.7736066471553994, 0.20041167606029642]], [["Brightness", 0.05382537581401925, 0.6405265501035952], ["Contrast", 0.30484329473639593, 0.5449338155734242]], [["Color", 0.613257119787967, 0.4541503912724138], ["Brightness", 0.9061572524724674, 0.4030159294447347]], [["Brightness", 0.02739111568942537, 0.006028056532326534], ["ShearX", 0.17276751958646486, 0.05967365780621859]], [["TranslateY", 0.4376298213047888, 0.7691816164456199], ["Sharpness", 0.8162292718857824, 0.6054926462265117]], [["Color", 0.37963069679121214, 0.5946919433483344], ["Posterize", 0.08485417284005387, 0.5663580913231766]], [["Equalize", 0.49785780226818316, 0.9999137109183761], ["Sharpness", 0.7685879484682496, 0.6260846154212211]], [["AutoContrast", 0.4190931409670763, 0.2374852525139795], ["Posterize", 0.8797422264608563, 0.3184738541692057]], [["Rotate", 0.7307269024632872, 0.41523609600701106], ["ShearX", 0.6166685870692289, 0.647133807748274]], [["Sharpness", 0.5633713231039904, 0.8276694754755876], ["Cutout", 0.8329340776895764, 0.42656043027424073]], [["ShearY", 0.14934828370884312, 0.8622510773680372], ["Invert", 0.25925989086863277, 0.8813283584888576]], [["Contrast", 0.9457071292265932, 0.43228655518614034], ["Sharpness", 0.8485316947644338, 0.7590298998732413]], [["AutoContrast", 0.8386103589399184, 0.5859583131318076], ["Solarize", 0.466758711343543, 0.9956215363818983]], [["Rotate", 0.9387133710926467, 0.19180564509396503], ["Rotate", 0.5558247609706255, 0.04321698692007105]], [["ShearX", 0.3608716600695567, 0.15206159451532864], ["TranslateX", 0.47295292905710146, 0.5290760596129888]], [["TranslateX", 0.8357685981547495, 0.5991305115727084], ["Posterize", 0.5362929404188211, 0.34398525441943373]], [["ShearY", 0.6751984031632811, 0.6066293622133011], ["Contrast", 0.4122723990263818, 0.4062467515095566]], [["Color", 0.7515349936021702, 0.5122124665429213], ["Contrast", 0.03190514292904123, 0.22903520154660545]], [["Contrast", 0.5448962625054385, 0.38655673938910545], ["AutoContrast", 0.4867400684894492, 0.3433111101096984]], [["Rotate", 0.0008372434310827959, 0.28599951781141714], ["Equalize", 0.37113686925530087, 0.5243929348114981]], [["Color", 0.720054993488857, 0.2010177651701808], ["TranslateX", 0.23036196506059398, 0.11152764304368781]], [["Cutout", 0.859134208332423, 0.6727345740185254], ["ShearY", 0.02159833505865088, 0.46390076266538544]], [["Sharpness", 0.3428232157391428, 0.4067874527486514], ["Brightness", 0.5409415136577347, 0.3698432231874003]], [["Solarize", 0.27303978936454776, 0.9832186173589548], ["ShearY", 0.08831127213044043, 0.4681870331149774]], [["TranslateY", 0.2909309268736869, 0.4059460811623174], ["Sharpness", 0.6425125139803729, 0.20275737203293587]], [["Contrast", 0.32167626214661627, 0.28636162794046977], ["Invert", 0.4712405253509603, 0.7934644799163176]], [["Color", 0.867993060896951, 0.96574321666213], ["Color", 0.02233897320328512, 0.44478933557303063]], [["AutoContrast", 0.1841254751814967, 0.2779992148017741], ["Color", 0.3586283093530607, 0.3696246850445087]], [["Posterize", 0.2052935984046965, 0.16796913860308244], ["ShearX", 0.4807226832843722, 0.11296747254563266]], [["Cutout", 0.2016411266364791, 0.2765295444084803], ["Brightness", 0.3054112810424313, 0.695924264931216]], [["Rotate", 0.8405872184910479, 0.5434142541450815], ["Cutout", 0.4493615138203356, 0.893453735250007]], [["Contrast", 0.8433310507685494, 0.4915423577963278], ["ShearX", 0.22567799557913246, 0.20129892537008834]], [["Contrast", 0.045954277103674224, 0.5043900167190442], ["Cutout", 0.5552992473054611, 0.14436447810888237]], [["AutoContrast", 0.7719296115130478, 0.4440417544621306], ["Sharpness", 0.13992809206158283, 0.7988278670709781]], [["Color", 0.7838574233513952, 0.5971351401625151], ["TranslateY", 0.13562290583925385, 0.2253039635819158]], [["Cutout", 0.24870301109385806, 0.6937886690381568], ["TranslateY", 0.4033400068952813, 0.06253378991880915]], [["TranslateX", 0.0036059390486775644, 0.5234723884081843], ["Solarize", 0.42724862530733526, 0.8697702564187633]], [["Equalize", 0.5446026737834311, 0.9367992979112202], ["ShearY", 0.5943478903735789, 0.42345889214100046]], [["ShearX", 0.18611885697957506, 0.7320849092947314], ["ShearX", 0.3796416430900566, 0.03817761920009881]], [["Posterize", 0.37636778506979124, 0.26807924785236537], ["Brightness", 0.4317372554383255, 0.5473346211870932]], [["Brightness", 0.8100436240916665, 0.3817612088285007], ["Brightness", 0.4193974619003253, 0.9685902764026623]], [["Contrast", 0.701776402197012, 0.6612786008858009], ["Color", 0.19882787177960912, 0.17275597188875483]], [["Color", 0.9538303302832989, 0.48362384535228686], ["ShearY", 0.2179980837345602, 0.37027290936457313]], [["TranslateY", 0.6068028691503798, 0.3919346523454841], ["Cutout", 0.8228303342563138, 0.18372280287814613]], [["Equalize", 0.016416758802906828, 0.642838949194916], ["Cutout", 0.5761717838655257, 0.7600661153497648]], [["Color", 0.9417761826818639, 0.9916074035986558], ["Equalize", 0.2524209308597042, 0.6373703468715077]], [["Brightness", 0.75512589439513, 0.6155072321007569], ["Contrast", 0.32413476940254515, 0.4194739830159837]], [["Sharpness", 0.3339450765586968, 0.9973297539194967], ["AutoContrast", 0.6523930242124429, 0.1053482471037186]], [["ShearX", 0.2961391955838801, 0.9870036064904368], ["ShearY", 0.18705025965909403, 0.4550895821154484]], [["TranslateY", 0.36956447983807883, 0.36371471767143543], ["Sharpness", 0.6860051967688487, 0.2850190720087796]], [["Cutout", 0.13017742151902967, 0.47316674150067195], ["Invert", 0.28923829959551883, 0.9295585654924601]], [["Contrast", 0.7302368472279086, 0.7178974949876642], ["TranslateY", 0.12589674152030433, 0.7485392909494947]], [["Color", 0.6474693117772619, 0.5518269515590674], ["Contrast", 0.24643004970708016, 0.3435581358079418]], [["Contrast", 0.5650327855750835, 0.4843031798040887], ["Brightness", 0.3526684005761239, 0.3005305004600969]], [["Rotate", 0.09822284968122225, 0.13172798244520356], ["Equalize", 0.38135066977857157, 0.5135129123554154]], [["Contrast", 0.5902590645585712, 0.2196062383730596], ["ShearY", 0.14188379126120954, 0.1582612142182743]], [["Cutout", 0.8529913814417812, 0.89734031211874], ["Color", 0.07293767043078672, 0.32577659205278897]], [["Equalize", 0.21401668971453247, 0.040015259500028266], ["ShearY", 0.5126400895338797, 0.4726484828276388]], [["Brightness", 0.8269430025954498, 0.9678362841865166], ["ShearY", 0.17142069814830432, 0.4726727848289514]], [["Brightness", 0.699707089334018, 0.2795501395789335], ["ShearX", 0.5308818178242845, 0.10581814221896294]], [["Equalize", 0.32519644258946145, 0.15763390340309183], ["TranslateX", 0.6149090364414208, 0.7454832565718259]], [["AutoContrast", 0.5404508567155423, 0.7472387762067986], ["Equalize", 0.05649876539221024, 0.5628180219887216]]]
-    return p
diff --git a/autoPyTorch/components/preprocessing/image_preprocessing/augmentation_transforms.py b/autoPyTorch/components/preprocessing/image_preprocessing/augmentation_transforms.py
deleted file mode 100644
index fb716c905..000000000
--- a/autoPyTorch/components/preprocessing/image_preprocessing/augmentation_transforms.py
+++ /dev/null
@@ -1,439 +0,0 @@
-# Copyright 2018 The TensorFlow Authors All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Transforms used in the Augmentation Policies."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import random
-import numpy as np
-# pylint:disable=g-multiple-import
-from PIL import ImageOps, ImageEnhance, ImageFilter, Image
-# pylint:enable=g-multiple-import
-
-
-IMAGE_SIZE = 32
-# What is the dataset mean and std of the images on the training set
-MEANS = [0.49139968, 0.48215841, 0.44653091]
-STDS = [0.24703223, 0.24348513, 0.26158784]
-PARAMETER_MAX = 10  # What is the max 'level' a transform could be predicted
-
-
-def random_flip(x):
-  """Flip the input x horizontally with 50% probability."""
-  if np.random.rand(1)[0] > 0.5:
-    return np.fliplr(x)
-  return x
-
-
-def zero_pad_and_crop(img, amount=4):
-  """Zero pad by `amount` zero pixels on each side then take a random crop.
-  Args:
-    img: numpy image that will be zero padded and cropped.
-    amount: amount of zeros to pad `img` with horizontally and verically.
-  Returns:
-    The cropped zero padded img. The returned numpy array will be of the same
-    shape as `img`.
-  """
-  padded_img = np.zeros((img.shape[0] + amount * 2, img.shape[1] + amount * 2,
-                         img.shape[2]))
-  padded_img[amount:img.shape[0] + amount, amount:
-             img.shape[1] + amount, :] = img
-  top = np.random.randint(low=0, high=2 * amount)
-  left = np.random.randint(low=0, high=2 * amount)
-  new_img = padded_img[top:top + img.shape[0], left:left + img.shape[1], :]
-  return new_img
-
-
-def create_cutout_mask(img_height, img_width, num_channels, size):
-  """Creates a zero mask used for cutout of shape `img_height` x `img_width`.
-  Args:
-    img_height: Height of image cutout mask will be applied to.
-    img_width: Width of image cutout mask will be applied to.
-    num_channels: Number of channels in the image.
-    size: Size of the zeros mask.
-  Returns:
-    A mask of shape `img_height` x `img_width` with all ones except for a
-    square of zeros of shape `size` x `size`. This mask is meant to be
-    elementwise multiplied with the original image. Additionally returns
-    the `upper_coord` and `lower_coord` which specify where the cutout mask
-    will be applied.
-  """
-  if size>1:
-    print("SIZE AND CHANNELS", size, num_channels)
-    print("IMAGE HEIGTH AND WIDTH", img_height, img_width)
-    assert img_height == img_width
-
-    # Sample center where cutout mask will be applied
-    height_loc = np.random.randint(low=0, high=img_height)
-    width_loc = np.random.randint(low=0, high=img_width)
-    print("HEIGHT LOC AND WIDTH LOC HEIGTH AND WIDTH", height_loc, width_loc)
-
-    # Determine upper right and lower left corners of patch
-    upper_coord = (max(0, height_loc - size // 2), max(0, width_loc - size // 2))
-    lower_coord = (min(img_height, height_loc + size // 2),
-                 min(img_width, width_loc + size // 2))
-    print("UPPER AND LOWER COORD", upper_coord, lower_coord)
-    mask_height = lower_coord[0] - upper_coord[0]
-    mask_width = lower_coord[1] - upper_coord[1]
-    print("MASK HEIGTH AND WIDTH", mask_height, mask_width)
-    assert mask_height > 0
-    assert mask_width > 0
-
-    mask = np.ones((img_height, img_width, num_channels))
-    zeros = np.zeros((mask_height, mask_width, num_channels))
-    mask[upper_coord[0]:lower_coord[0], upper_coord[1]:lower_coord[1], :] = (
-        zeros)
-  
-  else:
-    height_loc = np.random.randint(low=0, high=img_height)
-    width_loc = np.random.randint(low=0, high=img_width)
-    upper_coord = (height_loc,width_loc)
-    lower_coord = upper_coord
-    mask = np.ones((img_height, img_width, num_channels))
-    mask[height_loc, width_loc] = 0
-
-  return mask, upper_coord, lower_coord
-
-
-def cutout_numpy(img, size=16):
-  """Apply cutout with mask of shape `size` x `size` to `img`.
-  The cutout operation is from the paper https://arxiv.org/abs/1708.04552.
-  This operation applies a `size`x`size` mask of zeros to a random location
-  within `img`.
-  Args:
-    img: Numpy image that cutout will be applied to.
-    size: Height/width of the cutout mask that will be
-  Returns:
-    A numpy tensor that is the result of applying the cutout mask to `img`.
-  """
-  img_height, img_width, num_channels = (img.shape[0], img.shape[1],
-                                         img.shape[2])
-  assert len(img.shape) == 3
-  mask, _, _ = create_cutout_mask(img_height, img_width, num_channels, size)
-  return img * mask
-
-
-def float_parameter(level, maxval):
-  """Helper function to scale `val` between 0 and maxval .
-  Args:
-    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
-    maxval: Maximum value that the operation can have. This will be scaled
-      to level/PARAMETER_MAX.
-  Returns:
-    A float that results from scaling `maxval` according to `level`.
-  """
-  return float(level) * maxval / PARAMETER_MAX
-
-
-def int_parameter(level, maxval):
-  """Helper function to scale `val` between 0 and maxval .
-  Args:
-    level: Level of the operation that will be between [0, `PARAMETER_MAX`].
-    maxval: Maximum value that the operation can have. This will be scaled
-      to level/PARAMETER_MAX.
-  Returns:
-    An int that results from scaling `maxval` according to `level`.
-  """
-  return int(level * maxval / PARAMETER_MAX)
-
-
-def pil_wrap(img):
-  """Convert the `img` numpy tensor to a PIL Image."""
-  return Image.fromarray(
-      np.uint8((img * STDS + MEANS) * 255.0)).convert('RGBA')
-
-
-def pil_unwrap(pil_img):
-  """Converts the PIL img to a numpy array."""
-  pic_array = (np.array(pil_img.getdata()).reshape((32, 32, 4)) / 255.0)
-  i1, i2 = np.where(pic_array[:, :, 3] == 0)
-  pic_array = (pic_array[:, :, :3] - MEANS) / STDS
-  pic_array[i1, i2] = [0, 0, 0]
-  return pic_array
-
-
-def apply_policy(policy, img):
-  """Apply the `policy` to the numpy `img`.
-  Args:
-    policy: A list of tuples with the form (name, probability, level) where
-      `name` is the name of the augmentation operation to apply, `probability`
-      is the probability of applying the operation and `level` is what strength
-      the operation to apply.
-    img: Numpy image that will have `policy` applied to it.
-  Returns:
-    The result of applying `policy` to `img`.
-  """
-  pil_img = img # pil_wrap(img)
-  
-  for xform in policy:
-    assert len(xform) == 3
-    name, probability, level = xform
-    xform_fn = NAME_TO_TRANSFORM[name].pil_transformer(probability, level)
-    pil_img = xform_fn(pil_img)
-  return pil_img #pil_unwrap(pil_img)
-
-
-class TransformFunction(object):
-  """Wraps the Transform function for pretty printing options."""
-
-  def __init__(self, func, name):
-    self.f = func
-    self.name = name
-
-  def __repr__(self):
-    return '<' + self.name + '>'
-
-  def __call__(self, pil_img):
-    return self.f(pil_img)
-
-
-class TransformT(object):
-  """Each instance of this class represents a specific transform."""
-
-  def __init__(self, name, xform_fn):
-    self.name = name
-    self.xform = xform_fn
-
-  def pil_transformer(self, probability, level):
-
-    def return_function(im):
-      if random.random() < probability:
-        im = self.xform(im, level)
-      return im
-
-    name = self.name + '({:.1f},{})'.format(probability, level)
-    return TransformFunction(return_function, name)
-
-  def do_transform(self, image, level):
-    f = self.pil_transformer(PARAMETER_MAX, level)
-    return pil_unwrap(f(pil_wrap(image)))
-
-
-################## Transform Functions ##################
-identity = TransformT('identity', lambda pil_img, level: pil_img)
-flip_lr = TransformT(
-    'FlipLR',
-    lambda pil_img, level: pil_img.transpose(Image.FLIP_LEFT_RIGHT))
-flip_ud = TransformT(
-    'FlipUD',
-    lambda pil_img, level: pil_img.transpose(Image.FLIP_TOP_BOTTOM))
-# pylint:disable=g-long-lambda
-auto_contrast = TransformT(
-    'AutoContrast',
-    lambda pil_img, level: ImageOps.autocontrast(
-        pil_img.convert('RGB')).convert('RGBA'))
-equalize = TransformT(
-    'Equalize',
-    lambda pil_img, level: ImageOps.equalize(
-        pil_img.convert('RGB')).convert('RGBA'))
-invert = TransformT(
-    'Invert',
-    lambda pil_img, level: ImageOps.invert(
-        pil_img.convert('RGB')).convert('RGBA'))
-# pylint:enable=g-long-lambda
-blur = TransformT(
-    'Blur', lambda pil_img, level: pil_img.filter(ImageFilter.BLUR))
-smooth = TransformT(
-    'Smooth',
-    lambda pil_img, level: pil_img.filter(ImageFilter.SMOOTH))
-
-
-def _rotate_impl(pil_img, level):
-  """Rotates `pil_img` from -30 to 30 degrees depending on `level`."""
-  degrees = int_parameter(level, 30)
-  if random.random() > 0.5:
-    degrees = -degrees
-  return pil_img.rotate(degrees)
-
-
-rotate = TransformT('Rotate', _rotate_impl)
-
-
-def _posterize_impl(pil_img, level):
-  """Applies PIL Posterize to `pil_img`."""
-  level = int_parameter(level, 4)
-  return ImageOps.posterize(pil_img.convert('RGB'), 4 - level).convert('RGBA')
-
-
-posterize = TransformT('Posterize', _posterize_impl)
-
-
-def _shear_x_impl(pil_img, level):
-  """Applies PIL ShearX to `pil_img`.
-  The ShearX operation shears the image along the horizontal axis with `level`
-  magnitude.
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-  Returns:
-    A PIL Image that has had ShearX applied to it.
-  """
-  level = float_parameter(level, 0.3)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform(pil_img.size, Image.AFFINE, (1, level, 0, 0, 1, 0))
-
-
-shear_x = TransformT('ShearX', _shear_x_impl)
-
-
-def _shear_y_impl(pil_img, level):
-  """Applies PIL ShearY to `pil_img`.
-  The ShearY operation shears the image along the vertical axis with `level`
-  magnitude.
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-  Returns:
-    A PIL Image that has had ShearX applied to it.
-  """
-  level = float_parameter(level, 0.3)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform(pil_img.size, Image.AFFINE, (1, 0, 0, level, 1, 0))
-
-
-shear_y = TransformT('ShearY', _shear_y_impl)
-
-
-def _translate_x_impl(pil_img, level):
-  """Applies PIL TranslateX to `pil_img`.
-  Translate the image in the horizontal direction by `level`
-  number of pixels.
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-  Returns:
-    A PIL Image that has had TranslateX applied to it.
-  """
-  level = int_parameter(level, 10)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform(pil_img.size, Image.AFFINE, (1, 0, level, 0, 1, 0))
-
-
-translate_x = TransformT('TranslateX', _translate_x_impl)
-
-
-def _translate_y_impl(pil_img, level):
-  """Applies PIL TranslateY to `pil_img`.
-  Translate the image in the vertical direction by `level`
-  number of pixels.
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-  Returns:
-    A PIL Image that has had TranslateY applied to it.
-  """
-  level = int_parameter(level, 10)
-  if random.random() > 0.5:
-    level = -level
-  return pil_img.transform(pil_img.size, Image.AFFINE, (1, 0, 0, 0, 1, level))
-
-
-translate_y = TransformT('TranslateY', _translate_y_impl)
-
-
-def _crop_impl(pil_img, level, interpolation=Image.BILINEAR):
-  """Applies a crop to `pil_img` with the size depending on the `level`."""
-  cropped = pil_img.crop((level, level, IMAGE_SIZE - level, IMAGE_SIZE - level))
-  resized = cropped.resize((IMAGE_SIZE, IMAGE_SIZE), interpolation)
-  return resized
-
-
-crop_bilinear = TransformT('CropBilinear', _crop_impl)
-
-
-def _solarize_impl(pil_img, level):
-  """Applies PIL Solarize to `pil_img`.
-  Translate the image in the vertical direction by `level`
-  number of pixels.
-  Args:
-    pil_img: Image in PIL object.
-    level: Strength of the operation specified as an Integer from
-      [0, `PARAMETER_MAX`].
-  Returns:
-    A PIL Image that has had Solarize applied to it.
-  """
-  level = int_parameter(level, 256)
-  return ImageOps.solarize(pil_img.convert('RGB'), 256 - level).convert('RGBA')
-
-
-solarize = TransformT('Solarize', _solarize_impl)
-
-
-def _cutout_pil_impl(pil_img, level):
-  """Apply cutout to pil_img at the specified level."""
-  size = int_parameter(level, 20)
-  if size <= 0:
-    return pil_img
-  img_height, img_width, num_channels = (32, 32, 3)
-  _, upper_coord, lower_coord = (
-      create_cutout_mask(img_height, img_width, num_channels, size))
-  pixels = pil_img.load()  # create the pixel map
-  for i in range(upper_coord[0], lower_coord[0]):  # for every col:
-    for j in range(upper_coord[1], lower_coord[1]):  # For every row
-      pixels[i, j] = (125, 122, 113, 0)  # set the colour accordingly
-  return pil_img
-
-cutout = TransformT('Cutout', _cutout_pil_impl)
-
-
-def _enhancer_impl(enhancer):
-  """Sets level to be between 0.1 and 1.8 for ImageEnhance transforms of PIL."""
-  def impl(pil_img, level):
-    v = float_parameter(level, 1.8) + .1  # going to 0 just destroys it
-    return enhancer(pil_img).enhance(v)
-  return impl
-
-
-color = TransformT('Color', _enhancer_impl(ImageEnhance.Color))
-contrast = TransformT('Contrast', _enhancer_impl(ImageEnhance.Contrast))
-brightness = TransformT('Brightness', _enhancer_impl(
-    ImageEnhance.Brightness))
-sharpness = TransformT('Sharpness', _enhancer_impl(ImageEnhance.Sharpness))
-
-ALL_TRANSFORMS = [
-    flip_lr,
-    flip_ud,
-    auto_contrast,
-    equalize,
-    invert,
-    rotate,
-    posterize,
-    crop_bilinear,
-    solarize,
-    color,
-    contrast,
-    brightness,
-    sharpness,
-    shear_x,
-    shear_y,
-    translate_x,
-    translate_y,
-    cutout,
-    blur,
-    smooth
-]
-
-NAME_TO_TRANSFORM = {t.name: t for t in ALL_TRANSFORMS}
-TRANSFORM_NAMES = NAME_TO_TRANSFORM.keys()
diff --git a/autoPyTorch/components/preprocessing/image_preprocessing/operations.py b/autoPyTorch/components/preprocessing/image_preprocessing/operations.py
deleted file mode 100644
index 0a1d72add..000000000
--- a/autoPyTorch/components/preprocessing/image_preprocessing/operations.py
+++ /dev/null
@@ -1,283 +0,0 @@
-import numpy as np
-import math
-import random
-import os
-
-from PIL import Image, ImageOps, ImageEnhance
-
-class Operation(object):
-    """
-    Base class of all operations.
-    """
-    def __init__(self, prob, magnitude):
-        self.prob = prob
-        self.magnitude = magnitude
-
-    def __str__(self):
-        return self.__class__.__name__
-
-    def __call__(self, image):
-        raise NotImplementedError("Need to instantiate a subclass of this class!")
-
-class Equalize(Operation):
-    """
-    Equalize the image histogram.
-    """
-    def __init__(self, prob, magnitude):
-        super(Equalize, self).__init__(prob, None)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            return ImageOps.equalize(image)
-
-class Invert(Operation):
-    """
-    Invert the pixels of the image.
-    """
-    def __init__(self, prob, magnitude):
-        super(Invert, self).__init__(prob, None)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            return ImageOps.invert(image)
-
-class AutoContrast(Operation):
-    """
-    Maximize the image contrast, by making the darkest pixel black and
-    the lightest pixel white.
-    """
-    def __init__(self, prob, magnitude):
-        super(AutoContrast, self).__init__(prob, None)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            return ImageOps.autocontrast(image)
-
-class Posterize(Operation):
-    """
-    Reduce the number of bits for each pixel magnitude bits.
-    """
-    def __init__(self, prob, magnitude):
-        super(Posterize, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(4, 8, 10)
-            bits = int(round(magnitude_range[self.magnitude]))
-            return ImageOps.posterize(image, bits)
-
-class Solarize(Operation):
-    """
-    Invert all pixels above a threshold value of magnitude.
-    """
-    def __init__(self, prob, magnitude):
-        super(Solarize, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(0, 256, 10)
-            threshold = magnitude_range[self.magnitude]
-            return ImageOps.solarize(image, threshold)
-
-class Contrast(Operation):
-    """
-    Control the contrast of the image. 
-    A magnitude=0 gives a gray image,
-    whereas magnitude=1 gives the original image.
-    """
-    def __init__(self, prob, magnitude):
-        super(Contrast, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(0.1, 1.9, 10)
-            factor = magnitude_range[self.magnitude]
-            enhancer = ImageEnhance.Contrast(image)
-            return enhancer.enhance(factor)
-
-class Color(Operation):
-    """
-    Adjust the color balance of the image, 
-    in a manner similar to the controls on a colour TV set.
-    A magnitude=0 gives a black & white image, 
-    whereas magnitude=1 gives the original image.
-    """
-    def __init__(self, prob, magnitude):
-        super(Color, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(0.1, 1.9, 10)
-            factor = magnitude_range[self.magnitude]
-            enhancer = ImageEnhance.Color(image)
-            return enhancer.enhance(factor)
-
-class Brightness(Operation):
-    """
-    Adjust the brightness of the image. 
-    A magnitude=0 gives a black image,
-    whereas magnitude=1 gives the original image.
-    """
-    def __init__(self, prob, magnitude):
-        super(Brightness, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(0.1, 1.9, 10)
-            factor = magnitude_range[self.magnitude]
-            enhancer = ImageEnhance.Brightness(image)
-            return enhancer.enhance(factor)
-
-class Sharpness(Operation):
-    """
-    Adjust the sharpness of the image. 
-    A magnitude=0 gives a blurred image,
-    whereas magnitude=1 gives the original image.
-    """
-    def __init__(self, prob, magnitude):
-        super(Sharpness, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(0.1, 1.9, 10)
-            factor = magnitude_range[self.magnitude]
-            enhancer = ImageEnhance.Sharpness(image)
-            return enhancer.enhance(factor)
-
-class Rotate(Operation):
-    """
-    Rotate the image magnitude degrees.
-    """
-    def __init(self, prob, magnitude):
-        super(Rotate, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(-30, 30, 10)
-            degrees = magnitude_range[self.magnitude]
-            return image.rotate(degrees, expand=False, resample=Image.BICUBIC)
-
-class TranslateX(Operation):
-    """
-    Translate the image in the horizontal axis 
-    direction by magnitude number of pixels.
-    """
-    def __init__(self, prob, magnitude):
-        super(TranslateX, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(-15, 15, 10)
-            pixels = magnitude_range[self.magnitude]
-            return image.transform(image.size, Image.AFFINE, (1, 0, pixels, 0, 1, 0))
-
-class TranslateY(Operation):
-    """
-    Translate the image in the vertical axis 
-    direction by magnitude number of pixels.
-    """
-    def __init__(self, prob, magnitude):
-        super(TranslateY, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(-15, 15, 10)
-            pixels = magnitude_range[self.magnitude]
-            return image.transform(image.size, Image.AFFINE, (1, 0, 0, 0, 1, pixels))
-
-
-class ShearX(Operation):
-    """
-    Shear image along horizontal axis with rate magnitude.
-    """
-    def __init__(self, prob, magnitude):
-        super(ShearX, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(-0.3, 0.3, 10)
-            rate = magnitude_range[self.magnitude]
-
-            w, h = image.size
-
-            phi = math.tan(abs(rate))
-            shift_in_pixels = phi * h
-            matrix_offset = shift_in_pixels
-            if rate <= 0:
-                matrix_offset = 0
-                phi = -1 * phi
-
-            transform_matrix = (1, phi, -matrix_offset, 0, 1, 0)
-
-            image = image.transform((int(round(w + shift_in_pixels)), h),
-                                    Image.AFFINE,
-                                    transform_matrix)
-
-            if rate <= 0:
-                image = image.crop((0, 0, w, h))
-            else:
-                image = image.crop((abs(shift_in_pixels), 0, w + abs(shift_in_pixels), h))
-
-            return image
-
-class ShearY(Operation):
-    """
-    Shear image along vertical axis with rate magnitude.
-    """
-    def __init__(self, prob, magnitude):
-        super(ShearY, self).__init__(prob, magnitude)
-
-    def __call__(self, image):
-        if random.uniform(0, 1) > self.prob:
-            return image
-        else:
-            magnitude_range = np.linspace(-0.3, 0.3, 10)
-            rate = magnitude_range[self.magnitude]
-
-            w, h = image.size
-
-            phi = math.tan(abs(rate))
-            shift_in_pixels = phi * h
-            matrix_offset = shift_in_pixels
-            if rate <= 0:
-                matrix_offset = 0
-                phi = -1 * phi
-
-            transform_matrix = (1, 0, 0, phi, 1, -matrix_offset)
-
-            image = image.transform((w, int(round(h + shift_in_pixels))),
-                                    Image.AFFINE,
-                                    transform_matrix)
-
-            if rate <= 0:
-                image = image.crop((0, 0, w, h))
-            else:
-                image = image.crop((0, abs(shift_in_pixels), w, h + abs(shift_in_pixels)))
-
-            return image
diff --git a/autoPyTorch/components/preprocessing/image_preprocessing/transforms.py b/autoPyTorch/components/preprocessing/image_preprocessing/transforms.py
deleted file mode 100644
index 11effac21..000000000
--- a/autoPyTorch/components/preprocessing/image_preprocessing/transforms.py
+++ /dev/null
@@ -1,177 +0,0 @@
-from __future__ import absolute_import
-
-from torchvision.transforms import *
-from .augmentation_transforms import *
-
-import random
-import math
-import torch
-import numpy as np
-
-from .operations import *
-
-
-class RandomErasing(object):
-    """
-    Class that performs Random Erasing in Random Erasing Data Augmentation by Zhong et al. 
-    
-    Args:
-        probability: The probability that the operation will be performed.
-        sl: min erasing area
-        sh: max erasing area
-        r1: min aspect ratio
-        mean: erasing value
-    """
-
-    def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.4914, 0.4822, 0.4465]):
-        self.probability = probability
-        self.sl = sl
-        self.sh = sh
-        self.r1 = r1
-        self.mean = mean
-
-    def __call__(self, img):
-        if random.uniform(0, 1) > self.probability:
-            return img
-
-        for attempt in range(100):
-            area = img.size()[1] * img.size()[2]
-
-            target_area = random.uniform(self.sl, self.sh) * area
-            aspect_ratio = random.uniform(self.r1, 1/self.r1)
-
-            h = int(round(math.sqrt(target_area * aspect_ratio)))
-            w = int(round(math.sqrt(target_area / aspect_ratio)))
-
-            if w < img.size()[2] and h < img.size()[1]:
-                x1 = random.randint(0, img.size()[1] - h)
-                y1 = random.randint(0, img.size()[2] - w)
-                if img.size()[0] == 3:
-                    img[0, x1:x1+h, y1:y1+w] = self.mean[0]
-                    img[1, x1:x1+h, y1:y1+w] = self.mean[1]
-                    img[2, x1:x1+h, y1:y1+w] = self.mean[2]
-                else:
-                    img[0, x1:x1+h, y1:y1+w] = self.mean[1]
-                return img
-
-        return img
-
-
-class Cutout(object):
-    """
-    Randomly mask out one or more patches from an image.
-    Args:
-        n_holes (int): Number of patches to cut out of each image.
-        length (int): The length (in pixels) of each square patch.
-    """
-    def __init__(self, n_holes, length, probability):
-        self.n_holes = n_holes
-        self.length = length
-        self.probability = probability
-
-    def __call__(self, img):
-        """
-        Args:
-            img (Tensor): Tensor image of size (C, H, W).
-        Returns:
-            Tensor: Image with n_holes of dimension length x length cut out of it.
-        """
-        if random.uniform(0, 1) > self.probability:
-            return img
-
-        h = img.size(1)
-        w = img.size(2)
-
-        mask = np.ones((h, w), np.float32)
-
-        for n in range(self.n_holes):
-            y = np.random.randint(h)
-            x = np.random.randint(w)
-
-            y1 = int(np.clip(y - self.length / 2, 0, h))
-            y2 = int(np.clip(y + self.length / 2, 0, h))
-            x1 = int(np.clip(x - self.length / 2, 0, w))
-            x2 = int(np.clip(x + self.length / 2, 0, w))
-
-            mask[y1: y2, x1: x2] = 0.
-
-        mask = torch.from_numpy(mask)
-        mask = mask.expand_as(img)
-        img = img * mask
-
-        return img
-
-
-class AutoAugment(object):
-    
-    def __init__(self):
-        pass
-
-    def __call__(self, img):
-        """
-        Args:
-            img (Tensor): Tensor image of size (C, H, W).
-        """
-
-        #
-        # ImageNet policies proposed in https://arxiv.org/abs/1805.09501
-        #
-        policies = [
-            [('Posterize', 0.4, 8),    ('Rotate', 0.6,9)],
-            [('Solarize', 0.6, 5),     ('AutoContrast', 0.6, 5)],
-            [('Equalize', 0.8, 8),     ('Equalize', 0.6, 3)], 
-            [('Posterize', 0.6, 7),    ('Posterize', 0.6, 3)],
-            [('Equalize', 0.4, 7),     ('Solarize', 0.2, 4)],
-            [('Equalize', 0.4, 4),     ('Rotate', 0.8, 8)],
-            [('Solarize', 0.6, 3),     ('Equalize', 0.6, 7)],
-            [('Posterize', 0.8, 5),    ('Equalize', 1.0, 2)],
-            [('Rotate', 0.2, 3),       ('Solarize', 0.6, 8)],
-            [('Equalize', 0.6, 8),     ('Posterize', 0.4, 6)],
-            [('Rotate', 0.8, 8),       ('Color', 0.4, 0)],
-            [('Rotate', 0.4, 9),       ('Equalize', 0.6, 2)],
-            [('Equalize', 0.0, 7),     ('Equalize', 0.8, 8)],
-            [('Invert', 0.6, 4),       ('Equalize', 1.0, 8)],
-            [('Color', 0.6, 4),        ('Contrast', 1.0, 8)],
-            [('Rotate', 0.8, 8),       ('Color', 1.0, 2)],
-            [('Color', 0.8, 8),        ('Solarize', 0.8, 7)],
-            [('Sharpness', 0.4, 7),    ('Invert', 0.6, 8)],
-            [('ShearX', 0.6, 5),       ('Equalize', 1.0, 9)],
-            [('Color', 0.4, 0),        ('Equalize', 0.6, 3)],
-            [('Equalize', 0.4, 7),     ('Solarize', 0.2, 4)],
-            [('Solarize', 0.6, 5),     ('AutoContrast', 0.6, 5)],
-            [('Invert', 0.6, 4),       ('Equalize', 1.0, 8)],
-            [('Color', 0.6, 4),        ('Contrast', 1.0, 8)],
-            [('Equalize', 0.8, 8),     ('Equalize', 0.6, 3)],
-        ]
-
-        policy = random.choice(policies)
-
-        img = apply_policy(policy, img)
-
-        return img.convert('RGB')
-
-
-class FastAutoAugment(object):
-
-        #
-        # ImageNet policies proposed in https://arxiv.org/abs/1905.00397
-        #
-    
-
-    def __init__(self):
-
-        from .archive import fa_reduced_cifar10
-
-        self.policies = fa_reduced_cifar10()
-        
-    def __call__(self, img):
-        """
-        Args:
-            img (Tensor): Tensor image of size (C, H, W).
-        """
-
-        policy = random.choice(self.policies)
-
-        img = apply_policy(policy, img)
-
-        return img.convert('RGB')
diff --git a/autoPyTorch/components/preprocessing/loss_weight_strategies.py b/autoPyTorch/components/preprocessing/loss_weight_strategies.py
deleted file mode 100644
index cddce005a..000000000
--- a/autoPyTorch/components/preprocessing/loss_weight_strategies.py
+++ /dev/null
@@ -1,34 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-import numpy as np
-
-
-class LossWeightStrategyWeighted():
-    def __call__(self, pipeline_config, X, Y):
-
-        counts = np.sum(Y, axis=0)
-        total_weight = Y.shape[0]
-
-        if len(Y.shape) > 1:
-            weight_per_class = total_weight / Y.shape[1]
-            weights = (np.ones(Y.shape[1]) * weight_per_class) / np.maximum(counts, 1)
-        else:
-            classes, counts = np.unique(Y, axis=0, return_counts=True)
-            classes, counts = classes[::-1], counts[::-1]
-            weight_per_class = total_weight / classes.shape[0]
-            weights = (np.ones(classes.shape[0]) * weight_per_class) / counts
-
-        return weights
-
-class LossWeightStrategyWeightedBinary():
-    def __call__(self, pipeline_config, X, Y):
-
-        counts_one = np.sum(Y, axis=0)
-        counts_zero = counts_one + (-Y.shape[0])
-        weights = counts_zero / np.maximum(counts_one, 1)
-
-        return weights
-
diff --git a/autoPyTorch/components/preprocessing/preprocessor_base.py b/autoPyTorch/components/preprocessing/preprocessor_base.py
deleted file mode 100644
index 30725f043..000000000
--- a/autoPyTorch/components/preprocessing/preprocessor_base.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import ConfigSpace
-
-class PreprocessorBase():
-    def __init__(self, hyperparameter_config):
-        pass
-
-
-    def fit(self, X, Y):
-        """Fit preprocessor with X and Y.
-        
-        Arguments:
-            X {tensor} -- feature matrix
-            Y {tensor} -- labels
-        """
-        pass
-
-    def transform(self, X, **kwargs):
-        """Preprocess X
-        
-        Arguments:
-            X {tensor} -- feature matrix
-        
-        Returns:
-            X -- preprocessed X
-        """
-
-        return X
-
-    @staticmethod
-    def get_hyperparameter_search_space(dataset_info=None):
-        return ConfigSpace.ConfigurationSpace()
-
-
-    
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/resampling/__init__.py b/autoPyTorch/components/preprocessing/resampling/__init__.py
deleted file mode 100644
index a678487a1..000000000
--- a/autoPyTorch/components/preprocessing/resampling/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from autoPyTorch.components.preprocessing.resampling.random import (RandomOverSamplingWithReplacement,
-                                                                RandomUnderSamplingWithReplacement)
-from autoPyTorch.components.preprocessing.resampling.smote import SMOTE
-from autoPyTorch.components.preprocessing.resampling.target_size_strategies import (TargetSizeStrategyAverageSample,
-                                                                                TargetSizeStrategyDownsample,
-                                                                                TargetSizeStrategyMedianSample,
-                                                                                TargetSizeStrategyUpsample)
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/resampling/random.py b/autoPyTorch/components/preprocessing/resampling/random.py
deleted file mode 100644
index 2e28089e9..000000000
--- a/autoPyTorch/components/preprocessing/resampling/random.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from autoPyTorch.components.preprocessing.resampling_base import ResamplingMethodBase
-
-class RandomOverSamplingWithReplacement(ResamplingMethodBase):
-    def resample(self, X, y, target_size_strategy, seed):
-        from imblearn.over_sampling import RandomOverSampler as imblearn_RandomOverSampler
-        resampler = imblearn_RandomOverSampler(sampling_strategy=target_size_strategy, random_state=seed)
-        return resampler.fit_resample(X, y)
-
-
-class RandomUnderSamplingWithReplacement(ResamplingMethodBase):
-    def resample(self, X, y, target_size_strategy, seed):
-        from imblearn.under_sampling import RandomUnderSampler as imblearn_RandomUnderSampler
-        resampler = imblearn_RandomUnderSampler(sampling_strategy=target_size_strategy, random_state=seed)
-        return resampler.fit_resample(X, y)
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/resampling/smote.py b/autoPyTorch/components/preprocessing/resampling/smote.py
deleted file mode 100644
index 8e05c7c22..000000000
--- a/autoPyTorch/components/preprocessing/resampling/smote.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from autoPyTorch.components.preprocessing.resampling_base import ResamplingMethodBase
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter, get_hyperparameter
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-
-class SMOTE(ResamplingMethodBase):
-    def __init__(self, hyperparameter_config):
-        self.k_neighbors = hyperparameter_config["k_neighbors"]
-
-    def resample(self, X, y, target_size_strategy, seed):
-        from imblearn.over_sampling import SMOTE as imblearn_SMOTE
-        k_neighbors = self.k_neighbors
-        resampler = imblearn_SMOTE(sampling_strategy=target_size_strategy, k_neighbors=k_neighbors, random_state=seed)
-        return resampler.fit_resample(X, y)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        k_neighbors=(3, 7)
-    ):
-        k_neighbors = get_hyperparameter(CSH.UniformIntegerHyperparameter, "k_neighbors", k_neighbors)
-        cs = ConfigSpace.ConfigurationSpace()
-        cs.add_hyperparameter(k_neighbors)
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/preprocessing/resampling/target_size_strategies.py b/autoPyTorch/components/preprocessing/resampling/target_size_strategies.py
deleted file mode 100644
index 5a7d3fc5e..000000000
--- a/autoPyTorch/components/preprocessing/resampling/target_size_strategies.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import numpy as np
-from autoPyTorch.components.preprocessing.resampling_base import TargetSizeStrategyBase
-
-class TargetSizeStrategyUpsample(TargetSizeStrategyBase):
-    def get_target_size(self, targets, counts):
-        return int(np.max(counts))
-
-class TargetSizeStrategyDownsample(TargetSizeStrategyBase):
-    def get_target_size(self, targets, counts):
-        return int(np.min(counts))
-
-class TargetSizeStrategyAverageSample(TargetSizeStrategyBase):
-    def get_target_size(self, targets, counts):
-        return int(np.average(counts))
-
-class TargetSizeStrategyMedianSample(TargetSizeStrategyBase):
-    def get_target_size(self, targets, counts):
-        return int(np.median(counts))
diff --git a/autoPyTorch/components/preprocessing/resampling_base.py b/autoPyTorch/components/preprocessing/resampling_base.py
deleted file mode 100644
index 2d4779d58..000000000
--- a/autoPyTorch/components/preprocessing/resampling_base.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import numpy as np
-import ConfigSpace
-
-class TargetSizeStrategyBase():
-    def over_sample_strategy(self, y):
-        result = dict()
-        targets, counts = np.unique(y, return_counts=True)
-        target_size = self.get_target_size(targets, counts)
-        for target, count in zip(targets, counts):
-            if target_size > count:
-                result[target] = target_size
-        return result
-
-    def under_sample_strategy(self, y):
-        result = dict()
-        targets, counts = np.unique(y, return_counts=True)
-        target_size = self.get_target_size(targets, counts)
-        for target, count in zip(targets, counts):
-            if target_size < count:
-                result[target] = target_size
-        return result
-    
-    def get_target_size(self, targets, counts):
-        raise NotImplementedError()
-
-
-class ResamplingMethodBase():
-    def __init__(self, hyperparameter_config):
-        pass
-
-    def resample(self, X, y, target_size_strategy, seed):
-        """Fit preprocessor with X and y.
-        
-        Arguments:
-            X {tensor} -- feature matrix
-            y {tensor} -- labels
-            target_size_strategy {dict} -- determine target size for each label
-        """
-        raise NotImplementedError()
-
-    @staticmethod
-    def get_hyperparameter_search_space():
-        cs = ConfigSpace.ConfigurationSpace()
-        return cs
-
-
-class ResamplingMethodNone(ResamplingMethodBase):
-    def resample(self, X, y, target_size_strategy, seed):
-        return X, y
\ No newline at end of file
diff --git a/autoPyTorch/components/regularization/mixup.py b/autoPyTorch/components/regularization/mixup.py
deleted file mode 100644
index 884667d2a..000000000
--- a/autoPyTorch/components/regularization/mixup.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from autoPyTorch.components.training.base_training import BaseBatchLossComputationTechnique
-from autoPyTorch.utils.config_space_hyperparameter import add_hyperparameter
-import numpy as np
-from torch.autograd import Variable
-import ConfigSpace
-import torch
-
-class Mixup(BaseBatchLossComputationTechnique):
-    def set_up(self, pipeline_config, hyperparameter_config, logger):
-        super(Mixup, self).set_up(pipeline_config, hyperparameter_config, logger)
-        self.alpha = hyperparameter_config["alpha"]
-
-    def prepare_data(self, x, y):
-
-        lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0. else 1.
-        batch_size = x.size()[0]
-        index = torch.randperm(batch_size).cuda() if x.is_cuda else torch.randperm(batch_size)
-
-        mixed_x = lam * x + (1 - lam) * x[index, :]
-        y_a, y_b = y, y[index]
-        return mixed_x, { 'y_a': y_a, 'y_b': y_b, 'lam' : lam }
-
-    def criterion(self, y_a, y_b, lam):
-        return lambda criterion, pred: lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
-
-    @staticmethod
-    def get_hyperparameter_search_space(
-        alpha=(0, 1)
-    ):
-        cs = ConfigSpace.ConfigurationSpace()
-        add_hyperparameter(cs, ConfigSpace.hyperparameters.UniformFloatHyperparameter, "alpha", alpha)
-        return cs
\ No newline at end of file
diff --git a/autoPyTorch/components/regularization/shake.py b/autoPyTorch/components/regularization/shake.py
deleted file mode 100644
index 45f74431a..000000000
--- a/autoPyTorch/components/regularization/shake.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Functions for shake-shake and shake-drop regularization.
-"""
-
-import torch
-import random
-from torch.autograd import Function
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class ShakeShakeFunction(Function):
-    @staticmethod
-    def forward(ctx, x1, x2, alpha, beta):
-        ctx.save_for_backward(x1, x2, alpha, beta)
-
-        y = x1 * alpha + x2 * (1 - alpha)
-        return y
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        x1, x2, alpha, beta = ctx.saved_variables
-        grad_x1 = grad_x2 = grad_alpha = grad_beta = None
-
-        if ctx.needs_input_grad[0]:
-            grad_x1 = grad_output * beta
-        if ctx.needs_input_grad[1]:
-            grad_x2 = grad_output * (1 - beta)
-
-        return grad_x1, grad_x2, grad_alpha, grad_beta
-shake_shake = ShakeShakeFunction.apply
-
-
-class ShakeDropFunction(Function):
-    @staticmethod
-    def forward(ctx, x, alpha, beta, bl):
-        ctx.save_for_backward(x, alpha, beta, bl)
-
-        y = (bl + alpha - bl * alpha ) * x
-        return y
-
-    @staticmethod
-    def backward(ctx, grad_output):
-        x, alpha, beta, bl = ctx.saved_variables
-        grad_x = grad_alpha = grad_beta = grad_bl = None
-
-        if ctx.needs_input_grad[0]:
-            grad_x = grad_output * (bl + beta - bl * beta)
-
-        return grad_x, grad_alpha, grad_beta, grad_bl
-shake_drop = ShakeDropFunction.apply
-
-def shake_get_alpha_beta(is_training, is_cuda):
-    if is_training:
-        result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))
-        return result if not is_cuda else (result[0].cuda(), result[1].cuda())
-
-    # TODO implement other update methods
-    alpha = torch.rand(1)
-    beta = torch.rand(1)
-
-    if is_cuda:
-        alpha = alpha.cuda()
-        beta = beta.cuda()
-
-    return alpha, beta
-
-def shake_drop_get_bl(block_index, min_prob_no_shake, num_blocks, is_training, is_cuda):
-    pl = 1 - ((block_index + 1)/ num_blocks) * (1 - min_prob_no_shake)
-
-    if not is_training:
-        bl = torch.tensor(1.0) if random.random() <= pl else torch.tensor(0.0)
-    if is_training:
-        bl = torch.tensor(pl)
-
-    if is_cuda:
-        bl = bl.cuda()
-
-    return bl
diff --git a/autoPyTorch/components/training/base_training.py b/autoPyTorch/components/training/base_training.py
deleted file mode 100644
index 40b9ddc1c..000000000
--- a/autoPyTorch/components/training/base_training.py
+++ /dev/null
@@ -1,159 +0,0 @@
-import ConfigSpace
-
-class BaseTrainingTechnique():
-    def __init__(self, training_components=None):
-        """Initialize the training technique. Should be called in a fit Method of a Pipeline node.
-        
-        Keyword Arguments:
-            training_components {dict} -- Maps a names to a training components necessary for this training technique (default: {None})
-        """
-        self.training_components = training_components or dict()
-
-    # VIRTUAL
-    def set_up(self, trainer, pipeline_config):
-        """Set up the training component
-        
-        Arguments:
-            trainer {Trainer} -- The trainer object used for training.
-            pipeline_config {dict} -- Configuration of the Pipeline.
-            logger {Logger} -- Logger.
-        """
-
-        pass
-    
-    # VIRTUAL
-    def on_epoch_start(self, trainer, log, epoch):
-        """Function that gets called before the train_batches method of each epoch in training.
-        
-        Arguments:
-            trainer {Trainer} -- The trainer object used for training.
-            log {dict} -- The log of the current epoch.
-            epoch {int} -- The current epoch of training.
-        """
-
-        pass
-
-    # VIRTUAL
-    def on_epoch_end(self, trainer, log, epoch):
-        """Function that gets called after the train_batches method of each epoch in training.
-        Is able to stop training by returning True.
-        
-        Arguments:
-            trainer {Trainer} -- The trainer object used for training.
-            log {dict} -- The log of the current epoch.
-            epoch {int} -- The current epoch of training.
-        
-        Returns:
-            bool -- If training should be stopped.
-        """
-
-        return False
-
-    # VIRTUAL
-    def on_batch_start(self, trainer, epoch, step, num_steps):
-        """Function that gets called in the train_batches method of training.
-        Is able to cancel the current epoch by returning True.
-        
-        Arguments:
-            batch_loss {tensor} -- The batch loss of the current batch.
-            trainer {Trainer} -- The trainer object used for training
-        
-        Returns:
-            bool -- If the current epoch should be canceled.
-        """
-
-        return False
-    
-        # VIRTUAL
-    def on_batch_end(self, batch_loss, trainer, epoch, step, num_steps):
-        """Function that gets called in the train_batches method of training.
-        Is able to cancel the current epoch by returning True.
-        
-        Arguments:
-            batch_loss {tensor} -- The batch loss of the current batch.
-            trainer {Trainer} -- The trainer object used for training
-        
-        Returns:
-            bool -- If the current epoch should be canceled.
-        """
-
-        return False
-    
-    # VIRTUAL
-    def select_log(self, logs, trainer):
-        """Select one log from the list of all epoch logs.
-        
-        Arguments:
-            logs {list} -- A list of log. For each epoch of training there is one entry.
-            trainer {Trainer} -- The trainer object used for training
-        
-        Returns:
-            log -- The selected log. Return None if undecided.
-        """
-
-        return False
-    
-        # VIRTUAL
-    def requires_eval_each_epoch(self):
-        """ Specify if the training technique needs the network to be evaluated on a snapshot after training.
-        
-        Return:
-            bool -- If the training technique needs the network to be evaluated on a snapshot after training
-        """
-
-        return False
-
-
-    # VIRTUAL
-    @staticmethod
-    def get_pipeline_config_options():
-        """Return a list of ConfigOption used for this training technique.
-        
-        Returns:
-            list -- A list of ConfigOptions.
-        """
-
-        return []
-
-
-class BaseBatchLossComputationTechnique():
-
-    # VIRTUAL 
-    def set_up(self, pipeline_config, hyperparameter_config, logger):
-        """Initialize the batch loss computation technique.
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline.
-            hyperparameter_config {dict} -- The hyperparameter config sampled by BOHB.
-            logger {Logger} -- Logger.
-        """
-        self.logger = logger
-    
-    # VIRTUAL
-    def prepare_data(self, X_batch, y_batch):
-        """Method that gets called, before batch is but into network.
-        
-        Arguments:
-            X_batch {tensor} -- The features of the batch.
-            y_batch {tensor} -- The targets of the batch.
-        """
-
-        return X_batch, {'y_batch' : y_batch}
-    
-    # VIRTUAL
-    def criterion(self, y_batch):
-        return lambda criterion, pred: criterion(pred, y_batch)
-    
-    # VIRTUAL
-    @staticmethod
-    def get_hyperparameter_search_space(**pipeline_config):
-        """Get the hyperparameter config space for this technique.
-        
-        Returns:
-            ConfigurationSpace -- The hyperparameter config space for this technique
-        """
-
-        return ConfigSpace.ConfigurationSpace()
-
-
-        
\ No newline at end of file
diff --git a/autoPyTorch/components/training/budget_types.py b/autoPyTorch/components/training/budget_types.py
deleted file mode 100644
index 5e21c680e..000000000
--- a/autoPyTorch/components/training/budget_types.py
+++ /dev/null
@@ -1,79 +0,0 @@
-from autoPyTorch.components.training.base_training import BaseTrainingTechnique
-import time
-
-class BudgetTypeTime(BaseTrainingTechnique):
-    default_min_budget = 120
-    default_max_budget = 6000
-    compensate = 10 # will be modified by cv
-
-    # OVERRIDE
-    def set_up(self, trainer, pipeline_config, **kwargs):
-        super(BudgetTypeTime, self).set_up(trainer, pipeline_config)
-        self.end_time = trainer.budget - self.compensate + trainer.fit_start_time
-        self.start_time = time.time()
-        
-        if self.start_time >= self.end_time:
-            raise Exception("Budget exhausted before training started")
-    
-    # OVERRIDE
-    def on_batch_end(self, **kwargs):
-        return time.time() >= self.end_time
-    
-    # OVERRIDE
-    def on_epoch_end(self, trainer, **kwargs):
-        elapsed = time.time() - trainer.fit_start_time
-        trainer.model.budget_trained = elapsed
-        trainer.logger.debug("Budget used: " + str(elapsed) + "/" + str(trainer.budget - self.compensate))
-
-        if time.time() >= self.end_time:
-            trainer.logger.debug("Budget exhausted!")
-            return True
-        return False
-
-class BudgetTypeEpochs(BaseTrainingTechnique):
-    default_min_budget = 5
-    default_max_budget = 150
-    
-    # OVERRIDE
-    def set_up(self, trainer, pipeline_config, **kwargs):
-        super(BudgetTypeEpochs, self).set_up(trainer, pipeline_config)
-        self.target = trainer.budget
-    
-    # OVERRIDE
-    def on_epoch_end(self, trainer, epoch, **kwargs):
-        trainer.model.budget_trained = epoch
-        trainer.logger.debug("Budget used: " + str(epoch) + "/" + str(self.target))
-
-        if epoch >= self.target:
-            trainer.logger.debug("Budget exhausted!")
-            return True
-        return False
-
-class BudgetTypeTrainingTime(BaseTrainingTechnique):
-    default_min_budget = 120
-    default_max_budget = 6000
-
-    # OVERRIDE
-    def set_up(self, trainer, pipeline_config, **kwargs):
-        super(BudgetTypeTrainingTime, self).set_up(trainer, pipeline_config)
-        self.end_time = trainer.budget + time.time()
-        self.start_time = time.time()
-
-        if self.start_time >= self.end_time:
-            raise Exception("Budget exhausted before training started")
-
-    # OVERRIDE
-    def on_batch_end(self, **kwargs):
-        return time.time() >= self.end_time
-
-    # OVERRIDE
-    def on_epoch_end(self, trainer, **kwargs):
-        elapsed = time.time() - self.start_time
-        trainer.model.budget_trained = elapsed
-        trainer.logger.debug("Budget used: " + str(elapsed) +
-                             "/" + str(self.end_time - self.start_time))
-
-        if time.time() >= self.end_time:
-            trainer.logger.debug("Budget exhausted!")
-            return True
-        return False
diff --git a/autoPyTorch/components/training/early_stopping.py b/autoPyTorch/components/training/early_stopping.py
deleted file mode 100644
index aa3e40759..000000000
--- a/autoPyTorch/components/training/early_stopping.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from autoPyTorch.components.training.base_training import BaseTrainingTechnique
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-
-class EarlyStopping(BaseTrainingTechnique):
-    """ Stop training when there is no improvement on the validation set for a specified number of epochs.
-    Is able to take a snapshot of the parameters, where the performance of the validation set is best.
-    There is no further split of the data. Therefore the validation performance reported to BOHB will become an optimistic estimator.
-    """
-
-    # OVERRIDE
-    def set_up(self, trainer, pipeline_config, **kwargs):
-        super(EarlyStopping, self).set_up(trainer, pipeline_config)
-        self.reset_parameters = pipeline_config["early_stopping_reset_parameters"]
-        self.patience = pipeline_config["early_stopping_patience"]
-        self.loss_transform = trainer.metrics[0].loss_transform
-
-        # does not work with e.g. cosine anealing with warm restarts
-        if hasattr(trainer, "lr_scheduler") and not trainer.lr_scheduler.allows_early_stopping:
-            self.patience = float("inf")
-
-        # initialize current best performance to +/- infinity
-        if trainer.model.current_best_epoch_performance is None:
-            trainer.model.current_best_epoch_performance = float("inf")
-
-        trainer.logger.debug("Using Early stopping with patience: " + str(self.patience))
-        trainer.logger.debug("Reset Parameters to parameters with best validation performance: " + str(self.reset_parameters))
-    
-    # OVERRIDE
-    def on_epoch_end(self, trainer, log, **kwargs):
-        if "val_" + trainer.metrics[0] not in log:
-            if self.patience < float("inf"):
-                trainer.logger.debug("No Early stopping because no validation set performance available")
-            return False
-        if self.reset_parameters and (not hasattr(trainer, "lr_scheduler") or not trainer.lr_scheduler.snapshot_before_restart):
-            log["best_parameters"] = False
-        current_performance = self.loss_transform(log["val_" + trainer.metrics[0]])
-
-        # new best performance
-        if current_performance < trainer.model.current_best_epoch_performance:
-            trainer.model.num_epochs_no_progress = 0
-            trainer.model.current_best_epoch_performance = current_performance
-            trainer.logger.debug("New best performance!")
-
-            if self.reset_parameters and (not hasattr(trainer, "lr_scheduler") or not trainer.lr_scheduler.snapshot_before_restart):
-                trainer.logger.debug("Early stopping takes snapshot of current parameters")
-                log["best_parameters"] = True
-                trainer.model.snapshot()
-
-        # do early stopping
-        elif trainer.model.num_epochs_no_progress > self.patience:
-            trainer.logger.debug("Early stopping patience exhausted. Stopping Early!")
-            trainer.model.stopped_early = True
-            return True
-        
-        # no improvement
-        else:
-            trainer.logger.debug("No improvement")
-            trainer.model.num_epochs_no_progress += 1
-        return False
-    
-    # OVERRIDE
-    def select_log(self, logs, trainer, **kwargs):
-        # select the log where a snapshot has been taken
-        if self.reset_parameters and (not hasattr(trainer, "lr_scheduler") or not trainer.lr_scheduler.snapshot_before_restart):
-            trainer.logger.debug("Using logs of parameters with best validation performance")
-            logs = [log for log in logs if log["best_parameters"]] or logs
-            logs = logs[-1]
-            return logs
-        return False
-    
-    def requires_eval_each_epoch(self):
-        return self.reset_parameters or self.patience < float("inf")
-    
-    # OVERRIDE
-    @staticmethod
-    def get_pipeline_config_options():
-        options = [
-            ConfigOption("early_stopping_patience", default=float("inf"), type=float),
-            ConfigOption("early_stopping_reset_parameters", default=False, type=to_bool)
-        ]
-        return options
\ No newline at end of file
diff --git a/autoPyTorch/components/training/image/base_training.py b/autoPyTorch/components/training/image/base_training.py
deleted file mode 100644
index 637a6396f..000000000
--- a/autoPyTorch/components/training/image/base_training.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import ConfigSpace
-from torch.autograd import Variable
-
-class BaseTrainingTechnique():
-    def __init__(self, training_components=None):
-        """Initialize the training technique. Should be called in a fit Method of a Pipeline node.
-        
-        Keyword Arguments:
-            training_components {dict} -- Maps a names to a training components necessary for this training technique (default: {None})
-        """
-
-        self.training_components = training_components or dict()
-    
-    # VIRTUAL
-    def set_up(self, training_components, pipeline_config, logger):
-        """Set up the training component
-        
-        Arguments:
-            training_components {dict} -- All training components of training.
-            pipeline_config {dict} -- Configuration of the Pipeline.
-            logger {Logger} -- Logger.
-        """
-
-        self.logger = logger
-    
-    # VIRTUAL
-    def before_train_batches(self, training_components, log, epoch):
-        """Function that gets called before the train_batches method of each epoch in training.
-        
-        Arguments:
-            training_components {dict} -- All training components used in training.
-            log {dict} -- The log of the current epoch.
-            epoch {int} -- The current epoch of training.
-        """
-
-        pass
-
-    # VIRTUAL
-    def after_train_batches(self, training_components, log, epoch):
-        """Function that gets called after the train_batches method of each epoch in training.
-        Is able to stop training by returning True.
-        
-        Arguments:
-            training_components {dict} -- All training components used in training.
-            log {dict} -- The log of the current epoch.
-            epoch {int} -- The current epoch of training.
-        
-        Returns:
-            bool -- If training should be stopped.
-        """
-
-        return False
-
-    # VIRTUAL
-    def during_train_batches(self, batch_loss, training_components):
-        """Function that gets called in the train_batches method of training.
-        Is able to cancel the current epoch by returning True.
-        
-        Arguments:
-            batch_loss {tensor} -- The batch loss of the current batch.
-            training_components {dict} -- All training components used in training.
-        
-        Returns:
-            bool -- If the current epoch should be canceled.
-        """
-
-        return False
-    
-    # VIRTUAL
-    def select_log(self, logs, training_components):
-        """Select one log from the list of all epoch logs.
-        
-        Arguments:
-            logs {list} -- A list of log. For each epoch of training there is one entry.
-            training_components {dict} -- All training components used in training.
-        
-        Returns:
-            log -- The selected log. Return None if undecided.
-        """
-
-        return False
-
-    # VIRTUAL
-    def needs_eval_on_valid_each_epoch(self):
-        """Specify if the training technique needs the network to be evaluated on the validation set.
-        
-        Returns:
-            bool -- If the network should be evaluated on the validation set.
-        """
-
-        return False
-    
-    # VIRTUAL
-    def needs_eval_on_train_each_epoch(self):
-        """Specify if the training technique needs the network to be evaluated on the training set.
-        
-        Returns:
-            bool -- If the network should be evaluated on the training set.
-        """
-
-
-        return False
-    
-    # VIRTUAL
-    @staticmethod
-    def get_pipeline_config_options():
-        """Return a list of ConfigOption used for this training technique.
-        
-        Returns:
-            list -- A list of ConfigOptions.
-        """
-
-        return []
-
-
-class BaseBatchLossComputationTechnique():
-
-    # VIRTUAL 
-    def set_up(self, pipeline_config, hyperparameter_config, logger):
-        """Initialize the batch loss computation technique.
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline.
-            hyperparameter_config {dict} -- The hyperparameter config sampled by BOHB.
-            logger {Logger} -- Logger.
-        """
-        self.logger = logger
-    
-    # VIRTUAL
-    def prepare_data(self, X_batch, y_batch):
-        """Method that gets called, before batch is but into network.
-        
-        Arguments:
-            X_batch {tensor} -- The features of the batch.
-            y_batch {tensor} -- The targets of the batch.
-        """
-
-        return X_batch, {'y_batch' : y_batch}
-    
-    # VIRTUAL
-    def criterion(self, y_batch):
-        return lambda criterion, pred: criterion(pred, y_batch)
-    
-    # VIRTUAL
-    def evaluate(self, metric, y_pred, y_batch):
-        return metric(y_pred, y_batch)
-        
-    
-    # VIRTUAL
-    @staticmethod
-    def get_pipeline_config_options():
-        """A list of ConfigOptions used for this technique.
-        
-        Returns:
-            list -- A list of ConfigOptions for this technique.
-        """
-
-        return []
-    
-    # VIRTUAL
-    @staticmethod
-    def get_hyperparameter_search_space(**pipeline_config):
-        """Get the hyperparameter config space for this technique.
-        
-        Returns:
-            ConfigurationSpace -- The hyperparameter config space for this technique
-        """
-
-        return ConfigSpace.ConfigurationSpace()
-
-
-        
\ No newline at end of file
diff --git a/autoPyTorch/components/training/image/budget_types.py b/autoPyTorch/components/training/image/budget_types.py
deleted file mode 100644
index ee7becba6..000000000
--- a/autoPyTorch/components/training/image/budget_types.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from autoPyTorch.components.training.image.base_training import BaseTrainingTechnique
-import time
-
-class BudgetTypeTime(BaseTrainingTechnique):
-    default_min_budget = 120
-    default_max_budget = 6000
-    compensate = 10 # will be modified by cv
-
-    # OVERRIDE
-    def set_up(self, training_components, pipeline_config, logger):
-        super(BudgetTypeTime, self).set_up(training_components, pipeline_config, logger)
-        self.end_time = training_components["budget"] - self.compensate + training_components["fit_start_time"]
-        self.start_time = time.time()
-        
-        if self.start_time >= self.end_time:
-            raise Exception("Budget exhausted before training started")
-    
-    # OVERRIDE
-    def during_train_batches(self, batch_loss, training_components):
-        return time.time() >= self.end_time
-    
-    # OVERRIDE
-    def after_train_batches(self, training_components, log, epoch):
-        elapsed = time.time() - self.start_time
-        training_components["network"].budget_trained = elapsed
-        self.logger.debug("Budget used: " + str(elapsed) + "/" + str(self.end_time - self.start_time))
-
-        if time.time() >= self.end_time:
-            self.logger.debug("Budget exhausted!")
-            return True
-        return False
-
-class BudgetTypeEpochs(BaseTrainingTechnique):
-    default_min_budget = 5
-    default_max_budget = 150
-    
-    # OVERRIDE
-    def set_up(self, training_components, pipeline_config, logger):
-        super(BudgetTypeEpochs, self).set_up(training_components, pipeline_config, logger)
-        self.target = training_components["budget"]
-    
-    # OVERRIDE
-    def after_train_batches(self, training_components, log, epoch):
-        training_components["network"].budget_trained = epoch
-        self.logger.debug("Budget used: " + str(epoch) + "/" + str(self.target))
-
-        if epoch >= self.target:
-            self.logger.debug("Budget exhausted!")
-            return True
-        return False
diff --git a/autoPyTorch/components/training/image/checkpoints/load_specific.py b/autoPyTorch/components/training/image/checkpoints/load_specific.py
deleted file mode 100644
index 493d71119..000000000
--- a/autoPyTorch/components/training/image/checkpoints/load_specific.py
+++ /dev/null
@@ -1,58 +0,0 @@
-
-import os
-import math
-import torch
-import torch.nn as nn
-
-
-import logging
-
-
-def load_model(model, checkpoint):
-
-    if checkpoint is None:
-        return model
-
-    pretrained_state = checkpoint['state']
-    model_state = model.state_dict()
-
-    pretrained_state = { k:v for k,v in pretrained_state.items() if k in model_state and v.size() == model_state[k].size() }
-    logging.getLogger('autonet').debug('=> Resuming model using ' + str(len(pretrained_state.keys())) + '/' + str(len(model_state.keys())) + ' parameters')
-    model_state.update(pretrained_state)
-    model.load_state_dict(model_state)
-    
-    return model
-
-# def load_optimizer(optimizer, checkpoint, device):
-    
-#     if checkpoint is None:
-#         return optimizer
-
-#     opti_state = optimizer.state_dict()
-#     pretrained_state = checkpoint['optimizer']
-
-#     logging.getLogger('autonet').debug(str(len(pretrained_state['state'])))
-#     logging.getLogger('autonet').debug(str(len(opti_state['param_groups'][0]['params'])))
-#     logging.getLogger('autonet').debug(str(len(pretrained_state['param_groups'][0]['params'])))
-#     logging.getLogger('autonet').debug(str(set(pretrained_state['param_groups'][0]['params']).intersection(set(opti_state['param_groups'][0]['params']))))
-
-
-#     pretrained_state = {k: pretrained_state[k] for state in opti_state.items() for k, v in enumerate(state) if state in pretrained_state and k in pretrained_state[state] and v.size() == opti_state[state][k].size()}
-#     logging.getLogger('autonet').debug('=> Resuming optimizer using ' + str(len(pretrained_state.keys())) + '/' + str(len(opti_state.keys())))
-#     opti_state.update(pretrained_state)
-#     optimizer.load_state_dict(opti_state)
-
-#     for state in optimizer.state.values():
-#         for k, v in state.items():
-#             if isinstance(v, torch.Tensor):
-#                 state[k] = v.to(device)
-#     return optimizer
-
-# def load_scheduler(scheduler, checkpoint):
-
-#     if checkpoint is None:
-#         return scheduler
-
-#     loaded_scheduler = checkpoint['scheduler']
-#     loaded_scheduler.optimizer = scheduler.optimizer
-#     return loaded_scheduler
\ No newline at end of file
diff --git a/autoPyTorch/components/training/image/checkpoints/save_load.py b/autoPyTorch/components/training/image/checkpoints/save_load.py
deleted file mode 100644
index 014662f1c..000000000
--- a/autoPyTorch/components/training/image/checkpoints/save_load.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import torch
-import os
-
-import logging
-
-
-def get_checkpoint_name(config_id, budget):
-    return 'checkpoint_' + str(config_id) + '_Budget_' + str(int(budget)) + '.pt'
-
-def get_checkpoint_dir(working_directory):
-    return os.path.join(working_directory, 'checkpoints')
-
-def save_checkpoint(path, config_id, budget, model, optimizer, scheduler):
-
-    name = get_checkpoint_name(config_id, budget)
-    os.makedirs(path, exist_ok=True)
-
-    path = os.path.join(path, name)
-
-    torch.save({
-       'state': model.state_dict(),
-    }, open(path, 'wb'))
-
-    logging.getLogger('autonet').debug('=> Model {} saved to {}'.format(str(type(model)), path))
-    return path
-
-
-def load_checkpoint(path, config_id, budget):
-    name = get_checkpoint_name(config_id, budget)
-
-    path = os.path.join(path, name)
-    if not os.path.exists(path):
-        return None
-
-    logging.getLogger('autonet').debug('=> Loading checkpoint ' + path)
-    checkpoint = torch.load(path)
-    return checkpoint
-
-
diff --git a/autoPyTorch/components/training/image/early_stopping.py b/autoPyTorch/components/training/image/early_stopping.py
deleted file mode 100644
index 51f5f7678..000000000
--- a/autoPyTorch/components/training/image/early_stopping.py
+++ /dev/null
@@ -1,84 +0,0 @@
-from autoPyTorch.components.training.image.base_training import BaseTrainingTechnique
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-
-class EarlyStopping(BaseTrainingTechnique):
-    """ Stop training when there is no improvement on the validation set for a specified number of epochs.
-    Is able to take a snapshot of the parameters, where the performance of the validation set is best.
-    There is no further split of the data. Therefore the validation performance reported to BOHB will become an optimistic estimator.
-    """
-
-    # OVERRIDE
-    def set_up(self, training_components, pipeline_config, logger):
-        super(EarlyStopping, self).set_up(training_components, pipeline_config, logger)
-        self.reset_parameters = pipeline_config["early_stopping_reset_parameters"]
-        self.minimize = pipeline_config["minimize"]
-        self.patience = pipeline_config["early_stopping_patience"]
-
-        # does not work with e.g. cosine anealing with warm restarts
-        if "lr_scheduler" in training_components and not training_components["lr_scheduler"].allows_early_stopping:
-            self.patience = float("inf")
-
-        # initialize current best performance to +/- infinity
-        if training_components["network"].current_best_epoch_performance is None:
-            training_components["network"].current_best_epoch_performance = float("inf")
-            if not self.minimize:
-                training_components["network"].current_best_epoch_performance = -float("inf")
-
-        self.logger.debug("Using Early stopping with patience: " + str(self.patience))
-        self.logger.debug("Reset Parameters to parameters with best validation performance: " + str(self.reset_parameters))
-    
-    # OVERRIDE
-    def after_train_batches(self, training_components, log, epoch):
-        if "val_" + training_components["train_metric_name"] not in log:
-            if self.patience < float("inf"):
-                self.logger.debug("No Early stopping because no validation set performance available")
-            return False
-        if self.reset_parameters and ("lr_scheduler" not in training_components or not training_components["lr_scheduler"].snapshot_before_restart):
-            log["best_parameters"] = False
-        current_performance = log["val_" + training_components["train_metric_name"]]
-
-        # new best performance
-        if ((self.minimize and current_performance < training_components["network"].current_best_epoch_performance) or
-            (not self.minimize and current_performance > training_components["network"].current_best_epoch_performance)):
-            training_components["network"].num_epochs_no_progress = 0
-            training_components["network"].current_best_epoch_performance = current_performance
-            self.logger.debug("New best performance!")
-
-            if self.reset_parameters and ("lr_scheduler" not in training_components or not training_components["lr_scheduler"].snapshot_before_restart):
-                self.logger.debug("Early stopping takes snapshot of current parameters")
-                log["best_parameters"] = True
-                training_components["network"].snapshot()
-
-        # do early stopping
-        elif training_components["network"].num_epochs_no_progress > self.patience:
-            self.logger.debug("Early stopping patience exhausted. Stopping Early!")
-            training_components["network"].stopped_early = True
-            return True
-        
-        # no improvement
-        else:
-            self.logger.debug("No improvement")
-            training_components["network"].num_epochs_no_progress += 1
-        return False
-    
-    # OVERRIDE
-    def select_log(self, logs, training_components):
-        # select the log where a snapshot has been taken
-        if self.reset_parameters and ("lr_scheduler" not in training_components or not training_components["lr_scheduler"].snapshot_before_restart):
-            self.logger.debug("Using logs of parameters with best validation performance")
-            logs = [log for log in logs if log["best_parameters"]] or logs
-            logs = logs[-1]
-            return logs
-        return False
-    
-    def needs_eval_on_valid_each_epoch(self):
-        return self.reset_parameters or self.patience < float("inf")
-    
-    # OVERRIDE
-    @staticmethod
-    def get_pipeline_config_options():
-        options = [
-            ConfigOption("early_stopping_patience", default=float("inf"), type=float),
-            ConfigOption("early_stopping_reset_parameters", default=False, type=to_bool)
-        ]
-        return options
diff --git a/autoPyTorch/components/training/image/lr_scheduling.py b/autoPyTorch/components/training/image/lr_scheduling.py
deleted file mode 100644
index e207a2665..000000000
--- a/autoPyTorch/components/training/image/lr_scheduling.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from autoPyTorch.components.training.image.base_training import BaseTrainingTechnique
-
-class LrScheduling(BaseTrainingTechnique):
-    """Schedule the learning rate with given learning rate scheduler.
-    The learning rate scheduler is usually set in a LrSchedulerSelector pipeline node.
-    """
-
-    # OVERRIDE
-    def after_train_batches(self, training_components, log, epoch):
-
-        # do one step of lr scheduling
-        if callable(getattr(training_components["lr_scheduler"], "get_lr", None)):
-            log['lr'] = training_components["lr_scheduler"].get_lr()[0]
-        try:
-            training_components["lr_scheduler"].step(epoch=(epoch + 1), metrics=log['loss'])
-        except:
-            training_components["lr_scheduler"].step(epoch=(epoch + 1))
-        self.logger.debug("Perform learning rate scheduling")
-
-        # check if lr scheduler has converged, if possible
-        if not training_components["lr_scheduler"].snapshot_before_restart:
-            return False
-        training_components["lr_scheduler"].get_lr()
-        log["lr_scheduler_converged"] = False
-        if training_components["lr_scheduler"].restarted_at == (epoch + 1):
-            self.logger.debug("Learning rate scheduler converged. Taking Snapshot of models parameters.")
-            training_components["network"].snapshot()
-            log["lr_scheduler_converged"] = True
-        return False
-    
-    def select_log(self, logs, training_components):
-
-        # select the log where the lr scheduler has converged, if possible.
-        if training_components["lr_scheduler"].snapshot_before_restart:
-            self.logger.debug("Using logs where lr scheduler converged")
-            logs = [log for log in logs if log["lr_scheduler_converged"]] or logs
-            logs = logs[-1]
-            return logs
-        return False
diff --git a/autoPyTorch/components/training/image/mixup.py b/autoPyTorch/components/training/image/mixup.py
deleted file mode 100644
index 7fb6d3309..000000000
--- a/autoPyTorch/components/training/image/mixup.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from autoPyTorch.components.training.image.base_training import BaseBatchLossComputationTechnique
-import numpy as np
-from torch.autograd import Variable
-import ConfigSpace
-import torch
-
-class Mixup(BaseBatchLossComputationTechnique):
-    def set_up(self, pipeline_config, hyperparameter_config, logger):
-        super(Mixup, self).set_up(pipeline_config, hyperparameter_config, logger)
-        self.alpha = hyperparameter_config["alpha"]
-
-    def prepare_data(self, x, y):
-
-        lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0. else 1.
-        batch_size = x.size()[0]
-        index = torch.randperm(batch_size).cuda() if x.is_cuda else torch.randperm(batch_size)
-
-        mixed_x = lam * x + (1 - lam) * x[index, :]
-        y_a, y_b = y, y[index]
-        return mixed_x, { 'y_a': y_a, 'y_b': y_b, 'lam' : lam }
-
-    def criterion(self, y_a, y_b, lam):
-        return lambda criterion, pred: lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
-        
-    def evaluate(self, metric, y_pred, y_a, y_b, lam):
-        return lam * metric(y_pred, y_a) + (1 - lam) * metric(y_pred, y_b)
-
-    @staticmethod
-    def get_hyperparameter_search_space(**pipeline_config):
-        cs = ConfigSpace.ConfigurationSpace()
-        cs.add_hyperparameter(ConfigSpace.hyperparameters.UniformFloatHyperparameter("alpha", lower=0, upper=1, default_value=1))
-        return cs
diff --git a/autoPyTorch/components/training/image/trainer.py b/autoPyTorch/components/training/image/trainer.py
deleted file mode 100644
index 49a2a466f..000000000
--- a/autoPyTorch/components/training/image/trainer.py
+++ /dev/null
@@ -1,252 +0,0 @@
-import time
-import os
-import torch
-import torch.nn as nn
-
-import random
-from torch.autograd import Variable
-from .checkpoints.save_load import save_checkpoint
-
-# from util.transforms import mixup_data, mixup_criterion
-# from checkpoints import save_checkpoint
-
-class Trainer(object):
-    def __init__(self, loss_computation, model, criterion, budget, optimizer, scheduler, budget_type, device, images_to_plot=0, checkpoint_path=None, config_id=None):
-        self.checkpoint_path = checkpoint_path
-        self.config_id = config_id
-
-        self.scheduler = scheduler
-        # if self.scheduler and not hasattr(self.scheduler, 'cumulative_time'):
-        #     self.scheduler.cumulative_time = 0
-        self.optimizer = optimizer
-        self.device = device
-
-        self.budget = budget
-        self.loss_computation = loss_computation
-
-        self.images_plot_count = images_to_plot
-
-        self.budget_type = budget_type
-        self.cumulative_time = 0
-
-        self.train_loss_sum = 0
-        self.train_iterations = 0
-
-        self.latest_checkpoint = None
-
-        try:
-            if torch.cuda.device_count() > 1:
-                model = nn.DataParallel(model)
-            self.model = model.to(self.device)
-        except:
-            print("CUDA unavailable, continue using CPU.")
-            self.model = model.to("cpu")
-
-        try:
-            self.criterion = criterion.to(self.device)
-        except:
-            print("No criterion specified.")
-            self.criterion = None
-
-    def train(self, epoch, train_loader, metrics):
-        '''
-            Trains the model for a single epoch
-        '''
-
-        # train_size = int(0.9 * len(train_loader.dataset.train_data) / self.config.batch_size)
-        loss_sum = 0.0
-        N = 0
-
-        # print('\33[1m==> Training epoch # {}\033[0m'.format(str(epoch)))
-
-
-        classified = []
-        misclassified = []
-
-        self.model.train()
-
-        budget_exceeded = False
-        metric_results = [0] * len(metrics)
-        start_time = time.time()
-        for step, (data, targets) in enumerate(train_loader):
-            # import matplotlib.pyplot as plt
-            # img = plt.imshow(data.numpy()[0,1,:])
-            # plt.show()
-
-            # images += list(data.numpy())
-            # print('Data:', data.size(), ' - Label:', targets.size())
-
-            data = data.to(self.device)
-            targets = targets.to(self.device)
-
-            data, criterion_kwargs = self.loss_computation.prepare_data(data, targets)
-            batch_size = data.size(0)
-
-            outputs = self.model(data)
-            loss_func = self.loss_computation.criterion(**criterion_kwargs)
-            loss = loss_func(self.criterion, outputs)
-
-            self.optimizer.zero_grad()
-            loss.backward()
-            self.optimizer.step()
-
-            # print('Train:', ' '.join(str(outputs).split('\n')[0:2]))
-
-            if self.images_plot_count > 0:
-                with torch.no_grad():
-                    _, pred = outputs.topk(1, 1, True, True)
-                    pred = pred.t()
-                    correct = pred.eq(targets.view(1, -1).expand_as(pred)).cpu().numpy()[0]
-                    data = data.cpu().numpy()
-                    classified += list(data[correct.astype(bool)])
-                    misclassified += list(data[(1-correct).astype(bool)])
-                    if len(classified) > self.images_plot_count:
-                        classified = random.sample(classified, self.images_plot_count)
-                    if len(misclassified) > self.images_plot_count:
-                        misclassified = random.sample(misclassified, self.images_plot_count)
-
-            # self.scheduler.cumulative_time += delta_time
-            # self.scheduler.last_step = self.scheduler.cumulative_time - delta_time - 1e-10
-
-            tmp = time.time()
-
-            with torch.no_grad():
-                for i, metric in enumerate(metrics):
-                    metric_results[i] += self.loss_computation.evaluate(metric, outputs, **criterion_kwargs) * batch_size
-
-            loss_sum += loss.item() * batch_size
-            N += batch_size
-
-            #print('Update', (metric_results[0] / N), 'loss', (loss_sum / N), 'lr', self.optimizer.param_groups[0]['lr'])
-
-            if self.budget_type == 'time' and self.cumulative_time + (time.time() - start_time) >= self.budget:
-                # print(' * Stopping at Epoch: [%d][%d/%d] for a budget of %.3f s' % (epoch, step + 1, train_size, self.config.budget))
-                budget_exceeded = True
-                break
-
-        if N==0: # Fixes a bug during initialization
-            N=1
-
-        if self.images_plot_count > 0:
-            import tensorboard_logger as tl
-            tl.log_images('Train_Classified/Image', classified, step=epoch)
-            tl.log_images('Train_Misclassified/Image', misclassified, step=epoch)
-
-        if self.checkpoint_path and self.scheduler.snapshot_before_restart and self.scheduler.needs_checkpoint():
-            self.latest_checkpoint = save_checkpoint(self.checkpoint_path, self.config_id, self.budget, self.model, self.optimizer, self.scheduler)
-
-        try:
-            self.scheduler.step(epoch=epoch)
-        except:
-            self.scheduler.step(metrics=loss_sum / N, epoch=epoch)
-
-        self.cumulative_time += (time.time() - start_time)
-        #print('LR', self.optimizer.param_groups[0]['lr'], 'Update', (metric_results[0] / N), 'loss', (loss_sum / N))
-
-        return [res / N for res in metric_results], loss_sum / N, budget_exceeded
-
-
-    def evaluate(self, test_loader, metrics, epoch=0):
-
-        N = 0
-        metric_results = [0] * len(metrics)
-        
-        classified = []
-        misclassified = []
-
-        self.model.eval()
-
-        with torch.no_grad():
-            for step, (data, targets) in enumerate(test_loader):
-
-                # import matplotlib.pyplot as plt
-                # img = plt.imshow(data.numpy()[0,1,:])
-                # plt.show()
-
-                try:
-                    data = data.to(self.device)
-                    targets = targets.to(self.device)
-                except:
-                    data = data.to("cpu")
-                    targets = targets.to("cpu")
-
-                batch_size = data.size(0)
-
-                outputs = self.model(data)
-
-                if self.images_plot_count > 0:
-                    _, pred = outputs.topk(1, 1, True, True)
-                    pred = pred.t()
-                    correct = pred.eq(targets.view(1, -1).expand_as(pred)).cpu().numpy()[0]
-                    data = data.cpu().numpy()
-                    classified += list(data[correct.astype(bool)])
-                    misclassified += list(data[(1-correct).astype(bool)])
-                    if len(classified) > self.images_plot_count:
-                        classified = random.sample(classified, self.images_plot_count)
-                    if len(misclassified) > self.images_plot_count:
-                        misclassified = random.sample(misclassified, self.images_plot_count)
-
-                # print('Valid:', ' '.join(str(outputs).split('\n')[0:2]))
-                # print('Shape:', outputs.shape, 'Sums', str(outputs.cpu().numpy().sum(1)).replace('\n', ''))
-                
-                for i, metric in enumerate(metrics):
-                    metric_results[i] += metric(outputs.data, targets.data) * batch_size
-
-                N += batch_size
-
-        if self.images_plot_count > 0:
-            import tensorboard_logger as tl
-            tl.log_images('Valid_Classified/Image', classified, step=epoch)
-            tl.log_images('Valid_Misclassified/Image', misclassified, step=epoch)
-
-        self.model.train()
-            
-        return [res / N for res in metric_results]
-    
-
-    def class_to_probability_mapping(self, test_loader):
-
-        N = 0
-
-        import numpy as np
-        import torch.nn as nn
-        
-        probs = None;
-        class_to_index = dict()
-        target_count = []
-        
-        self.model.eval()
-
-        with torch.no_grad():
-            for i, (data, targets) in enumerate(test_loader):
-    
-                data = data.to(self.device)
-                targets = targets.to(self.device)
-                
-                batch_size = data.size(0)
-
-                outputs = self.model(data)
-
-                for i, output in enumerate(outputs):
-                    target = targets[i].cpu().item()
-                    np_output = output.cpu().numpy()
-                    if target not in class_to_index:
-                        if probs is None:
-                            probs = np.array([np_output])
-                        else:
-                            probs = np.vstack((probs, np_output))
-                        class_to_index[target] = probs.shape[0] - 1
-                        target_count.append(0)
-                    else:
-                        probs[class_to_index[target]] = probs[class_to_index[target]] + np_output
-
-                    target_count[class_to_index[target]] += 1
-
-                N += batch_size
-            
-            probs = probs / np.array(target_count)[:, None] #np.max(probs, axis=1)[:, None]
-            probs = torch.from_numpy(probs)
-            # probs = nn.Softmax(1)(probs)
-
-        self.model.train()
-        return probs, class_to_index
diff --git a/autoPyTorch/components/training/lr_scheduling.py b/autoPyTorch/components/training/lr_scheduling.py
deleted file mode 100644
index ec218835c..000000000
--- a/autoPyTorch/components/training/lr_scheduling.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from autoPyTorch.components.training.base_training import BaseTrainingTechnique
-import time
-
-class LrScheduling(BaseTrainingTechnique):
-    """Schedule the learning rate with given learning rate scheduler.
-    The learning rate scheduler is usually set in a LrSchedulerSelector pipeline node.
-    """
-    def __init__(self, training_components, lr_step_after_batch, lr_step_with_time, allow_snapshot):
-        super(LrScheduling, self).__init__(training_components=training_components)
-        self.lr_step_after_batch = lr_step_after_batch
-        self.lr_step_with_time = lr_step_with_time
-        self.allow_snapshot = allow_snapshot
-
-    # OVERRIDE
-    def on_batch_end(self, batch_loss, trainer, epoch, step, num_steps, **kwargs):
-        if not self.lr_step_after_batch:
-            return
-
-        if self.lr_step_with_time:
-            self.perform_scheduling(trainer, time.time() - trainer.fit_start_time, batch_loss)
-        else:
-            self.perform_scheduling(trainer, (epoch - 1) + ((step + 1) / num_steps), batch_loss)
-
-    # OVERRIDE
-    def on_epoch_end(self, trainer, epoch, log, **kwargs):
-        log["lr_scheduler_converged"] = False
-        if callable(getattr(trainer.lr_scheduler, "get_lr", None)):
-            log['lr'] = trainer.lr_scheduler.get_lr()[0]
-
-        if self.lr_step_after_batch:
-            return
-
-        if self.lr_step_with_time:
-            log["lr_scheduler_converged"] = self.perform_scheduling(trainer, time.time() - trainer.fit_start_time, log['loss'])
-        else:
-            log["lr_scheduler_converged"]  = self.perform_scheduling(trainer, epoch, log['loss'])
-        return False
-    
-    def perform_scheduling(self, trainer, epoch, metric, **kwargs):
-        try:
-            trainer.lr_scheduler.step(epoch=epoch, metrics=metric)
-        except:
-            trainer.lr_scheduler.step(epoch=epoch)
-        trainer.logger.debug("Perform learning rate scheduling")
-
-        # check if lr scheduler has converged, if possible
-        if not trainer.lr_scheduler.snapshot_before_restart:
-            return False
-        trainer.lr_scheduler.get_lr()
-        if trainer.lr_scheduler.restarted_at == epoch:
-            if self.allow_snapshot:
-                trainer.logger.debug("Learning rate scheduler converged. Taking Snapshot of models parameters.")
-                trainer.model.snapshot()
-            return True
-        return False
-
-    def select_log(self, logs, trainer, **kwargs):
-        # select the log where the lr scheduler has converged, if possible.
-        if trainer.lr_scheduler.snapshot_before_restart:
-            trainer.logger.debug("Using logs where lr scheduler converged")
-            logs = [log for log in logs if log["lr_scheduler_converged"]] or logs
-            logs = logs[-1]
-            return logs
-        return False
diff --git a/autoPyTorch/components/training/trainer.py b/autoPyTorch/components/training/trainer.py
deleted file mode 100644
index 1f8cbf413..000000000
--- a/autoPyTorch/components/training/trainer.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import time
-import os
-import torch
-import numpy as np
-
-from torch.autograd import Variable
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-
-# from util.transforms import mixup_data, mixup_criterion
-# from checkpoints import save_checkpoint
-
-class Trainer(object):
-    def __init__(self, metrics, log_functions, loss_computation, model, criterion,
-            budget, optimizer, training_techniques, logger, device, full_eval_each_epoch):
-        
-        self.criterion = criterion
-        self.optimizer = optimizer
-        self.metrics = metrics
-        self.log_functions = log_functions
-        self.model = model
-        self.device = device
-
-        for t in training_techniques:
-            for key, value in t.training_components.items():
-                setattr(self, key, value)
-        self.training_techniques = training_techniques
-
-        self.budget = budget
-        self.loss_computation = loss_computation
-
-        self.logger = logger
-        self.fit_start_time = None
-
-        self.eval_valid_each_epoch = full_eval_each_epoch or any(t.requires_eval_each_epoch() for t in self.training_techniques)
-        self.eval_valid_on_snapshot = not self.eval_valid_each_epoch
-        
-        self.eval_additional_logs_each_epoch = full_eval_each_epoch and self.log_functions
-        self.eval_additional_logs_on_snapshot = not full_eval_each_epoch and self.log_functions
-
-        self.to(device)
-    
-    def prepare(self, pipeline_config, hyperparameter_config, fit_start_time):
-        self.fit_start_time = fit_start_time
-        self.loss_computation.set_up(
-            pipeline_config=pipeline_config,
-            hyperparameter_config=ConfigWrapper(hyperparameter_config["batch_loss_computation_technique"], hyperparameter_config),
-            logger=self.logger)
-        for t in self.training_techniques:
-            t.set_up(trainer=self, pipeline_config=pipeline_config)
-    
-    def to(self, device):
-        self.device = device
-        self.model = self.model.to(device)
-        self.criterion = self.criterion.to(device)
-    
-    @staticmethod
-    def get_device(pipeline_config):
-        if not torch.cuda.is_available():
-            pipeline_config["cuda"] = False
-        return torch.device('cuda:0' if pipeline_config['cuda'] else 'cpu')
-    
-    def on_epoch_start(self, log, epoch):
-        for t in self.training_techniques:
-            t.on_epoch_start(trainer=self, log=log, epoch=epoch)
-    
-    def on_epoch_end(self, log, epoch):
-        return any([t.on_epoch_end(trainer=self, log=log, epoch=epoch) for t in self.training_techniques])
-    
-    def final_eval(self, opt_metric_name, logs, train_loader, valid_loader, best_over_epochs, refit):
-        # select log
-        if best_over_epochs:
-            final_log = min(logs, key=lambda log: self.metrics[0].loss_transform(log[opt_metric_name]))
-        else:
-            final_log = None
-            for t in self.training_techniques:
-                log = t.select_log(trainer=self, logs=logs)
-                if log:
-                    final_log = log
-            final_log = final_log or logs[-1]
-
-        # validation on snapshot
-        if self.eval_additional_logs_on_snapshot or self.eval_valid_on_snapshot or refit:
-            self.model.load_snapshot()
-            valid_metric_results = None
-            if valid_loader is not None and self.eval_valid_on_snapshot:
-                valid_metric_results = self.evaluate(valid_loader)
-
-            for i, metric in enumerate(self.metrics):
-                if valid_metric_results:
-                    final_log['val_' + metric.name] = valid_metric_results[i]
-            if self.eval_additional_logs_on_snapshot and not refit:
-                    for additional_log in self.log_functions:
-                        final_log[additional_log.name] = additional_log(self.model, None)
-        return final_log
-
-    def train(self, epoch, train_loader):
-        '''
-            Trains the model for a single epoch
-        '''
-
-        loss_sum = 0.0
-        N = 0
-        self.model.train()
-        outputs_data = list()
-        targets_data = list()
-
-        for step, (data, targets) in enumerate(train_loader):
-   
-            # prepare
-            data = data.to(self.device)
-            targets = targets.to(self.device)
-
-            data, criterion_kwargs = self.loss_computation.prepare_data(data, targets)
-            data = Variable(data)
-            batch_size = data.size(0)
-
-            for t in self.training_techniques:
-                t.on_batch_start(trainer=self, epoch=epoch, step=step, num_steps=len(train_loader))
-
-            # training
-            self.optimizer.zero_grad()
-            outputs = self.model(data)
-            loss_func = self.loss_computation.criterion(**criterion_kwargs)
-            loss = loss_func(self.criterion, outputs)
-            loss.backward()
-            self.optimizer.step()
-
-            # save for metric evaluation
-            if self.model.final_activation is not None:
-                outputs = self.model.final_activation(outputs)
-            outputs_data.append(outputs.data.cpu().detach().numpy())
-            targets_data.append(targets.data.cpu().detach().numpy())
-
-            loss_sum += loss.item() * batch_size
-            N += batch_size
-
-            if any([t.on_batch_end(batch_loss=loss.item(), trainer=self, epoch=epoch, step=step, num_steps=len(train_loader))
-                    for t in self.training_techniques]):
-                return self.compute_metrics(outputs_data, targets_data), loss_sum / N, True
-        return self.compute_metrics(outputs_data, targets_data), loss_sum / N, False
-
-
-    def evaluate(self, test_loader):
-        self.model.eval()
-
-        outputs_data = list()
-        targets_data = list()
-
-        with torch.no_grad():
-            for _, (data, targets) in enumerate(test_loader):
-    
-                data = data.to(self.device)
-                data = Variable(data)
-                outputs = self.model(data)
-
-                outputs_data.append(outputs.data.cpu().detach().numpy())
-                targets_data.append(targets.data.cpu().detach().numpy())
-
-        self.model.train()
-        return self.compute_metrics(outputs_data, targets_data)
-    
-    def compute_metrics(self, outputs_data, targets_data):
-        outputs_data = np.vstack(outputs_data)
-        targets_data = np.vstack(targets_data)
-        return [metric(outputs_data, targets_data) for metric in self.metrics]
diff --git a/autoPyTorch/components/baselines/__init__.py b/autoPyTorch/configs/__init__.py
similarity index 100%
rename from autoPyTorch/components/baselines/__init__.py
rename to autoPyTorch/configs/__init__.py
diff --git a/autoPyTorch/configs/default_pipeline_options.json b/autoPyTorch/configs/default_pipeline_options.json
new file mode 100644
index 000000000..700b4d892
--- /dev/null
+++ b/autoPyTorch/configs/default_pipeline_options.json
@@ -0,0 +1,12 @@
+{
+            "device": "cpu",
+            "budget_type": "epochs",
+            "min_epochs": 5,
+            "epochs": 50,
+            "runtime": 3600,
+            "torch_num_threads": 1,
+            "early_stopping": 20,
+            "use_tensorboard_logger": "True",
+            "use_pynisher": "False",
+            "metrics_during_training": "True"
+}
diff --git a/autoPyTorch/constants.py b/autoPyTorch/constants.py
new file mode 100644
index 000000000..652a546b9
--- /dev/null
+++ b/autoPyTorch/constants.py
@@ -0,0 +1,56 @@
+TABULAR_CLASSIFICATION = 1
+IMAGE_CLASSIFICATION = 2
+TABULAR_REGRESSION = 3
+IMAGE_REGRESSION = 4
+TIMESERIES_CLASSIFICATION = 5
+TIMESERIES_REGRESSION = 6
+
+REGRESSION_TASKS = [TABULAR_REGRESSION, IMAGE_REGRESSION, TIMESERIES_REGRESSION]
+CLASSIFICATION_TASKS = [TABULAR_CLASSIFICATION, IMAGE_CLASSIFICATION, TIMESERIES_CLASSIFICATION]
+
+TABULAR_TASKS = [TABULAR_CLASSIFICATION, TABULAR_REGRESSION]
+IMAGE_TASKS = [IMAGE_CLASSIFICATION, IMAGE_REGRESSION]
+TASK_TYPES = REGRESSION_TASKS + CLASSIFICATION_TASKS
+
+TASK_TYPES_TO_STRING = \
+    {TABULAR_CLASSIFICATION: 'tabular_classification',
+     IMAGE_CLASSIFICATION: 'image_classification',
+     TABULAR_REGRESSION: 'tabular_regression',
+     IMAGE_REGRESSION: 'image_regression',
+     TIMESERIES_CLASSIFICATION: 'time_series_classification',
+     TIMESERIES_REGRESSION: 'time_series_regression'}
+
+STRING_TO_TASK_TYPES = \
+    {'tabular_classification': TABULAR_CLASSIFICATION,
+     'image_classification': IMAGE_CLASSIFICATION,
+     'tabular_regression': TABULAR_REGRESSION,
+     'image_regression': IMAGE_REGRESSION,
+     'time_series_classification': TIMESERIES_CLASSIFICATION,
+     'time_series_regression': TIMESERIES_REGRESSION}
+
+# Output types have been defined as in scikit-learn type_of_target
+# (https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html)
+BINARY = 10
+CONTINUOUSMULTIOUTPUT = 11
+MULTICLASS = 12
+CONTINUOUS = 13
+MULTICLASSMULTIOUTPUT = 14
+
+OUTPUT_TYPES = [BINARY, CONTINUOUSMULTIOUTPUT, MULTICLASS, CONTINUOUS]
+
+OUTPUT_TYPES_TO_STRING = \
+    {BINARY: 'binary',
+     CONTINUOUSMULTIOUTPUT: 'continuous-multioutput',
+     MULTICLASS: 'multiclass',
+     CONTINUOUS: 'continuous',
+     MULTICLASSMULTIOUTPUT: 'multiclass-multioutput'}
+
+STRING_TO_OUTPUT_TYPES = \
+    {'binary': BINARY,
+     'continuous-multioutput': CONTINUOUSMULTIOUTPUT,
+     'multiclass': MULTICLASS,
+     'continuous': CONTINUOUS,
+     'multiclass-multioutput': MULTICLASSMULTIOUTPUT}
+
+CLASSIFICATION_OUTPUTS = [BINARY, MULTICLASS, MULTICLASSMULTIOUTPUT]
+REGRESSION_OUTPUTS = [CONTINUOUS, CONTINUOUSMULTIOUTPUT]
diff --git a/autoPyTorch/core/api.py b/autoPyTorch/core/api.py
deleted file mode 100644
index fe680a37d..000000000
--- a/autoPyTorch/core/api.py
+++ /dev/null
@@ -1,345 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-import numpy as np
-import scipy.sparse
-import torch
-import torch.nn as nn
-import copy
-import os
-import json
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-
-from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
-from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
-from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm
-from autoPyTorch.pipeline.nodes.create_dataset_info import CreateDatasetInfo
-from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
-
-
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-
-class AutoNet():
-    """Find an optimal neural network given a ML-task using BOHB"""
-    preset_folder_name = None
-
-    def __init__(self, config_preset="medium_cs", pipeline=None, **autonet_config):
-        """Superclass for all AutoNet variations, that specifies the API of AutoNet.
-
-        Keyword Arguments:
-            pipeline {Pipeline} -- Define your own Autonet Pipeline (default: {None})
-            **autonet_config -- Configure AutoNet for your needs. You can also configure AutoNet in fit(). Call print_help() for more info.
-        """
-        self.pipeline = pipeline or self.get_default_pipeline()
-        self.base_config = autonet_config
-        self.autonet_config = None
-        self.fit_result = None
-        self.dataset_info = None
-
-        if config_preset is not None:
-            parser = self.get_autonet_config_file_parser()
-            c = parser.read(os.path.join(os.path.dirname(__file__), "presets",
-                self.preset_folder_name, config_preset + ".txt"))
-            c.update(self.base_config)
-            self.base_config = c
-
-    def update_autonet_config(self, **autonet_config):
-        """Update the configuration of AutoNet"""
-        self.base_config.update(autonet_config)
-
-    def get_autonet_config_file_parser(self):
-        return ConfigFileParser(self.pipeline.get_pipeline_config_options())
-    
-    def print_help(self):
-        """Print the kwargs to configure the current AutoNet Pipeline"""
-        config_file_parser = self.get_autonet_config_file_parser()
-        print("Configure AutoNet with the following keyword arguments.")
-        print("Pass these arguments to either the constructor or fit().")
-        print()
-        config_file_parser.print_help(self.base_config)
-
-    def get_current_autonet_config(self):
-        """Return the current AutoNet configuration
-        
-        Returns:
-            dict -- The Configuration of AutoNet
-        """
-
-        if (self.autonet_config is not None):
-            return self.autonet_config
-        return self.pipeline.get_pipeline_config(**self.base_config)
-    
-    def get_hyperparameter_search_space(self, X_train=None, Y_train=None, X_valid=None, Y_valid=None, **autonet_config):
-        """Return hyperparameter search space of Auto-PyTorch. Does depend on the dataset and the configuration!
-        You can either pass the dataset and the configuration or use dataset and configuration of last fit call.
-        
-        Keyword Arguments:
-            X_train {array} -- Training data. ConfigSpace depends on Training data.
-            Y_train {array} -- Targets of training data.
-            X_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None})
-            Y_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None})
-            autonet_config{dict} -- if not given and fit already called, config of last fit will be used
-        
-        Returns:
-            ConfigurationSpace -- The configuration space that should be optimized.
-        """
-        X_train, Y_train, X_valid, Y_valid = self.check_data_array_types(X_train, Y_train, X_valid, Y_valid)
-        dataset_info = self.dataset_info
-        pipeline_config = dict(self.base_config, **autonet_config) if autonet_config else \
-            self.get_current_autonet_config()
-        if X_train is not None and Y_train is not None:
-            dataset_info_node = self.pipeline[CreateDatasetInfo.get_name()]
-            dataset_info = dataset_info_node.fit(pipeline_config=pipeline_config,
-                                                 X_train=X_train,
-                                                 Y_train=Y_train,
-                                                 X_valid=X_valid,
-                                                 Y_valid=Y_valid)["dataset_info"]
-
-        return self.pipeline.get_hyperparameter_search_space(dataset_info=dataset_info, **pipeline_config)
-
-    @classmethod
-    def get_default_pipeline(cls):
-        """Build a pipeline for AutoNet. Should be implemented by child classes.
-        
-        Returns:
-            Pipeline -- The Pipeline for AutoNet
-        """
-
-        # build the pipeline
-        pipeline = Pipeline()
-        
-        cls._apply_default_pipeline_settings(pipeline)
-        return pipeline
-
-    @staticmethod
-    def _apply_default_pipeline_settings(pipeline):
-        """Apply some settings the pipeline. Should be implemented by child classes."""
-        pass
-
-    def fit(self, X_train, Y_train, X_valid=None, Y_valid=None, refit=True, **autonet_config):
-        """Fit AutoNet to training data.
-        
-        Arguments:
-            X_train {array} -- Training data.
-            Y_train {array} -- Targets of training data.
-        
-        Keyword Arguments:
-            X_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None})
-            Y_valid {array} -- Validation data. Will be ignored if cv_splits > 1. (default: {None})
-            refit {bool} -- Whether final architecture should be trained again after search. (default: {True})
-        
-        Returns:
-            optimized_hyperparameter_config -- The best found hyperparameter config.
-            **autonet_config -- Configure AutoNet for your needs. You can also configure AutoNet in the constructor(). Call print_help() for more info.
-        """
-        X_train, Y_train, X_valid, Y_valid = self.check_data_array_types(X_train, Y_train, X_valid, Y_valid)
-        self.autonet_config = self.pipeline.get_pipeline_config(**dict(self.base_config, **autonet_config))
-
-        self.fit_result = self.pipeline.fit_pipeline(pipeline_config=self.autonet_config,
-                                                     X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
-        try:
-            self.dataset_info = self.pipeline[CreateDatasetInfo.get_name()].fit_output["dataset_info"]
-        except:
-            self.dataset_info = None
-        self.pipeline.clean()
-
-        if "optimized_hyperparameter_config" not in self.fit_result.keys() or not self.fit_result["optimized_hyperparameter_config"]: # MODIFY
-            raise RuntimeError("No models fit during training, please retry with a larger max_runtime.")
-        
-        if (refit):
-            self.refit(X_train, Y_train, X_valid, Y_valid)
-        return self.fit_result
-
-    def refit(self, X_train, Y_train, X_valid=None, Y_valid=None, hyperparameter_config=None, autonet_config=None, budget=None, rescore=False):
-        """Refit AutoNet to given hyperparameters. This will skip hyperparameter search.
-        
-        Arguments:
-            X_train {array} -- Training data.
-            Y_train {array} -- Targets of training data.
-        
-        Keyword Arguments:
-            X_valid {array} -- Validation  data. (default: {None})
-            Y_valid {array} -- Validation targets (default: {None})
-            hyperparameter_config {dict} -- The hyperparameter config that specifies architecture and hyperparameters (default: {None})
-            autonet_config -- Configure AutoNet for your needs. Call print_help() for more info.
-            budget -- The budget used for the refit.
-            rescore -- Use the same validation procedure as in fit (e.g. with cv).
-        
-        Raises:
-            ValueError -- No hyperparameter config available
-        """
-        X_train, Y_train, X_valid, Y_valid = self.check_data_array_types(X_train, Y_train, X_valid, Y_valid)
-        if (autonet_config is None):
-            autonet_config = self.autonet_config
-        if (autonet_config is None):
-            autonet_config = self.base_config
-        if (hyperparameter_config is None and self.fit_result):
-            hyperparameter_config = self.fit_result["optimized_hyperparameter_config"]
-        if (budget is None and self.fit_result):
-            budget = self.fit_result["budget"]
-        if (budget is None):
-            budget = self.autonet_config["max_budget"]
-        if (autonet_config is None or hyperparameter_config is None):
-            raise ValueError("You have to specify a hyperparameter and autonet config in order to be able to refit")
-
-        assert len(hyperparameter_config) > 0, "You have to specify a non-empty hyperparameter config for refit."
-
-        refit_data = {'hyperparameter_config': hyperparameter_config,
-                      'budget': budget,
-                      'rescore': rescore}
-
-        autonet_config = copy.deepcopy(autonet_config)
-        autonet_config['cv_splits'] = 1
-        autonet_config['increase_number_of_trained_datasets'] = False #if training multiple datasets else ignored
-
-        return self.pipeline.fit_pipeline(pipeline_config=autonet_config, refit=refit_data,
-                                          X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
-
-    def predict(self, X, return_probabilities=False):
-        """Predict the targets for a data matrix X.
-        
-        Arguments:
-            X {array} -- The data matrix.
-        
-        Keyword Arguments:
-            return_probabilities {bool} -- Whether to return a tuple, where the second entry is the true network output (default: {False})
-        
-        Returns:
-            result -- The predicted targets.
-        """
-
-        # run predict pipeline
-        X, = self.check_data_array_types(X)
-        autonet_config = self.get_current_autonet_config()
-
-        Y_pred = self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)['Y']
-
-        # reverse one hot encoding
-        if OneHotEncoding.get_name() in self.pipeline:
-            OHE = self.pipeline[OneHotEncoding.get_name()]
-            result = OHE.reverse_transform_y(Y_pred, OHE.fit_output['y_one_hot_encoder'])
-            return result if not return_probabilities else (result, Y_pred)
-        else:
-            result = dict()
-            result['Y'] = Y_pred
-            return result if not return_probabilities else (result, Y_pred)
-
-    def score(self, X_test, Y_test, return_loss_value=False):
-        """Calculate the sore on test data using the specified optimize_metric
-        
-        Arguments:
-            X_test {array} -- The test data matrix.
-            Y_test {array} -- The test targets.
-        
-        Returns:
-            score -- The score for the test data.
-        """
-
-        # Update config if needed
-        X_test, Y_test = self.check_data_array_types(X_test, Y_test)
-        autonet_config = self.get_current_autonet_config()
-
-        res = self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test)
-        if 'score' in res:
-            # in case of default dataset like CIFAR10 - the pipeline will compute the score of the according pytorch test set
-            return res['score']
-        Y_pred = res['Y']
-        # run predict pipeline
-        #self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test)
-        #Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y']
-        
-        
-        # one hot encode Y
-        try:
-            OHE = self.pipeline[OneHotEncoding.get_name()]
-            Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder'])
-        except:
-            print("No one-hot encodig possible. Continuing without.")
-            pass
-
-        metric = self.pipeline[MetricSelector.get_name()].fit_output['optimize_metric']
-
-        if return_loss_value:
-            return metric.get_loss_value(Y_pred, Y_test)
-        return metric(torch.from_numpy(Y_pred.astype(np.float32)), torch.from_numpy(Y_test.astype(np.float32)))
-
-    def get_pytorch_model(self):
-        """Returns a pytorch sequential model of the current incumbent configuration. Not possible for all models.
-        
-        Arguments:
-        
-        Returns:
-            model -- PyTorch sequential model of the current incumbent configuration
-        """
-        try:
-            if NetworkSelector.get_name() in self.pipeline:
-                return self.pipeline[NetworkSelector.get_name()].fit_output["network"].layers
-            else:
-                return self.pipeline[NetworkSelectorDatasetInfo.get_name()].fit_output["network"].layers
-        except:
-            print("Can not get PyTorch Sequential model for incumbent config. Returning Auto-PyTorch model")
-            if NetworkSelector.get_name() in self.pipeline:
-                return self.pipeline[NetworkSelector.get_name()].fit_output["network"]
-            else:
-                return self.pipeline[NetworkSelectorDatasetInfo.get_name()].fit_output["network"]
-
-    def initialize_from_checkpoint(self, hyperparameter_config, checkpoint, in_features, out_features, final_activation=None):
-        """
-
-        Arguments:
-            config_file: json with output as from .fit method
-            in_features: array-like object, channels first
-            out_features: int, number of classes
-            final_activation:
-
-        Returns:
-            PyTorch Sequential model
-
-        """
-        # load state dict
-        state_dict = torch.load(checkpoint, map_location=torch.device('cpu'))["state"]
-
-        # read config file
-        if type(hyperparameter_config)==dict:
-            config = hyperparameter_config
-        else:
-            with open(hyperparameter_config, 'r') as file:
-                config = json.load(file)[1]
-
-        # get model
-        network_type = config['NetworkSelectorDatasetInfo:network']
-        network_type = self.pipeline[NetworkSelectorDatasetInfo.get_name()].networks[network_type]
-        model = network_type(config=config,
-                             in_features=in_features,
-                             out_features=out_features,
-                             final_activation=final_activation)
-
-        # Apply state dict
-        pretrained_state = state_dict
-        model_state = model.state_dict()
-
-        pretrained_state = { k:v for k,v in pretrained_state.items() if k in model_state and v.size() == model_state[k].size() }
-        model_state.update(pretrained_state)
-        model.load_state_dict(model_state)
-
-        # Add to pipeline
-        self.pipeline[NetworkSelectorDatasetInfo.get_name()].fit_output["network"] = model
-
-        return model
-    
-    def check_data_array_types(self, *arrays):
-        result = []
-        for array in arrays:
-            if array is None or scipy.sparse.issparse(array):
-                result.append(array)
-                continue
-            
-            result.append(np.asanyarray(array))
-            if not result[-1].shape:
-                raise RuntimeError("Given data-array is of unexpected type %s. Please pass numpy arrays instead." % type(array))
-        return result
diff --git a/autoPyTorch/core/autonet_classes/__init__.py b/autoPyTorch/core/autonet_classes/__init__.py
deleted file mode 100644
index 96e55e5e5..000000000
--- a/autoPyTorch/core/autonet_classes/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from autoPyTorch.core.autonet_classes.autonet_feature_classification import AutoNetClassification
-from autoPyTorch.core.autonet_classes.autonet_feature_regression import AutoNetRegression
-from autoPyTorch.core.autonet_classes.autonet_feature_multilabel import AutoNetMultilabel
-from autoPyTorch.core.autonet_classes.autonet_image_classification import AutoNetImageClassification
-from autoPyTorch.core.autonet_classes.autonet_image_classification_multiple_datasets import AutoNetImageClassificationMultipleDatasets
diff --git a/autoPyTorch/core/autonet_classes/autonet_feature_classification.py b/autoPyTorch/core/autonet_classes/autonet_feature_classification.py
deleted file mode 100644
index a6183e1f9..000000000
--- a/autoPyTorch/core/autonet_classes/autonet_feature_classification.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from autoPyTorch.core.autonet_classes.autonet_feature_data import AutoNetFeatureData
-
-class AutoNetClassification(AutoNetFeatureData):
-    preset_folder_name = "feature_classification"
-
-    # OVERRIDE
-    @staticmethod
-    def _apply_default_pipeline_settings(pipeline):
-        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
-        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-        from autoPyTorch.pipeline.nodes.train_node import TrainNode
-        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
-        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
-        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
-        from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
-        from autoPyTorch.components.preprocessing.resampling import RandomOverSamplingWithReplacement, RandomUnderSamplingWithReplacement, SMOTE, \
-            TargetSizeStrategyAverageSample, TargetSizeStrategyDownsample, TargetSizeStrategyMedianSample, TargetSizeStrategyUpsample
-
-        import torch.nn as nn
-        from sklearn.model_selection import StratifiedKFold
-        from autoPyTorch.components.metrics import accuracy, auc_metric, pac_metric, balanced_accuracy, cross_entropy
-        from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeighted
-
-        AutoNetFeatureData._apply_default_pipeline_settings(pipeline)
-
-
-        net_selector = pipeline[NetworkSelector.get_name()]
-        net_selector.add_final_activation('softmax', nn.Softmax(1))
-
-        loss_selector = pipeline[LossModuleSelector.get_name()]
-        loss_selector.add_loss_module('cross_entropy', nn.CrossEntropyLoss, None, True)
-        loss_selector.add_loss_module('cross_entropy_weighted', nn.CrossEntropyLoss, LossWeightStrategyWeighted(), True)
-
-        metric_selector = pipeline[MetricSelector.get_name()]
-        metric_selector.add_metric('accuracy', accuracy, loss_transform=True,
-                                   requires_target_class_labels=True)
-        metric_selector.add_metric('auc_metric', auc_metric, loss_transform=True,
-                                   requires_target_class_labels=False)
-        metric_selector.add_metric('pac_metric', pac_metric, loss_transform=True,
-                                   requires_target_class_labels=False)
-        metric_selector.add_metric('balanced_accuracy', balanced_accuracy, loss_transform=True,
-                                   requires_target_class_labels=True)
-        metric_selector.add_metric('cross_entropy', cross_entropy, loss_transform=True,
-                                   requires_target_class_labels=False)
-
-        resample_selector = pipeline[ResamplingStrategySelector.get_name()]
-        resample_selector.add_over_sampling_method('random', RandomOverSamplingWithReplacement)
-        resample_selector.add_over_sampling_method('smote', SMOTE)
-        resample_selector.add_under_sampling_method('random', RandomUnderSamplingWithReplacement)
-        resample_selector.add_target_size_strategy('upsample', TargetSizeStrategyUpsample)
-        resample_selector.add_target_size_strategy('downsample', TargetSizeStrategyDownsample)
-        resample_selector.add_target_size_strategy('average', TargetSizeStrategyAverageSample)
-        resample_selector.add_target_size_strategy('median', TargetSizeStrategyMedianSample)
-
-        train_node = pipeline[TrainNode.get_name()]
-        train_node.default_minimize_value = False
-        
-        cv = pipeline[CrossValidation.get_name()]
-        cv.add_cross_validator("stratified_k_fold", StratifiedKFold, flatten)
-
-        one_hot_encoding_node = pipeline[OneHotEncoding.get_name()]
-        one_hot_encoding_node.encode_Y = True
-
-        return pipeline
-
-def flatten(x):
-    return x.reshape((-1, ))
diff --git a/autoPyTorch/core/autonet_classes/autonet_feature_data.py b/autoPyTorch/core/autonet_classes/autonet_feature_data.py
deleted file mode 100644
index e46ead2b6..000000000
--- a/autoPyTorch/core/autonet_classes/autonet_feature_data.py
+++ /dev/null
@@ -1,175 +0,0 @@
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-from autoPyTorch.core.api import AutoNet
-
-class AutoNetFeatureData(AutoNet):
-
-    @classmethod
-    def get_default_ensemble_pipeline(cls):
-        """Construct a default pipeline, include nodes for Ensemble.
-        
-        Returns:
-            Pipeline -- The constructed default pipeline
-        """
-        from autoPyTorch.pipeline.base.pipeline import Pipeline
-        from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
-            CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
-            EmbeddingSelector, NetworkSelector, OptimizerSelector, LearningrateSchedulerSelector, LogFunctionsSelector, MetricSelector, \
-            LossModuleSelector, TrainNode, CreateDataLoader, CreateDatasetInfo, EnableComputePredictionsForEnsemble, SavePredictionsForEnsemble, \
-            BuildEnsemble, EnsembleServer, InitializationSelector, BaselineTrainer
-        
-        # build the pipeline
-        pipeline = Pipeline([
-            AutoNetSettings(),
-            CreateDatasetInfo(),
-            EnsembleServer(),
-            OptimizationAlgorithm([
-                CrossValidation([
-                    Imputation(),
-                    BaselineTrainer(),
-                    NormalizationStrategySelector(),
-                    OneHotEncoding(),
-                    PreprocessorSelector(),
-                    ResamplingStrategySelector(),
-                    EmbeddingSelector(),
-                    NetworkSelector(),
-                    InitializationSelector(),
-                    OptimizerSelector(),
-                    LearningrateSchedulerSelector(),
-                    LogFunctionsSelector(),
-                    MetricSelector(),
-                    EnableComputePredictionsForEnsemble(),
-                    LossModuleSelector(),
-                    CreateDataLoader(),
-                    TrainNode(),
-                    SavePredictionsForEnsemble()
-                ])
-            ]),
-            BuildEnsemble()
-        ])
-
-        cls._apply_default_pipeline_settings(pipeline)
-        return pipeline
-    
-    @classmethod
-    def get_default_pipeline(cls):
-        """Construct a default pipeline, do not include nodes for Ensemble.
-        
-        Returns:
-            Pipeline -- The constructed default pipeline
-        """
-        from autoPyTorch.pipeline.base.pipeline import Pipeline
-        from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
-            CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
-            EmbeddingSelector, NetworkSelector, OptimizerSelector, LearningrateSchedulerSelector, LogFunctionsSelector, MetricSelector, \
-            LossModuleSelector, TrainNode, CreateDataLoader, CreateDatasetInfo, InitializationSelector
-        
-        # build the pipeline
-        pipeline = Pipeline([
-            AutoNetSettings(),
-            CreateDatasetInfo(),
-            OptimizationAlgorithm([
-                CrossValidation([
-                    Imputation(),
-                    NormalizationStrategySelector(),
-                    OneHotEncoding(),
-                    PreprocessorSelector(),
-                    ResamplingStrategySelector(),
-                    EmbeddingSelector(),
-                    NetworkSelector(),
-                    InitializationSelector(),
-                    OptimizerSelector(),
-                    LearningrateSchedulerSelector(),
-                    LogFunctionsSelector(),
-                    MetricSelector(),
-                    LossModuleSelector(),
-                    CreateDataLoader(),
-                    TrainNode()
-                ])
-            ]),
-        ])
-
-        cls._apply_default_pipeline_settings(pipeline)
-        return pipeline
-
-    
-    @staticmethod
-    def _apply_default_pipeline_settings(pipeline):
-        """Add the components to the pipeline
-        
-        Arguments:
-            pipeline {pipeline} -- The pipelines to add the components to
-        """
-        from autoPyTorch.pipeline.nodes import NormalizationStrategySelector, PreprocessorSelector, EmbeddingSelector, NetworkSelector, \
-            OptimizerSelector, LearningrateSchedulerSelector, TrainNode, CrossValidation, InitializationSelector
-
-        from autoPyTorch.components.networks.feature import MlpNet, ResNet, ShapedMlpNet, ShapedResNet
-        from autoPyTorch.components.networks.initialization import SimpleInitializer, SparseInitialization
-
-        from autoPyTorch.components.optimizer.optimizer import AdamOptimizer, AdamWOptimizer, SgdOptimizer, RMSpropOptimizer
-        from autoPyTorch.components.lr_scheduler.lr_schedulers import SchedulerCosineAnnealingWithRestartsLR, SchedulerNone, \
-            SchedulerCyclicLR, SchedulerExponentialLR, SchedulerReduceLROnPlateau, SchedulerReduceLROnPlateau, SchedulerStepLR, \
-            SchedulerAdaptiveLR, SchedulerAlternatingCosineLR, SchedulerCosineAnnealingLR
-        from autoPyTorch.components.networks.feature import LearnedEntityEmbedding
-
-        from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler
-        from sklearn.model_selection import KFold
-
-        from autoPyTorch.components.preprocessing.feature_preprocessing import \
-                TruncatedSVD, FastICA, RandomKitchenSinks, KernelPCA, Nystroem, PowerTransformer
-
-        from autoPyTorch.components.training.early_stopping import EarlyStopping
-        from autoPyTorch.components.regularization.mixup import Mixup
-
-        pre_selector = pipeline[PreprocessorSelector.get_name()]
-        pre_selector.add_preprocessor('truncated_svd', TruncatedSVD)
-        pre_selector.add_preprocessor('power_transformer', PowerTransformer)
-        pre_selector.add_preprocessor('fast_ica', FastICA)
-        pre_selector.add_preprocessor('kitchen_sinks', RandomKitchenSinks)
-        pre_selector.add_preprocessor('kernel_pca', KernelPCA)
-        pre_selector.add_preprocessor('nystroem', Nystroem)
-
-        norm_selector = pipeline[NormalizationStrategySelector.get_name()]
-        norm_selector.add_normalization_strategy('minmax',   MinMaxScaler)
-        norm_selector.add_normalization_strategy('standardize', StandardScaler)
-        norm_selector.add_normalization_strategy('maxabs', MaxAbsScaler)
-
-        emb_selector = pipeline[EmbeddingSelector.get_name()]
-        emb_selector.add_embedding_module('learned', LearnedEntityEmbedding)
-
-        net_selector = pipeline[NetworkSelector.get_name()]
-        net_selector.add_network('mlpnet',       MlpNet)
-        net_selector.add_network('shapedmlpnet', ShapedMlpNet)
-        net_selector.add_network('resnet',       ResNet)
-        net_selector.add_network('shapedresnet', ShapedResNet)
-
-        init_selector = pipeline[InitializationSelector.get_name()]
-        init_selector.add_initialization_method("sparse", SparseInitialization)
-        init_selector.add_initializer("simple_initializer", SimpleInitializer)
-
-        opt_selector = pipeline[OptimizerSelector.get_name()]
-        opt_selector.add_optimizer('adam', AdamOptimizer)
-        opt_selector.add_optimizer('adamw', AdamWOptimizer)
-        opt_selector.add_optimizer('sgd',  SgdOptimizer)
-        opt_selector.add_optimizer('rmsprop',  RMSpropOptimizer)
-
-        lr_selector = pipeline[LearningrateSchedulerSelector.get_name()]
-        lr_selector.add_lr_scheduler('cosine_annealing',                 SchedulerCosineAnnealingLR)
-        lr_selector.add_lr_scheduler('cosine_annealing_with_restarts',   SchedulerCosineAnnealingWithRestartsLR)
-        lr_selector.add_lr_scheduler('cyclic',                           SchedulerCyclicLR)
-        lr_selector.add_lr_scheduler('exponential',                      SchedulerExponentialLR)
-        lr_selector.add_lr_scheduler('step',                             SchedulerStepLR)
-        lr_selector.add_lr_scheduler('adapt',                            SchedulerAdaptiveLR)
-        lr_selector.add_lr_scheduler('plateau',                          SchedulerReduceLROnPlateau)
-        lr_selector.add_lr_scheduler('alternating_cosine',               SchedulerAlternatingCosineLR)
-        lr_selector.add_lr_scheduler('none',                             SchedulerNone)
-
-        train_node = pipeline[TrainNode.get_name()]
-        train_node.add_training_technique("early_stopping", EarlyStopping)
-        train_node.add_batch_loss_computation_technique("mixup", Mixup)
-
-        cv = pipeline[CrossValidation.get_name()]
-        cv.add_cross_validator("k_fold", KFold)
diff --git a/autoPyTorch/core/autonet_classes/autonet_feature_multilabel.py b/autoPyTorch/core/autonet_classes/autonet_feature_multilabel.py
deleted file mode 100644
index 5b70e6915..000000000
--- a/autoPyTorch/core/autonet_classes/autonet_feature_multilabel.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from autoPyTorch.core.autonet_classes.autonet_feature_data import AutoNetFeatureData
-
-class AutoNetMultilabel(AutoNetFeatureData):
-    preset_folder_name = "feature_multilabel"
-
-    # OVERRIDE
-    @staticmethod
-    def _apply_default_pipeline_settings(pipeline):
-        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
-        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-        from autoPyTorch.pipeline.nodes.train_node import TrainNode
-        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
-
-        import torch.nn as nn
-        from autoPyTorch.components.metrics import multilabel_accuracy, auc_metric, pac_metric
-        from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeightedBinary
-
-        AutoNetFeatureData._apply_default_pipeline_settings(pipeline)
-
-        net_selector = pipeline[NetworkSelector.get_name()]
-        net_selector.add_final_activation('sigmoid', nn.Sigmoid())
-
-        loss_selector = pipeline[LossModuleSelector.get_name()]
-        loss_selector.add_loss_module('bce_with_logits', nn.BCEWithLogitsLoss, None, False)
-        loss_selector.add_loss_module('bce_with_logits_weighted', nn.BCEWithLogitsLoss, LossWeightStrategyWeightedBinary(), False)
-
-        metric_selector = pipeline[MetricSelector.get_name()]
-        metric_selector.add_metric('multilabel_accuracy', multilabel_accuracy,
-                                   loss_transform=True, requires_target_class_labels=True)
-        metric_selector.add_metric('auc_metric', auc_metric, loss_transform=True,
-                                   requires_target_class_labels=False)
-        metric_selector.add_metric('pac_metric', pac_metric, loss_transform=True,
-                                   requires_target_class_labels=False)
-
-        train_node = pipeline[TrainNode.get_name()]
-        train_node.default_minimize_value = False
-
-        cv = pipeline[CrossValidation.get_name()]
-        cv.use_stratified_cv_split_default = False
diff --git a/autoPyTorch/core/autonet_classes/autonet_feature_regression.py b/autoPyTorch/core/autonet_classes/autonet_feature_regression.py
deleted file mode 100644
index 281d62d34..000000000
--- a/autoPyTorch/core/autonet_classes/autonet_feature_regression.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import numpy as np
-import torch
-import torch.nn as nn
-import copy
-from autoPyTorch.core.autonet_classes.autonet_feature_data import AutoNetFeatureData
-
-class AutoNetRegression(AutoNetFeatureData):
-    preset_folder_name = "feature_regression"
-
-    # OVERRIDE
-    @staticmethod
-    def _apply_default_pipeline_settings(pipeline):
-        from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-        from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
-        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-        from autoPyTorch.pipeline.nodes.train_node import TrainNode
-        from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
-
-        import torch.nn as nn
-        from autoPyTorch.components.metrics.standard_metrics import mae, rmse
-
-        AutoNetFeatureData._apply_default_pipeline_settings(pipeline)
-
-        net_selector = pipeline[NetworkSelector.get_name()]
-        net_selector.add_final_activation('none', nn.Sequential())
-
-        loss_selector = pipeline[LossModuleSelector.get_name()]
-        loss_selector.add_loss_module('l1_loss', nn.L1Loss)
-
-        metric_selector = pipeline[MetricSelector.get_name()]
-        metric_selector.add_metric('mean_abs_error', mae, loss_transform=False, requires_target_class_labels=False)
-        metric_selector.add_metric('rmse', rmse, loss_transform=False, requires_target_class_labels=False)
-
-        train_node = pipeline[TrainNode.get_name()]
-        train_node.default_minimize_value = True
-
-        cv = pipeline[CrossValidation.get_name()]
-        cv.use_stratified_cv_split_default = False
diff --git a/autoPyTorch/core/autonet_classes/autonet_image_classification.py b/autoPyTorch/core/autonet_classes/autonet_image_classification.py
deleted file mode 100644
index d9173aba2..000000000
--- a/autoPyTorch/core/autonet_classes/autonet_image_classification.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from autoPyTorch.core.autonet_classes.autonet_image_data import AutoNetImageData
-
-
-class AutoNetImageClassification(AutoNetImageData):
-    preset_folder_name = "image_classification"
-
-    @staticmethod
-    def _apply_default_pipeline_settings(pipeline):
-        import torch.nn as nn
-        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-        from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode
-        from autoPyTorch.pipeline.nodes.image.cross_validation_indices import CrossValidationIndices
-        from autoPyTorch.pipeline.nodes.image.loss_module_selector_indices import LossModuleSelectorIndices
-        from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
-        from autoPyTorch.components.metrics import accuracy, auc_metric, pac_metric, balanced_accuracy, cross_entropy
-        from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeighted
-
-        AutoNetImageData._apply_default_pipeline_settings(pipeline)
-
-        net_selector = pipeline[NetworkSelectorDatasetInfo.get_name()]
-        net_selector.add_final_activation('softmax', nn.Softmax(1))
-
-        loss_selector = pipeline[LossModuleSelectorIndices.get_name()]
-        loss_selector.add_loss_module('cross_entropy', nn.CrossEntropyLoss, None, True)
-        loss_selector.add_loss_module('cross_entropy_weighted', nn.CrossEntropyLoss, LossWeightStrategyWeighted(), True)
-
-        metric_selector = pipeline[MetricSelector.get_name()]
-        metric_selector.add_metric('accuracy', accuracy, loss_transform=True,
-                                   requires_target_class_labels=False)
-        metric_selector.add_metric('auc_metric', auc_metric, loss_transform=True,
-                                   requires_target_class_labels=False)
-        metric_selector.add_metric('pac_metric', pac_metric, loss_transform=True,
-                                   requires_target_class_labels=False)
-        metric_selector.add_metric('balanced_accuracy', balanced_accuracy, loss_transform=True,
-                                   requires_target_class_labels=True)
-        metric_selector.add_metric('cross_entropy', cross_entropy, loss_transform=True,
-                                   requires_target_class_labels=False)
-
-        train_node = pipeline[SimpleTrainNode.get_name()]
-        train_node.default_minimize_value = False
-        
-        cv = pipeline[CrossValidationIndices.get_name()]
-        cv.use_stratified_cv_split_default = True
diff --git a/autoPyTorch/core/autonet_classes/autonet_image_classification_multiple_datasets.py b/autoPyTorch/core/autonet_classes/autonet_image_classification_multiple_datasets.py
deleted file mode 100644
index 95f6a9859..000000000
--- a/autoPyTorch/core/autonet_classes/autonet_image_classification_multiple_datasets.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from autoPyTorch.core.autonet_classes.autonet_image_classification import AutoNetImageClassification
-
-
-class AutoNetImageClassificationMultipleDatasets(AutoNetImageClassification):
-    preset_folder_name = "image_classification_multiple_datasets"
-
-    @classmethod
-    def get_default_pipeline(cls):
-        from autoPyTorch.pipeline.base.pipeline import Pipeline
-        from autoPyTorch.pipeline.nodes.image.optimization_algorithm_no_timelimit import OptimizationAlgorithmNoTimeLimit
-        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
-        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
-        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-
-        from autoPyTorch.pipeline.nodes.image.simple_scheduler_selector import SimpleLearningrateSchedulerSelector
-        from autoPyTorch.pipeline.nodes.image.cross_validation_indices import CrossValidationIndices
-        from autoPyTorch.pipeline.nodes.image.autonet_settings_no_shuffle import AutoNetSettingsNoShuffle
-        from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
-        from autoPyTorch.pipeline.nodes.image.loss_module_selector_indices import LossModuleSelectorIndices
-        from autoPyTorch.pipeline.nodes.image.image_augmentation import ImageAugmentation
-        from autoPyTorch.pipeline.nodes.image.create_image_dataloader import CreateImageDataLoader
-        from autoPyTorch.pipeline.nodes.image.create_dataset_info import CreateDatasetInfo
-        from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode
-        from autoPyTorch.pipeline.nodes.image.multiple_datasets import MultipleDatasets
-        from autoPyTorch.pipeline.nodes.image.image_dataset_reader import ImageDatasetReader
-
-        # build the pipeline
-        pipeline = Pipeline([
-            AutoNetSettingsNoShuffle(),
-            OptimizationAlgorithmNoTimeLimit([
-
-                MultipleDatasets([
-
-                    ImageDatasetReader(),
-                    CreateDatasetInfo(),
-                    CrossValidationIndices([
-
-                        NetworkSelectorDatasetInfo(),
-                        OptimizerSelector(),
-                        SimpleLearningrateSchedulerSelector(),
-
-                        LogFunctionsSelector(),
-                        MetricSelector(),
-
-                        LossModuleSelectorIndices(),
-
-                        ImageAugmentation(),
-                        CreateImageDataLoader(),
-                        SimpleTrainNode()
-                    ])
-                ])
-            ])
-        ])
-
-        cls._apply_default_pipeline_settings(pipeline)
-        return pipeline
diff --git a/autoPyTorch/core/autonet_classes/autonet_image_data.py b/autoPyTorch/core/autonet_classes/autonet_image_data.py
deleted file mode 100644
index c6a3cd078..000000000
--- a/autoPyTorch/core/autonet_classes/autonet_image_data.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import numpy as np
-import torch
-from autoPyTorch.core.api import AutoNet
-
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-class AutoNetImageData(AutoNet):
-
-    @classmethod
-    def get_default_pipeline(cls):
-        from autoPyTorch.pipeline.base.pipeline import Pipeline
-        from autoPyTorch.pipeline.nodes.image.optimization_algorithm_no_timelimit import OptimizationAlgorithmNoTimeLimit
-        from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
-        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
-        from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
-        from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-
-        from autoPyTorch.pipeline.nodes.image.simple_scheduler_selector import SimpleLearningrateSchedulerSelector
-        from autoPyTorch.pipeline.nodes.image.cross_validation_indices import CrossValidationIndices
-        from autoPyTorch.pipeline.nodes.image.autonet_settings_no_shuffle import AutoNetSettingsNoShuffle
-        from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
-        from autoPyTorch.pipeline.nodes.image.loss_module_selector_indices import LossModuleSelectorIndices
-        from autoPyTorch.pipeline.nodes.image.image_augmentation import ImageAugmentation
-        from autoPyTorch.pipeline.nodes.image.create_image_dataloader import CreateImageDataLoader
-        from autoPyTorch.pipeline.nodes.image.create_dataset_info import CreateDatasetInfo
-        from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode
-        from autoPyTorch.pipeline.nodes.image.image_dataset_reader import ImageDatasetReader
-        from autoPyTorch.pipeline.nodes.image.single_dataset import SingleDataset
-
-        # build the pipeline
-        pipeline = Pipeline([
-            AutoNetSettingsNoShuffle(),
-            OptimizationAlgorithmNoTimeLimit([
-
-                SingleDataset([
-
-                    ImageDatasetReader(),
-                    CreateDatasetInfo(),
-                    CrossValidationIndices([
-
-                        NetworkSelectorDatasetInfo(),
-                        OptimizerSelector(),
-                        SimpleLearningrateSchedulerSelector(),
-
-                        LogFunctionsSelector(),
-                        MetricSelector(),
-
-                        LossModuleSelectorIndices(),
-
-                        ImageAugmentation(),
-                        CreateImageDataLoader(),
-                        SimpleTrainNode()
-                    ])
-                ])
-            ])
-        ])
-
-
-        cls._apply_default_pipeline_settings(pipeline)
-        return pipeline
-
-    @staticmethod
-    def _apply_default_pipeline_settings(pipeline):
-        from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
-        from autoPyTorch.pipeline.nodes.image.simple_scheduler_selector import SimpleLearningrateSchedulerSelector
-
-        from autoPyTorch.pipeline.nodes.image.network_selector_datasetinfo import NetworkSelectorDatasetInfo
-        from autoPyTorch.pipeline.nodes.image.simple_train_node import SimpleTrainNode
-        from autoPyTorch.pipeline.nodes.image.create_image_dataloader import CreateImageDataLoader
-        from autoPyTorch.pipeline.nodes.image.image_augmentation import ImageAugmentation
-
-        from autoPyTorch.components.networks.image import DenseNet, ResNet, MobileNet
-        from autoPyTorch.components.networks.image.densenet_flexible import DenseNetFlexible
-        from autoPyTorch.components.networks.image.resnet152 import ResNet152
-        from autoPyTorch.components.networks.image.darts.model import DARTSImageNet
-
-        from autoPyTorch.components.optimizer.optimizer import AdamOptimizer, AdamWOptimizer, SgdOptimizer, RMSpropOptimizer
-        from autoPyTorch.components.lr_scheduler.lr_schedulers import SchedulerCosineAnnealingWithRestartsLR, SchedulerNone, \
-            SchedulerCyclicLR, SchedulerExponentialLR, SchedulerReduceLROnPlateau, SchedulerReduceLROnPlateau, SchedulerStepLR, \
-            SchedulerAlternatingCosineLR, SchedulerAdaptiveLR, SchedulerExponentialLR, SchedulerCosineAnnealingLR
-
-        from autoPyTorch.components.training.image.early_stopping import EarlyStopping
-        from autoPyTorch.components.training.image.mixup import Mixup
-
-        net_selector = pipeline[NetworkSelectorDatasetInfo.get_name()]
-        net_selector.add_network('densenet', DenseNet)
-        net_selector.add_network('densenet_flexible', DenseNetFlexible)
-        net_selector.add_network('resnet', ResNet)
-        net_selector.add_network('resnet152', ResNet152)
-        net_selector.add_network('darts', DARTSImageNet)
-        net_selector.add_network('mobilenet', MobileNet)
-        net_selector._apply_search_space_update('resnet:nr_main_blocks', [2, 4], log=False)
-        net_selector._apply_search_space_update('resnet:widen_factor_1', [0.5, 8], log=True)
-
-        opt_selector = pipeline[OptimizerSelector.get_name()]
-        opt_selector.add_optimizer('adam', AdamOptimizer)
-        opt_selector.add_optimizer('adamw', AdamWOptimizer)
-        opt_selector.add_optimizer('sgd',  SgdOptimizer)
-        opt_selector.add_optimizer('rmsprop',  RMSpropOptimizer)
-
-        lr_selector = pipeline[SimpleLearningrateSchedulerSelector.get_name()]
-        lr_selector.add_lr_scheduler('cosine_annealing', SchedulerCosineAnnealingLR)
-        lr_selector.add_lr_scheduler('cosine_annealing_with_restarts', SchedulerCosineAnnealingWithRestartsLR)
-        lr_selector.add_lr_scheduler('cyclic', SchedulerCyclicLR)
-        lr_selector.add_lr_scheduler('step', SchedulerStepLR)
-        lr_selector.add_lr_scheduler('adapt', SchedulerAdaptiveLR)
-        lr_selector.add_lr_scheduler('plateau', SchedulerReduceLROnPlateau)
-        lr_selector.add_lr_scheduler('alternating_cosine',SchedulerAlternatingCosineLR)
-        lr_selector.add_lr_scheduler('exponential',      SchedulerExponentialLR)
-        lr_selector.add_lr_scheduler('none', SchedulerNone)
-        
-        train_node = pipeline[SimpleTrainNode.get_name()]
-        #train_node.add_training_technique("early_stopping", EarlyStopping)
-        train_node.add_batch_loss_computation_technique("mixup", Mixup)
-
-        data_node = pipeline[CreateImageDataLoader.get_name()]
-
-        data_node._apply_search_space_update('batch_size', [32, 160], log=True)
-
-        augment_node = pipeline[ImageAugmentation.get_name()]
-        augment_node._apply_search_space_update('augment', [False, True])
-        augment_node._apply_search_space_update('autoaugment', [False, True])
-        augment_node._apply_search_space_update('fastautoaugment', [False, True])
-        augment_node._apply_search_space_update('length', [2,6])
-        augment_node._apply_search_space_update('cutout', [False, True])
-        augment_node._apply_search_space_update('cutout_holes', [1, 50])
diff --git a/autoPyTorch/core/ensemble.py b/autoPyTorch/core/ensemble.py
deleted file mode 100644
index 4132cae8d..000000000
--- a/autoPyTorch/core/ensemble.py
+++ /dev/null
@@ -1,182 +0,0 @@
-import os
-import torch
-import logging
-import numpy as np
-from autoPyTorch.core.api import AutoNet
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
-from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-from autoPyTorch.pipeline.nodes.ensemble import EnableComputePredictionsForEnsemble, SavePredictionsForEnsemble, BuildEnsemble, EnsembleServer
-from autoPyTorch.pipeline.nodes.create_dataset_info import CreateDatasetInfo
-from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-from autoPyTorch.pipeline.nodes import BaselineTrainer
-
-from IPython import embed
-
-class AutoNetEnsemble(AutoNet):
-    """Build an ensemble of several neural networks that were evaluated during the architecure search"""
-
-    # OVERRIDE
-    def __init__(self, autonet, config_preset="medium_cs", **autonet_config):
-        if isinstance(autonet, AutoNet):
-            self.pipeline = autonet.pipeline
-            self.autonet_type = type(autonet)
-            self.base_config = autonet.base_config
-            self.autonet_config = autonet.autonet_config
-            self.fit_result = autonet.fit_result
-        elif issubclass(autonet, AutoNet):
-            self.pipeline = autonet.get_default_ensemble_pipeline()
-            self.autonet_type = autonet
-            self.base_config = dict()
-            self.autonet_config = None
-            self.fit_result = None
-        else:
-            raise("Invalid autonet argument")
-        
-        assert EnableComputePredictionsForEnsemble in self.pipeline
-        assert SavePredictionsForEnsemble in self.pipeline
-        assert EnsembleServer in self.pipeline
-        assert BuildEnsemble in self.pipeline
-
-        self.base_config.update(autonet_config)
-        self.trained_autonets = None
-        self.dataset_info = None
-
-        if config_preset is not None:
-            parser = self.get_autonet_config_file_parser()
-            c = parser.read(os.path.join(os.path.dirname(__file__), "presets",
-                autonet.preset_folder_name, config_preset + ".txt"))
-            c.update(self.base_config)
-            self.base_config = c
-
-    # OVERRIDE
-    def fit(self, X_train, Y_train, X_valid=None, Y_valid=None, refit=True, **autonet_config):
-        X_train, Y_train, X_valid, Y_valid = self.check_data_array_types(X_train, Y_train, X_valid, Y_valid)
-        self.autonet_config = self.pipeline.get_pipeline_config(**dict(self.base_config, **autonet_config))
-
-        self.autonet_config["save_models"] = True
-
-        self.fit_result = self.pipeline.fit_pipeline(pipeline_config=self.autonet_config,
-                                                     X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
-        self.dataset_info = self.pipeline[CreateDatasetInfo.get_name()].fit_output["dataset_info"]
-        self.pipeline.clean()
-        if refit:
-            self.refit(X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid)
-        return self.fit_result
-    
-    # OVERRIDE
-    def refit(self, X_train, Y_train, X_valid=None, Y_valid=None, ensemble_configs=None, ensemble=None, autonet_config=None):
-        X_train, Y_train, X_valid, Y_valid = self.check_data_array_types(X_train, Y_train, X_valid, Y_valid)
-        # The ensemble API does mot contain the fit_output from cross_val subpipeline nodes. Fit a single pipeline here for preprocessing
-        if (autonet_config is None):
-            autonet_config = self.autonet_config
-        if (autonet_config is None):
-            autonet_config = self.base_config
-        if (ensemble_configs is None and self.fit_result and "ensemble_configs" in self.fit_result.keys()):
-            ensemble_configs = self.fit_result["ensemble_configs"]
-        if (ensemble is None and self.fit_result):
-            ensemble = self.fit_result["ensemble"]
-        if (autonet_config is None or ensemble_configs is None or ensemble is None):
-            raise ValueError("You have to specify ensemble and autonet config in order to be able to refit")
-        
-        identifiers = ensemble.get_selected_model_identifiers()
-        self.trained_autonets = dict()
-
-        autonet_config["save_models"] = False
-
-        for identifier in identifiers:
-            config_id = tuple(identifier[:3])
-            budget = identifier[3]
-            hyperparameter_config = ensemble_configs[config_id]
-            autonet = self.autonet_type(pipeline=self.pipeline)
-            autonet.refit(X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid,
-                hyperparameter_config=hyperparameter_config, autonet_config=autonet_config, budget=budget)
-            self.trained_autonets[tuple(identifier)] = autonet
-            self.trained_autonet = autonet
-            break
-    
-    # OVERRIDE
-    def predict(self, X, return_probabilities=False, return_metric=False):
-        # run predict pipeline
-        X, = self.check_data_array_types(X)
-        prediction = None
-        autonet_config = self.get_current_autonet_config()
-
-        identifiers_with_budget, weights = self.fit_result["ensemble"].identifiers_, self.fit_result["ensemble"].weights_
-
-        baseline_id2model = BaselineTrainer.identifiers_ens
-
-
-        model_dirs = [os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch") for ident in identifiers_with_budget]
-        
-        # get data preprocessing pipeline
-        for ident, weight in zip(identifiers_with_budget, weights):
-            
-            if weight==0:
-                continue
-
-
-            if ident[0]>=0:
-                model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch")
-                logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight))
-                model = torch.load(model_dir)
-
-                autonet_config["model"] = model
-                current_prediction = self.trained_autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)['Y']
-                prediction = current_prediction if prediction is None else prediction + weight * current_prediction
-
-                OHE = self.trained_autonet.pipeline[OneHotEncoding.get_name()]
-                metric = self.trained_autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric']
-
-            else:
-                model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".pkl")
-                info_dir =  os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + "_info.pkl")
-
-                logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight))
-
-                baseline_model = baseline_id2model[ident[0]]()
-                baseline_model.load(model_dir, info_dir)
-
-                current_prediction = baseline_model.predict(X_test=X, predict_proba=True)
-                prediction = current_prediction if prediction is None else prediction + weight * current_prediction
-                
-        # reverse one hot encoding
-        result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder'])
-        if not return_probabilities and not return_metric:
-            return result
-        result = [result]
-        if return_probabilities:
-            result.append(prediction)
-        if return_metric:
-            result.append(metric)
-        return tuple(result)
-
-
-        """
-        models_with_weights = self.fit_result["ensemble"].get_models_with_weights(self.trained_autonets)
-        autonet_config = self.autonet_config or self.base_config
-        for weight, autonet in models_with_weights:
-            current_prediction = autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)["Y"]
-            prediction = current_prediction if prediction is None else prediction + weight * current_prediction
-            OHE = autonet.pipeline[OneHotEncoding.get_name()]
-            metric = autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric']
-
-        # reverse one hot encoding 
-        result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder'])
-        if not return_probabilities and not return_metric:
-            return result
-        result = [result]
-        if return_probabilities:
-            result.append(prediction)
-        if return_metric:
-            result.append(metric)
-        return tuple(result)
-        """
-    
-    # OVERRIDE
-    def score(self, X_test, Y_test):
-        # run predict pipeline
-        X_test, Y_test = self.check_data_array_types(X_test, Y_test)
-        _, Y_pred, metric = self.predict(X_test, return_probabilities=True, return_metric=True)
-        Y_test, _ = self.pipeline[OneHotEncoding.get_name()].complete_y_tranformation(Y_test)
-        return metric(Y_pred, Y_test)
diff --git a/autoPyTorch/core/hpbandster_extensions/bohb_ext.py b/autoPyTorch/core/hpbandster_extensions/bohb_ext.py
deleted file mode 100644
index eb31a3154..000000000
--- a/autoPyTorch/core/hpbandster_extensions/bohb_ext.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-from hpbandster.optimizers.bohb import BOHB
-from autoPyTorch.core.hpbandster_extensions.run_with_time import run_with_time
-
-class BOHBExt(BOHB):
-    def run_until(self, runtime=1, n_iterations=float("inf"), min_n_workers=1, iteration_kwargs = {},):
-        """
-            Parameters:
-            -----------
-            runtime: int
-                time for this run in seconds
-            n_iterations:
-                the number of hyperband iterations to run
-            min_n_workers: int
-                minimum number of workers before starting the run
-        """
-        return run_with_time(self, runtime, n_iterations, min_n_workers, iteration_kwargs)
\ No newline at end of file
diff --git a/autoPyTorch/core/hpbandster_extensions/greedy_portfolio.json b/autoPyTorch/core/hpbandster_extensions/greedy_portfolio.json
deleted file mode 100644
index dba3a5e46..000000000
--- a/autoPyTorch/core/hpbandster_extensions/greedy_portfolio.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"CreateDataLoader:batch_size": 60, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 290, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.00020060142055000713, "OptimizerSelector:adam:weight_decay": 0.0018320003468984575, "TrainNode:mixup:alpha": 0.8448753109694546, "NetworkSelector:shapedmlpnet:max_dropout": 0.023271935735825866}, {"CreateDataLoader:batch_size": 240, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 41, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.011526647986073339, "OptimizerSelector:adam:weight_decay": 0.031290291410446765, "NetworkSelector:shapedresnet:max_dropout": 0.7662454727603789, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.30409463597128383}, {"CreateDataLoader:batch_size": 165, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 438, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.07331878780908542, "OptimizerSelector:sgd:momentum": 0.44665514022476815, "OptimizerSelector:sgd:weight_decay": 0.006911333726469374}, {"CreateDataLoader:batch_size": 299, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 279, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0007471732018616978, "OptimizerSelector:adam:weight_decay": 0.0005438753720314742}, {"CreateDataLoader:batch_size": 183, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 354, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.001780112494729604, "OptimizerSelector:adam:weight_decay": 0.004224029178574147, "NetworkSelector:shapedresnet:max_dropout": 0.27204101593048097, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.4412292309825137}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 201, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.015232504956212976, "OptimizerSelector:adam:weight_decay": 9.906036909600088e-05}, {"CreateDataLoader:batch_size": 159, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 966, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.0007790465032701478, "OptimizerSelector:adam:weight_decay": 0.0016722444122252624, "PreprocessorSelector:truncated_svd:target_dim": 151}, {"CreateDataLoader:batch_size": 442, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 467, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0008298747674239372, "OptimizerSelector:adam:weight_decay": 0.0067071038164946365, "PreprocessorSelector:truncated_svd:target_dim": 115}, {"CreateDataLoader:batch_size": 140, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 423, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0006164392898567234, "OptimizerSelector:adam:weight_decay": 0.006605449457495538, "PreprocessorSelector:truncated_svd:target_dim": 240}, {"CreateDataLoader:batch_size": 48, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 529, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.020107910011636462, "OptimizerSelector:sgd:momentum": 0.5818716367708677, "OptimizerSelector:sgd:weight_decay": 0.003995594064278902}, {"CreateDataLoader:batch_size": 168, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 349, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0019867054473724295, "OptimizerSelector:adam:weight_decay": 0.0067889732830148704, "NetworkSelector:shapedresnet:max_dropout": 0.8992826006547855}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 278, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.001178107244651597, "OptimizerSelector:adam:weight_decay": 0.010815452216436712}, {"CreateDataLoader:batch_size": 163, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 171, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.002654293880282279, "OptimizerSelector:adam:weight_decay": 0.010374059713414468, "NetworkSelector:shapedresnet:max_dropout": 0.6341848343636569}, {"CreateDataLoader:batch_size": 150, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 314, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.003106362796390374, "OptimizerSelector:adam:weight_decay": 0.010492136888557045, "NetworkSelector:shapedresnet:max_dropout": 0.7133813761319248, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.2808341606307928}, {"CreateDataLoader:batch_size": 151, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 313, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.0013791902792817097, "OptimizerSelector:adam:weight_decay": 0.0016536079820230513, "PreprocessorSelector:truncated_svd:target_dim": 147}, {"CreateDataLoader:batch_size": 42, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 86, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0021530021937535334, "OptimizerSelector:adam:weight_decay": 0.008386657635007597, "PreprocessorSelector:truncated_svd:target_dim": 151, "NetworkSelector:shapedresnet:max_dropout": 0.6296079567189131, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.034431265307095615}]
diff --git a/autoPyTorch/core/hpbandster_extensions/hyperband_ext.py b/autoPyTorch/core/hpbandster_extensions/hyperband_ext.py
deleted file mode 100644
index 06636c286..000000000
--- a/autoPyTorch/core/hpbandster_extensions/hyperband_ext.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-from hpbandster.optimizers.hyperband import HyperBand
-from autoPyTorch.core.hpbandster_extensions.run_with_time import run_with_time
-
-class HyperBandExt(HyperBand):
-    def run_until(self, runtime=1, n_iterations=float("inf"), min_n_workers=1, iteration_kwargs = {},):
-        """
-            Parameters:
-            -----------
-            runtime: int
-                time for this run in seconds
-            n_iterations:
-                the number of hyperband iterations to run
-            min_n_workers: int
-                minimum number of workers before starting the run
-        """
-        return run_with_time(self, runtime, n_iterations, min_n_workers, iteration_kwargs)
\ No newline at end of file
diff --git a/autoPyTorch/core/hpbandster_extensions/portfolio_bohb_ext.py b/autoPyTorch/core/hpbandster_extensions/portfolio_bohb_ext.py
deleted file mode 100644
index c43795548..000000000
--- a/autoPyTorch/core/hpbandster_extensions/portfolio_bohb_ext.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import time
-import math
-import copy
-import json
-import logging
-import numpy as np
-
-import ConfigSpace as CS
-from hpbandster.core.master import Master
-from hpbandster.optimizers.iterations import SuccessiveHalving
-from hpbandster.optimizers.config_generators.bohb import BOHB as BOHB_CG
-
-from autoPyTorch.core.hpbandster_extensions.run_with_time import run_with_time
-
-def get_portfolio(portfolio_type):
-    dirname = os.path.dirname(os.path.abspath(__file__))
-    portfolio_file = os.path.join(dirname, portfolio_type+"_portfolio.json")
-
-    with open(portfolio_file, "r") as f:
-        portfolio_configs = json.load(f)
-    return portfolio_configs
-
-
-class PortfolioBOHB_CG(BOHB_CG):
-    def __init__(self, initial_configs=None, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        self.initial_configs = initial_configs
-
-    def get_config(self, budget):
-
-        # return a portfolio member first
-        if len(self.initial_configs) > 0 and True:
-            c = self.initial_configs.pop(0)
-            return (c, {'portfolio_member': True})
-
-        return (super().get_config(budget))
-
-    def new_result(self, job):
-        # notify ensemble script or something
-        super().new_result(job)
-
-
-class PortfolioBOHB(Master):
-    def __init__(self, configspace = None,
-                 eta=3, min_budget=0.01, max_budget=1,
-                 min_points_in_model = None, top_n_percent=15,
-                 num_samples = 64, random_fraction=1/3, bandwidth_factor=3,
-                 min_bandwidth=1e-3,
-                 portfolio_type="greedy",
-                 **kwargs ):
-
-        if configspace is None:
-            raise ValueError("You have to provide a valid CofigSpace object")
-
-        portfolio_configs = get_portfolio(portfolio_type=portfolio_type)
-
-        cg = PortfolioBOHB_CG(initial_configs=portfolio_configs,
-                              configspace = configspace,
-                              min_points_in_model = min_points_in_model,
-                              top_n_percent=top_n_percent,
-                              num_samples = num_samples,
-                              random_fraction=random_fraction,
-                              bandwidth_factor=bandwidth_factor,
-                              min_bandwidth = min_bandwidth)
-
-        super().__init__(config_generator=cg, **kwargs)
-
-        # Hyperband related stuff
-        self.eta = eta
-        self.min_budget = min_budget
-        self.max_budget = max_budget
-
-        # precompute some HB stuff
-        self.max_SH_iter = -int(np.log(min_budget/max_budget)/np.log(eta)) + 1
-        self.budgets = max_budget * np.power(eta, -np.linspace(self.max_SH_iter-1, 0, self.max_SH_iter))
-
-        self.config.update({
-            'eta'        : eta,
-            'min_budget' : min_budget,
-            'max_budget' : max_budget,
-            'budgets'    : self.budgets,
-            'max_SH_iter': self.max_SH_iter,
-            'min_points_in_model' : min_points_in_model,
-            'top_n_percent' : top_n_percent,
-            'num_samples' : num_samples,
-            'random_fraction' : random_fraction,
-            'bandwidth_factor' : bandwidth_factor,
-            'min_bandwidth': min_bandwidth})
-
-    def get_next_iteration(self, iteration, iteration_kwargs={}):
-		
-        # number of 'SH runs'
-        s = self.max_SH_iter - 1 - (iteration%self.max_SH_iter)
-        # number of configurations in that bracket
-        n0 = int(np.floor((self.max_SH_iter)/(s+1)) * self.eta**s)
-        ns = [max(int(n0*(self.eta**(-i))), 1) for i in range(s+1)]
-
-        return(SuccessiveHalving(HPB_iter=iteration, num_configs=ns, budgets=self.budgets[(-s-1):], config_sampler=self.config_generator.get_config, **iteration_kwargs))
-
-    def load_portfolio_configs(self):
-        with open(self.portfolio_dir, "r") as f:
-            configs = json.load(f)
-        return configs
-
-
-class PortfolioBOHBExt(PortfolioBOHB):
-    def run_until(self, runtime=1, n_iterations=float("inf"), min_n_workers=1, iteration_kwargs = {},):
-        """
-            Parameters:
-            -----------
-            runtime: int
-                time for this run in seconds
-            n_iterations:
-                the number of hyperband iterations to run
-            min_n_workers: int
-                minimum number of workers before starting the run
-        """
-        return run_with_time(self, runtime, n_iterations, min_n_workers, iteration_kwargs)
diff --git a/autoPyTorch/core/hpbandster_extensions/run_with_time.py b/autoPyTorch/core/hpbandster_extensions/run_with_time.py
deleted file mode 100644
index 68e8a1e38..000000000
--- a/autoPyTorch/core/hpbandster_extensions/run_with_time.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from hpbandster.core.result import Result
-from hpbandster.core.dispatcher import Job
-import copy
-import time
-
-def run_with_time(self, runtime=1, n_iterations=float("inf"), min_n_workers=1, iteration_kwargs = {},):
-    """
-        custom run method of Master in hpbandster submodule
-
-        Parameters:
-        -----------
-        runtime: int
-            time for this run in seconds
-        n_iterations: int
-            maximum number of iterations
-        min_n_workers: int
-            minimum number of workers before starting the run
-    """
-
-    self.wait_for_workers(min_n_workers)
-
-    iteration_kwargs.update({'result_logger': self.result_logger})
-
-    if self.time_ref is None:
-        self.time_ref = time.time()
-        self.config['time_ref'] = self.time_ref
-    
-        self.logger.info('HBMASTER: starting run at %s'%(str(self.time_ref)))
-
-    self.thread_cond.acquire()
-
-    start_time = time.time()
-
-    while True:
-
-        self._queue_wait()
-
-        # Check if timelimit is reached
-        if (runtime < time.time() - start_time):
-            self.logger.info('HBMASTER: Timelimit reached: wait for remaining %i jobs'%self.num_running_jobs)
-            break
-        
-        next_run = None
-        # find a new run to schedule
-        for i in self.active_iterations():
-            next_run = self.iterations[i].get_next_run()
-            if not next_run is None: break
-
-        if next_run is not None:
-            self.logger.debug('HBMASTER: schedule new run for iteration %i'%i)
-            self._submit_job(*next_run)
-            continue
-        elif n_iterations > 0:
-            next_HPB_iter = len(self.iterations) + (self.iterations[0].HPB_iter if len(self.iterations) > 0 else 0)
-            self.iterations.append(self.get_next_iteration(next_HPB_iter, iteration_kwargs))
-            n_iterations -= 1
-            continue
-
-        # at this point there is no imediate run that can be scheduled,
-        # so wait for some job to finish if there are active iterations
-        if self.active_iterations():
-            self.thread_cond.wait()
-        else:
-            break
-
-    # clean up / cancel remaining iteration runs
-    next_run = True
-    n_canceled = 0
-    while next_run is not None:
-        next_run = None
-        for i in self.active_iterations():
-            next_run = self.iterations[i].get_next_run()
-            if not next_run is None: 
-                config_id, config, budget = next_run
-                job = Job(config_id, config=config, budget=budget, working_directory=self.working_directory)
-                self.iterations[job.id[0]].register_result(job) # register dummy job - will be interpreted as canceled job
-                n_canceled += 1
-                break
-
-    self.logger.debug('HBMASTER: Canceled %i remaining runs'%n_canceled)
-
-    # wait for remaining jobs
-    while self.num_running_jobs > 0:
-        self.thread_cond.wait(60)
-        self.logger.debug('HBMASTER: Job finished: wait for remaining %i jobs'%self.num_running_jobs)
-
-    self.thread_cond.release()
-    
-    for i in self.warmstart_iteration:
-        i.fix_timestamps(self.time_ref)
-        
-    ws_data = [i.data for i in self.warmstart_iteration]
-    
-    return Result([copy.deepcopy(i.data) for i in self.iterations] + ws_data, self.config)
diff --git a/autoPyTorch/core/hpbandster_extensions/simple_portfolio.json b/autoPyTorch/core/hpbandster_extensions/simple_portfolio.json
deleted file mode 100644
index 3a208b3c5..000000000
--- a/autoPyTorch/core/hpbandster_extensions/simple_portfolio.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"CreateDataLoader:batch_size": 439, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 582, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.00046235883321873317, "OptimizerSelector:adam:weight_decay": 0.023542340482179413, "PreprocessorSelector:truncated_svd:target_dim": 235, "TrainNode:mixup:alpha": 0.629997950828498}, {"CreateDataLoader:batch_size": 472, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 122, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0054858638199374215, "OptimizerSelector:adam:weight_decay": 0.0002523445285068334, "TrainNode:mixup:alpha": 0.09189163910542086, "NetworkSelector:shapedmlpnet:max_dropout": 0.6250336673744067}, {"CreateDataLoader:batch_size": 26, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 582, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.00016321317055670558, "OptimizerSelector:sgd:momentum": 0.2371909843510024, "OptimizerSelector:sgd:weight_decay": 0.0016674225312644055, "NetworkSelector:shapedmlpnet:max_dropout": 0.4026568521749192}, {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 468, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.005718443234994137, "OptimizerSelector:adam:weight_decay": 0.09695987887448426, "PreprocessorSelector:truncated_svd:target_dim": 67, "TrainNode:mixup:alpha": 0.8071051956187791, "NetworkSelector:shapedresnet:max_dropout": 0.8635418545594287, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7471216427371846}, {"CreateDataLoader:batch_size": 445, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 129, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.025638328755952233, "OptimizerSelector:sgd:momentum": 0.7264236460120405, "OptimizerSelector:sgd:weight_decay": 0.07662347599376629}, {"CreateDataLoader:batch_size": 94, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 205, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.009428064332106297, "OptimizerSelector:adam:weight_decay": 0.012311364446470033}, {"CreateDataLoader:batch_size": 18, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 175, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.0007165031492459263, "OptimizerSelector:sgd:momentum": 0.3038581892141401, "OptimizerSelector:sgd:weight_decay": 0.07419042939598824, "PreprocessorSelector:truncated_svd:target_dim": 13, "NetworkSelector:shapedmlpnet:max_dropout": 0.6598718282675555}, {"CreateDataLoader:batch_size": 409, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 319, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.00020713774276681103, "OptimizerSelector:adam:weight_decay": 0.07018171677772647, "PreprocessorSelector:truncated_svd:target_dim": 159}, {"CreateDataLoader:batch_size": 16, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 204, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.05537370682126812, "OptimizerSelector:adam:weight_decay": 0.08619610547506606, "TrainNode:mixup:alpha": 0.4728114030085989, "NetworkSelector:shapedmlpnet:max_dropout": 0.10614249566987}, {"CreateDataLoader:batch_size": 267, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 400, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.024020267100099302, "OptimizerSelector:sgd:momentum": 0.1934421787669936, "OptimizerSelector:sgd:weight_decay": 0.09729992785693502, "NetworkSelector:shapedresnet:max_dropout": 0.43218371350888096, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.9425111433832147}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 201, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.015232504956212976, "OptimizerSelector:adam:weight_decay": 9.906036909600088e-05}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 103, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 4, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.005280283815656177, "OptimizerSelector:adam:weight_decay": 0.03490526984610669}, {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 501, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0019988351675271627, "OptimizerSelector:sgd:momentum": 0.5605511303582469, "OptimizerSelector:sgd:weight_decay": 0.09388925887219764, "PreprocessorSelector:truncated_svd:target_dim": 17, "NetworkSelector:shapedresnet:max_dropout": 0.027287223517468817, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5220512472525107}, {"CreateDataLoader:batch_size": 118, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 828, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.0002608774566125682, "OptimizerSelector:sgd:momentum": 0.6715942049447821, "OptimizerSelector:sgd:weight_decay": 0.09219127657746905, "NetworkSelector:shapedmlpnet:max_dropout": 0.35329155557035585}, {"CreateDataLoader:batch_size": 59, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 727, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0014779537824625817, "OptimizerSelector:adam:weight_decay": 0.07760673100144247, "PreprocessorSelector:truncated_svd:target_dim": 172, "NetworkSelector:shapedmlpnet:max_dropout": 0.0050535765165276025}, {"CreateDataLoader:batch_size": 34, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 238, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.005257584544960766, "OptimizerSelector:adam:weight_decay": 0.010494915267590934, "PreprocessorSelector:truncated_svd:target_dim": 197, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5543854385339433}, {"CreateDataLoader:batch_size": 41, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 306, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 4, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.0006685935047527848, "OptimizerSelector:sgd:momentum": 0.19715266916993127, "OptimizerSelector:sgd:weight_decay": 0.0191352924535996, "TrainNode:mixup:alpha": 0.8540597949343145}, {"CreateDataLoader:batch_size": 49, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 171, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.00032886793022793, "OptimizerSelector:adam:weight_decay": 0.013722203605391487, "TrainNode:mixup:alpha": 0.7538761884612464, "NetworkSelector:shapedmlpnet:max_dropout": 0.9230245355464833}, {"CreateDataLoader:batch_size": 69, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 209, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0005072626778940518, "OptimizerSelector:adam:weight_decay": 0.046165592068353294, "PreprocessorSelector:truncated_svd:target_dim": 189, "NetworkSelector:shapedresnet:max_dropout": 0.42844247595107343, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.8646861447635343}, {"CreateDataLoader:batch_size": 183, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 354, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.001780112494729604, "OptimizerSelector:adam:weight_decay": 0.004224029178574147, "NetworkSelector:shapedresnet:max_dropout": 0.27204101593048097, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.4412292309825137}, {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0011631297606181654, "OptimizerSelector:sgd:momentum": 0.9758171524776871, "OptimizerSelector:sgd:weight_decay": 0.012285985223647503, "NetworkSelector:shapedresnet:max_dropout": 0.3092463846587796, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.0007643854781880233}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 278, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.001178107244651597, "OptimizerSelector:adam:weight_decay": 0.010815452216436712}, {"CreateDataLoader:batch_size": 248, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 217, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.07077000523767582, "OptimizerSelector:sgd:momentum": 0.9180259111403218, "OptimizerSelector:sgd:weight_decay": 0.03926776334776571, "NetworkSelector:shapedresnet:max_dropout": 0.13891546732868326, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.275228444744125}, {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0011631297606181654, "OptimizerSelector:sgd:momentum": 0.9758171524776871, "OptimizerSelector:sgd:weight_decay": 0.012285985223647503, "NetworkSelector:shapedresnet:max_dropout": 0.3092463846587796, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.0007643854781880233}, {"CreateDataLoader:batch_size": 255, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 41, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.011526647986073339, "OptimizerSelector:adam:weight_decay": 0.031290291410446765, "NetworkSelector:shapedresnet:max_dropout": 0.7662454727603789, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.30409463597128383}, {"CreateDataLoader:batch_size": 168, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 349, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0019867054473724295, "OptimizerSelector:adam:weight_decay": 0.0067889732830148704, "NetworkSelector:shapedresnet:max_dropout": 0.8992826006547855}, {"CreateDataLoader:batch_size": 449, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 344, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.02812568059684062, "OptimizerSelector:adam:weight_decay": 0.03873372197869346}, {"CreateDataLoader:batch_size": 103, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 558, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.0006633676361506362, "OptimizerSelector:sgd:momentum": 0.1944912551047625, "OptimizerSelector:sgd:weight_decay": 0.03905005927739427, "PreprocessorSelector:truncated_svd:target_dim": 81, "TrainNode:mixup:alpha": 0.5133003494679066, "NetworkSelector:shapedmlpnet:max_dropout": 0.348256681750747}, {"CreateDataLoader:batch_size": 81, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 148, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.02448198397255897, "OptimizerSelector:adam:weight_decay": 0.006169551510309072, "PreprocessorSelector:truncated_svd:target_dim": 153, "NetworkSelector:shapedresnet:max_dropout": 0.0169198147537577, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.6057800516973848}, {"CreateDataLoader:batch_size": 16, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 192, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0019217317096942039, "OptimizerSelector:adam:weight_decay": 0.004015630395502329, "NetworkSelector:shapedresnet:max_dropout": 0.9588615460196043}, {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 55, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.028323492962412076, "OptimizerSelector:adam:weight_decay": 0.0907653881635229, "NetworkSelector:shapedresnet:max_dropout": 0.2796211794498489}, {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 234, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.02558034833374044, "OptimizerSelector:adam:weight_decay": 0.07419625049953248}, {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 796, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.06584106160121614, "OptimizerSelector:sgd:momentum": 0.9044497880344563, "OptimizerSelector:sgd:weight_decay": 0.05979401888132041}, {"CreateDataLoader:batch_size": 60, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 290, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.00020060142055000713, "OptimizerSelector:adam:weight_decay": 0.0018320003468984575, "TrainNode:mixup:alpha": 0.8448753109694546, "NetworkSelector:shapedmlpnet:max_dropout": 0.023271935735825866}, {"CreateDataLoader:batch_size": 106, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 249, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0001324440461008619, "OptimizerSelector:sgd:momentum": 0.25798942154240156, "OptimizerSelector:sgd:weight_decay": 0.05755785974958902, "TrainNode:mixup:alpha": 0.9797697328616091, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5785197821401383}, {"CreateDataLoader:batch_size": 67, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 93, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0018096039818770826, "OptimizerSelector:adam:weight_decay": 0.06852509784467198, "PreprocessorSelector:truncated_svd:target_dim": 175, "TrainNode:mixup:alpha": 0.41730480236712697, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.1981014890848788}, {"CreateDataLoader:batch_size": 163, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 171, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.002654293880282279, "OptimizerSelector:adam:weight_decay": 0.010374059713414468, "NetworkSelector:shapedresnet:max_dropout": 0.6341848343636569}, {"CreateDataLoader:batch_size": 442, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 467, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0008298747674239372, "OptimizerSelector:adam:weight_decay": 0.0067071038164946365, "PreprocessorSelector:truncated_svd:target_dim": 115}, {"CreateDataLoader:batch_size": 26, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 268, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.007648194557734052, "OptimizerSelector:adam:weight_decay": 0.05069462826843516, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.22914240499187666}, {"CreateDataLoader:batch_size": 22, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 204, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.0048685372142690305, "OptimizerSelector:sgd:momentum": 0.47074095479004696, "OptimizerSelector:sgd:weight_decay": 0.031998307053765024, "TrainNode:mixup:alpha": 0.29617671840307525}, {"CreateDataLoader:batch_size": 159, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 259, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0007790465032701478, "OptimizerSelector:adam:weight_decay": 0.0016722444122252624, "TrainNode:mixup:alpha": 0.3186468647803825}, {"CreateDataLoader:batch_size": 20, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 211, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.01073543535661538, "OptimizerSelector:sgd:momentum": 0.80194124039067, "OptimizerSelector:sgd:weight_decay": 0.013915417149259158, "TrainNode:mixup:alpha": 0.25294755547434644, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.35185897985172915}, {"CreateDataLoader:batch_size": 431, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 378, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0015297724620340046, "OptimizerSelector:adam:weight_decay": 0.09632662021878678, "NetworkSelector:shapedmlpnet:max_dropout": 0.947970211217681}, {"CreateDataLoader:batch_size": 40, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 225, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 1, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.021855108489323143, "OptimizerSelector:sgd:momentum": 0.13561732398817308, "OptimizerSelector:sgd:weight_decay": 0.04805588965631032, "TrainNode:mixup:alpha": 0.24204501586252747}, {"CreateDataLoader:batch_size": 66, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 55, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.00046360426150906695, "OptimizerSelector:sgd:momentum": 0.5668972901050637, "OptimizerSelector:sgd:weight_decay": 0.07853174986187968, "TrainNode:mixup:alpha": 0.3243624597261865, "NetworkSelector:shapedresnet:max_dropout": 0.8447096076020696, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5384405925945437}, {"CreateDataLoader:batch_size": 165, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 438, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.07331878780908542, "OptimizerSelector:sgd:momentum": 0.44665514022476815, "OptimizerSelector:sgd:weight_decay": 0.006911333726469374}, {"CreateDataLoader:batch_size": 44, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 502, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.001167120004492945, "OptimizerSelector:adam:weight_decay": 0.002631594543964365, "TrainNode:mixup:alpha": 0.8623462528380441, "NetworkSelector:shapedresnet:max_dropout": 0.6909421420661686, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.18863680095531088}, {"CreateDataLoader:batch_size": 109, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 176, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.005061758091295525, "OptimizerSelector:sgd:momentum": 0.28276061796023044, "OptimizerSelector:sgd:weight_decay": 0.025240051199586642, "TrainNode:mixup:alpha": 0.1954294811093188, "NetworkSelector:shapedresnet:max_dropout": 0.9700199890883123, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.8468288014900353}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 430, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.0007200270296740166, "OptimizerSelector:sgd:momentum": 0.1087271293207601, "OptimizerSelector:sgd:weight_decay": 0.057549428928153595, "NetworkSelector:shapedresnet:max_dropout": 0.16497226789895247, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7272354411432771}, {"CreateDataLoader:batch_size": 315, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 329, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.001464587050130148, "OptimizerSelector:adam:weight_decay": 0.0357316386544697, "TrainNode:mixup:alpha": 0.833294144042645, "NetworkSelector:shapedresnet:max_dropout": 0.568481348902715, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7896255606380758}, {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 468, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.004820484133908542, "OptimizerSelector:sgd:momentum": 0.11676391314627578, "OptimizerSelector:sgd:weight_decay": 0.08006526094133483, "PreprocessorSelector:truncated_svd:target_dim": 67, "NetworkSelector:shapedresnet:max_dropout": 0.8635418545594287, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7471216427371846}, {"CreateDataLoader:batch_size": 106, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 240, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.00013123925883518525, "OptimizerSelector:adam:weight_decay": 0.009372347244778492, "TrainNode:mixup:alpha": 0.9797697328616091}, {"CreateDataLoader:batch_size": 407, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 373, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.022629871228371002, "OptimizerSelector:adam:weight_decay": 0.05512356196551538, "PreprocessorSelector:truncated_svd:target_dim": 60, "NetworkSelector:shapedresnet:max_dropout": 0.7314944078365041}, {"CreateDataLoader:batch_size": 78, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 152, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0008132243995896875, "OptimizerSelector:adam:weight_decay": 0.055797977989254906, "NetworkSelector:shapedresnet:max_dropout": 0.5812041125598189, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7993887486819252}, {"CreateDataLoader:batch_size": 95, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 347, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.016333124910018332, "OptimizerSelector:adam:weight_decay": 0.02363972732021642, "PreprocessorSelector:truncated_svd:target_dim": 58, "NetworkSelector:shapedmlpnet:max_dropout": 0.5739636616341546}, {"CreateDataLoader:batch_size": 48, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 529, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.020107910011636462, "OptimizerSelector:sgd:momentum": 0.5818716367708677, "OptimizerSelector:sgd:weight_decay": 0.003995594064278902}, {"CreateDataLoader:batch_size": 74, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 127, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0009288613009879144, "OptimizerSelector:adam:weight_decay": 0.05852099102743707, "NetworkSelector:shapedresnet:max_dropout": 0.738882230193691, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.22384249668015016}, {"CreateDataLoader:batch_size": 378, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 182, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.027192706625040735, "OptimizerSelector:sgd:momentum": 0.5163359154582704, "OptimizerSelector:sgd:weight_decay": 0.035972655344725255, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.9048748716739771}, {"CreateDataLoader:batch_size": 299, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 279, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0007471732018616978, "OptimizerSelector:adam:weight_decay": 0.0005438753720314742}, {"CreateDataLoader:batch_size": 67, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 93, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0018096039818770826, "OptimizerSelector:adam:weight_decay": 0.06852509784467198, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.1981014890848788}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 146, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.018451403915605256, "OptimizerSelector:adam:weight_decay": 0.008623815271764488, "PreprocessorSelector:truncated_svd:target_dim": 61, "NetworkSelector:shapedresnet:max_dropout": 0.9196284212476861, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.9978529392794797}, {"CreateDataLoader:batch_size": 198, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 223, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.04889335143655566, "OptimizerSelector:sgd:momentum": 0.3151448209863167, "OptimizerSelector:sgd:weight_decay": 0.012085510759079881, "NetworkSelector:shapedresnet:max_dropout": 0.5431664302380945, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.2867116496064306}, {"CreateDataLoader:batch_size": 36, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 70, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.011437807197899053, "OptimizerSelector:adam:weight_decay": 0.007952907310089416, "PreprocessorSelector:truncated_svd:target_dim": 215, "NetworkSelector:shapedresnet:max_dropout": 0.23999752152289966}, {"CreateDataLoader:batch_size": 153, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 106, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.06414577410502037, "OptimizerSelector:adam:weight_decay": 0.0037776734771791986, "TrainNode:mixup:alpha": 0.034689171105874175, "NetworkSelector:shapedresnet:max_dropout": 0.5037798612915632, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.1164738743582362}, {"CreateDataLoader:batch_size": 164, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 133, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.01312669555979778, "OptimizerSelector:sgd:momentum": 0.5297501458607897, "OptimizerSelector:sgd:weight_decay": 0.012629748273898378, "TrainNode:mixup:alpha": 0.15983365034409336, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.40420190577514725}, {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.00036762513669437864, "OptimizerSelector:adam:weight_decay": 0.068578832348263, "NetworkSelector:shapedresnet:max_dropout": 0.3092463846587796, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.0007643854781880233}, {"CreateDataLoader:batch_size": 102, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 172, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00012837520247311956, "OptimizerSelector:adam:weight_decay": 0.03031980431097341, "PreprocessorSelector:truncated_svd:target_dim": 241}, {"CreateDataLoader:batch_size": 140, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 423, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0006164392898567234, "OptimizerSelector:adam:weight_decay": 0.006605449457495538, "PreprocessorSelector:truncated_svd:target_dim": 240}, {"CreateDataLoader:batch_size": 82, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 85, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0463101572182028, "OptimizerSelector:adam:weight_decay": 0.0994741777534842, "NetworkSelector:shapedresnet:max_dropout": 0.4657049040319358}, {"CreateDataLoader:batch_size": 23, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 86, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.015135885395358036, "OptimizerSelector:sgd:momentum": 0.5415201275133706, "OptimizerSelector:sgd:weight_decay": 0.026874865791117496, "NetworkSelector:shapedmlpnet:max_dropout": 0.2658665766728675}, {"CreateDataLoader:batch_size": 363, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 259, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.05095877324797267, "OptimizerSelector:sgd:momentum": 0.9763924378541492, "OptimizerSelector:sgd:weight_decay": 0.02373229569054879, "PreprocessorSelector:truncated_svd:target_dim": 137, "NetworkSelector:shapedmlpnet:max_dropout": 0.5248331308558654}, {"CreateDataLoader:batch_size": 317, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 164, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00022844556850020554, "OptimizerSelector:adam:weight_decay": 0.07132734627442727, "PreprocessorSelector:truncated_svd:target_dim": 131, "NetworkSelector:shapedresnet:max_dropout": 0.02541912674409519, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.10789142699330445}, {"CreateDataLoader:batch_size": 376, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 353, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0039747002839877815, "OptimizerSelector:adam:weight_decay": 0.06779163262928257, "TrainNode:mixup:alpha": 0.26494532980241914, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.6484969372160396}, {"CreateDataLoader:batch_size": 150, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 314, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.003106362796390374, "OptimizerSelector:adam:weight_decay": 0.010492136888557045, "NetworkSelector:shapedresnet:max_dropout": 0.7133813761319248, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.2808341606307928}, {"CreateDataLoader:batch_size": 22, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 105, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.009366967912007734, "OptimizerSelector:sgd:momentum": 0.5835907376255355, "OptimizerSelector:sgd:weight_decay": 0.0021904146194185402, "TrainNode:mixup:alpha": 0.4947481980402333, "NetworkSelector:shapedmlpnet:max_dropout": 0.5619559976528329}, {"CreateDataLoader:batch_size": 205, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 68, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.015585895002050975, "OptimizerSelector:adam:weight_decay": 0.0006863335859090819, "PreprocessorSelector:truncated_svd:target_dim": 205, "TrainNode:mixup:alpha": 0.2017625998412078, "NetworkSelector:shapedresnet:max_dropout": 0.933900654090867}, {"CreateDataLoader:batch_size": 159, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 966, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.0007790465032701478, "OptimizerSelector:adam:weight_decay": 0.0016722444122252624, "PreprocessorSelector:truncated_svd:target_dim": 151}, {"CreateDataLoader:batch_size": 255, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 435, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0001972606689184097, "OptimizerSelector:adam:weight_decay": 0.042116551424255175, "PreprocessorSelector:truncated_svd:target_dim": 87, "NetworkSelector:shapedmlpnet:max_dropout": 0.8346256718973729}, {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 125, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.005618969089441542, "OptimizerSelector:adam:weight_decay": 0.013433598503872749, "PreprocessorSelector:truncated_svd:target_dim": 209, "TrainNode:mixup:alpha": 0.26339747282612025, "NetworkSelector:shapedresnet:max_dropout": 0.4054128659393603}, {"CreateDataLoader:batch_size": 25, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 165, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0008796239831156432, "OptimizerSelector:sgd:momentum": 0.6635431863443884, "OptimizerSelector:sgd:weight_decay": 0.08773727386473432, "NetworkSelector:shapedresnet:max_dropout": 0.6562293317171314, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.13184097502750214}, {"CreateDataLoader:batch_size": 30, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 48, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.02373903815702463, "OptimizerSelector:adam:weight_decay": 0.06957946378504122, "NetworkSelector:shapedresnet:max_dropout": 0.4086372250132624}, {"CreateDataLoader:batch_size": 49, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 67, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00032886793022793, "OptimizerSelector:adam:weight_decay": 0.013722203605391487, "PreprocessorSelector:truncated_svd:target_dim": 196}, {"CreateDataLoader:batch_size": 70, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 67, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00030282134081965027, "OptimizerSelector:adam:weight_decay": 0.08189550276227578, "PreprocessorSelector:truncated_svd:target_dim": 168, "TrainNode:mixup:alpha": 0.17076026218006835, "NetworkSelector:shapedresnet:max_dropout": 0.8172469352734951}, {"CreateDataLoader:batch_size": 191, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 435, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.00014688942127411792, "OptimizerSelector:sgd:momentum": 0.6986457141740332, "OptimizerSelector:sgd:weight_decay": 0.08154337881750333, "PreprocessorSelector:truncated_svd:target_dim": 190, "TrainNode:mixup:alpha": 0.7622639661557122}, {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 158, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.030988368382148756, "OptimizerSelector:sgd:momentum": 0.6835896232041176, "OptimizerSelector:sgd:weight_decay": 0.06737310540562418, "PreprocessorSelector:truncated_svd:target_dim": 152, "TrainNode:mixup:alpha": 0.27411977516685215, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.35242963021640883}, {"CreateDataLoader:batch_size": 371, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 95, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.006190610434988198, "OptimizerSelector:adam:weight_decay": 0.08467974277540907, "PreprocessorSelector:truncated_svd:target_dim": 206, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.8076239088577185}, {"CreateDataLoader:batch_size": 400, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 155, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0017839041331735258, "OptimizerSelector:adam:weight_decay": 0.045518323556858406, "PreprocessorSelector:truncated_svd:target_dim": 121}, {"CreateDataLoader:batch_size": 67, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 147, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 1, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.00041054120168588934, "OptimizerSelector:sgd:momentum": 0.8894275753154599, "OptimizerSelector:sgd:weight_decay": 0.002748485443860637, "TrainNode:mixup:alpha": 0.41730480236712697, "NetworkSelector:shapedmlpnet:max_dropout": 0.5586898284457517}, {"CreateDataLoader:batch_size": 42, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 86, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0021530021937535334, "OptimizerSelector:adam:weight_decay": 0.008386657635007597, "NetworkSelector:shapedresnet:max_dropout": 0.6296079567189131, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.034431265307095615}, {"CreateDataLoader:batch_size": 35, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0009817976291636144, "OptimizerSelector:adam:weight_decay": 0.024124099975400798, "NetworkSelector:shapedresnet:max_dropout": 0.4021687882782366}, {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 72, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0008342696363487985, "OptimizerSelector:adam:weight_decay": 0.09061881600977395, "TrainNode:mixup:alpha": 0.2065367770580181, "NetworkSelector:shapedresnet:max_dropout": 0.35788353506890797, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.24266089214992287}, {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 103, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.06584106160121614, "OptimizerSelector:sgd:momentum": 0.9044497880344563, "OptimizerSelector:sgd:weight_decay": 0.05979401888132041, "PreprocessorSelector:truncated_svd:target_dim": 237}, {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 284, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.006693660906585942, "OptimizerSelector:sgd:momentum": 0.15137752883636504, "OptimizerSelector:sgd:weight_decay": 0.09055643503146099, "NetworkSelector:shapedresnet:max_dropout": 0.47165130569082925}, {"CreateDataLoader:batch_size": 82, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 252, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0008973419851764589, "OptimizerSelector:adam:weight_decay": 0.054917410540895634, "PreprocessorSelector:truncated_svd:target_dim": 244, "NetworkSelector:shapedresnet:max_dropout": 0.35461937430747353, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.09049558353514076}, {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 424, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.010207979744757036, "OptimizerSelector:sgd:momentum": 0.8261931972212537, "OptimizerSelector:sgd:weight_decay": 0.089750853247459, "TrainNode:mixup:alpha": 0.5578253037149105, "NetworkSelector:shapedresnet:max_dropout": 0.6919839045242188}, {"CreateDataLoader:batch_size": 194, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 485, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0003134819964586975, "OptimizerSelector:adam:weight_decay": 0.007912897171989085, "PreprocessorSelector:truncated_svd:target_dim": 237, "NetworkSelector:shapedresnet:max_dropout": 0.13907682103099803}, {"CreateDataLoader:batch_size": 50, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 167, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.03176506773195294, "OptimizerSelector:sgd:momentum": 0.5121854756782205, "OptimizerSelector:sgd:weight_decay": 0.05419340001639463}, {"CreateDataLoader:batch_size": 52, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 436, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0007127194739504882, "OptimizerSelector:adam:weight_decay": 0.03834228244278504}]
\ No newline at end of file
diff --git a/autoPyTorch/core/hpbandster_extensions/simple_portfolio_dict.json b/autoPyTorch/core/hpbandster_extensions/simple_portfolio_dict.json
deleted file mode 100644
index 0b2d59653..000000000
--- a/autoPyTorch/core/hpbandster_extensions/simple_portfolio_dict.json
+++ /dev/null
@@ -1 +0,0 @@
-{"75212": {"CreateDataLoader:batch_size": 439, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 582, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.00046235883321873317, "OptimizerSelector:adam:weight_decay": 0.023542340482179413, "PreprocessorSelector:truncated_svd:target_dim": 235, "TrainNode:mixup:alpha": 0.629997950828498}, "167185": {"CreateDataLoader:batch_size": 472, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 122, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0054858638199374215, "OptimizerSelector:adam:weight_decay": 0.0002523445285068334, "TrainNode:mixup:alpha": 0.09189163910542086, "NetworkSelector:shapedmlpnet:max_dropout": 0.6250336673744067}, "75178": {"CreateDataLoader:batch_size": 26, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 582, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.00016321317055670558, "OptimizerSelector:sgd:momentum": 0.2371909843510024, "OptimizerSelector:sgd:weight_decay": 0.0016674225312644055, "NetworkSelector:shapedmlpnet:max_dropout": 0.4026568521749192}, "75239": {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 468, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.005718443234994137, "OptimizerSelector:adam:weight_decay": 0.09695987887448426, "PreprocessorSelector:truncated_svd:target_dim": 67, "TrainNode:mixup:alpha": 0.8071051956187791, "NetworkSelector:shapedresnet:max_dropout": 0.8635418545594287, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7471216427371846}, "168337": {"CreateDataLoader:batch_size": 445, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 129, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.025638328755952233, "OptimizerSelector:sgd:momentum": 0.7264236460120405, "OptimizerSelector:sgd:weight_decay": 0.07662347599376629}, "75171": {"CreateDataLoader:batch_size": 94, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 205, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.009428064332106297, "OptimizerSelector:adam:weight_decay": 0.012311364446470033}, "167181": {"CreateDataLoader:batch_size": 18, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 175, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.0007165031492459263, "OptimizerSelector:sgd:momentum": 0.3038581892141401, "OptimizerSelector:sgd:weight_decay": 0.07419042939598824, "PreprocessorSelector:truncated_svd:target_dim": 13, "NetworkSelector:shapedmlpnet:max_dropout": 0.6598718282675555}, "167201": {"CreateDataLoader:batch_size": 409, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 319, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.00020713774276681103, "OptimizerSelector:adam:weight_decay": 0.07018171677772647, "PreprocessorSelector:truncated_svd:target_dim": 159}, "189887": {"CreateDataLoader:batch_size": 16, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 204, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.05537370682126812, "OptimizerSelector:adam:weight_decay": 0.08619610547506606, "TrainNode:mixup:alpha": 0.4728114030085989, "NetworkSelector:shapedmlpnet:max_dropout": 0.10614249566987}, "146593": {"CreateDataLoader:batch_size": 267, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 400, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.024020267100099302, "OptimizerSelector:sgd:momentum": 0.1934421787669936, "OptimizerSelector:sgd:weight_decay": 0.09729992785693502, "NetworkSelector:shapedresnet:max_dropout": 0.43218371350888096, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.9425111433832147}, "3044": {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 201, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.015232504956212976, "OptimizerSelector:adam:weight_decay": 9.906036909600088e-05}, "75196": {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 103, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 4, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.005280283815656177, "OptimizerSelector:adam:weight_decay": 0.03490526984610669}, "3048": {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 501, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0019988351675271627, "OptimizerSelector:sgd:momentum": 0.5605511303582469, "OptimizerSelector:sgd:weight_decay": 0.09388925887219764, "PreprocessorSelector:truncated_svd:target_dim": 17, "NetworkSelector:shapedresnet:max_dropout": 0.027287223517468817, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5220512472525107}, "126029": {"CreateDataLoader:batch_size": 118, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 828, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.0002608774566125682, "OptimizerSelector:sgd:momentum": 0.6715942049447821, "OptimizerSelector:sgd:weight_decay": 0.09219127657746905, "NetworkSelector:shapedmlpnet:max_dropout": 0.35329155557035585}, "189841": {"CreateDataLoader:batch_size": 59, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 727, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0014779537824625817, "OptimizerSelector:adam:weight_decay": 0.07760673100144247, "PreprocessorSelector:truncated_svd:target_dim": 172, "NetworkSelector:shapedmlpnet:max_dropout": 0.0050535765165276025}, "75235": {"CreateDataLoader:batch_size": 34, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 238, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.005257584544960766, "OptimizerSelector:adam:weight_decay": 0.010494915267590934, "PreprocessorSelector:truncated_svd:target_dim": 197, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5543854385339433}, "189882": {"CreateDataLoader:batch_size": 41, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 306, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 4, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.0006685935047527848, "OptimizerSelector:sgd:momentum": 0.19715266916993127, "OptimizerSelector:sgd:weight_decay": 0.0191352924535996, "TrainNode:mixup:alpha": 0.8540597949343145}, "2336": {"CreateDataLoader:batch_size": 49, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 171, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.00032886793022793, "OptimizerSelector:adam:weight_decay": 0.013722203605391487, "TrainNode:mixup:alpha": 0.7538761884612464, "NetworkSelector:shapedmlpnet:max_dropout": 0.9230245355464833}, "168798": {"CreateDataLoader:batch_size": 69, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 209, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0005072626778940518, "OptimizerSelector:adam:weight_decay": 0.046165592068353294, "PreprocessorSelector:truncated_svd:target_dim": 189, "NetworkSelector:shapedresnet:max_dropout": 0.42844247595107343, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.8646861447635343}, "189874": {"CreateDataLoader:batch_size": 183, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 354, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.001780112494729604, "OptimizerSelector:adam:weight_decay": 0.004224029178574147, "NetworkSelector:shapedresnet:max_dropout": 0.27204101593048097, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.4412292309825137}, "75250": {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0011631297606181654, "OptimizerSelector:sgd:momentum": 0.9758171524776871, "OptimizerSelector:sgd:weight_decay": 0.012285985223647503, "NetworkSelector:shapedresnet:max_dropout": 0.3092463846587796, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.0007643854781880233}, "75142": {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 278, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.001178107244651597, "OptimizerSelector:adam:weight_decay": 0.010815452216436712}, "75213": {"CreateDataLoader:batch_size": 248, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 217, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.07077000523767582, "OptimizerSelector:sgd:momentum": 0.9180259111403218, "OptimizerSelector:sgd:weight_decay": 0.03926776334776571, "NetworkSelector:shapedresnet:max_dropout": 0.13891546732868326, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.275228444744125}, "189908": {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0011631297606181654, "OptimizerSelector:sgd:momentum": 0.9758171524776871, "OptimizerSelector:sgd:weight_decay": 0.012285985223647503, "NetworkSelector:shapedresnet:max_dropout": 0.3092463846587796, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.0007643854781880233}, "146679": {"CreateDataLoader:batch_size": 255, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 41, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.011526647986073339, "OptimizerSelector:adam:weight_decay": 0.031290291410446765, "NetworkSelector:shapedresnet:max_dropout": 0.7662454727603789, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.30409463597128383}, "75161": {"CreateDataLoader:batch_size": 168, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 349, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0019867054473724295, "OptimizerSelector:adam:weight_decay": 0.0067889732830148704, "NetworkSelector:shapedresnet:max_dropout": 0.8992826006547855}, "189881": {"CreateDataLoader:batch_size": 449, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 344, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.02812568059684062, "OptimizerSelector:adam:weight_decay": 0.03873372197869346}, "146602": {"CreateDataLoader:batch_size": 103, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 558, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.0006633676361506362, "OptimizerSelector:sgd:momentum": 0.1944912551047625, "OptimizerSelector:sgd:weight_decay": 0.03905005927739427, "PreprocessorSelector:truncated_svd:target_dim": 81, "TrainNode:mixup:alpha": 0.5133003494679066, "NetworkSelector:shapedmlpnet:max_dropout": 0.348256681750747}, "166882": {"CreateDataLoader:batch_size": 81, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 148, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.02448198397255897, "OptimizerSelector:adam:weight_decay": 0.006169551510309072, "PreprocessorSelector:truncated_svd:target_dim": 153, "NetworkSelector:shapedresnet:max_dropout": 0.0169198147537577, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.6057800516973848}, "260": {"CreateDataLoader:batch_size": 16, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 192, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0019217317096942039, "OptimizerSelector:adam:weight_decay": 0.004015630395502329, "NetworkSelector:shapedresnet:max_dropout": 0.9588615460196043}, "3055": {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 55, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.028323492962412076, "OptimizerSelector:adam:weight_decay": 0.0907653881635229, "NetworkSelector:shapedresnet:max_dropout": 0.2796211794498489}, "189862": {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 234, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.02558034833374044, "OptimizerSelector:adam:weight_decay": 0.07419625049953248}, "3945": {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 796, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.06584106160121614, "OptimizerSelector:sgd:momentum": 0.9044497880344563, "OptimizerSelector:sgd:weight_decay": 0.05979401888132041}, "253": {"CreateDataLoader:batch_size": 60, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 290, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.00020060142055000713, "OptimizerSelector:adam:weight_decay": 0.0018320003468984575, "TrainNode:mixup:alpha": 0.8448753109694546, "NetworkSelector:shapedmlpnet:max_dropout": 0.023271935735825866}, "167085": {"CreateDataLoader:batch_size": 106, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 249, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0001324440461008619, "OptimizerSelector:sgd:momentum": 0.25798942154240156, "OptimizerSelector:sgd:weight_decay": 0.05755785974958902, "TrainNode:mixup:alpha": 0.9797697328616091, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5785197821401383}, "75108": {"CreateDataLoader:batch_size": 67, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 93, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0018096039818770826, "OptimizerSelector:adam:weight_decay": 0.06852509784467198, "PreprocessorSelector:truncated_svd:target_dim": 175, "TrainNode:mixup:alpha": 0.41730480236712697, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.1981014890848788}, "189829": {"CreateDataLoader:batch_size": 163, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 171, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.002654293880282279, "OptimizerSelector:adam:weight_decay": 0.010374059713414468, "NetworkSelector:shapedresnet:max_dropout": 0.6341848343636569}, "2356": {"CreateDataLoader:batch_size": 442, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 467, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0008298747674239372, "OptimizerSelector:adam:weight_decay": 0.0067071038164946365, "PreprocessorSelector:truncated_svd:target_dim": 115}, "271": {"CreateDataLoader:batch_size": 26, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 268, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.007648194557734052, "OptimizerSelector:adam:weight_decay": 0.05069462826843516, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.22914240499187666}, "166872": {"CreateDataLoader:batch_size": 22, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 204, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.0048685372142690305, "OptimizerSelector:sgd:momentum": 0.47074095479004696, "OptimizerSelector:sgd:weight_decay": 0.031998307053765024, "TrainNode:mixup:alpha": 0.29617671840307525}, "167083": {"CreateDataLoader:batch_size": 159, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 259, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0007790465032701478, "OptimizerSelector:adam:weight_decay": 0.0016722444122252624, "TrainNode:mixup:alpha": 0.3186468647803825}, "167149": {"CreateDataLoader:batch_size": 20, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 211, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.01073543535661538, "OptimizerSelector:sgd:momentum": 0.80194124039067, "OptimizerSelector:sgd:weight_decay": 0.013915417149259158, "TrainNode:mixup:alpha": 0.25294755547434644, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.35185897985172915}, "166958": {"CreateDataLoader:batch_size": 431, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 378, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0015297724620340046, "OptimizerSelector:adam:weight_decay": 0.09632662021878678, "NetworkSelector:shapedmlpnet:max_dropout": 0.947970211217681}, "167104": {"CreateDataLoader:batch_size": 40, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 225, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 1, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.021855108489323143, "OptimizerSelector:sgd:momentum": 0.13561732398817308, "OptimizerSelector:sgd:weight_decay": 0.04805588965631032, "TrainNode:mixup:alpha": 0.24204501586252747}, "189354": {"CreateDataLoader:batch_size": 66, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 55, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.00046360426150906695, "OptimizerSelector:sgd:momentum": 0.5668972901050637, "OptimizerSelector:sgd:weight_decay": 0.07853174986187968, "TrainNode:mixup:alpha": 0.3243624597261865, "NetworkSelector:shapedresnet:max_dropout": 0.8447096076020696, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.5384405925945437}, "75223": {"CreateDataLoader:batch_size": 165, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 438, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.07331878780908542, "OptimizerSelector:sgd:momentum": 0.44665514022476815, "OptimizerSelector:sgd:weight_decay": 0.006911333726469374}, "167096": {"CreateDataLoader:batch_size": 44, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 502, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.001167120004492945, "OptimizerSelector:adam:weight_decay": 0.002631594543964365, "TrainNode:mixup:alpha": 0.8623462528380441, "NetworkSelector:shapedresnet:max_dropout": 0.6909421420661686, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.18863680095531088}, "189866": {"CreateDataLoader:batch_size": 109, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 176, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.005061758091295525, "OptimizerSelector:sgd:momentum": 0.28276061796023044, "OptimizerSelector:sgd:weight_decay": 0.025240051199586642, "TrainNode:mixup:alpha": 0.1954294811093188, "NetworkSelector:shapedresnet:max_dropout": 0.9700199890883123, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.8468288014900353}, "126025": {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 430, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.0007200270296740166, "OptimizerSelector:sgd:momentum": 0.1087271293207601, "OptimizerSelector:sgd:weight_decay": 0.057549428928153595, "NetworkSelector:shapedresnet:max_dropout": 0.16497226789895247, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7272354411432771}, "75173": {"CreateDataLoader:batch_size": 315, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 329, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.001464587050130148, "OptimizerSelector:adam:weight_decay": 0.0357316386544697, "TrainNode:mixup:alpha": 0.833294144042645, "NetworkSelector:shapedresnet:max_dropout": 0.568481348902715, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7896255606380758}, "189828": {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 468, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.004820484133908542, "OptimizerSelector:sgd:momentum": 0.11676391314627578, "OptimizerSelector:sgd:weight_decay": 0.08006526094133483, "PreprocessorSelector:truncated_svd:target_dim": 67, "NetworkSelector:shapedresnet:max_dropout": 0.8635418545594287, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7471216427371846}, "189786": {"CreateDataLoader:batch_size": 106, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 240, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.00013123925883518525, "OptimizerSelector:adam:weight_decay": 0.009372347244778492, "TrainNode:mixup:alpha": 0.9797697328616091}, "75225": {"CreateDataLoader:batch_size": 407, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 373, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.022629871228371002, "OptimizerSelector:adam:weight_decay": 0.05512356196551538, "PreprocessorSelector:truncated_svd:target_dim": 60, "NetworkSelector:shapedresnet:max_dropout": 0.7314944078365041}, "75166": {"CreateDataLoader:batch_size": 78, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 152, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0008132243995896875, "OptimizerSelector:adam:weight_decay": 0.055797977989254906, "NetworkSelector:shapedresnet:max_dropout": 0.5812041125598189, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.7993887486819252}, "189861": {"CreateDataLoader:batch_size": 95, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 347, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.016333124910018332, "OptimizerSelector:adam:weight_decay": 0.02363972732021642, "PreprocessorSelector:truncated_svd:target_dim": 58, "NetworkSelector:shapedmlpnet:max_dropout": 0.5739636616341546}, "75210": {"CreateDataLoader:batch_size": 48, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 529, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 3, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.020107910011636462, "OptimizerSelector:sgd:momentum": 0.5818716367708677, "OptimizerSelector:sgd:weight_decay": 0.003995594064278902}, "256": {"CreateDataLoader:batch_size": 74, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 127, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0009288613009879144, "OptimizerSelector:adam:weight_decay": 0.05852099102743707, "NetworkSelector:shapedresnet:max_dropout": 0.738882230193691, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.22384249668015016}, "168330": {"CreateDataLoader:batch_size": 378, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 182, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.027192706625040735, "OptimizerSelector:sgd:momentum": 0.5163359154582704, "OptimizerSelector:sgd:weight_decay": 0.035972655344725255, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.9048748716739771}, "189909": {"CreateDataLoader:batch_size": 299, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 279, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0007471732018616978, "OptimizerSelector:adam:weight_decay": 0.0005438753720314742}, "166959": {"CreateDataLoader:batch_size": 67, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 93, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0018096039818770826, "OptimizerSelector:adam:weight_decay": 0.06852509784467198, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.1981014890848788}, "167090": {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 146, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.018451403915605256, "OptimizerSelector:adam:weight_decay": 0.008623815271764488, "PreprocessorSelector:truncated_svd:target_dim": 61, "NetworkSelector:shapedresnet:max_dropout": 0.9196284212476861, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.9978529392794797}, "190158": {"CreateDataLoader:batch_size": 198, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 223, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.04889335143655566, "OptimizerSelector:sgd:momentum": 0.3151448209863167, "OptimizerSelector:sgd:weight_decay": 0.012085510759079881, "NetworkSelector:shapedresnet:max_dropout": 0.5431664302380945, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.2867116496064306}, "75116": {"CreateDataLoader:batch_size": 36, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 70, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.011437807197899053, "OptimizerSelector:adam:weight_decay": 0.007952907310089416, "PreprocessorSelector:truncated_svd:target_dim": 215, "NetworkSelector:shapedresnet:max_dropout": 0.23999752152289966}, "75185": {"CreateDataLoader:batch_size": 153, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 106, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.06414577410502037, "OptimizerSelector:adam:weight_decay": 0.0037776734771791986, "TrainNode:mixup:alpha": 0.034689171105874175, "NetworkSelector:shapedresnet:max_dropout": 0.5037798612915632, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.1164738743582362}, "168332": {"CreateDataLoader:batch_size": 164, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 133, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.01312669555979778, "OptimizerSelector:sgd:momentum": 0.5297501458607897, "OptimizerSelector:sgd:weight_decay": 0.012629748273898378, "TrainNode:mixup:alpha": 0.15983365034409336, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.40420190577514725}, "2342": {"CreateDataLoader:batch_size": 62, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.00036762513669437864, "OptimizerSelector:adam:weight_decay": 0.068578832348263, "NetworkSelector:shapedresnet:max_dropout": 0.3092463846587796, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.0007643854781880233}, "146212": {"CreateDataLoader:batch_size": 102, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 172, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00012837520247311956, "OptimizerSelector:adam:weight_decay": 0.03031980431097341, "PreprocessorSelector:truncated_svd:target_dim": 241}, "75134": {"CreateDataLoader:batch_size": 140, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 423, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0006164392898567234, "OptimizerSelector:adam:weight_decay": 0.006605449457495538, "PreprocessorSelector:truncated_svd:target_dim": 240}, "75199": {"CreateDataLoader:batch_size": 82, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 85, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0463101572182028, "OptimizerSelector:adam:weight_decay": 0.0994741777534842, "NetworkSelector:shapedresnet:max_dropout": 0.4657049040319358}, "167184": {"CreateDataLoader:batch_size": 23, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 86, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.015135885395358036, "OptimizerSelector:sgd:momentum": 0.5415201275133706, "OptimizerSelector:sgd:weight_decay": 0.026874865791117496, "NetworkSelector:shapedmlpnet:max_dropout": 0.2658665766728675}, "189865": {"CreateDataLoader:batch_size": 363, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 259, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.05095877324797267, "OptimizerSelector:sgd:momentum": 0.9763924378541492, "OptimizerSelector:sgd:weight_decay": 0.02373229569054879, "PreprocessorSelector:truncated_svd:target_dim": 137, "NetworkSelector:shapedmlpnet:max_dropout": 0.5248331308558654}, "168335": {"CreateDataLoader:batch_size": 317, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 164, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00022844556850020554, "OptimizerSelector:adam:weight_decay": 0.07132734627442727, "PreprocessorSelector:truncated_svd:target_dim": 131, "NetworkSelector:shapedresnet:max_dropout": 0.02541912674409519, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.10789142699330445}, "189899": {"CreateDataLoader:batch_size": 376, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 353, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0039747002839877815, "OptimizerSelector:adam:weight_decay": 0.06779163262928257, "TrainNode:mixup:alpha": 0.26494532980241914, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.6484969372160396}, "75131": {"CreateDataLoader:batch_size": 150, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 314, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.003106362796390374, "OptimizerSelector:adam:weight_decay": 0.010492136888557045, "NetworkSelector:shapedresnet:max_dropout": 0.7133813761319248, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.2808341606307928}, "166866": {"CreateDataLoader:batch_size": 22, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 105, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.009366967912007734, "OptimizerSelector:sgd:momentum": 0.5835907376255355, "OptimizerSelector:sgd:weight_decay": 0.0021904146194185402, "TrainNode:mixup:alpha": 0.4947481980402333, "NetworkSelector:shapedmlpnet:max_dropout": 0.5619559976528329}, "189906": {"CreateDataLoader:batch_size": 205, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 68, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.015585895002050975, "OptimizerSelector:adam:weight_decay": 0.0006863335859090819, "PreprocessorSelector:truncated_svd:target_dim": 205, "TrainNode:mixup:alpha": 0.2017625998412078, "NetworkSelector:shapedresnet:max_dropout": 0.933900654090867}, "168795": {"CreateDataLoader:batch_size": 159, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 966, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 5, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:adam:learning_rate": 0.0007790465032701478, "OptimizerSelector:adam:weight_decay": 0.0016722444122252624, "PreprocessorSelector:truncated_svd:target_dim": 151}, "211723": {"CreateDataLoader:batch_size": 255, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 435, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 6, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:adam:learning_rate": 0.0001972606689184097, "OptimizerSelector:adam:weight_decay": 0.042116551424255175, "PreprocessorSelector:truncated_svd:target_dim": 87, "NetworkSelector:shapedmlpnet:max_dropout": 0.8346256718973729}, "167168": {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 125, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.005618969089441542, "OptimizerSelector:adam:weight_decay": 0.013433598503872749, "PreprocessorSelector:truncated_svd:target_dim": 209, "TrainNode:mixup:alpha": 0.26339747282612025, "NetworkSelector:shapedresnet:max_dropout": 0.4054128659393603}, "168338": {"CreateDataLoader:batch_size": 25, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 165, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.0008796239831156432, "OptimizerSelector:sgd:momentum": 0.6635431863443884, "OptimizerSelector:sgd:weight_decay": 0.08773727386473432, "NetworkSelector:shapedresnet:max_dropout": 0.6562293317171314, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.13184097502750214}, "3053": {"CreateDataLoader:batch_size": 30, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 48, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.02373903815702463, "OptimizerSelector:adam:weight_decay": 0.06957946378504122, "NetworkSelector:shapedresnet:max_dropout": 0.4086372250132624}, "190154": {"CreateDataLoader:batch_size": 49, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 67, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00032886793022793, "OptimizerSelector:adam:weight_decay": 0.013722203605391487, "PreprocessorSelector:truncated_svd:target_dim": 196}, "126026": {"CreateDataLoader:batch_size": 70, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 67, "NetworkSelector:shapedresnet:num_groups": 4, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.00030282134081965027, "OptimizerSelector:adam:weight_decay": 0.08189550276227578, "PreprocessorSelector:truncated_svd:target_dim": 168, "TrainNode:mixup:alpha": 0.17076026218006835, "NetworkSelector:shapedresnet:max_dropout": 0.8172469352734951}, "75192": {"CreateDataLoader:batch_size": 191, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 435, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 2, "NetworkSelector:shapedmlpnet:use_dropout": false, "OptimizerSelector:sgd:learning_rate": 0.00014688942127411792, "OptimizerSelector:sgd:momentum": 0.6986457141740332, "OptimizerSelector:sgd:weight_decay": 0.08154337881750333, "PreprocessorSelector:truncated_svd:target_dim": 190, "TrainNode:mixup:alpha": 0.7622639661557122}, "167205": {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 158, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.030988368382148756, "OptimizerSelector:sgd:momentum": 0.6835896232041176, "OptimizerSelector:sgd:weight_decay": 0.06737310540562418, "PreprocessorSelector:truncated_svd:target_dim": 152, "TrainNode:mixup:alpha": 0.27411977516685215, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.35242963021640883}, "167161": {"CreateDataLoader:batch_size": 371, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 95, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.006190610434988198, "OptimizerSelector:adam:weight_decay": 0.08467974277540907, "PreprocessorSelector:truncated_svd:target_dim": 206, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.8076239088577185}, "2120": {"CreateDataLoader:batch_size": 400, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 155, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0017839041331735258, "OptimizerSelector:adam:weight_decay": 0.045518323556858406, "PreprocessorSelector:truncated_svd:target_dim": 121}, "189873": {"CreateDataLoader:batch_size": 67, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedmlpnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedmlpnet:activation": "relu", "NetworkSelector:shapedmlpnet:max_units": 147, "NetworkSelector:shapedmlpnet:mlp_shape": "funnel", "NetworkSelector:shapedmlpnet:num_layers": 1, "NetworkSelector:shapedmlpnet:use_dropout": true, "OptimizerSelector:sgd:learning_rate": 0.00041054120168588934, "OptimizerSelector:sgd:momentum": 0.8894275753154599, "OptimizerSelector:sgd:weight_decay": 0.002748485443860637, "TrainNode:mixup:alpha": 0.41730480236712697, "NetworkSelector:shapedmlpnet:max_dropout": 0.5586898284457517}, "167101": {"CreateDataLoader:batch_size": 42, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 86, "NetworkSelector:shapedresnet:num_groups": 3, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0021530021937535334, "OptimizerSelector:adam:weight_decay": 0.008386657635007597, "NetworkSelector:shapedresnet:max_dropout": 0.6296079567189131, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.034431265307095615}, "75112": {"CreateDataLoader:batch_size": 35, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 42, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0009817976291636144, "OptimizerSelector:adam:weight_decay": 0.024124099975400798, "NetworkSelector:shapedresnet:max_dropout": 0.4021687882782366}, "189875": {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 72, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0008342696363487985, "OptimizerSelector:adam:weight_decay": 0.09061881600977395, "TrainNode:mixup:alpha": 0.2065367770580181, "NetworkSelector:shapedresnet:max_dropout": 0.35788353506890797, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.24266089214992287}, "34539": {"CreateDataLoader:batch_size": 24, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 103, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:sgd:learning_rate": 0.06584106160121614, "OptimizerSelector:sgd:momentum": 0.9044497880344563, "OptimizerSelector:sgd:weight_decay": 0.05979401888132041, "PreprocessorSelector:truncated_svd:target_dim": 237}, "189872": {"CreateDataLoader:batch_size": 17, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 284, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.006693660906585942, "OptimizerSelector:sgd:momentum": 0.15137752883636504, "OptimizerSelector:sgd:weight_decay": 0.09055643503146099, "NetworkSelector:shapedresnet:max_dropout": 0.47165130569082925}, "75154": {"CreateDataLoader:batch_size": 82, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 1, "NetworkSelector:shapedresnet:max_units": 252, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": true, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0008973419851764589, "OptimizerSelector:adam:weight_decay": 0.054917410540895634, "PreprocessorSelector:truncated_svd:target_dim": 244, "NetworkSelector:shapedresnet:max_dropout": 0.35461937430747353, "NetworkSelector:shapedresnet:max_shake_drop_probability": 0.09049558353514076}, "75187": {"CreateDataLoader:batch_size": 21, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "mixup", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 2, "NetworkSelector:shapedresnet:max_units": 424, "NetworkSelector:shapedresnet:num_groups": 5, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.010207979744757036, "OptimizerSelector:sgd:momentum": 0.8261931972212537, "OptimizerSelector:sgd:weight_decay": 0.089750853247459, "TrainNode:mixup:alpha": 0.5578253037149105, "NetworkSelector:shapedresnet:max_dropout": 0.6919839045242188}, "75120": {"CreateDataLoader:batch_size": 194, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "truncated_svd", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 485, "NetworkSelector:shapedresnet:num_groups": 1, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": true, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": true, "OptimizerSelector:adam:learning_rate": 0.0003134819964586975, "OptimizerSelector:adam:weight_decay": 0.007912897171989085, "PreprocessorSelector:truncated_svd:target_dim": 237, "NetworkSelector:shapedresnet:max_dropout": 0.13907682103099803}, "189900": {"CreateDataLoader:batch_size": 50, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "sgd", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 167, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:sgd:learning_rate": 0.03176506773195294, "OptimizerSelector:sgd:momentum": 0.5121854756782205, "OptimizerSelector:sgd:weight_decay": 0.05419340001639463}, "3047": {"CreateDataLoader:batch_size": 52, "Imputation:strategy": "mean", "InitializationSelector:initialization_method": "default", "InitializationSelector:initializer:initialize_bias": "Yes", "LearningrateSchedulerSelector:lr_scheduler": "cosine_annealing", "LossModuleSelector:loss_module": "cross_entropy_weighted", "NetworkSelector:network": "shapedresnet", "NormalizationStrategySelector:normalization_strategy": "standardize", "OptimizerSelector:optimizer": "adam", "PreprocessorSelector:preprocessor": "none", "ResamplingStrategySelector:over_sampling_method": "none", "ResamplingStrategySelector:target_size_strategy": "none", "ResamplingStrategySelector:under_sampling_method": "none", "TrainNode:batch_loss_computation_technique": "standard", "LearningrateSchedulerSelector:cosine_annealing:T_max": 50, "LearningrateSchedulerSelector:cosine_annealing:eta_min": 1e-08, "NetworkSelector:shapedresnet:activation": "relu", "NetworkSelector:shapedresnet:blocks_per_group": 3, "NetworkSelector:shapedresnet:max_units": 436, "NetworkSelector:shapedresnet:num_groups": 2, "NetworkSelector:shapedresnet:resnet_shape": "funnel", "NetworkSelector:shapedresnet:use_dropout": false, "NetworkSelector:shapedresnet:use_shake_drop": false, "NetworkSelector:shapedresnet:use_shake_shake": false, "OptimizerSelector:adam:learning_rate": 0.0007127194739504882, "OptimizerSelector:adam:weight_decay": 0.03834228244278504}}
\ No newline at end of file
diff --git a/autoPyTorch/core/presets/feature_classification/medium_cs.txt b/autoPyTorch/core/presets/feature_classification/medium_cs.txt
deleted file mode 100644
index ec440db95..000000000
--- a/autoPyTorch/core/presets/feature_classification/medium_cs.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-networks=[shapedresnet]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd,power_transformer]
-target_size_strategies=[none,upsample,median]
diff --git a/autoPyTorch/core/presets/feature_classification/tiny_cs.txt b/autoPyTorch/core/presets/feature_classification/tiny_cs.txt
deleted file mode 100644
index 4056dec55..000000000
--- a/autoPyTorch/core/presets/feature_classification/tiny_cs.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-embeddings=[none]
-lr_scheduler=[cosine_annealing]
-networks=[shapedresnet]
-preprocessors=[truncated_svd]
-target_size_strategies=[none]
-over_sampling_methods=[none]
-under_sampling_methods=[none]
-batch_loss_computation_techniques=[standard]
-imputation_strategies=[median]
-initialization_methods=[default]
-loss_modules=[cross_entropy_weighted]
-normalization_strategies=[standardize]
-optimizer=[sgd]
-hyperparameter_search_space_updates=autoPyTorch/core/presets/tiny_cs_updates.txt
\ No newline at end of file
diff --git a/autoPyTorch/core/presets/feature_multilabel/medium_cs.txt b/autoPyTorch/core/presets/feature_multilabel/medium_cs.txt
deleted file mode 100644
index bb8e1248e..000000000
--- a/autoPyTorch/core/presets/feature_multilabel/medium_cs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-networks=[shapedresnet]
-preprocessors=[none,truncated_svd,power_transformer]
diff --git a/autoPyTorch/core/presets/feature_multilabel/tiny_cs.txt b/autoPyTorch/core/presets/feature_multilabel/tiny_cs.txt
deleted file mode 100644
index 24c71a2d5..000000000
--- a/autoPyTorch/core/presets/feature_multilabel/tiny_cs.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-embeddings=[none]
-lr_scheduler=[cosine_annealing]
-networks=[shapedresnet]
-preprocessors=[truncated_svd]
-batch_loss_computation_techniques=[standard]
-imputation_strategies=[median]
-initialization_methods=[default]
-loss_modules=[cross_entropy_weighted]
-normalization_strategies=[standardize]
-optimizer=[sgd]
-hyperparameter_search_space_updates=autoPyTorch/core/presets/tiny_cs_updates.txt
\ No newline at end of file
diff --git a/autoPyTorch/core/presets/feature_regression/medium_cs.txt b/autoPyTorch/core/presets/feature_regression/medium_cs.txt
deleted file mode 100644
index bb8e1248e..000000000
--- a/autoPyTorch/core/presets/feature_regression/medium_cs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-networks=[shapedresnet]
-preprocessors=[none,truncated_svd,power_transformer]
diff --git a/autoPyTorch/core/presets/feature_regression/tiny_cs.txt b/autoPyTorch/core/presets/feature_regression/tiny_cs.txt
deleted file mode 100644
index 1019acc7c..000000000
--- a/autoPyTorch/core/presets/feature_regression/tiny_cs.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-embeddings=[none]
-lr_scheduler=[cosine_annealing]
-networks=[shapedresnet]
-preprocessors=[truncated_svd]
-batch_loss_computation_techniques=[standard]
-imputation_strategies=[median]
-initialization_methods=[default]
-loss_modules=[l1_loss]
-normalization_strategies=[standardize]
-optimizer=[sgd]
-hyperparameter_search_space_updates=autoPyTorch/core/presets/tiny_cs_updates.txt
diff --git a/autoPyTorch/core/presets/image_classification/medium_cs.txt b/autoPyTorch/core/presets/image_classification/medium_cs.txt
deleted file mode 100644
index f9fb6096f..000000000
--- a/autoPyTorch/core/presets/image_classification/medium_cs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-lr_scheduler=[cosine_annealing, step]
-networks=[resnet, mobilenet]
-batch_loss_computation_techniques=[standard, mixup]
-optimizer=[adamw, sgd]
diff --git a/autoPyTorch/core/presets/image_classification/tiny_cs.txt b/autoPyTorch/core/presets/image_classification/tiny_cs.txt
deleted file mode 100644
index 29a03617c..000000000
--- a/autoPyTorch/core/presets/image_classification/tiny_cs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-lr_scheduler=[cosine_annealing, step]
-networks=[resnet]
-batch_loss_computation_techniques=[standard]
-optimizer=[adamw]
diff --git a/autoPyTorch/core/presets/image_classification_multiple_datasets/medium_cs.txt b/autoPyTorch/core/presets/image_classification_multiple_datasets/medium_cs.txt
deleted file mode 100644
index f9fb6096f..000000000
--- a/autoPyTorch/core/presets/image_classification_multiple_datasets/medium_cs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-lr_scheduler=[cosine_annealing, step]
-networks=[resnet, mobilenet]
-batch_loss_computation_techniques=[standard, mixup]
-optimizer=[adamw, sgd]
diff --git a/autoPyTorch/core/presets/image_classification_multiple_datasets/tiny_cs.txt b/autoPyTorch/core/presets/image_classification_multiple_datasets/tiny_cs.txt
deleted file mode 100644
index 29a03617c..000000000
--- a/autoPyTorch/core/presets/image_classification_multiple_datasets/tiny_cs.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-lr_scheduler=[cosine_annealing, step]
-networks=[resnet]
-batch_loss_computation_techniques=[standard]
-optimizer=[adamw]
diff --git a/autoPyTorch/core/presets/tiny_cs_updates.txt b/autoPyTorch/core/presets/tiny_cs_updates.txt
deleted file mode 100644
index 7deb6f710..000000000
--- a/autoPyTorch/core/presets/tiny_cs_updates.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-CreateDataLoader batch_size [125]
-InitializationSelector initializer:initialize_bias ["No"]
-LearningrateSchedulerSelector cosine_annealing:T_max [10]
-LearningrateSchedulerSelector cosine_annealing:eta_min [2]
-NetworkSelector shapedresnet:activation ["relu"]
-NetworkSelector shapedresnet:max_shake_drop_probability [0.0,0.000001]
-NetworkSelector shapedresnet:resnet_shape ["brick"]
-NetworkSelector shapedresnet:use_dropout [False]
-NetworkSelector shapedresnet:use_shake_drop [False]
-NetworkSelector shapedresnet:use_shake_shake [False]
-PreprocessorSelector truncated_svd:target_dim [100]
diff --git a/autoPyTorch/core/worker.py b/autoPyTorch/core/worker.py
deleted file mode 100644
index 02384d0ea..000000000
--- a/autoPyTorch/core/worker.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import logging
-import torch
-import time
-import numpy as np
-import Pyro4
-from hpbandster.core.worker import Worker
-
-from autoPyTorch.components.training.budget_types import BudgetTypeTime
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class AutoNetWorker(Worker):
-    """Worker that evaluates the hyperparameter configurations of the ML-pipelines"""
-
-    def __init__(self, pipeline, pipeline_config,
-            X_train, Y_train, X_valid, Y_valid, dataset_info, budget_type, max_budget,
-            shutdownables, use_pynisher, *args, **kwargs):
-        """Initialize the worker.
-        
-        Arguments:
-            pipeline {Pipeline} -- The ML pipeline to evaluate
-            pipeline_config {dict} -- The configuration of the pipeline
-            X_train {array} -- The data used for training the neural networks
-            Y_train {array} -- The labels used for evaluating the neural networks
-            X_valid {array} -- The data used for evaluating the neural networks
-            Y_valid {array} -- The data used for evaluating the neural networks
-            dataset_info {DatasetInfo} -- Object containing basic information about the dataset
-            budget_type {BaseTrainingTechnique} -- The type of budget used for the optimization
-            max_budget {float} -- The maximum budget
-            shutdownables {list} -- For each element of the object, the shutdown() method is called when the worker is shutting down.
-            use_pynisher {bool} -- Whether to use pynisher to guarantee resource limits
-        """
-        self.X_train = X_train #torch.from_numpy(X_train).float()
-        self.Y_train = Y_train #torch.from_numpy(Y_train).long()
-        self.X_valid = X_valid
-        self.Y_valid = Y_valid
-        self.dataset_info = dataset_info
-        self.shutdownables = shutdownables
-
-        self.max_budget = max_budget
-        self.budget_type = budget_type
-
-        self.pipeline = pipeline
-        self.pipeline_config = pipeline_config
-
-        self.autonet_logger = logging.getLogger('autonet')
-
-        # We can only use user defined limits (memory) if we have the required module 'resource' - not available on windows!
-        self.guarantee_limits = use_pynisher and module_exists("resource") and module_exists("pynisher")
-        if (not self.guarantee_limits):
-            self.autonet_logger.info("Can not guarantee memory and time limit because module 'resource' is not available")
-
-
-        super().__init__(*args, **kwargs)
-    
-    # OVERRIDE
-    def compute(self, config, budget, working_directory, config_id, **kwargs):
-
-        self.autonet_logger.debug("Budget " + str(budget) + " config: " + str(config))
-
-        start_time = time.time()
-        self.autonet_logger.debug("Starting optimization!")
-
-        # guarantee time and memory limits using pynisher
-        if self.guarantee_limits:
-            import pynisher
-            time_limit=None
-
-            if self.budget_type == BudgetTypeTime:
-                grace_time = 10
-                time_limit = int(budget + 240)
-
-            # start optimization
-            limit_train = pynisher.enforce_limits(mem_in_mb=self.pipeline_config['memory_limit_mb'], wall_time_in_s=time_limit)(self.optimize_pipeline)
-            result = limit_train(config, config_id, budget, start_time)
-
-            # check for exceptions
-            if (limit_train.exit_status == pynisher.TimeoutException):
-                raise Exception("Time limit reached. Took " + str((time.time()-start_time)) + " seconds with budget " + str(budget))
-            elif (limit_train.exit_status == pynisher.MemorylimitException):
-                result = {"loss": 100000, "info":{"exception":"Memory limit exceeded"}}
-                return result
-            elif (limit_train.exit_status != 0):
-                self.autonet_logger.info('Exception occurred using config:\n' + str(config))
-                raise Exception("Exception in train pipeline. Took " + str((time.time()-start_time)) + " seconds with budget " + str(budget))
-        else:
-            result = self.optimize_pipeline(config, config_id, budget, start_time)
-
-        loss = result['loss']
-        info = result['info']
-        self.autonet_logger.debug("Result: " + str(loss) + " info: " + str(info))
-
-        # that is not really elegant but we can want to achieve some kind of feedback
-        network_name = [v for k, v in config.items() if k.endswith('network')] or "None"
-
-        self.autonet_logger.info("Training " + str(network_name) + " with budget " + str(budget) + " resulted in optimize-metric-loss: " + str(loss) + " took " + str((time.time()-start_time)) + " seconds")
-
-        return  result
-    
-    def optimize_pipeline(self, config, config_id, budget, optimize_start_time):
-        """Fit the pipeline using the sampled hyperparameter configuration.
-        
-        Arguments:
-            config {dict} -- The sampled hyperparameter configuration.
-            config_id {tuple} -- An ID for the configuration. Assigned by BOHB.
-            budget {float} -- The budget to evaluate the hyperparameter configuration.
-            optimize_start_time {float} -- The time when optimization started.
-        
-        Returns:
-            dict -- The result of fitting the pipeline.
-        """
-        try:
-            self.autonet_logger.info("Fit optimization pipeline")
-            return self.pipeline.fit_pipeline(hyperparameter_config=config, pipeline_config=self.pipeline_config, 
-                                            X_train=self.X_train, Y_train=self.Y_train, X_valid=self.X_valid, Y_valid=self.Y_valid, 
-                                            budget=budget, budget_type=self.budget_type, max_budget=self.max_budget, optimize_start_time=optimize_start_time,
-                                            refit=False, rescore=False, hyperparameter_config_id=config_id, dataset_info=self.dataset_info)
-        except Exception as e:
-            if 'use_tensorboard_logger' in self.pipeline_config and self.pipeline_config['use_tensorboard_logger']:            
-                import tensorboard_logger as tl
-                tl.log_value('Exceptions/' + str(e), budget, int(time.time()))
-            self.autonet_logger.info(str(e))
-            raise e
-    
-    @Pyro4.expose
-    @Pyro4.oneway
-    def shutdown(self):
-        for s in self.shutdownables:
-            s.shutdown()
-        super().shutdown()
-
-def module_exists(module_name):
-    try:
-        __import__(module_name)
-    except ImportError:
-        return False
-    else:
-        return True
diff --git a/autoPyTorch/core/worker_no_timelimit.py b/autoPyTorch/core/worker_no_timelimit.py
deleted file mode 100644
index 564cc0094..000000000
--- a/autoPyTorch/core/worker_no_timelimit.py
+++ /dev/null
@@ -1,133 +0,0 @@
-import logging
-import torch
-import time
-import random
-from hpbandster.core.worker import Worker
-
-from autoPyTorch.components.training.image.budget_types import BudgetTypeTime
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-class ModuleWorkerNoTimeLimit(Worker):
-    def __init__(self, pipeline, pipeline_config, constant_hyperparameter,
-            X_train, Y_train, X_valid, Y_valid, budget_type, max_budget, working_directory, permutations=None, *args, **kwargs):
-        self.X_train = X_train #torch.from_numpy(X_train).float()
-        self.Y_train = Y_train #torch.from_numpy(Y_train).long()
-        self.X_valid = X_valid
-        self.Y_valid = Y_valid
-
-        if permutations is None:
-            self.permutations = [idx for idx in range(len(X_train))]
-        else:
-            self.permutations = permutations
-
-        self.max_budget = max_budget
-        self.budget_type = budget_type
-
-        self.pipeline = pipeline
-        self.pipeline_config = pipeline_config
-        self.constant_hyperparameter = constant_hyperparameter
-
-        self.working_directory = working_directory
-
-        self.autonet_logger = logging.getLogger('autonet')
-        # self.end_time = None
-
-        # We can only use user defined limits (memory) if we have the required module 'resource' - not available on windows!
-        self.guarantee_limits = module_exists("resource") and module_exists("pynisher")
-        if (not self.guarantee_limits):
-            self.autonet_logger.info("Can not guarantee memory and time limit because module 'resource' is not available")
-
-        super().__init__(*args, **kwargs)
-    
-    def compute(self, config, budget, working_directory, config_id, **kwargs):
-
-        start_time = time.time()
-
-        self.autonet_logger.debug("Starting optimization!")
-
-        config.update(self.constant_hyperparameter)
-        
-        self.autonet_logger.debug("Budget " + str(budget) + " config: " + str(config))
-
-        if self.guarantee_limits and self.budget_type == 'time':
-            import pynisher
-
-            limit_train = pynisher.enforce_limits(mem_in_mb=self.pipeline_config['memory_limit_mb'], wall_time_in_s=int(budget * 4))(self.optimize_pipeline)
-            result = limit_train(config, budget, config_id, random.getstate())
-
-            if (limit_train.exit_status == pynisher.TimeoutException):
-                raise Exception("Time limit reached. Took " + str((time.time()-start_time)) + " seconds with budget " + str(budget))
-            if (limit_train.exit_status == pynisher.MemorylimitException):
-                result = {"loss": 100000, "info":{}}
-                return result
-            elif (limit_train.exit_status != 0):
-                self.autonet_logger.info('Exception occurred using config:\n' + str(config))
-                raise Exception("Exception in train pipeline. Took " + str((time.time()-start_time)) + " seconds with budget " + str(budget))
-
-        else:
-            result, randomstate = self.optimize_pipeline(config, budget, config_id, random.getstate())
-
-        random.setstate(randomstate)
-
-        loss = result['loss']
-        if 'losses' in result.keys():
-            losses = result['losses']
-        else:
-            losses = loss
-        info = result['info']
-
-        self.autonet_logger.debug("Result: " + str(loss) + " info: " + str(info))
-
-        # that is not really elegant but we can want to achieve some kind of feedback
-        network_name = [v for k, v in config.items() if k.endswith('network')] or "None"
-
-        self.autonet_logger.info("Training " + str(network_name) + " with budget " + str(budget) + " resulted in score: " + str(loss) + " took " + str((time.time()-start_time)) + " seconds")
-
-        if 'use_tensorboard_logger' in self.pipeline_config and self.pipeline_config['use_tensorboard_logger']:
-            import os
-            log_file = os.path.join(self.working_directory, "worker_logs_" + str(self.pipeline_config['task_id']), 'results.log')
-            sep = '\t'
-            with open(log_file, 'a+') as f:
-                f.write('Result: ' + str(round(loss, 2)) + sep + \
-                        'Budget: ' + str(round(budget)) + '/' + str(round(self.pipeline_config['max_budget'])) + sep + \
-                        'Used time: ' + str(round((time.time()-start_time))) + 'sec (' + str(round((time.time()-start_time)/budget, 2)) + 'x)' + sep + \
-                        'ID: ' + str(config_id) + '\n')
-
-        return  {
-                    'loss': loss,
-                    'info': info,
-                    'losses': losses
-                }
-    
-    def optimize_pipeline(self, config, budget, config_id, random_state):
-        
-        random.setstate(random_state)
-
-        if self.permutations is not None:
-            current_sh_run = config_id[0]
-            self.pipeline_config["dataset_order"] = self.permutations[current_sh_run%len(self.permutations)].tolist()
-
-        try:
-            self.autonet_logger.info("Fit optimization pipeline")
-            return self.pipeline.fit_pipeline(hyperparameter_config=config, pipeline_config=self.pipeline_config, 
-                                            X_train=self.X_train, Y_train=self.Y_train, X_valid=self.X_valid, Y_valid=self.Y_valid, 
-                                            budget=budget, budget_type=self.budget_type, max_budget=self.max_budget, 
-                                            config_id=config_id, working_directory=self.working_directory), random.getstate()
-        except Exception as e:
-            if 'use_tensorboard_logger' in self.pipeline_config and self.pipeline_config['use_tensorboard_logger']:            
-                import tensorboard_logger as tl
-                tl.log_value('Exceptions/' + str(e), budget, int(time.time()))
-            #self.autonet_logger.exception('Exception occurred')
-            raise e
-
-def module_exists(module_name):
-    try:
-        __import__(module_name)
-    except ImportError:
-        return False
-    else:
-        return True
diff --git a/autoPyTorch/data_management/data_converter.py b/autoPyTorch/data_management/data_converter.py
deleted file mode 100644
index 24636cb3f..000000000
--- a/autoPyTorch/data_management/data_converter.py
+++ /dev/null
@@ -1,155 +0,0 @@
-import numpy as np
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class DataConverter(object):
-    def __init__(self, is_classification=None,
-                 numerical_min_unique_values=3,
-                 force_numerical=None,
-                 force_categorical=None,
-                 is_multilabel=None):
-        """
-        Initialize the data_converter.
-        
-        Arguments:
-            X: NxM Matrix: N records with M features
-            Y: Vector of N labels.
-            is_classification: specifies, if it is a classification problem. None for autodetect.
-            numerical_min_unique_values: minimum number of unique values for a numerical feature.
-                A feature will be interpreted as categorical, if it has less.
-            force_numerical: Array of feature indices, which schould be treated as numerical.
-            force_categorical: Array of feature indices, which should be trated as categorical.
-            is_multilabel: True, if multivariable regression / multilabel classification
-        """
-        self.is_classification = is_classification
-        self.numerical_min_unique_values= numerical_min_unique_values
-        self.force_numerical = force_numerical or []
-        self.force_categorical = force_categorical or []
-        self.is_multilabel = is_multilabel
-
-    def convert(self, X, Y):
-        """
-        Convert the data.
-        
-        Returns:
-            X_result: The converted X matrix, using one-hot-encoding for categorical features.
-            Y_result: The converted Y vector, using integers for categorical featues.
-            is_classification: If the problem is a classification problem.
-        """
-        X_result, categorical = self.convert_matrix(X, self.force_categorical, self.force_numerical)
-
-        if len(Y.shape) == 1 or Y.shape[1] == 1:
-            Y_result, Y_categorical = self.convert_matrix(Y.reshape(-1, 1),
-                                                                [0] if self.is_classification else [],
-                                                                [0] if self.is_classification == False else [])
-            self.is_classification = np.any(Y_categorical)
-            assert self.is_multilabel != True, "multilabel specified, but only 1-dim output vector given"
-            self.is_multilabel = False
-        else:
-            Y_result = self.check_multi_dim_output(Y)
-
-        if Y_result.shape[1] == 1:
-            Y_result = np.reshape(Y_result, (-1, ))
-        elif not self.is_multilabel and self.is_classification:
-            Y_result = np.argmax(Y_result, axis=1)
-        return X_result, Y_result, self.is_classification, self.is_multilabel, categorical
-
-    def convert_matrix(self, matrix, force_categorical, force_numerical):
-        """
-        Covert the matrix in a matrix of floats.
-        Use one-hot-encoding for categorical features.
-        Features are categorical if at least one item is a string or it has more
-            unique values than specified numerical_min_unique_values
-            or it is listed in force_categorical.
-        
-        Arguments:
-            matrix: The matrix to convert.
-            force_cateogrical: The list of column indizes, which should be categorical.
-            force_numerical: The list of column indizes, which should be numerical.
-            
-        Result:
-            result: the converted matrix
-            categorical: boolean vector, that specifies which columns are categorical
-        """
-        num_rows = len(matrix)
-        is_categorical = []
-        len_values_and_indices = []
-        result_width = 0
-        
-        # iterate over the columns and get some data
-        for i in range(matrix.shape[1]):
-            
-            # check if it is categorical or numerical
-            matrix_column = matrix[0:num_rows, i]
-            if matrix.dtype == np.dtype("object"):
-                values_occurred = dict()
-                values = []
-                indices = []
-                for v in matrix_column:
-                    if v not in values_occurred:
-                        values_occurred[v] = len(values)
-                        values.append(v)
-                    indices.append(values_occurred[v])
-                indices = np.array(indices)
-                values = np.array(values, dtype=object)
-                nan_indices = np.array([i for i, n in enumerate(matrix_column) if n == np.nan])
-                valid_value_indices = np.array([i for i, n in enumerate(values) if n != np.nan])
-            else:
-                values, indices = np.unique(matrix_column, return_inverse=True)
-                nan_indices = np.argwhere(np.isnan(matrix_column)).flatten()
-                valid_value_indices = np.argwhere(~np.isnan(values)).flatten()
-
-            # check for missing values
-            # nan values are additional category in categorical features
-            if len(nan_indices) > 0:
-                values = np.append(values[valid_value_indices], np.nan)
-                indices[nan_indices] = values.shape[0] - 1         
-
-            len_values_and_indices.append((len(values), indices))
-            if len(values) == 1:
-                is_categorical.append(None)
-            elif i in force_categorical or i not in force_numerical and (
-                    len(values) < self.numerical_min_unique_values or
-                    any(type(value) is str for value in values)):
-                # column is categorical
-                is_categorical.append(True)
-                result_width += 1
-            else:
-                # column is numerical
-                is_categorical.append(False)
-                result_width += 1
-
-        # fill the result
-        result = np.zeros(shape=(num_rows, result_width), dtype='float32', order='F')
-        j = 0
-        for i, is_cat in enumerate(is_categorical):
-            len_values, indices = len_values_and_indices[i]
-            if len_values == 1:
-                continue
-            if is_cat:
-                # column is categorical: convert to int
-                result[:, j] = indices
-                j += 1
-            else:
-                # column is numerical
-                result[:, j] = matrix[:, i]
-                j += 1
-
-        return result.astype('float32', copy=False), [x for x in is_categorical if x is not None]
-    
-    
-    def check_multi_dim_output(self, Y):
-        Y = Y.astype('float32', copy=False)
-        unique = np.unique(Y)
-        if len(unique) == 2 and self.is_classification != False and 0 in unique and 1 in unique:
-            self.is_classification = True
-            if np.all(np.sum(Y, axis=1) == 1) and self.is_multilabel != True:
-                self.is_multilabel = False
-            else:
-                self.is_multilabel = True
-        else:
-            assert not np.any(np.isnan(Y)), "NaN in Y"
-            self.is_classification = False
-        return Y
\ No newline at end of file
diff --git a/autoPyTorch/data_management/data_loader.py b/autoPyTorch/data_management/data_loader.py
deleted file mode 100644
index f7cac7380..000000000
--- a/autoPyTorch/data_management/data_loader.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import os
-import math
-from PIL import Image
-import requests
-from io import BytesIO
-from torchvision import transforms, utils
-
-
-
-class DataLoader():
-    def __init__(self):
-        pass
-        
-    def load(self, url, size):
-        try:
-            response = requests.get(url)
-            img = Image.open(BytesIO(response.content)).convert('RGB')
-        except:
-            return None
-        t = transforms.Compose([transforms.Resize(size),
-                                transforms.CenterCrop(size),
-                                transforms.RandomHorizontalFlip(),
-                                transforms.ToTensor(), 
-                                # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-                                ])
-        res = (t(img) * 255).int()
-        return res.reshape((size*size*3))
-
-    def save_imagenet_subset(self, root, name, class_wnids, image_size, max_images=None):
-        with open(os.path.join(root, name) + '.data', 'w+') as data:
-            with open(os.path.join(root, name) + '.label', 'w+') as label:
-                for i, wnid in enumerate(class_wnids):
-                    urls = requests.get('http://www.image-net.org/api/text/imagenet.synset.geturls?wnid=' + wnid).content
-                    urls = urls.split(b"\n")
-                    images = 0
-                    for u in range(len(urls)):
-                        if max_images is not None and images+1 > max_images / len(class_wnids):
-                            break
-                        img = self.load(urls[u].decode('utf-8'), image_size)
-                        if img is None:
-                            continue
-                        images += 1
-                        data.write(' '.join([str(rgb) for rgb in img.numpy()]) + '\n')
-                        label.write(str(i) + '\n')
-                    missing = math.floor(max_images/len(class_wnids)) - images 
-                    if missing > 0:
-                        print('Wnid', wnid, 'needs', missing, 'more images.')
\ No newline at end of file
diff --git a/autoPyTorch/data_management/data_manager.py b/autoPyTorch/data_management/data_manager.py
deleted file mode 100644
index b0e0d7777..000000000
--- a/autoPyTorch/data_management/data_manager.py
+++ /dev/null
@@ -1,278 +0,0 @@
-from __future__ import print_function, division
-import torch
-import torchvision
-import torchvision.transforms as transforms
-import numpy as np
-import os
-import sys
-# from autoPyTorch.core.autonet_config import map_configs
-
-from autoPyTorch.data_management.data_reader import CSVReader, OpenMlReader, AutoMlReader
-from sklearn.datasets import make_regression, make_multilabel_classification
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-from enum import Enum
-class ProblemType(Enum):
-    FeatureClassification = 1
-    ImageClassification = 2
-    FeatureRegression = 3
-    FeatureMultilabel = 4
-    ImageClassificationMultipleDatasets = 5
-
-class DataManager(object):
-    """ Load data from multiple sources and formants"""
-
-    def __init__(self, verbose=0):
-        """Construct the DataManager
-        
-        Keyword Arguments:
-            verbose {bool} -- Whether to print stuff. (default: {0})
-        """
-        self.verbose = verbose
-        self.X_train, self.Y_train = None, None
-        self.X_test, self.Y_test = None, None
-        self.X_valid, self.Y_valid = None, None
-        self.is_classification = None
-        self.is_multilabel = None
-        self.metric = None
-        self.max_runtime = None
-        self.categorical_features = None
-
-    def read_data(self, file_name, test_split=0.0, is_classification=None, random_seed=0, **kwargs):
-        """Read the data.
-        
-        Arguments:
-            file_name {str} -- The name of the file to load. Different Readers are associated with different filenames.
-        
-        Keyword Arguments:
-            test_split {float} -- Amount of data to use as test split (default: {0.0})
-            is_classification {bool} -- Whether the data is a classification task (default: {None})
-            random_seed {int} -- a random seed (default: {0})
-        """
-        print("Read:" + file_name)
-        reader = self._get_reader(file_name, is_classification)
-        reader.read()
-        self.categorical_features = reader.categorical_features
-
-        if reader.is_multilabel:
-            self.problem_type = ProblemType.FeatureMultilabel
-        elif reader.is_classification:
-            self.problem_type = ProblemType.FeatureClassification
-        else:
-            self.problem_type = ProblemType.FeatureRegression
-
-        self.X, self.Y = reader.X, reader.Y
-        self.X_valid, self.Y_valid = reader.X_valid, reader.Y_valid
-        self.X_test, self.Y_test = reader.X_test, reader.Y_test
-        self.max_runtime = reader.max_runtime
-        self.metric = reader.metric
-        self._split_data(test_split, random_seed)
-
-    def _get_reader(self, file_name, is_classification):
-        """Get the reader associated with the filename.
-        
-        Arguments:
-            file_name {str} -- The file to load
-            is_classification {bool} -- Whether the data is a classification task or not
-        
-        Raises:
-            ValueError: The given file type is not supported
-        
-        Returns:
-            DataReader -- A reader that is able to read the data type
-        """
-        if file_name.endswith(".csv"):
-            reader = CSVReader(file_name, is_classification=is_classification)
-        elif file_name.startswith("openml:"):
-            dataset_id = int(file_name.split(":")[1])
-            reader = OpenMlReader(dataset_id, is_classification=is_classification)
-        elif file_name.endswith(".info"):
-            reader = AutoMlReader(file_name)
-        else:
-            raise ValueError("That filetype is not supported: " + file_name)
-        return reader
-
-    def generate_classification(self, num_classes, num_features, num_samples, test_split=0.1, seed=0):
-        """Generate a classification task
-        
-        Arguments:
-            num_classes {int} -- Number of classes
-            num_features {int} -- Number of features
-            num_samples {int} -- Number of samples
-        
-        Keyword Arguments:
-            test_split {float} -- Size of test split (default: {0.1})
-            seed {int} -- A random seed (default: {0})
-        """
-        #X, Y = make_classification(n_samples=800, n_features=num_feats, n_classes=num_classes, n_informative=4)
-        X, y = make_multilabel_classification(
-            n_samples=num_samples, n_features=num_features, n_classes=num_classes, n_labels=0.01,
-            length=50, allow_unlabeled=False, sparse=False, return_indicator='dense',
-            return_distributions=False, random_state=seed
-        )
-        Y = np.argmax(y, axis=1)
-        self.categorical_features = [False] * num_features
-        self.problem_type = ProblemType.FeatureClassification
-        self.X, self.Y = X, Y
-        self._split_data(test_split, seed)
-
-    def generate_regression(self, num_features, num_samples, test_split=0.1, seed=0):
-        """Generate a regression task
-        
-        Arguments:
-            num_features {int} -- Number of features
-            num_samples {int} -- Number of samples
-        
-        Keyword Arguments:
-            test_split {float} -- Size of test split (default: {0.1})
-            seed {int} -- a random seed (default: {0})
-        """
-        X, Y = make_regression(n_samples=num_samples, n_features=num_features, random_state=seed)
-        self.categorical_features = [False] * num_features
-        self.problem_type = ProblemType.FeatureRegression
-        self.X, self.Y = X, Y
-        self._split_data(test_split, seed)
-        
-    def _split_data(self, test_split, seed):
-        """Split the data in test (, valid) and training set.
-        
-        Arguments:
-            test_split {[type]} -- [description]
-            seed {[type]} -- [description]
-        """
-        valid_specified = self.X_valid is not None and self.Y_valid is not None
-        test_specified = self.X_test is not None and self.Y_test is not None
-
-        if not valid_specified and not test_specified:
-            self.X, self.Y, self.X_train, self.Y_train, self.X_test, self.Y_test = deterministic_shuffle_and_split(self.X, self.Y, test_split, seed=seed)
-            return
-        if not test_specified:
-            # use validation set as test set
-            self.X_test = self.X_valid
-            self.Y_test = self.Y_valid
-            self.X_valid = None
-            self.Y_valid = None
-        self.X_train = self.X
-        self.Y_train = self.Y
-
-
-class ImageManager(DataManager):
-        
-    def read_data(self, file_name, test_split=0.0, is_classification=None, **kwargs):
-        self.is_classification = True
-        self.is_multilabel = False
-        
-        if isinstance(file_name, list):
-            import numpy as np
-            arr = np.array(file_name)
-            self.X_train = arr
-            self.Y_train = np.array([0] * len(file_name))
-            self.X_valid = self.Y_valid = self.X_test = self.Y_test = None
-            self.problem_type = ProblemType.ImageClassificationMultipleDatasets
-        elif file_name.endswith(".csv"):
-            import pandas as pd
-            import math
-            import numpy as np
-            self.data = np.array(pd.read_csv(file_name, header=None))
-
-            self.X_train = np.array(self.data[:,0])
-            self.Y_train = np.array(self.data[:,1])
-
-            self.X_valid = self.Y_valid = self.X_test = self.Y_test = None
-            
-            if test_split > 0:
-                samples = self.X_train.shape[0]
-                indices = list(range(samples))
-                np.random.shuffle(indices)
-                split = samples * test_split
-                test_indices, train_indices = indices[:math.ceil(split)], indices[math.floor(split):]
-                self.X_test, self.Y_test = self.X_train[test_indices], self.Y_train[test_indices]
-                self.X_train, self.Y_train =  self.X_train[train_indices], self.Y_train[train_indices]
-                
-            self.problem_type = ProblemType.ImageClassification
-
-    def generate_classification(self, problem="MNIST", test_split=0.1, force_download=False, train_size=-1, test_size=-1):
-        self.is_classification = True
-        data = None
-        conversion = False
-        if problem == "MNIST":
-            data = torchvision.datasets.MNIST
-        elif problem == "Fashion-MNIST":
-            data = torchvision.datasets.FashionMNIST
-        elif problem == "CIFAR":
-            conversion = True
-            data = torchvision.datasets.CIFAR10
-        else:
-            raise ValueError("Dataset not supported: " + problem)        
-    
-        
-        train_dataset = data(root='datasets/torchvision/' + problem + '/',
-                                    train=True, 
-                                    transform=transforms.ToTensor(),
-                                    download=True)
-
-        test_dataset = data(root='datasets/torchvision/' + problem + '/',
-                                    train=False, 
-                                    transform=transforms.ToTensor())
-        images_train = []
-        labels_train = []
-
-        train_size = train_dataset.__len__() if train_size == -1 else min(train_size, train_dataset.__len__())
-        test_size = test_dataset.__len__() if test_size == -1 else min(test_size, test_dataset.__len__())
-
-        for i in range(train_size):
-            sys.stdout.write("Reading " + problem + " train data ["+ str(train_size)+"] - progress: %d%%   \r" % (int(100 * (i + 1)/ train_size) ))
-            sys.stdout.flush()
-            image, label = train_dataset.__getitem__(i)
-            if conversion:
-                label = torch.tensor(label)
-            images_train.append(image.numpy())
-            labels_train.append(label.numpy())
-
-        self.X_train = np.array(images_train)
-        self.Y_train = np.array(labels_train)
-
-        images_test = []
-        labels_test = []
-        print()
-        for i in range(test_size):
-            sys.stdout.write("Reading " + problem + " test data ["+ str(test_size)+"] - progress: %d%%   \r" % (int(100 * (i + 1) / test_size) ))
-            sys.stdout.flush()
-            image, label = test_dataset.__getitem__(i)
-            if conversion:
-                label = torch.tensor(label)
-            images_test.append(image.numpy())
-            labels_test.append(label.numpy())
-
-        self.problem_type = ProblemType.ImageClassification
-        self.X_test = np.array(images_test)
-        self.Y_test = np.array(labels_test)
-
-        self.categorical_features = None
-        print()
-
-def deterministic_shuffle_and_split(X, Y, split, seed):
-    """Split the data deterministically given the seed
-    
-    Arguments:
-        X {array} -- The feature data
-        Y {array} -- The targets
-        split {float} -- The size of the split
-        seed {int} -- A random seed
-    
-    Returns:
-        tuple -- Tuple of full data and the two splits
-    """
-    rng = np.random.RandomState(seed)
-    p = rng.permutation(X.shape[0])
-
-    X = X[p]
-    Y = Y[p]
-    if 0. < split < 1.:
-        split = int(split * X.shape[0])
-        return X, Y, X[0:-split], Y[0:-split], X[-split:], Y[-split:]
-    else:
-        return X, Y, X, Y, None, None
diff --git a/autoPyTorch/data_management/data_reader.py b/autoPyTorch/data_management/data_reader.py
deleted file mode 100644
index 36ed25352..000000000
--- a/autoPyTorch/data_management/data_reader.py
+++ /dev/null
@@ -1,267 +0,0 @@
-from __future__ import print_function, division
-import pandas as pd
-import numpy as np
-from abc import abstractmethod
-import os
-from  scipy.sparse import csr_matrix
-import math
-
-from autoPyTorch.data_management.data_converter import DataConverter
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-class DataReader(object):
-    def __init__(self, file_name, is_classification=None):
-        self.file_name = file_name
-        self.data = None
-        self.X = None
-        self.Y = None
-        self.X_valid = None
-        self.Y_valid = None
-        self.X_test = None
-        self.Y_test = None
-        self.is_classification = is_classification
-        self.categorical_features = None
-        self.is_multilabel = None
-        self.max_runtime = None
-        self.metric = None
-
-    @abstractmethod
-    def read(self):
-        return
-
-    def convert(self, **kwargs):
-        """
-        Convert the data using standard data converter with standard settings.
-        
-        Arguments:
-            **kwargs: args and kwargs are passed to Dataconverter
-        """
-        data_converter = DataConverter(is_classification=self.is_classification, is_multilabel=self.is_multilabel, **kwargs)
-        self.X, self.Y, self.is_classification, self.is_multilabel, self.categorical_features = data_converter.convert(self.X, self.Y)
-
-        if self.X_valid is not None and self.Y_valid is not None:
-            self.X_valid, self.Y_valid, _, _, _ = data_converter.convert(self.X_valid, self.Y_valid)
-
-        if self.X_test is not None and self.Y_test is not None:
-            self.X_test, self.Y_test, _, _, _ = data_converter.convert(self.X_test, self.Y_test)
-
-
-class CSVReader(DataReader):
-    def __init__(self, file_name, is_classification=None):
-        self.num_entries = None
-        self.num_features = None
-        self.num_classes = None
-        super(CSVReader, self).__init__(file_name, is_classification)
-        
-    
-
-    def read(self, auto_convert=True, **kwargs):
-        """
-        Read the data from given csv file.
-        
-        Arguments:
-            auto_convert: Automatically convert data after reading.
-            *args, **kwargs: arguments for converting.
-        """
-        self.data = pd.read_csv(self.file_name)
-
-
-        self.num_entries = len(self.data)
-        self.num_features = len(self.data.iloc[0]) - 1
-
-        self.data = np.array(self.data)
-            
-        self.X = self.data[0:self.num_entries, 0:self.num_features] #np.array(  .iloc
-        self.Y = self.data[0:self.num_entries, -1]
-
-        for i in range(self.X.shape[0]):
-            for j in range(self.X.shape[1]):
-                if self.X[i, j] == "?":
-                    self.X[i, j] = np.nan
-
-        self.num_classes = len(np.unique(self.Y))
-        if (auto_convert):
-            self.convert(**kwargs)
-            
-class OpenMlReader(DataReader):
-    def __init__(self, dataset_id, is_classification = None, api_key=None):
-        import openml
-        self.openml = openml
-        self.num_entries = None
-        self.num_features = None
-        self.num_classes = None
-        self.dataset_id = dataset_id
-        if api_key:
-            openml.config.server = "https://www.openml.org/api/v1/xml"
-            openml.config.apikey = api_key
-        super(OpenMlReader, self).__init__("openml:" + str(dataset_id), is_classification)
-
-    def read(self, **kwargs):
-        """
-        Read the data from given openml dataset file.
-        
-        Arguments:
-            auto_convert: Automatically convert data after reading.
-            *args, **kwargs: arguments for converting.
-        """
-        
-        dataset = self.openml.datasets.get_dataset(self.dataset_id)
-        try:
-            self.X, self.Y, self.categorical_features = dataset.get_data(
-                target=dataset.default_target_attribute, return_categorical_indicator=True)
-        except Exception as e:
-            raise RuntimeError("An error occurred when loading the dataset and splitting it into X and Y. Please check if the dataset is suitable.")
-
-        self.num_entries = self.X.shape[0]
-        self.num_features = self.X.shape[1]
-        self.is_multilabel = False
-        class_labels = dataset.retrieve_class_labels(target_name=dataset.default_target_attribute)
-        if class_labels:
-            self.is_classification = True
-            self.num_classes = len(class_labels)
-        else:
-            self.is_classification = False
-            self.num_classes = 1
-
-
-class AutoMlReader(DataReader):
-    def __init__(self, path_to_info):
-        self.num_entries = None
-        self.num_features = None
-        self.num_classes = None
-        super(AutoMlReader, self).__init__(path_to_info, None)
-    
-    def read(self, auto_convert=True, **kwargs):
-        path_to_info = self.file_name
-        info_dict = dict()
-
-        # read info file
-        with open(path_to_info, "r") as f:
-            for line in f:
-                info_dict[line.split("=")[0].strip()] = line.split("=")[1].strip().strip("'")
-        self.is_classification = "classification" in info_dict["task"]
-        
-        name = info_dict["name"]
-        path = os.path.dirname(path_to_info)
-        self.is_multilabel = "multilabel" in info_dict["task"] if self.is_classification else None
-        self.metric = info_dict["metric"]
-        self.max_runtime = float(info_dict["time_budget"])
-
-        target_num = int(info_dict["target_num"])
-        feat_num = int(info_dict["feat_num"])
-        train_num = int(info_dict["train_num"])
-        valid_num = int(info_dict["valid_num"])
-        test_num = int(info_dict["test_num"])
-        is_sparse = bool(int(info_dict["is_sparse"]))
-        feats_binary = info_dict["feat_type"].lower() == "binary"
-
-        # read feature types
-        force_categorical = []
-        force_numerical = []
-        if info_dict["feat_type"].lower() == "binary" or info_dict["feat_type"].lower() == "numerical":
-            force_numerical = [i for i in range(feat_num)]
-        elif info_dict["feat_type"].lower() == "categorical":
-            force_categorical = [i for i in range(feat_num)]
-        elif os.path.exists(os.path.join(path, name + "_feat.type")):
-            with open(os.path.join(path, name + "_feat.type"), "r") as f:
-                for i, line in enumerate(f):
-                    if line.strip().lower() == "numerical":
-                        force_numerical.append(i)
-                    elif line.strip().lower() == "categorical":
-                        force_categorical.append(i)
-        
-        # read data files
-        reading_function = self.read_datafile if not is_sparse else (
-            self.read_sparse_datafile if not feats_binary else self.read_binary_sparse_datafile)
-        self.X = reading_function(os.path.join(path, name + "_train.data"), (train_num, feat_num))
-        self.Y = self.read_datafile(os.path.join(path, name + "_train.solution"), (train_num, target_num))
-
-        if os.path.exists(os.path.join(path, name + "_valid.data")) and \
-            os.path.exists(os.path.join(path, name + "_valid.solution")):
-            self.X_valid = reading_function(os.path.join(path, name + "_valid.data"), (valid_num, feat_num))
-            self.Y_valid = self.read_datafile(os.path.join(path, name + "_valid.solution"), (valid_num, target_num))
-        
-        if os.path.exists(os.path.join(path, name + "_test.data")) and \
-            os.path.exists(os.path.join(path, name + "_test.solution")):
-            self.X_test = reading_function(os.path.join(path, name + "_test.data"), (test_num, feat_num))
-            self.Y_test = self.read_datafile(os.path.join(path, name + "_test.solution"), (test_num, target_num))
-        
-        if not self.is_multilabel and self.is_classification and self.Y.shape[1] > 1:
-            self.Y = np.argmax(self.Y, axis=1)
-            self.Y_valid = np.argmax(self.Y_valid, axis=1) if self.Y_valid is not None else None
-            self.Y_test = np.argmax(self.Y_test, axis=1) if self.Y_test is not None else None
-
-        if auto_convert and not is_sparse:
-            self.convert(force_categorical=force_categorical, force_numerical=force_numerical, **kwargs)
-        
-    def read_datafile(self, filepath, shape):
-        data = []
-        with open(filepath, "r") as f:
-            for line in f:
-                data.append([float(v.strip()) for v in line.split()])
-        return np.array(data)
-
-    def read_sparse_datafile(self, filepath, shape):
-        data = []
-        row_indizes = []
-        col_indizes = []
-        with open(filepath, "r") as f:
-            for row, line in enumerate(f):
-                print("\rReading line:",  row, "of", shape[0], end="")
-                for value in line.split():
-                    value = value.rstrip()
-
-                    data.append(float(value.split(":")[1]))
-                    col_indizes.append(int(value.split(":")[0]) - 1)
-                    row_indizes.append(row)
-            print("Done")
-        return csr_matrix((data, (row_indizes, col_indizes)), shape=shape)
-    
-    def read_binary_sparse_datafile(self, filepath, shape):
-        row_indizes = []
-        col_indizes = []
-        with open(filepath, "r") as f:
-            for row, line in enumerate(f):
-                print("\rReading line:",  row, "of", shape[0], end="")
-                for value in line.split():
-                    value = value.rstrip()
-                    col_indizes.append(int(value) - 1)
-                    row_indizes.append(row)
-            print("Done")
-        return csr_matrix(([1] * len(row_indizes), (row_indizes, col_indizes)), shape=shape)
-
-
-class OpenMLImageReader(OpenMlReader):
-    def __init__(self, dataset_id, is_classification = None, api_key=None, nChannels=1):
-        self.channels = nChannels
-        super(OpenMLImageReader, self).__init__(dataset_id, is_classification, api_key)
-
-    def read(self, auto_convert=True, **kwargs):
-        """
-        Read the data from given openml datset file.
-        
-        Arguments:
-            auto_convert: Automatically convert data after reading.
-            *args, **kwargs: arguments for converting.
-        """
-        
-        dataset = self.openml.datasets.get_dataset(self.dataset_id)
-        self.data = dataset.get_data()
-
-
-        self.num_entries = len(self.data)
-        self.num_features = len(self.data[0]) - 1
-
-            
-        self.X = self.data[0:self.num_entries, 0:self.num_features] / 255
-
-        image_size = int(math.sqrt(self.num_features / self.channels))
-        self.X = np.reshape(self.X, (self.X.shape[0], self.channels, image_size, image_size))
-        
-        self.Y = self.data[0:self.num_entries, -1]
-        self.num_classes = len(np.unique(self.Y))
-        if self.is_classification is None:
-            self.is_classification = dataset.get_features_by_type("nominal")[-1] == self.num_features
diff --git a/autoPyTorch/data_management/example_dataset.csv b/autoPyTorch/data_management/example_dataset.csv
deleted file mode 100644
index f0c5011a0..000000000
--- a/autoPyTorch/data_management/example_dataset.csv
+++ /dev/null
@@ -1,10 +0,0 @@
-"id", "checking_status","duration","class"
-1, '<0',6,good
-2, '0<=X<200',48,bad
-3, 'no checking',12,good
-4, '<0',42,good
-5, '<0',24,bad
-6, 'no checking',36,good
-7, 'no checking',24,good
-8, '0<=X<200',36,good
-9, 'no checking',12,excellent
\ No newline at end of file
diff --git a/autoPyTorch/data_management/image_loader.py b/autoPyTorch/data_management/image_loader.py
deleted file mode 100644
index 00dad12c1..000000000
--- a/autoPyTorch/data_management/image_loader.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import torch.utils.data as data
-
-import os
-import os.path
-
-import logging
-logging.getLogger('PIL').setLevel(logging.CRITICAL)
-from PIL import Image
-
-def default_loader(path):
-    return Image.open(path).convert('RGB')
-
-from multiprocessing import Process, RawValue, Lock
-import time
-
-class ThreadCounter(object):
-    def __init__(self):
-        # RawValue because we don't need it to create a Lock:
-        self.val = RawValue('d', 0)
-        self.num = RawValue('i', 0)
-        self.lock = Lock()
-
-    def add(self, value):
-        with self.lock:
-            self.val.value += value
-            self.num.value += 1
-
-    def value(self):
-        with self.lock:
-            return self.val.value
-
-    def avg(self):
-        with self.lock:
-            return self.val.value / self.num.value
-
-    def reset(self):
-        with self.lock:
-            self.val.value = 0
-            self.num.value = 0
-
-class ImageFilelist(data.Dataset):
-    def __init__(self, image_file_list, label_list, transform=None, target_transform=None, loader=default_loader, cache_size=0, image_size=None):
-        self.image_file_list = image_file_list
-        self.label_list = label_list
-        self.transform = transform
-        self.target_transform = target_transform
-        self.loader = loader
-        # self.readTime = ThreadCounter()
-        # self.augmentTime = ThreadCounter()
-        # self.loadTime = ThreadCounter()
-        self.fill_cache(cache_size, image_size)
-
-    def get_times(self, prefix):
-        times = dict()
-        # times.update({prefix + k: v for k, v in self.transform.get_times().items()})
-        # times[prefix + 'read_time'] = self.readTime.value()
-        # times[prefix + 'read_time_avg'] = self.readTime.avg()
-        # times[prefix + 'augment_time'] = self.augmentTime.value()
-        # times[prefix + 'augment_time_avg'] = self.augmentTime.avg()
-        # times[prefix + 'load_time'] = self.loadTime.value()
-        return times
-
-    def fill_cache(self, cache_size, image_size_pixels):
-        self.cache = dict()
-        if cache_size == 0:
-            return
-        import sys
-        max_image_size = 0
-        cur_size = 0
-        for i, impath in enumerate(self.image_file_list):
-            img = self.loader(impath)
-            image_size = sys.getsizeof(img)
-            max_image_size = max(max_image_size, image_size)
-            cur_size += image_size
-            if image_size_pixels is not None:
-                img = img.resize(image_size_pixels)
-            self.cache[impath] = img
-            # logging.getLogger('autonet').info('Load image: ' + str(sys.getsizeof(self.cache[impath])) + ' bytes - Cache: ' + str(cur_size))
-            if cur_size + max_image_size > cache_size:
-                break
-        logging.getLogger('autonet').info('Could load ' + str(i+1) + '/' + str(len(self.image_file_list)) + ' images into cache, used ' + str(cur_size) + '/' + str(cache_size) + ' bytes')
-
-    def __getitem__(self, index):
-        impath = self.image_file_list[index]
-        target = self.label_list[index]
-        # start_time = time.time()
-        img = self.cache[impath] if impath in self.cache else self.loader(impath)
-        # self.readTime.add(time.time() - start_time)
-        # start_time = time.time()
-        if self.transform is not None:
-            img = self.transform(img)
-        if self.target_transform is not None:
-            target = self.target_transform(target)
-        # self.augmentTime.add(time.time() - start_time)
-        # self.loadTime.add(time.time() - start_time)
-        return img, target
-
-    def __len__(self):
-        return len(self.image_file_list)
-
-class XYDataset(data.Dataset):
-    def __init__(self, X, Y, transform=None, target_transform=None):
-        self.X = X
-        self.Y = Y
-        self.transform = transform
-        self.target_transform = target_transform
-
-    def __getitem__(self, index):
-        img = self.X[index]
-        target = self.Y[index]
-
-        if self.transform is not None:
-            img = self.transform(img)
-        if self.target_transform is not None:
-            target = self.target_transform(target)
-        return img, target
-
-    def __len__(self):
-        return len(self.image_file_list)
\ No newline at end of file
diff --git a/autoPyTorch/components/ensembles/__init__.py b/autoPyTorch/datasets/__init__.py
similarity index 100%
rename from autoPyTorch/components/ensembles/__init__.py
rename to autoPyTorch/datasets/__init__.py
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
new file mode 100644
index 000000000..565ffd4f3
--- /dev/null
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -0,0 +1,373 @@
+from abc import ABCMeta
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, cast
+
+import numpy as np
+
+from scipy.sparse import issparse
+
+from sklearn.utils.multiclass import type_of_target
+
+from torch.utils.data import Dataset, Subset
+
+import torchvision
+
+from autoPyTorch.datasets.resampling_strategy import (
+    CROSS_VAL_FN,
+    CrossValTypes,
+    DEFAULT_RESAMPLING_PARAMETERS,
+    HOLDOUT_FN,
+    HoldoutValTypes,
+    get_cross_validators,
+    get_holdout_validators,
+    is_stratified,
+)
+from autoPyTorch.utils.common import FitRequirement, hash_array_or_matrix
+
+BASE_DATASET_INPUT = Union[Tuple[np.ndarray, np.ndarray], Dataset]
+
+
+def check_valid_data(data: Any) -> None:
+    if not (hasattr(data, '__getitem__') and hasattr(data, '__len__')):
+        raise ValueError(
+            'The specified Data for Dataset does either not have a __getitem__ or a __len__ attribute.')
+
+
+def type_check(train_tensors: BASE_DATASET_INPUT, val_tensors: Optional[BASE_DATASET_INPUT] = None) -> None:
+    for i in range(len(train_tensors)):
+        check_valid_data(train_tensors[i])
+    if val_tensors is not None:
+        for i in range(len(val_tensors)):
+            check_valid_data(val_tensors[i])
+
+
+class TransformSubset(Subset):
+    """
+    Because the BaseDataset contains all the data (train/val/test), the transformations
+    have to be applied with some directions. That is, if yielding train data,
+    we expect to apply train transformation (which have augmentations exclusively).
+
+    We achieve so by adding a train flag to the pytorch subset
+    """
+    def __init__(self, dataset: Dataset, indices: Sequence[int], train: bool) -> None:
+        self.dataset = dataset
+        self.indices = indices
+        self.train = train
+
+    def __getitem__(self, idx: int) -> np.ndarray:
+        return self.dataset.__getitem__(self.indices[idx], self.train)
+
+
+class BaseDataset(Dataset, metaclass=ABCMeta):
+    def __init__(
+        self,
+        train_tensors: BASE_DATASET_INPUT,
+        dataset_name: Optional[str] = None,
+        val_tensors: Optional[BASE_DATASET_INPUT] = None,
+        test_tensors: Optional[BASE_DATASET_INPUT] = None,
+        resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
+        resampling_strategy_args: Optional[Dict[str, Any]] = None,
+        shuffle: Optional[bool] = True,
+        seed: Optional[int] = 42,
+        train_transforms: Optional[torchvision.transforms.Compose] = None,
+        val_transforms: Optional[torchvision.transforms.Compose] = None,
+    ):
+        """
+        Base class for datasets used in AutoPyTorch
+        Args:
+            train_tensors (A tuple of objects that have a __len__ and a __getitem__ attribute):
+                training data
+            dataset_name (str): name of the dataset, used as experiment name.
+            val_tensors (An optional tuple of objects that have a __len__ and a __getitem__ attribute):
+                validation data
+            test_tensors (An optional tuple of objects that have a __len__ and a __getitem__ attribute):
+                test data
+            resampling_strategy (Union[CrossValTypes, HoldoutValTypes]),
+                (default=HoldoutValTypes.holdout_validation):
+                strategy to split the training data.
+            resampling_strategy_args (Optional[Dict[str, Any]]): arguments
+                required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            shuffle:  Whether to shuffle the data before performing splits
+            seed (int), (default=1): seed to be used for reproducibility.
+            train_transforms (Optional[torchvision.transforms.Compose]):
+                Additional Transforms to be applied to the training data
+            val_transforms (Optional[torchvision.transforms.Compose]):
+                Additional Transforms to be applied to the validation/test data
+        """
+        if dataset_name is not None:
+            self.dataset_name = dataset_name
+        else:
+            self.dataset_name = hash_array_or_matrix(train_tensors[0])
+        if not hasattr(train_tensors[0], 'shape'):
+            type_check(train_tensors, val_tensors)
+        self.train_tensors = train_tensors
+        self.val_tensors = val_tensors
+        self.test_tensors = test_tensors
+        self.cross_validators: Dict[str, CROSS_VAL_FN] = {}
+        self.holdout_validators: Dict[str, HOLDOUT_FN] = {}
+        self.rand = np.random.RandomState(seed=seed)
+        self.shuffle = shuffle
+        self.resampling_strategy = resampling_strategy
+        self.resampling_strategy_args = resampling_strategy_args
+        self.task_type: Optional[str] = None
+        self.issparse: bool = issparse(self.train_tensors[0])
+        self.input_shape: Tuple[int] = train_tensors[0].shape[1:]
+        self.num_classes: Optional[int] = None
+        if len(train_tensors) == 2 and train_tensors[1] is not None:
+            self.output_type: str = type_of_target(self.train_tensors[1])
+            self.output_shape: int = train_tensors[1].shape[1] if train_tensors[1].shape == 2 else 1
+
+        # TODO: Look for a criteria to define small enough to preprocess
+        self.is_small_preprocess = True
+
+        # Make sure cross validation splits are created once
+        self.cross_validators = get_cross_validators(
+            CrossValTypes.stratified_k_fold_cross_validation,
+            CrossValTypes.k_fold_cross_validation,
+            CrossValTypes.shuffle_split_cross_validation,
+            CrossValTypes.stratified_shuffle_split_cross_validation
+        )
+        self.holdout_validators = get_holdout_validators(
+            HoldoutValTypes.holdout_validation,
+            HoldoutValTypes.stratified_holdout_validation
+        )
+        self.splits = self.get_splits_from_resampling_strategy()
+
+        # We also need to be able to transform the data, be it for pre-processing
+        # or for augmentation
+        self.train_transform = train_transforms
+        self.val_transform = val_transforms
+
+    def update_transform(self, transform: Optional[torchvision.transforms.Compose],
+                         train: bool = True,
+                         ) -> 'BaseDataset':
+        """
+        During the pipeline execution, the pipeline object might propose transformations
+        as a product of the current pipeline configuration being tested.
+
+        This utility allows to return a self with the updated transformation, so that
+        a dataloader can yield this dataset with the desired transformations
+
+        Args:
+            transform (torchvision.transforms.Compose): The transformations proposed
+                by the current pipeline
+            train (bool): Whether to update the train or validation transform
+
+        Returns:
+            self: A copy of the update pipeline
+        """
+        if train:
+            self.train_transform = transform
+        else:
+            self.val_transform = transform
+        return self
+
+    def __getitem__(self, index: int, train: bool = True) -> Tuple[np.ndarray, ...]:
+        """
+        The base dataset uses a Subset of the data. Nevertheless, the base dataset expect
+        both validation and test data to be present in the same dataset, which motivated the
+        need to dynamically give train/test data with the __getitem__ command.
+
+        This method yields a datapoint of the whole data (after a Subset has selected a given
+        item, based on the resampling strategy) and applies a train/testing transformation, if any.
+
+        Args:
+            index (int): what element to yield from all the train/test tensors
+            train (bool): Whether to apply a train or test transformation, if any
+
+        Returns:
+            A transformed single point prediction
+        """
+
+        if hasattr(self.train_tensors[0], 'loc'):
+            X = self.train_tensors[0].iloc[[index]]
+        else:
+            X = self.train_tensors[0][index]
+
+        if self.train_transform is not None and train:
+            X = self.train_transform(X)
+        elif self.val_transform is not None and not train:
+            X = self.val_transform(X)
+
+        # In case of prediction, the targets are not provided
+        Y = self.train_tensors[1]
+        if Y is not None:
+            Y = Y[index]
+        else:
+            Y = None
+
+        return X, Y
+
+    def __len__(self) -> int:
+        return self.train_tensors[0].shape[0]
+
+    def _get_indices(self) -> np.ndarray:
+        if self.shuffle:
+            indices = self.rand.permutation(len(self))
+        else:
+            indices = np.arange(len(self))
+        return indices
+
+    def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], List[int]]]:
+        """
+        Creates a set of splits based on a resampling strategy provided
+
+        Returns
+            (List[Tuple[List[int], List[int]]]): splits in the [train_indices, val_indices] format
+        """
+        splits = []
+        if isinstance(self.resampling_strategy, HoldoutValTypes):
+            val_share = DEFAULT_RESAMPLING_PARAMETERS[self.resampling_strategy].get(
+                'val_share', None)
+            if self.resampling_strategy_args is not None:
+                val_share = self.resampling_strategy_args.get('val_share', val_share)
+            splits.append(
+                self.create_holdout_val_split(
+                    holdout_val_type=self.resampling_strategy,
+                    val_share=val_share,
+                )
+            )
+        elif isinstance(self.resampling_strategy, CrossValTypes):
+            num_splits = DEFAULT_RESAMPLING_PARAMETERS[self.resampling_strategy].get(
+                'num_splits', None)
+            if self.resampling_strategy_args is not None:
+                num_splits = self.resampling_strategy_args.get('num_splits', num_splits)
+            # Create the split if it was not created before
+            splits.extend(
+                self.create_cross_val_splits(
+                    cross_val_type=self.resampling_strategy,
+                    num_splits=cast(int, num_splits),
+                )
+            )
+        else:
+            raise ValueError(f"Unsupported resampling strategy={self.resampling_strategy}")
+        return splits
+
+    def create_cross_val_splits(
+        self,
+        cross_val_type: CrossValTypes,
+        num_splits: int
+    ) -> List[Tuple[Union[List[int], np.ndarray], Union[List[int], np.ndarray]]]:
+        """
+        This function creates the cross validation split for the given task.
+
+        It is done once per dataset to have comparable results among pipelines
+        Args:
+            cross_val_type (CrossValTypes):
+            num_splits (int): number of splits to be created
+
+        Returns:
+            (List[Tuple[Union[List[int], np.ndarray], Union[List[int], np.ndarray]]]):
+                list containing 'num_splits' splits.
+        """
+        # Create just the split once
+        # This is gonna be called multiple times, because the current dataset
+        # is being used for multiple pipelines. That is, to be efficient with memory
+        # we dump the dataset to memory and read it on a need basis. So this function
+        # should be robust against multiple calls, and it does so by remembering the splits
+        if not isinstance(cross_val_type, CrossValTypes):
+            raise NotImplementedError(f'The selected `cross_val_type` "{cross_val_type}" is not implemented.')
+        kwargs = {}
+        if is_stratified(cross_val_type):
+            # we need additional information about the data for stratification
+            kwargs["stratify"] = self.train_tensors[-1]
+        splits = self.cross_validators[cross_val_type.name](
+            num_splits, self._get_indices(), **kwargs)
+        return splits
+
+    def create_holdout_val_split(
+        self,
+        holdout_val_type: HoldoutValTypes,
+        val_share: float,
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        This function creates the holdout split for the given task.
+
+        It is done once per dataset to have comparable results among pipelines
+        Args:
+            holdout_val_type (HoldoutValTypes):
+            val_share (float): share of the validation data
+
+        Returns:
+            (Tuple[np.ndarray, np.ndarray]): Tuple containing (train_indices, val_indices)
+        """
+        if holdout_val_type is None:
+            raise ValueError(
+                '`val_share` specified, but `holdout_val_type` not specified.'
+            )
+        if self.val_tensors is not None:
+            raise ValueError(
+                '`val_share` specified, but the Dataset was a given a pre-defined split at initialization already.')
+        if val_share < 0 or val_share > 1:
+            raise ValueError(f"`val_share` must be between 0 and 1, got {val_share}.")
+        if not isinstance(holdout_val_type, HoldoutValTypes):
+            raise NotImplementedError(f'The specified `holdout_val_type` "{holdout_val_type}" is not supported.')
+        kwargs = {}
+        if is_stratified(holdout_val_type):
+            # we need additional information about the data for stratification
+            kwargs["stratify"] = self.train_tensors[-1]
+        train, val = self.holdout_validators[holdout_val_type.name](val_share, self._get_indices(), **kwargs)
+        return train, val
+
+    def get_dataset_for_training(self, split_id: int) -> Tuple[Dataset, Dataset]:
+        """
+        The above split methods employ the Subset to internally subsample the whole dataset.
+
+        During training, we need access to one of those splits. This is a handy function
+        to provide training data to fit a pipeline
+
+        Args:
+            split (int): The desired subset of the dataset to split and use
+
+        Returns:
+            Dataset: the reduced dataset to be used for testing
+        """
+        # Subset creates a dataset. Splits is a (train_indices, test_indices) tuple
+        return (TransformSubset(self, self.splits[split_id][0], train=True),
+                TransformSubset(self, self.splits[split_id][1], train=False))
+
+    def replace_data(self, X_train: BASE_DATASET_INPUT, X_test: Optional[BASE_DATASET_INPUT]) -> 'BaseDataset':
+        """
+        To speed up the training of small dataset, early pre-processing of the data
+        can be made on the fly by the pipeline.
+
+        In this case, we replace the original train/test tensors by this pre-processed version
+
+        Args:
+            X_train (np.ndarray): the pre-processed (imputation/encoding/...) train data
+            X_test (np.ndarray): the pre-processed (imputation/encoding/...) test data
+
+        Returns:
+            self
+        """
+        self.train_tensors = (X_train, self.train_tensors[1])
+        if X_test is not None and self.test_tensors is not None:
+            self.test_tensors = (X_test, self.test_tensors[1])
+        return self
+
+    def get_dataset_properties(self, dataset_requirements: List[FitRequirement]) -> Dict[str, Any]:
+        """
+        Gets the dataset properties required in the fit dictionary
+        Args:
+            dataset_requirements (List[FitRequirement]): List of
+                fit requirements that the dataset properties must
+                contain.
+
+        Returns:
+
+        """
+        dataset_properties = dict()
+        for dataset_requirement in dataset_requirements:
+            dataset_properties[dataset_requirement.name] = getattr(self, dataset_requirement.name)
+
+        # Add task type, output type and issparse to dataset properties as
+        # they are not a dataset requirement in the pipeline
+        dataset_properties.update({'task_type': self.task_type,
+                                   'output_type': self.output_type,
+                                   'issparse': self.issparse,
+                                   'input_shape': self.input_shape,
+                                   'output_shape': self.output_shape,
+                                   'num_classes': self.num_classes,
+                                   })
+        return dataset_properties
diff --git a/autoPyTorch/datasets/image_dataset.py b/autoPyTorch/datasets/image_dataset.py
new file mode 100644
index 000000000..4664dbaf5
--- /dev/null
+++ b/autoPyTorch/datasets/image_dataset.py
@@ -0,0 +1,174 @@
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from PIL import Image
+
+import numpy as np
+
+import torch
+from torch.utils.data import Dataset, TensorDataset
+
+import torchvision.transforms
+from torchvision.transforms import functional as TF
+
+from autoPyTorch.constants import (
+    CLASSIFICATION_OUTPUTS,
+    CLASSIFICATION_TASKS,
+    IMAGE_CLASSIFICATION,
+    IMAGE_REGRESSION,
+    REGRESSION_OUTPUTS,
+    STRING_TO_OUTPUT_TYPES,
+    STRING_TO_TASK_TYPES,
+    TASK_TYPES_TO_STRING,
+)
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.datasets.resampling_strategy import (
+    CrossValTypes,
+    HoldoutValTypes,
+)
+
+IMAGE_DATASET_INPUT = Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]
+
+
+class ImageDataset(BaseDataset):
+    """
+    Dataset class for images used in AutoPyTorch
+    Args:
+        train (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]):
+            training data
+        val (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]):
+            validation data
+        test (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]):
+            testing data
+        resampling_strategy (Union[CrossValTypes, HoldoutValTypes]),
+            (default=HoldoutValTypes.holdout_validation):
+            strategy to split the training data.
+        resampling_strategy_args (Optional[Dict[str, Any]]): arguments
+            required for the chosen resampling strategy. If None, uses
+            the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+            in ```datasets/resampling_strategy.py```.
+        shuffle:  Whether to shuffle the data before performing splits
+        seed (int), (default=1): seed to be used for reproducibility.
+        train_transforms (Optional[torchvision.transforms.Compose]):
+            Additional Transforms to be applied to the training data
+        val_transforms (Optional[torchvision.transforms.Compose]):
+            Additional Transforms to be applied to the validation/test data
+    """
+    def __init__(self,
+                 train: IMAGE_DATASET_INPUT,
+                 val: Optional[IMAGE_DATASET_INPUT] = None,
+                 test: Optional[IMAGE_DATASET_INPUT] = None,
+                 resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
+                 resampling_strategy_args: Optional[Dict[str, Any]] = None,
+                 shuffle: Optional[bool] = True,
+                 seed: Optional[int] = 42,
+                 train_transforms: Optional[torchvision.transforms.Compose] = None,
+                 val_transforms: Optional[torchvision.transforms.Compose] = None,
+                 ):
+        _check_image_inputs(train=train, val=val)
+        train = _create_image_dataset(data=train)
+        if val is not None:
+            val = _create_image_dataset(data=val)
+        if test is not None:
+            test = _create_image_dataset(data=test)
+        self.mean, self.std = _calc_mean_std(train=train)
+
+        super().__init__(train_tensors=train, val_tensors=val, test_tensors=test, shuffle=shuffle,
+                         resampling_strategy=resampling_strategy, resampling_strategy_args=resampling_strategy_args,
+                         seed=seed,
+                         train_transforms=train_transforms,
+                         val_transforms=val_transforms,
+                         )
+        if self.output_type is not None:
+            if STRING_TO_OUTPUT_TYPES[self.output_type] in CLASSIFICATION_OUTPUTS:
+                self.task_type = TASK_TYPES_TO_STRING[IMAGE_CLASSIFICATION]
+            elif STRING_TO_OUTPUT_TYPES[self.output_type] in REGRESSION_OUTPUTS:
+                self.task_type = TASK_TYPES_TO_STRING[IMAGE_REGRESSION]
+            else:
+                raise ValueError("Output type not currently supported ")
+        else:
+            raise ValueError("Task type not currently supported ")
+        if STRING_TO_TASK_TYPES[self.task_type] in CLASSIFICATION_TASKS:
+            self.num_classes: int = len(np.unique(self.train_tensors[1]))
+
+
+def _calc_mean_std(train: Dataset) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    calculates channel wise mean of the dataset
+    Args:
+        train (Dataset): dataset
+
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor]: (mean, std)
+    """
+    mean = torch.zeros((3,), dtype=torch.float)
+    var = torch.zeros((3,), dtype=torch.float)
+    for i in range(len(train)):
+        v, m = torch.var_mean(train[i][0])  # 0 used to index images
+        mean += m
+        var += v
+    mean /= len(train)
+    var /= len(var)
+    std = torch.sqrt(var)
+    return mean, std
+
+
+def _check_image_inputs(train: IMAGE_DATASET_INPUT,
+                        val: Optional[IMAGE_DATASET_INPUT] = None) -> None:
+    """
+    Performs sanity checks on the given data
+    Args:
+        train (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]):
+            training data
+        val (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]):
+            validation data
+    Returns:
+        None
+    """
+    if not isinstance(train, Dataset):
+        if len(train[0]) != len(train[1]):
+            raise ValueError(
+                f"expected train inputs to have the same length, but got lengths {len(train[0])} and {len(train[1])}")
+        if val is not None:
+            if len(val[0]) != len(val[1]):
+                raise ValueError(
+                    f"expected val inputs to have the same length, but got lengths {len(train[0])} and {len(train[1])}")
+
+
+def _create_image_dataset(data: IMAGE_DATASET_INPUT) -> Dataset:
+    """
+    Creates a torch.utils.data.Dataset from different types of data mentioned
+    Args:
+        data (Union[Dataset, Tuple[Union[np.ndarray, List[str]], np.ndarray]]):
+            data
+    Returns:
+        (Dataset): torch.utils.data.Dataset object of the given data
+    """
+    # if user already provided a dataset, use it
+    if isinstance(data, Dataset):
+        return data
+    # if user provided list of file paths, create a file path dataset
+    if isinstance(data[0], list):
+        return _FilePathDataset(file_paths=data[0], targets=data[1])
+    # if user provided the images as numpy tensors use them directly
+    else:
+        return TensorDataset(torch.tensor(data[0]), torch.tensor(data[1]))
+
+
+class _FilePathDataset(Dataset):
+    """
+    Internal class used to handle data from file paths
+    Args:
+        file_paths (List[str]): paths of images
+        targets (np.ndarray): targets
+    """
+    def __init__(self, file_paths: List[str], targets: np.ndarray):
+        self.file_paths = file_paths
+        self.targets = targets
+
+    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
+        with open(self.file_paths[index], "rb") as f:
+            img = Image.open(f).convert("RGB")
+        return TF.to_tensor(img), torch.tensor(self.targets[index])
+
+    def __len__(self) -> int:
+        return len(self.file_paths)
diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
new file mode 100644
index 000000000..1d0bc3077
--- /dev/null
+++ b/autoPyTorch/datasets/resampling_strategy.py
@@ -0,0 +1,153 @@
+from enum import IntEnum
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+
+from sklearn.model_selection import (
+    KFold,
+    ShuffleSplit,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    TimeSeriesSplit,
+    train_test_split
+)
+
+from typing_extensions import Protocol
+
+
+# Use callback protocol as workaround, since callable with function fields count 'self' as argument
+class CROSS_VAL_FN(Protocol):
+    def __call__(self,
+                 num_splits: int,
+                 indices: np.ndarray,
+                 stratify: Optional[Any]) -> List[Tuple[np.ndarray, np.ndarray]]:
+        ...
+
+
+class HOLDOUT_FN(Protocol):
+    def __call__(self, val_share: float, indices: np.ndarray, stratify: Optional[Any]
+                 ) -> Tuple[np.ndarray, np.ndarray]:
+        ...
+
+
+class CrossValTypes(IntEnum):
+    stratified_k_fold_cross_validation = 1
+    k_fold_cross_validation = 2
+    stratified_shuffle_split_cross_validation = 3
+    shuffle_split_cross_validation = 4
+    time_series_cross_validation = 5
+
+
+class HoldoutValTypes(IntEnum):
+    holdout_validation = 6
+    stratified_holdout_validation = 7
+
+
+RESAMPLING_STRATEGIES = [CrossValTypes, HoldoutValTypes]
+
+DEFAULT_RESAMPLING_PARAMETERS = {
+    HoldoutValTypes.holdout_validation: {
+        'val_share': 0.33,
+    },
+    HoldoutValTypes.stratified_holdout_validation: {
+        'val_share': 0.33,
+    },
+    CrossValTypes.k_fold_cross_validation: {
+        'num_splits': 3,
+    },
+    CrossValTypes.stratified_k_fold_cross_validation: {
+        'num_splits': 3,
+    },
+    CrossValTypes.shuffle_split_cross_validation: {
+        'num_splits': 3,
+    },
+    CrossValTypes.time_series_cross_validation: {
+        'num_splits': 3,
+    },
+}  # type: Dict[Union[HoldoutValTypes, CrossValTypes], Dict[str, Any]]
+
+
+def get_cross_validators(*cross_val_types: CrossValTypes) -> Dict[str, CROSS_VAL_FN]:
+    cross_validators = {}  # type: Dict[str, CROSS_VAL_FN]
+    for cross_val_type in cross_val_types:
+        cross_val_fn = globals()[cross_val_type.name]
+        cross_validators[cross_val_type.name] = cross_val_fn
+    return cross_validators
+
+
+def get_holdout_validators(*holdout_val_types: HoldoutValTypes) -> Dict[str, HOLDOUT_FN]:
+    holdout_validators = {}  # type: Dict[str, HOLDOUT_FN]
+    for holdout_val_type in holdout_val_types:
+        holdout_val_fn = globals()[holdout_val_type.name]
+        holdout_validators[holdout_val_type.name] = holdout_val_fn
+    return holdout_validators
+
+
+def is_stratified(val_type: Union[str, CrossValTypes, HoldoutValTypes]) -> bool:
+    if isinstance(val_type, str):
+        return val_type.lower().startswith("stratified")
+    else:
+        return val_type.name.lower().startswith("stratified")
+
+
+def holdout_validation(val_share: float, indices: np.ndarray, **kwargs: Any) -> Tuple[np.ndarray, np.ndarray]:
+    train, val = train_test_split(indices, test_size=val_share, shuffle=False)
+    return train, val
+
+
+def stratified_holdout_validation(val_share: float, indices: np.ndarray, **kwargs: Any) \
+        -> Tuple[np.ndarray, np.ndarray]:
+    train, val = train_test_split(indices, test_size=val_share, shuffle=False, stratify=kwargs["stratify"])
+    return train, val
+
+
+def shuffle_split_cross_validation(num_splits: int, indices: np.ndarray, **kwargs: Any) \
+        -> List[Tuple[np.ndarray, np.ndarray]]:
+    cv = ShuffleSplit(n_splits=num_splits)
+    splits = list(cv.split(indices))
+    return splits
+
+
+def stratified_shuffle_split_cross_validation(num_splits: int, indices: np.ndarray, **kwargs: Any) \
+        -> List[Tuple[np.ndarray, np.ndarray]]:
+    cv = StratifiedShuffleSplit(n_splits=num_splits)
+    splits = list(cv.split(indices, kwargs["stratify"]))
+    return splits
+
+
+def stratified_k_fold_cross_validation(num_splits: int, indices: np.ndarray, **kwargs: Any) \
+        -> List[Tuple[np.ndarray, np.ndarray]]:
+    cv = StratifiedKFold(n_splits=num_splits)
+    splits = list(cv.split(indices, kwargs["stratify"]))
+    return splits
+
+
+def k_fold_cross_validation(num_splits: int, indices: np.ndarray, **kwargs: Any) -> List[Tuple[np.ndarray, np.ndarray]]:
+    """
+    Standard k fold cross validation.
+
+    :param indices: array of indices to be split
+    :param num_splits: number of cross validation splits
+    :return: list of tuples of training and validation indices
+    """
+    cv = KFold(n_splits=num_splits)
+    splits = list(cv.split(indices))
+    return splits
+
+
+def time_series_cross_validation(num_splits: int, indices: np.ndarray, **kwargs: Any) \
+        -> List[Tuple[np.ndarray, np.ndarray]]:
+    """
+    Returns train and validation indices respecting the temporal ordering of the data.
+    Dummy example: [0, 1, 2, 3] with 3 folds yields
+        [0] [1]
+        [0, 1] [2]
+        [0, 1, 2] [3]
+
+    :param indices: array of indices to be split
+    :param num_splits: number of cross validation splits
+    :return: list of tuples of training and validation indices
+    """
+    cv = TimeSeriesSplit(n_splits=num_splits)
+    splits = list(cv.split(indices))
+    return splits
diff --git a/autoPyTorch/datasets/tabular_dataset.py b/autoPyTorch/datasets/tabular_dataset.py
new file mode 100644
index 000000000..ab75ce3f8
--- /dev/null
+++ b/autoPyTorch/datasets/tabular_dataset.py
@@ -0,0 +1,229 @@
+from enum import Enum
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+
+import pandas as pd
+
+from sklearn.utils import check_array
+
+import torchvision.transforms
+
+from autoPyTorch.constants import (
+    CLASSIFICATION_OUTPUTS,
+    CLASSIFICATION_TASKS,
+    REGRESSION_OUTPUTS,
+    STRING_TO_OUTPUT_TYPES,
+    STRING_TO_TASK_TYPES,
+    TABULAR_CLASSIFICATION,
+    TABULAR_REGRESSION,
+    TASK_TYPES_TO_STRING,
+)
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.datasets.resampling_strategy import (
+    CrossValTypes,
+    HoldoutValTypes,
+)
+
+
+class DataTypes(Enum):
+    Canonical = 1
+    Float = 2
+    String = 3
+    Categorical = 4
+
+
+class Value2Index(object):
+    def __init__(self, values: list):
+        assert all(not (pd.isna(v)) for v in values)
+        self.values = {v: i for i, v in enumerate(values)}
+
+    def __getitem__(self, item: Any) -> int:
+        if pd.isna(item):
+            return 0
+        else:
+            return self.values[item] + 1
+
+
+class TabularDataset(BaseDataset):
+    """
+        Base class for datasets used in AutoPyTorch
+        Args:
+            X (Union[np.ndarray, pd.DataFrame]): input training data.
+            Y (Union[np.ndarray, pd.Series]): training data targets.
+            X_test (Optional[Union[np.ndarray, pd.DataFrame]]):  input testing data.
+            Y_test (Optional[Union[np.ndarray, pd.DataFrame]]): testing data targets
+            resampling_strategy (Union[CrossValTypes, HoldoutValTypes]),
+                (default=HoldoutValTypes.holdout_validation):
+                strategy to split the training data.
+            resampling_strategy_args (Optional[Dict[str, Any]]): arguments
+                required for the chosen resampling strategy. If None, uses
+                the default values provided in DEFAULT_RESAMPLING_PARAMETERS
+                in ```datasets/resampling_strategy.py```.
+            shuffle:  Whether to shuffle the data before performing splits
+            seed (int), (default=1): seed to be used for reproducibility.
+            train_transforms (Optional[torchvision.transforms.Compose]):
+                Additional Transforms to be applied to the training data.
+            val_transforms (Optional[torchvision.transforms.Compose]):
+                Additional Transforms to be applied to the validation/test data.
+
+        Notes: Support for Numpy Arrays is missing Strings.
+
+        """
+
+    def __init__(self, X: Union[np.ndarray, pd.DataFrame],
+                 Y: Union[np.ndarray, pd.Series],
+                 X_test: Optional[Union[np.ndarray, pd.DataFrame]] = None,
+                 Y_test: Optional[Union[np.ndarray, pd.DataFrame]] = None,
+                 resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
+                 resampling_strategy_args: Optional[Dict[str, Any]] = None,
+                 shuffle: Optional[bool] = True,
+                 seed: Optional[int] = 42,
+                 train_transforms: Optional[torchvision.transforms.Compose] = None,
+                 val_transforms: Optional[torchvision.transforms.Compose] = None,
+                 dataset_name: Optional[str] = None,
+                 ):
+
+        X, self.data_types, self.nan_mask, self.itovs, self.vtois = self.interpret_columns(X)
+
+        if Y is not None:
+            Y, _, self.target_nan_mask, self.target_itov, self.target_vtoi = self.interpret_columns(
+                Y, assert_single_column=True)
+            # For tabular classification, we expect also that it complies with Sklearn
+            # The below check_array performs input data checks and make sure that a numpy array
+            # is returned, as both Pytorch/Sklearn deal directly with numpy/list objects.
+            # In this particular case, the interpret() returns a pandas object (needed to extract)
+            # the data types, yet check_array translate the np.array. When Sklearn support pandas
+            # the below function will simply return Pandas DataFrame.
+            Y = check_array(Y, ensure_2d=False)
+
+        self.categorical_columns, self.numerical_columns, self.categories, self.num_features = \
+            self.infer_dataset_properties(X)
+
+        # Allow support for X_test, Y_test. They will NOT be used for optimization, but
+        # rather to have a performance through time on the test data
+        if X_test is not None:
+            X_test, self._test_data_types, _, _, _ = self.interpret_columns(X_test)
+
+            # Some quality checks on the data
+            if self.data_types != self._test_data_types:
+                raise ValueError(f"The train data inferred types {self.data_types} are "
+                                 "different than the test inferred types {self._test_data_types}")
+            if Y_test is not None:
+                Y_test, _, _, _, _ = self.interpret_columns(
+                    Y_test, assert_single_column=True)
+                Y_test = check_array(Y_test, ensure_2d=False)
+
+        super().__init__(train_tensors=(X, Y), test_tensors=(X_test, Y_test), shuffle=shuffle,
+                         resampling_strategy=resampling_strategy,
+                         resampling_strategy_args=resampling_strategy_args,
+                         seed=seed, train_transforms=train_transforms,
+                         dataset_name=dataset_name,
+                         val_transforms=val_transforms)
+        if self.output_type is not None:
+            if STRING_TO_OUTPUT_TYPES[self.output_type] in CLASSIFICATION_OUTPUTS:
+                self.task_type = TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION]
+            elif STRING_TO_OUTPUT_TYPES[self.output_type] in REGRESSION_OUTPUTS:
+                self.task_type = TASK_TYPES_TO_STRING[TABULAR_REGRESSION]
+            else:
+                raise ValueError("Output type not currently supported ")
+        else:
+            raise ValueError("Task type not currently supported ")
+        if STRING_TO_TASK_TYPES[self.task_type] in CLASSIFICATION_TASKS:
+            self.num_classes: int = len(np.unique(self.train_tensors[1]))
+
+    def interpret_columns(self,
+                          data: Union[np.ndarray, pd.DataFrame, pd.Series],
+                          assert_single_column: bool = False
+                          ) -> Tuple[Union[pd.DataFrame, Any], List[DataTypes],
+                                     Union[np.ndarray],
+                                     List[Optional[list]],
+                                     List[Optional[Value2Index]]]:
+        """
+        Interpret information such as data, data_types, nan_mask, itovs, vtois
+        about the columns from the given data.
+
+        Args:
+            data (Union[np.ndarray, pd.DataFrame, pd.Series]): data to be
+                interpreted.
+            assert_single_column (bool), (default=False): flag for
+                asserting that the data contains a single column
+
+        Returns:
+            Tuple[pd.DataFrame, List[DataTypes],
+                 Union[np.ndarray],
+                 List[Optional[list]],
+                 List[Optional[Value2Index]]]: Tuple of information
+        """
+        single_column = False
+        if isinstance(data, np.ndarray):
+            if len(data.shape) == 1 and ',' not in str(data.dtype):
+                single_column = True
+                data = data[:, None]
+            data = pd.DataFrame(data).infer_objects().convert_dtypes()
+        elif isinstance(data, pd.DataFrame):
+            data = data.infer_objects().convert_dtypes()
+        elif isinstance(data, pd.Series):
+            single_column = True
+            data = data.to_frame()
+        else:
+            raise ValueError('Provided data needs to be either an np.ndarray or a pd.DataFrame for TabularDataset.')
+        if assert_single_column:
+            assert single_column, \
+                "The data is asserted to be only of a single column, but it isn't. \
+                Most likely your targets are not a vector or series."
+
+        data_types = []
+        nan_mask = data.isna().to_numpy()
+        for col_index, dtype in enumerate(data.dtypes):
+            if dtype.kind == 'f':
+                data_types.append(DataTypes.Float)
+            elif dtype.kind in ('i', 'u', 'b'):
+                data_types.append(DataTypes.Canonical)
+            elif isinstance(dtype, pd.StringDtype):
+                data_types.append(DataTypes.String)
+            elif dtype.name == 'category':
+                # OpenML format categorical columns as category
+                # So add support for that
+                data_types.append(DataTypes.Categorical)
+            else:
+                raise ValueError(f"The dtype in column {col_index} is {dtype} which is not supported.")
+        itovs: List[Optional[List[Any]]] = []
+        vtois: List[Optional[Value2Index]] = []
+        for col_index, (_, col) in enumerate(data.iteritems()):
+            if data_types[col_index] != DataTypes.Float:
+                non_na_values = [v for v in set(col) if not pd.isna(v)]
+                non_na_values.sort()
+                itovs.append([np.nan] + non_na_values)
+                vtois.append(Value2Index(non_na_values))
+            else:
+                itovs.append(None)
+                vtois.append(None)
+
+        if single_column:
+            return data.iloc[:, 0], data_types, nan_mask, itovs, vtois
+
+        return data, data_types, nan_mask, itovs, vtois
+
+    def infer_dataset_properties(self, X: Any) \
+            -> Tuple[List[int], List[int], List[object], int]:
+        """
+        Infers the properties of the dataset like
+        categorical_columns, numerical_columns, categories, num_features
+        Args:
+            X: input training data
+
+        Returns:
+            (Tuple[List[int], List[int], List[object], int]):
+        """
+        categorical_columns = []
+        numerical_columns = []
+        for i, data_type in enumerate(self.data_types):
+            if data_type == DataTypes.String or data_type == DataTypes.Categorical:
+                categorical_columns.append(i)
+            else:
+                numerical_columns.append(i)
+        categories = [np.unique(X.iloc[:, a]).tolist() for a in categorical_columns]
+        num_features = X.shape[1]
+
+        return categorical_columns, numerical_columns, categories, num_features
diff --git a/autoPyTorch/datasets/time_series_dataset.py b/autoPyTorch/datasets/time_series_dataset.py
new file mode 100644
index 000000000..7b0435d19
--- /dev/null
+++ b/autoPyTorch/datasets/time_series_dataset.py
@@ -0,0 +1,174 @@
+from typing import Any, Dict, Optional, Tuple, Union
+
+import numpy as np
+
+import torchvision.transforms
+
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.datasets.resampling_strategy import (
+    CrossValTypes,
+    HoldoutValTypes,
+    get_cross_validators,
+    get_holdout_validators
+)
+
+TIME_SERIES_FORECASTING_INPUT = Tuple[np.ndarray, np.ndarray]  # currently only numpy arrays are supported
+TIME_SERIES_REGRESSION_INPUT = Tuple[np.ndarray, np.ndarray]
+TIME_SERIES_CLASSIFICATION_INPUT = Tuple[np.ndarray, np.ndarray]
+
+
+class TimeSeriesForecastingDataset(BaseDataset):
+    def __init__(self,
+                 target_variables: Tuple[int],
+                 sequence_length: int,
+                 n_steps: int,
+                 train: TIME_SERIES_FORECASTING_INPUT,
+                 val: Optional[TIME_SERIES_FORECASTING_INPUT] = None,
+                 resampling_strategy: Union[CrossValTypes, HoldoutValTypes] = HoldoutValTypes.holdout_validation,
+                 resampling_strategy_args: Optional[Dict[str, Any]] = None,
+                 shuffle: Optional[bool] = False,
+                 seed: Optional[int] = 42,
+                 train_transforms: Optional[torchvision.transforms.Compose] = None,
+                 val_transforms: Optional[torchvision.transforms.Compose] = None,
+                 ):
+        """
+
+        :param target_variables: The indices of the variables you want to forecast
+        :param sequence_length: The amount of past data you want to use to forecast future value
+        :param n_steps: The number of steps you want to forecast into the future
+        :param train: Tuple with one tensor holding the training data
+        :param val: Tuple with one tensor holding the validation data
+        """
+        _check_time_series_forecasting_inputs(
+            target_variables=target_variables,
+            sequence_length=sequence_length,
+            n_steps=n_steps,
+            train=train,
+            val=val)
+        train = _prepare_time_series_forecasting_tensor(tensor=train,
+                                                        target_variables=target_variables,
+                                                        sequence_length=sequence_length,
+                                                        n_steps=n_steps)
+        if val is not None:
+            val = _prepare_time_series_forecasting_tensor(tensor=val,
+                                                          target_variables=target_variables,
+                                                          sequence_length=sequence_length,
+                                                          n_steps=n_steps)
+        super().__init__(train_tensors=train, val_tensors=val, shuffle=shuffle,
+                         resampling_strategy=resampling_strategy, resampling_strategy_args=resampling_strategy_args,
+                         seed=seed,
+                         train_transforms=train_transforms,
+                         val_transforms=val_transforms,
+                         )
+        self.cross_validators = get_cross_validators(CrossValTypes.time_series_cross_validation)
+        self.holdout_validators = get_holdout_validators(HoldoutValTypes.holdout_validation)
+
+
+def _check_time_series_forecasting_inputs(target_variables: Tuple[int],
+                                          sequence_length: int,
+                                          n_steps: int,
+                                          train: TIME_SERIES_FORECASTING_INPUT,
+                                          val: Optional[TIME_SERIES_FORECASTING_INPUT] = None) -> None:
+    if train[0].ndim != 3:
+        raise ValueError(
+            "The training data for time series forecasting has to be a three-dimensional tensor of shape PxLxM.")
+    if val is not None:
+        if val[0].ndim != 3:
+            raise ValueError(
+                "The validation data for time series forecasting "
+                "has to be a three-dimensional tensor of shape PxLxM.")
+    _, time_series_length, num_features = train[0].shape
+    if sequence_length + n_steps > time_series_length:
+        raise ValueError(f"Invalid sequence length: Cannot create dataset "
+                         f"using sequence_length={sequence_length} and n_steps={n_steps} "
+                         f"when the time series are of length {time_series_length}")
+    for t in target_variables:
+        if t < 0 or t >= num_features:
+            raise ValueError(f"Target variable {t} is out of bounds. Number of features is {num_features}, "
+                             f"so each target variable has to be between 0 and {num_features - 1}.")
+
+
+def _prepare_time_series_forecasting_tensor(tensor: TIME_SERIES_FORECASTING_INPUT,
+                                            target_variables: Tuple[int],
+                                            sequence_length: int,
+                                            n_steps: int) -> Tuple[np.ndarray, np.ndarray]:
+    population_size, time_series_length, num_features = tensor[0].shape
+    num_targets = len(target_variables)
+    num_datapoints = time_series_length - sequence_length - n_steps + 1
+    x_tensor = np.zeros((num_datapoints, population_size, sequence_length, num_features), dtype=np.float)
+    y_tensor = np.zeros((num_datapoints, population_size, num_targets), dtype=np.float)
+
+    for p in range(population_size):
+        for i in range(num_datapoints):
+            x_tensor[i, p, :, :] = tensor[0][p, i:i + sequence_length, :]
+            y_tensor[i, p, :] = tensor[0][p, i + sequence_length + n_steps - 1, target_variables]
+
+    # get rid of population dimension by reshaping
+    x_tensor = x_tensor.reshape((-1, sequence_length, num_features))
+    y_tensor = y_tensor.reshape((-1, num_targets))
+    return x_tensor, y_tensor
+
+
+class TimeSeriesClassificationDataset(BaseDataset):
+    def __init__(self,
+                 train: TIME_SERIES_CLASSIFICATION_INPUT,
+                 val: Optional[TIME_SERIES_CLASSIFICATION_INPUT] = None):
+        _check_time_series_inputs(train=train,
+                                  val=val,
+                                  task_type="time_series_classification")
+        super().__init__(train_tensors=train, val_tensors=val, shuffle=True)
+        self.cross_validators = get_cross_validators(
+            CrossValTypes.stratified_k_fold_cross_validation,
+            CrossValTypes.k_fold_cross_validation,
+            CrossValTypes.shuffle_split_cross_validation,
+            CrossValTypes.stratified_shuffle_split_cross_validation
+        )
+        self.holdout_validators = get_holdout_validators(
+            HoldoutValTypes.holdout_validation,
+            HoldoutValTypes.stratified_holdout_validation
+        )
+
+
+class TimeSeriesRegressionDataset(BaseDataset):
+    def __init__(self, train: Tuple[np.ndarray, np.ndarray], val: Optional[Tuple[np.ndarray, np.ndarray]] = None):
+        _check_time_series_inputs(train=train,
+                                  val=val,
+                                  task_type="time_series_regression")
+        super().__init__(train_tensors=train, val_tensors=val, shuffle=True)
+        self.cross_validators = get_cross_validators(
+            CrossValTypes.k_fold_cross_validation,
+            CrossValTypes.shuffle_split_cross_validation
+        )
+        self.holdout_validators = get_holdout_validators(
+            HoldoutValTypes.holdout_validation
+        )
+
+
+def _check_time_series_inputs(task_type: str,
+                              train: Union[TIME_SERIES_CLASSIFICATION_INPUT, TIME_SERIES_REGRESSION_INPUT],
+                              val: Optional[
+                                  Union[TIME_SERIES_CLASSIFICATION_INPUT, TIME_SERIES_REGRESSION_INPUT]] = None
+                              ) -> None:
+    if len(train) != 2:
+        raise ValueError(f"There must be exactly two training tensors for {task_type}. "
+                         f"The first one containing the data and the second one containing the targets.")
+    if train[0].ndim != 3:
+        raise ValueError(
+            f"The training data for {task_type} has to be a three-dimensional tensor of shape NxSxM.")
+    if train[1].ndim != 1:
+        raise ValueError(
+            f"The training targets for {task_type} have to be of shape N."
+        )
+    if val is not None:
+        if len(val) != 2:
+            raise ValueError(
+                f"There must be exactly two validation tensors for{task_type}. "
+                f"The first one containing the data and the second one containing the targets.")
+        if val[0].ndim != 3:
+            raise ValueError(
+                f"The validation data for {task_type} has to be a "
+                f"three-dimensional tensor of shape NxSxM.")
+        if val[0].ndim != 1:
+            raise ValueError(
+                f"The validation targets for {task_type} have to be of shape N."
+            )
diff --git a/autoPyTorch/components/lr_scheduler/__init__.py b/autoPyTorch/ensemble/__init__.py
similarity index 100%
rename from autoPyTorch/components/lr_scheduler/__init__.py
rename to autoPyTorch/ensemble/__init__.py
diff --git a/autoPyTorch/ensemble/abstract_ensemble.py b/autoPyTorch/ensemble/abstract_ensemble.py
new file mode 100644
index 000000000..ecbe5edfc
--- /dev/null
+++ b/autoPyTorch/ensemble/abstract_ensemble.py
@@ -0,0 +1,78 @@
+from abc import ABCMeta, abstractmethod
+from typing import List, Tuple, Union
+
+import numpy as np
+
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+
+
+class AbstractEnsemble(object):
+    __metaclass__ = ABCMeta
+
+    @abstractmethod
+    def fit(
+        self,
+        base_models_predictions: np.ndarray,
+        true_targets: np.ndarray,
+        model_identifiers: List[Tuple[int, int, float]],
+    ) -> 'AbstractEnsemble':
+        """Fit an ensemble given predictions of base models and targets.
+        Ensemble building maximizes performance (in contrast to
+        hyperparameter optimization)!
+
+        Args:
+            base_models_predictions (np.ndarray):
+                array of shape = [n_base_models, n_data_points, n_targets]
+                This are the predictions of the individual models found by SMAC
+            true_targets (np.ndarray) : array of shape [n_targets]
+                This is the ground truth of the above predictions
+            model_identifiers (List[Tuple[int, int, float]]): identifier for each base model.
+                Can be used for practical text output of the ensemble.
+
+        Returns:
+            self
+        """
+        pass
+
+    @abstractmethod
+    def predict(self, base_models_predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        """Create ensemble predictions from the base model predictions.
+
+        Args:
+            base_models_predictions (Union[np.ndarray, List[np.ndarray]]) : array of
+                shape = [n_base_models, n_data_points, n_targets]
+                Same as in the fit method.
+
+        Returns:
+            predicted array
+        """
+        self
+
+    @abstractmethod
+    def get_models_with_weights(self, models: BasePipeline) -> List[Tuple[float, BasePipeline]]:
+        """Return a list of (weight, model) pairs
+
+        Args:
+            models : dict {identifier : model object}
+                The identifiers are the same as the one presented to the fit()
+                method. Models can be used for nice printing.
+        Returns:
+            array of weights : [(weight_1, model_1), ..., (weight_n, model_n)]
+        """
+
+    @abstractmethod
+    def get_selected_model_identifiers(self) -> List[Tuple[int, int, float]]:
+        """Return identifiers of models in the ensemble.
+        This includes models which have a weight of zero!
+
+        Returns:
+            The selected models (seed, idx, budget) from smac
+        """
+
+    @abstractmethod
+    def get_validation_performance(self) -> float:
+        """Return validation performance of ensemble.
+
+        Returns:
+            Score
+        """
diff --git a/autoPyTorch/ensemble/ensemble_builder.py b/autoPyTorch/ensemble/ensemble_builder.py
new file mode 100644
index 000000000..2462ebf23
--- /dev/null
+++ b/autoPyTorch/ensemble/ensemble_builder.py
@@ -0,0 +1,1495 @@
+# -*- encoding: utf-8 -*-
+import glob
+import gzip
+import logging
+import logging.handlers
+import math
+import numbers
+import os
+import pickle
+import re
+import shutil
+import time
+import traceback
+import zlib
+from typing import Dict, List, Optional, Set, Tuple, Union
+
+import dask.distributed
+
+import numpy as np
+
+import pandas as pd
+
+import pynisher
+
+from sklearn.utils.validation import check_random_state
+
+from smac.callbacks import IncorporateRunResultCallback
+from smac.optimizer.smbo import SMBO
+from smac.runhistory.runhistory import RunInfo, RunValue
+
+from autoPyTorch.constants import BINARY
+from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
+from autoPyTorch.ensemble.ensemble_selection import EnsembleSelection
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
+from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.logging_ import get_named_client_logger
+
+Y_ENSEMBLE = 0
+Y_TEST = 1
+
+MODEL_FN_RE = r'_([0-9]*)_([0-9]*)_([0-9]+\.*[0-9]*)\.npy'
+
+
+class EnsembleBuilderManager(IncorporateRunResultCallback):
+    def __init__(
+        self,
+        start_time: float,
+        time_left_for_ensembles: float,
+        backend: Backend,
+        dataset_name: str,
+        task_type: int,
+        output_type: int,
+        metrics: List[autoPyTorchMetric],
+        opt_metric: str,
+        ensemble_size: int,
+        ensemble_nbest: int,
+        max_models_on_disc: Union[float, int],
+        seed: int,
+        precision: int,
+        max_iterations: Optional[int],
+        read_at_most: int,
+        ensemble_memory_limit: Optional[int],
+        random_state: int,
+        logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+    ):
+        """ SMAC callback to handle ensemble building
+        Parameters
+        ----------
+        start_time: int
+            the time when this job was started, to account for any latency in job allocation
+        time_left_for_ensemble: int
+            How much time is left for the task. Job should finish within this allocated time
+        backend: util.backend.Backend
+            backend to write and read files
+        dataset_name: str
+            name of dataset
+        task_type: int
+            what type of output is expected. If Binary, we need to argmax the one hot encoding.
+        metrics: List[autoPyTorchMetric],
+            A set of metrics that will be used to get performance estimates
+        opt_metric: str
+            name of the optimization metrics
+        ensemble_size: int
+            maximal size of ensemble (passed to ensemble_selection)
+        ensemble_nbest: int/float
+            if int: consider only the n best prediction
+            if float: consider only this fraction of the best models
+            Both wrt to validation predictions
+            If performance_range_threshold > 0, might return less models
+        max_models_on_disc: Union[float, int]
+           Defines the maximum number of models that are kept in the disc.
+           If int, it must be greater or equal than 1, and dictates the max number of
+           models to keep.
+           If float, it will be interpreted as the max megabytes allowed of disc space. That
+           is, if the number of ensemble candidates require more disc space than this float
+           value, the worst models will be deleted to keep within this budget.
+           Models and predictions of the worst-performing models will be deleted then.
+           If None, the feature is disabled.
+           It defines an upper bound on the models that can be used in the ensemble.
+        seed: int
+            random seed
+        max_iterations: int
+            maximal number of iterations to run this script
+            (default None --> deactivated)
+        precision: [16,32,64,128]
+            precision of floats to read the predictions
+        memory_limit: Optional[int]
+            memory limit in mb. If ``None``, no memory limit is enforced.
+        read_at_most: int
+            read at most n new prediction files in each iteration
+        logger_port: int
+            port in where to publish a msg
+    Returns
+    -------
+        List[Tuple[int, float, float, float]]:
+            A list with the performance history of this ensemble, of the form
+            [[pandas_timestamp, train_performance, val_performance, test_performance], ...]
+        """
+        self.start_time = start_time
+        self.time_left_for_ensembles = time_left_for_ensembles
+        self.backend = backend
+        self.dataset_name = dataset_name
+        self.task_type = task_type
+        self.output_type = output_type
+        self.metrics = metrics
+        self.opt_metric = opt_metric
+        self.ensemble_size = ensemble_size
+        self.ensemble_nbest = ensemble_nbest
+        self.max_models_on_disc = max_models_on_disc  # type: Union[float, int]
+        self.seed = seed
+        self.precision = precision
+        self.max_iterations = max_iterations
+        self.read_at_most = read_at_most
+        self.ensemble_memory_limit = ensemble_memory_limit
+        self.random_state = random_state
+        self.logger_port = logger_port
+
+        # Store something similar to SMAC's runhistory
+        self.history = []  # type: List[Dict[str, float]]
+
+        # We only submit new ensembles when there is not an active ensemble job
+        self.futures = []  # type: List[dask.Future]
+
+        # The last criteria is the number of iterations
+        self.iteration = 0
+
+        # Keep track of when we started to know when we need to finish!
+        self.start_time = time.time()
+
+    def __call__(
+        self,
+        smbo: 'SMBO',
+        run_info: RunInfo,
+        result: RunValue,
+        time_left: float,
+    ) -> None:
+        self.build_ensemble(smbo.tae_runner.client)
+
+    def build_ensemble(self, dask_client: dask.distributed.Client) -> None:
+
+        # The second criteria is elapsed time
+        elapsed_time = time.time() - self.start_time
+
+        logger = get_named_client_logger(
+            name='EnsembleBuilder',
+            port=self.logger_port,
+        )
+
+        # First test for termination conditions
+        if self.time_left_for_ensembles < elapsed_time:
+            logger.info(
+                "Terminate ensemble building as not time is left (run for {}s)".format(
+                    elapsed_time
+                ),
+            )
+            return
+        if self.max_iterations is not None and self.max_iterations <= self.iteration:
+            logger.info(
+                "Terminate ensemble building because of max iterations: {} of {}".format(
+                    self.max_iterations,
+                    self.iteration
+                )
+            )
+            return
+
+        if len(self.futures) != 0:
+            if self.futures[0].done():
+                result = self.futures.pop().result()
+                if result:
+                    ensemble_history, self.ensemble_nbest, _, _ = result
+                    logger.debug("iteration={} @ elapsed_time={} has history={}".format(
+                        self.iteration,
+                        elapsed_time,
+                        ensemble_history,
+                    ))
+                    self.history.extend(ensemble_history)
+
+        # Only submit new jobs if the previous ensemble job finished
+        if len(self.futures) == 0:
+
+            # Add the result of the run
+            # On the next while iteration, no references to
+            # ensemble builder object, so it should be garbage collected to
+            # save memory while waiting for resources
+            # Also, notice how ensemble nbest is returned, so we don't waste
+            # iterations testing if the deterministic predictions size can
+            # be fitted in memory
+            try:
+                # Submit a Dask job from this job, to properly
+                # see it in the dask diagnostic dashboard
+                # Notice that the forked ensemble_builder_process will
+                # wait for the below function to be done
+                self.futures.append(dask_client.submit(
+                    fit_and_return_ensemble,
+                    backend=self.backend,
+                    dataset_name=self.dataset_name,
+                    task_type=self.task_type,
+                    output_type=self.output_type,
+                    metrics=self.metrics,
+                    opt_metric=self.opt_metric,
+                    ensemble_size=self.ensemble_size,
+                    ensemble_nbest=self.ensemble_nbest,
+                    max_models_on_disc=self.max_models_on_disc,
+                    seed=self.seed,
+                    precision=self.precision,
+                    memory_limit=self.ensemble_memory_limit,
+                    read_at_most=self.read_at_most,
+                    random_state=self.seed,
+                    logger_port=self.logger_port,
+                    end_at=self.start_time + self.time_left_for_ensembles,
+                    iteration=self.iteration,
+                    return_predictions=False,
+                    priority=100,
+                ))
+
+                logger.info(
+                    "{}/{} Started Ensemble builder job at {} for iteration {}.".format(
+                        # Log the client to make sure we
+                        # remain connected to the scheduler
+                        self.futures[0],
+                        dask_client,
+                        time.strftime("%Y.%m.%d-%H.%M.%S"),
+                        self.iteration,
+                    ),
+                )
+                self.iteration += 1
+            except Exception as e:
+                exception_traceback = traceback.format_exc()
+                error_message = repr(e)
+                logger.critical(exception_traceback)
+                logger.critical(error_message)
+
+
+def fit_and_return_ensemble(
+    backend: Backend,
+    dataset_name: str,
+    task_type: int,
+    output_type: int,
+    metrics: List[autoPyTorchMetric],
+    opt_metric: str,
+    ensemble_size: int,
+    ensemble_nbest: int,
+    max_models_on_disc: Union[float, int],
+    seed: int,
+    precision: int,
+    memory_limit: Optional[int],
+    read_at_most: int,
+    random_state: int,
+    logger_port: int,
+    end_at: float,
+    iteration: int,
+    return_predictions: bool,
+) -> Tuple[
+        List[Dict[str, float]],
+        int,
+        Optional[np.ndarray],
+        Optional[np.ndarray],
+]:
+    """
+    A short function to fit and create an ensemble. It is just a wrapper to easily send
+    a request to dask to create an ensemble and clean the memory when finished
+    Parameters
+    ----------
+        backend: util.backend.Backend
+            backend to write and read files
+        dataset_name: str
+            name of dataset
+        metrics: List[autoPyTorchMetric],
+            A set of metrics that will be used to get performance estimates
+        opt_metric:
+            Name of the metric to optimize
+        task_type: int
+            type of output expected in the ground truth
+        ensemble_size: int
+            maximal size of ensemble (passed to ensemble.ensemble_selection)
+        ensemble_nbest: int/float
+            if int: consider only the n best prediction
+            if float: consider only this fraction of the best models
+            Both wrt to validation predictions
+            If performance_range_threshold > 0, might return less models
+        max_models_on_disc: int
+           Defines the maximum number of models that are kept in the disc.
+           If int, it must be greater or equal than 1, and dictates the max number of
+           models to keep.
+           If float, it will be interpreted as the max megabytes allowed of disc space. That
+           is, if the number of ensemble candidates require more disc space than this float
+           value, the worst models will be deleted to keep within this budget.
+           Models and predictions of the worst-performing models will be deleted then.
+           If None, the feature is disabled.
+           It defines an upper bound on the models that can be used in the ensemble.
+        seed: int
+            random seed
+        precision: [16,32,64,128]
+            precision of floats to read the predictions
+        memory_limit: Optional[int]
+            memory limit in mb. If ``None``, no memory limit is enforced.
+        read_at_most: int
+            read at most n new prediction files in each iteration
+        logger_port: int
+            port in localhost where to publish msg
+        end_at: float
+            At what time the job must finish. Needs to be the endtime and not the time left
+            because we do not know when dask schedules the job.
+        iteration: int
+            The current iteration
+    Returns
+    -------
+        List[Tuple[int, float, float, float]]
+            A list with the performance history of this ensemble, of the form
+            [[pandas_timestamp, train_performance, val_performance, test_performance], ...]
+    """
+    result = EnsembleBuilder(
+        backend=backend,
+        dataset_name=dataset_name,
+        task_type=task_type,
+        output_type=output_type,
+        metrics=metrics,
+        opt_metric=opt_metric,
+        ensemble_size=ensemble_size,
+        ensemble_nbest=ensemble_nbest,
+        max_models_on_disc=max_models_on_disc,
+        seed=seed,
+        precision=precision,
+        memory_limit=memory_limit,
+        read_at_most=read_at_most,
+        random_state=random_state,
+        logger_port=logger_port,
+    ).run(
+        end_at=end_at,
+        iteration=iteration,
+        return_predictions=return_predictions,
+    )
+    return result
+
+
+class EnsembleBuilder(object):
+    def __init__(
+            self,
+            backend: Backend,
+            dataset_name: str,
+            task_type: int,
+            output_type: int,
+            metrics: List[autoPyTorchMetric],
+            opt_metric: str,
+            ensemble_size: int = 10,
+            ensemble_nbest: int = 100,
+            max_models_on_disc: Union[float, int] = 100,
+            performance_range_threshold: float = 0,
+            seed: int = 1,
+            precision: int = 32,
+            memory_limit: Optional[int] = 1024,
+            read_at_most: int = 5,
+            random_state: Optional[Union[int, np.random.RandomState]] = None,
+            logger_port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+    ):
+        """
+            Constructor
+            Parameters
+            ----------
+            backend: util.backend.Backend
+                backend to write and read files
+            dataset_name: str
+                name of dataset
+            task_type: int
+                type of ML task
+            metrics: List[autoPyTorchMetric],
+                name of metric to score predictions
+            opt_metric: str
+                name of the metric to optimize
+            ensemble_size: int
+                maximal size of ensemble (passed to ensemble.ensemble_selection)
+            ensemble_nbest: int/float
+                if int: consider only the n best prediction
+                if float: consider only this fraction of the best models
+                Both wrt to validation predictions
+                If performance_range_threshold > 0, might return less models
+            max_models_on_disc: Union[float, int]
+               Defines the maximum number of models that are kept in the disc.
+               If int, it must be greater or equal than 1, and dictates the max number of
+               models to keep.
+               If float, it will be interpreted as the max megabytes allowed of disc space. That
+               is, if the number of ensemble candidates require more disc space than this float
+               value, the worst models will be deleted to keep within this budget.
+               Models and predictions of the worst-performing models will be deleted then.
+               If None, the feature is disabled.
+               It defines an upper bound on the models that can be used in the ensemble.
+            performance_range_threshold: float
+                Keep only models that are better than:
+                    dummy + (best - dummy)*performance_range_threshold
+                E.g dummy=2, best=4, thresh=0.5 --> only consider models with score > 3
+                Will at most return the minimum between ensemble_nbest models,
+                and max_models_on_disc. Might return less
+            seed: int
+                random seed
+            precision: [16,32,64,128]
+                precision of floats to read the predictions
+            memory_limit: Optional[int]
+                memory limit in mb. If ``None``, no memory limit is enforced.
+            read_at_most: int
+                read at most n new prediction files in each iteration
+            logger_port: int
+                port where to publish messages
+        """
+
+        super(EnsembleBuilder, self).__init__()
+
+        self.backend = backend  # communication with filesystem
+        self.dataset_name = dataset_name
+        self.task_type = task_type
+        self.output_type = output_type
+        self.metrics = metrics
+        self.opt_metric = opt_metric
+        self.ensemble_size = ensemble_size
+        self.performance_range_threshold = performance_range_threshold
+
+        if isinstance(ensemble_nbest, numbers.Integral) and ensemble_nbest < 1:
+            raise ValueError("Integer ensemble_nbest has to be larger 1: %s" %
+                             ensemble_nbest)
+        elif not isinstance(ensemble_nbest, numbers.Integral):
+            if ensemble_nbest < 0 or ensemble_nbest > 1:
+                raise ValueError(
+                    "Float ensemble_nbest best has to be >= 0 and <= 1: %s" %
+                    ensemble_nbest)
+
+        self.ensemble_nbest = ensemble_nbest
+
+        # max_models_on_disc can be a float, in such case we need to
+        # remember the user specified Megabytes and translate this to
+        # max number of ensemble models. max_resident_models keeps the
+        # maximum number of models in disc
+        if max_models_on_disc is not None and max_models_on_disc < 0:
+            raise ValueError(
+                "max_models_on_disc has to be a positive number or None"
+            )
+        self.max_models_on_disc = max_models_on_disc
+        self.max_resident_models = None  # type: Optional[int]
+
+        self.seed = seed
+        self.precision = precision
+        self.memory_limit = memory_limit
+        self.read_at_most = read_at_most
+        self.random_state = check_random_state(random_state)
+
+        # Setup the logger
+        self.logger_port = logger_port
+        self.logger = get_named_client_logger(
+            name='EnsembleBuilder',
+            port=self.logger_port,
+        )
+
+        if ensemble_nbest == 1:
+            self.logger.debug("Behaviour depends on int/float: %s, %s (ensemble_nbest, type)" %
+                              (ensemble_nbest, type(ensemble_nbest)))
+
+        self.start_time = 0.0
+        self.model_fn_re = re.compile(MODEL_FN_RE)
+
+        self.last_hash = None  # hash of ensemble training data
+        self.y_true_ensemble = None
+        self.SAVE2DISC = True
+
+        # already read prediction files
+        # We read in back this object to give the ensemble the possibility to have memory
+        # Every ensemble task is sent to dask as a function, that cannot take un-picklable
+        # objects as attributes. For this reason, we dump to disk the stage of the past
+        # ensemble iterations to kick-start the ensembling process
+        # {"file name": {
+        #    "ens_score": float
+        #    "mtime_ens": str,
+        #    "mtime_test": str,
+        #    "seed": int,
+        #    "num_run": int,
+        # }}
+        self.read_scores = {}
+        # {"file_name": {
+        #    Y_ENSEMBLE: np.ndarray
+        #    Y_TEST: np.ndarray
+        #    }
+        # }
+        self.read_preds = {}
+
+        # Depending on the dataset dimensions,
+        # regenerating every iteration, the predictions
+        # scores for self.read_preds
+        # is too computationally expensive
+        # As the ensemble builder is stateless
+        # (every time the ensemble builder gets resources
+        # from dask, it builds this object from scratch)
+        # we save the state of this dictionary to memory
+        # and read it if available
+        self.ensemble_memory_file = os.path.join(
+            self.backend.internals_directory,
+            'ensemble_read_preds.pkl'
+        )
+        if os.path.exists(self.ensemble_memory_file):
+            try:
+                with (open(self.ensemble_memory_file, "rb")) as memory:
+                    self.read_preds, self.last_hash = pickle.load(memory)
+            except Exception as e:
+                self.logger.warning(
+                    "Could not load the previous iterations of ensemble_builder predictions."
+                    "This might impact the quality of the run. Exception={} {}".format(
+                        e,
+                        traceback.format_exc(),
+                    )
+                )
+        self.ensemble_score_file = os.path.join(
+            self.backend.internals_directory,
+            'ensemble_read_scores.pkl'
+        )
+        if os.path.exists(self.ensemble_score_file):
+            try:
+                with (open(self.ensemble_score_file, "rb")) as memory:
+                    self.read_scores = pickle.load(memory)
+            except Exception as e:
+                self.logger.warning(
+                    "Could not load the previous iterations of ensemble_builder scores."
+                    "This might impact the quality of the run. Exception={} {}".format(
+                        e,
+                        traceback.format_exc(),
+                    )
+                )
+
+        # hidden feature which can be activated via an environment variable. This keeps all
+        # models and predictions which have ever been a candidate. This is necessary to post-hoc
+        # compute the whole ensemble building trajectory.
+        self._has_been_candidate = set()  # type: Set[str]
+
+        self.validation_performance_ = np.inf
+
+        # Track the ensemble performance
+        self.y_test = None
+        datamanager = self.backend.load_datamanager()
+        if datamanager.test_tensors is not None:
+            self.y_test = datamanager.test_tensors[1]
+        del datamanager
+        self.ensemble_history = []  # type: List[Dict[str, float]]
+
+    def run(
+        self,
+        iteration: int,
+        time_left: Optional[float] = None,
+        end_at: Optional[float] = None,
+        time_buffer: int = 5,
+        return_predictions: bool = False,
+    ) -> Tuple[
+        List[Dict[str, float]],
+        int,
+        Optional[np.ndarray],
+        Optional[np.ndarray],
+    ]:
+        """
+        This function is an interface to the main process and fundamentally calls main(), the
+        later has the actual ensemble selection logic.
+
+        The motivation towards this run() method is that it can be seen as a wrapper over the
+        whole ensemble_builder.main() process so that pynisher can manage the memory/time limits.
+
+        This is handy because this function reduces the number of members of the ensemble in case
+        we run into memory issues. It does so in a halving fashion.
+
+        Args:
+            time_left (float):
+                How much time is left for the ensemble builder process
+            iteration (int):
+                Which is the current iteration
+            return_predictions (bool):
+                Whether we want to return the predictions of the current model or not
+
+        Returns:
+            ensemble_history (Dict):
+                A snapshot of both test and optimization performance. For debugging.
+            ensemble_nbest (int):
+                The user provides a direction on how many models to use in ensemble selection.
+                This number can be reduced internally if the memory requirements force it.
+            train_predictions (np.ndarray):
+                The optimization prediction from the current ensemble.
+            test_predictions (np.ndarray):
+                The train prediction from the current ensemble.
+        """
+
+        if time_left is None and end_at is None:
+            raise ValueError('Must provide either time_left or end_at.')
+        elif time_left is not None and end_at is not None:
+            raise ValueError('Cannot provide both time_left and end_at.')
+
+        self.logger = get_named_client_logger(
+            name='EnsembleBuilder',
+            port=self.logger_port,
+        )
+
+        process_start_time = time.time()
+        while True:
+
+            if time_left is not None:
+                time_elapsed = time.time() - process_start_time
+                time_left -= time_elapsed
+            elif end_at is not None:
+                current_time = time.time()
+                if current_time > end_at:
+                    break
+                else:
+                    time_left = end_at - current_time
+            else:
+                raise NotImplementedError()
+
+            if time_left - time_buffer < 1:
+                break
+            safe_ensemble_script = pynisher.enforce_limits(
+                wall_time_in_s=int(time_left - time_buffer),
+                mem_in_mb=self.memory_limit,
+                logger=self.logger
+            )(self.main)
+            safe_ensemble_script(time_left, iteration, return_predictions)
+            if safe_ensemble_script.exit_status is pynisher.MemorylimitException:
+                # if ensemble script died because of memory error,
+                # reduce nbest to reduce memory consumption and try it again
+
+                # ATTENTION: main will start from scratch; # all data structures are empty again
+                try:
+                    os.remove(self.ensemble_memory_file)
+                except:  # noqa E722
+                    pass
+
+                if isinstance(self.ensemble_nbest, numbers.Integral) and self.ensemble_nbest <= 1:
+                    if self.read_at_most == 1:
+                        self.logger.error(
+                            "Memory Exception -- Unable to further reduce the number of ensemble "
+                            "members and can no further limit the number of ensemble members "
+                            "loaded per iteration -- please restart autoPytorch with a higher "
+                            "value for the argument `memory_limit` (current limit is %s MB). "
+                            "The ensemble builder will keep running to delete files from disk in "
+                            "case this was enabled.", self.memory_limit
+                        )
+                        self.ensemble_nbest = 0
+                    else:
+                        self.read_at_most = 1
+                        self.logger.warning(
+                            "Memory Exception -- Unable to further reduce the number of ensemble "
+                            "members -- Now reducing the number of predictions per call to read "
+                            "at most to 1."
+                        )
+                else:
+                    if isinstance(self.ensemble_nbest, numbers.Integral):
+                        self.ensemble_nbest = max(1, int(self.ensemble_nbest / 2))
+                    else:
+                        self.ensemble_nbest = int(self.ensemble_nbest / 2)
+                    self.logger.warning("Memory Exception -- restart with "
+                                        "less ensemble_nbest: %d" % self.ensemble_nbest)
+                    return [], self.ensemble_nbest, None, None
+            else:
+                return safe_ensemble_script.result
+
+        return [], self.ensemble_nbest, None, None
+
+    def main(
+        self, time_left: float, iteration: int, return_predictions: bool,
+    ) -> Tuple[
+        List[Dict[str, float]],
+        int,
+        Optional[np.ndarray],
+        Optional[np.ndarray],
+    ]:
+        """
+        This is the main function of the ensemble builder process and can be considered
+        a wrapper over the ensemble selection method implemented y EnsembleSelection class.
+
+        This method is going to be called multiple times by the main process, to
+        build and ensemble, in case the SMAC process produced new models and to provide
+        anytime results.
+
+        On this regard, this method mainly:
+            1- select from all the individual models that smac created, the N-best candidates
+               (this in the scenario that N > ensemble_nbest argument to this class). This is
+               done based on a score calculated via the metrics argument.
+            2- This pre-selected candidates are provided to the ensemble selection method
+               and if a ensemble is found under the provided memory/time constraints, a new
+               ensemble is proposed.
+            3- Because this process will be called multiple times, it performs checks to make
+               sure a new ensenmble is only proposed if new predictions are available, as well
+               as making sure we do not run out of resources (like disk space)
+
+        Args:
+            time_left (float):
+                How much time is left for the ensemble builder process
+            iteration (int):
+                Which is the current iteration
+            return_predictions (bool):
+                Whether we want to return the predictions of the current model or not
+
+        Returns:
+            ensemble_history (Dict):
+                A snapshot of both test and optimization performance. For debugging.
+            ensemble_nbest (int):
+                The user provides a direction on how many models to use in ensemble selection.
+                This number can be reduced internally if the memory requirements force it.
+            train_predictions (np.ndarray):
+                The optimization prediction from the current ensemble.
+            test_predictions (np.ndarray):
+                The train prediction from the current ensemble.
+        """
+
+        # Pynisher jobs inside dask 'forget'
+        # the logger configuration. So we have to set it up
+        # accordingly
+        self.logger = get_named_client_logger(
+            name='EnsembleBuilder',
+            port=self.logger_port,
+        )
+
+        self.start_time = time.time()
+        train_pred, test_pred = None, None
+
+        used_time = time.time() - self.start_time
+        self.logger.debug(
+            'Starting iteration %d, time left: %f',
+            iteration,
+            time_left - used_time,
+        )
+
+        # populates self.read_preds and self.read_scores
+        if not self.score_ensemble_preds():
+            if return_predictions:
+                return self.ensemble_history, self.ensemble_nbest, train_pred, test_pred
+            else:
+                return self.ensemble_history, self.ensemble_nbest, None, None
+
+        # Only the models with the n_best predictions are candidates
+        # to be in the ensemble
+        candidate_models = self.get_n_best_preds()
+        if not candidate_models:  # no candidates yet
+            if return_predictions:
+                return self.ensemble_history, self.ensemble_nbest, train_pred, test_pred
+            else:
+                return self.ensemble_history, self.ensemble_nbest, None, None
+
+        # populates predictions in self.read_preds
+        # reduces selected models if file reading failed
+        n_sel_test = self.get_test_preds(selected_keys=candidate_models)
+
+        # If any of n_sel_* is not empty and overlaps with candidate_models,
+        # then ensure candidate_models AND n_sel_test are sorted the same
+        candidate_models_set = set(candidate_models)
+        if candidate_models_set.intersection(n_sel_test):
+            candidate_models = sorted(list(candidate_models_set.intersection(
+                n_sel_test)))
+            n_sel_test = candidate_models
+        else:
+            # This has to be the case
+            n_sel_test = []
+
+        if os.environ.get('ENSEMBLE_KEEP_ALL_CANDIDATES'):
+            for candidate in candidate_models:
+                self._has_been_candidate.add(candidate)
+
+        # train ensemble
+        ensemble = self.fit_ensemble(selected_keys=candidate_models)
+
+        # Save the ensemble for later use in the main module!
+        if ensemble is not None and self.SAVE2DISC:
+            self.backend.save_ensemble(ensemble, iteration, self.seed)
+
+        # Delete files of non-candidate models - can only be done after fitting the ensemble and
+        # saving it to disc so we do not accidentally delete models in the previous ensemble
+        if self.max_resident_models is not None:
+            self._delete_excess_models(selected_keys=candidate_models)
+
+        # Save the read scores status for the next iteration
+        with open(self.ensemble_score_file, "wb") as memory:
+            pickle.dump(self.read_scores, memory)
+
+        if ensemble is not None:
+            train_pred = self.predict(set_="train",
+                                      ensemble=ensemble,
+                                      selected_keys=candidate_models,
+                                      n_preds=len(candidate_models),
+                                      index_run=iteration)
+            # TODO if predictions fails, build the model again during the
+            #  next iteration!
+            test_pred = self.predict(set_="test",
+                                     ensemble=ensemble,
+                                     selected_keys=n_sel_test,
+                                     n_preds=len(candidate_models),
+                                     index_run=iteration)
+
+            # Add a score to run history to see ensemble progress
+            self._add_ensemble_trajectory(
+                train_pred,
+                test_pred
+            )
+
+        # The loaded predictions and the hash can only be saved after the ensemble has been
+        # built, because the hash is computed during the construction of the ensemble
+        with open(self.ensemble_memory_file, "wb") as memory:
+            pickle.dump((self.read_preds, self.last_hash), memory)
+
+        if return_predictions:
+            return self.ensemble_history, self.ensemble_nbest, train_pred, test_pred
+        else:
+            return self.ensemble_history, self.ensemble_nbest, None, None
+
+    def get_disk_consumption(self, pred_path: str) -> float:
+        """
+        gets the cost of a model being on disc
+        """
+
+        match = self.model_fn_re.search(pred_path)
+        if not match:
+            raise ValueError("Invalid path format %s" % pred_path)
+        _seed = int(match.group(1))
+        _num_run = int(match.group(2))
+        _budget = float(match.group(3))
+
+        stored_files_for_run = os.listdir(
+            self.backend.get_numrun_directory(_seed, _num_run, _budget))
+        stored_files_for_run = [
+            os.path.join(self.backend.get_numrun_directory(_seed, _num_run, _budget), file_name)
+            for file_name in stored_files_for_run]
+        this_model_cost = sum([os.path.getsize(path) for path in stored_files_for_run])
+
+        # get the megabytes
+        return round(this_model_cost / math.pow(1024, 2), 2)
+
+    def score_ensemble_preds(self) -> bool:
+        """
+            score predictions on ensemble building data set;
+            populates self.read_preds and self.read_scores
+        """
+
+        self.logger.debug("Read ensemble data set predictions")
+
+        if self.y_true_ensemble is None:
+            try:
+                self.y_true_ensemble = self.backend.load_targets_ensemble()
+            except FileNotFoundError:
+                self.logger.debug(
+                    "Could not find true targets on ensemble data set: %s",
+                    traceback.format_exc(),
+                )
+                return False
+
+        pred_path = os.path.join(
+            glob.escape(self.backend.get_runs_directory()),
+            '%d_*_*' % self.seed,
+            'predictions_ensemble_%s_*_*.npy*' % self.seed,
+        )
+        y_ens_files = glob.glob(pred_path)
+        y_ens_files = [y_ens_file for y_ens_file in y_ens_files
+                       if y_ens_file.endswith('.npy') or y_ens_file.endswith('.npy.gz')]
+        self.y_ens_files = y_ens_files
+        # no validation predictions so far -- no files
+        if len(self.y_ens_files) == 0:
+            self.logger.debug("Found no prediction files on ensemble data set:"
+                              " %s" % pred_path)
+            return False
+
+        # First sort files chronologically
+        to_read = []
+        for y_ens_fn in self.y_ens_files:
+            match = self.model_fn_re.search(y_ens_fn)
+            if match is None:
+                raise ValueError(f"Could not interpret file {y_ens_fn} "
+                                 "Something went wrong while scoring predictions")
+            _seed = int(match.group(1))
+            _num_run = int(match.group(2))
+            _budget = float(match.group(3))
+
+            to_read.append([y_ens_fn, match, _seed, _num_run, _budget])
+
+        n_read_files = 0
+        # Now read file wrt to num_run
+        # Mypy assumes sorted returns an object because of the lambda. Can't get to recognize the list
+        # as a returning list, so as a work-around we skip next line
+        for y_ens_fn, match, _seed, _num_run, _budget in sorted(to_read, key=lambda x: x[3]):  # type: ignore
+            if self.read_at_most and n_read_files >= self.read_at_most:
+                # limit the number of files that will be read
+                # to limit memory consumption
+                break
+
+            if not y_ens_fn.endswith(".npy") and not y_ens_fn.endswith(".npy.gz"):
+                self.logger.info('Error loading file (not .npy or .npy.gz): %s', y_ens_fn)
+                continue
+
+            if not self.read_scores.get(y_ens_fn):
+                self.read_scores[y_ens_fn] = {
+                    "ens_score": -np.inf,
+                    "mtime_ens": 0,
+                    "mtime_test": 0,
+                    "seed": _seed,
+                    "num_run": _num_run,
+                    "budget": _budget,
+                    "disc_space_cost_mb": None,
+                    # Lazy keys so far:
+                    # 0 - not loaded
+                    # 1 - loaded and in memory
+                    # 2 - loaded but dropped again
+                    # 3 - deleted from disk due to space constraints
+                    "loaded": 0
+                }
+            if not self.read_preds.get(y_ens_fn):
+                self.read_preds[y_ens_fn] = {
+                    Y_ENSEMBLE: None,
+                    Y_TEST: None,
+                }
+
+            if self.read_scores[y_ens_fn]["mtime_ens"] == os.path.getmtime(y_ens_fn):
+                # same time stamp; nothing changed;
+                continue
+
+            # actually read the predictions and score them
+            y_ensemble = self._read_np_fn(y_ens_fn)
+            scores = calculate_score(
+                metrics=self.metrics,
+                target=self.y_true_ensemble,
+                prediction=y_ensemble,
+                task_type=self.task_type,
+            )
+            try:
+                y_ensemble = self._read_np_fn(y_ens_fn)
+                scores = calculate_score(
+                    metrics=self.metrics,
+                    target=self.y_true_ensemble,
+                    prediction=y_ensemble,
+                    task_type=self.task_type,
+                )
+
+                if np.isfinite(self.read_scores[y_ens_fn]["ens_score"]):
+                    self.logger.debug(
+                        'Changing ensemble score for file %s from %f to %f '
+                        'because file modification time changed? %f - %f',
+                        y_ens_fn,
+                        self.read_scores[y_ens_fn]["ens_score"],
+                        scores[self.opt_metric],
+                        self.read_scores[y_ens_fn]["mtime_ens"],
+                        os.path.getmtime(y_ens_fn),
+                    )
+
+                self.read_scores[y_ens_fn]["ens_score"] = scores[self.opt_metric]
+
+                # It is not needed to create the object here
+                # To save memory, we just score the object.
+                self.read_scores[y_ens_fn]["mtime_ens"] = os.path.getmtime(y_ens_fn)
+                self.read_scores[y_ens_fn]["loaded"] = 2
+                self.read_scores[y_ens_fn]["disc_space_cost_mb"] = self.get_disk_consumption(
+                    y_ens_fn
+                )
+
+                n_read_files += 1
+
+            except Exception:
+                self.logger.warning(
+                    'Error loading %s: %s',
+                    y_ens_fn,
+                    traceback.format_exc(),
+                )
+                self.read_scores[y_ens_fn]["ens_score"] = -np.inf
+
+        self.logger.debug(
+            'Done reading %d new prediction files. Loaded %d predictions in '
+            'total.',
+            n_read_files,
+            np.sum([pred["loaded"] > 0 for pred in self.read_scores.values()])
+        )
+        return True
+
+    def get_n_best_preds(self) -> List[str]:
+        """
+            get best n predictions (i.e., keys of self.read_scores)
+            according to score on "ensemble set"
+            n: self.ensemble_nbest
+            Side effects:
+                ->Define the n-best models to use in ensemble
+                ->Only the best models are loaded
+                ->Any model that is not best is candidate to deletion
+                  if max models in disc is exceeded.
+        """
+
+        sorted_keys = self._get_list_of_sorted_preds()
+
+        # number of models available
+        num_keys = len(sorted_keys)
+        # remove all that are at most as good as random
+        # note: dummy model must have run_id=1 (there is no run_id=0)
+        dummy_scores = list(filter(lambda x: x[2] == 1, sorted_keys))
+
+        # Leave this here for when we enable dummy classifier/scorer
+        if dummy_scores:
+            # number of dummy models
+            num_dummy = len(dummy_scores)
+            dummy_score = dummy_scores[0]
+            self.logger.debug("Use %f as dummy score" % dummy_score[1])
+            sorted_keys = list(filter(lambda x: x[1] > dummy_score[1], sorted_keys))
+
+            # remove Dummy Classifier
+            sorted_keys = list(filter(lambda x: x[2] > 1, sorted_keys))
+            if not sorted_keys:
+                # no model left; try to use dummy score (num_run==0)
+                # log warning when there are other models but not better than dummy model
+                if num_keys > num_dummy:
+                    self.logger.warning("No models better than random - using Dummy Score!"
+                                        "Number of models besides current dummy model: %d. "
+                                        "Number of dummy models: %d",
+                                        num_keys - 1,
+                                        num_dummy)
+                sorted_keys = [
+                    (k, v["ens_score"], v["num_run"]) for k, v in self.read_scores.items()
+                    if v["seed"] == self.seed and v["num_run"] == 1
+                ]
+        # reload predictions if scores changed over time and a model is
+        # considered to be in the top models again!
+        if not isinstance(self.ensemble_nbest, numbers.Integral):
+            # Transform to number of models to keep. Keep at least one
+            keep_nbest = max(1, min(len(sorted_keys),
+                                    int(len(sorted_keys) * self.ensemble_nbest)))
+            self.logger.debug(
+                "Library pruning: using only top %f percent of the models for ensemble "
+                "(%d out of %d)",
+                self.ensemble_nbest * 100, keep_nbest, len(sorted_keys)
+            )
+        else:
+            # Keep only at most ensemble_nbest
+            keep_nbest = min(self.ensemble_nbest, len(sorted_keys))
+            self.logger.debug("Library Pruning: using for ensemble only "
+                              " %d (out of %d) models" % (keep_nbest, len(sorted_keys)))
+
+        # If max_models_on_disc is None, do nothing
+        # One can only read at most max_models_on_disc models
+        if self.max_models_on_disc is not None:
+            if not isinstance(self.max_models_on_disc, numbers.Integral):
+                consumption = [
+                    [
+                        v["ens_score"],
+                        v["disc_space_cost_mb"],
+                    ] for v in self.read_scores.values() if v["disc_space_cost_mb"] is not None
+                ]
+                max_consumption = max(c[1] for c in consumption)
+
+                # We are pessimistic with the consumption limit indicated by
+                # max_models_on_disc by 1 model. Such model is assumed to spend
+                # max_consumption megabytes
+                if (sum(c[1] for c in consumption) + max_consumption) > self.max_models_on_disc:
+
+                    # just leave the best -- higher is better!
+                    # This list is in descending order, to preserve the best models
+                    sorted_cum_consumption = np.cumsum([
+                        c[1] for c in list(reversed(sorted(consumption)))
+                    ]) + max_consumption
+                    max_models = np.argmax(sorted_cum_consumption > self.max_models_on_disc)
+
+                    # Make sure that at least 1 model survives
+                    self.max_resident_models = max(1, max_models)
+                    self.logger.warning(
+                        "Limiting num of models via float max_models_on_disc={}"
+                        " as accumulated={} worst={} num_models={}".format(
+                            self.max_models_on_disc,
+                            (sum(c[1] for c in consumption) + max_consumption),
+                            max_consumption,
+                            self.max_resident_models
+                        )
+                    )
+                else:
+                    self.max_resident_models = None
+            else:
+                self.max_resident_models = self.max_models_on_disc
+
+        if self.max_resident_models is not None and keep_nbest > self.max_resident_models:
+            self.logger.debug(
+                "Restricting the number of models to %d instead of %d due to argument "
+                "max_models_on_disc",
+                self.max_resident_models, keep_nbest,
+            )
+            keep_nbest = self.max_resident_models
+
+        # consider performance_range_threshold
+        if self.performance_range_threshold > 0:
+            best_score = sorted_keys[0][1]
+            min_score = dummy_score[1]
+            min_score += (best_score - min_score) * self.performance_range_threshold
+            if sorted_keys[keep_nbest - 1][1] < min_score:
+                # We can further reduce number of models
+                # since worst model is worse than thresh
+                for i in range(0, keep_nbest):
+                    # Look at most at keep_nbest models,
+                    # but always keep at least one model
+                    current_score = sorted_keys[i][1]
+                    if current_score <= min_score:
+                        self.logger.debug("Dynamic Performance range: "
+                                          "Further reduce from %d to %d models",
+                                          keep_nbest, max(1, i))
+                        keep_nbest = max(1, i)
+                        break
+        ensemble_n_best = keep_nbest
+
+        # reduce to keys
+        reduced_sorted_keys = list(map(lambda x: x[0], sorted_keys))
+
+        # remove loaded predictions for non-winning models
+        for k in reduced_sorted_keys[ensemble_n_best:]:
+            if k in self.read_preds:
+                self.read_preds[k][Y_ENSEMBLE] = None
+                self.read_preds[k][Y_TEST] = None
+            if self.read_scores[k]['loaded'] == 1:
+                self.logger.debug(
+                    'Dropping model %s (%d,%d) with score %f.',
+                    k,
+                    self.read_scores[k]['seed'],
+                    self.read_scores[k]['num_run'],
+                    self.read_scores[k]['ens_score'],
+                )
+                self.read_scores[k]['loaded'] = 2
+
+        # Load the predictions for the winning
+        for k in reduced_sorted_keys[:ensemble_n_best]:
+            if (
+                (
+                    k not in self.read_preds or self.read_preds[k][Y_ENSEMBLE] is None
+                )
+                and self.read_scores[k]['loaded'] != 3
+            ):
+                self.read_preds[k][Y_ENSEMBLE] = self._read_np_fn(k)
+                # No need to load test here because they are loaded
+                #  only if the model ends up in the ensemble
+                self.read_scores[k]['loaded'] = 1
+
+        # return best scored keys of self.read_scores
+        return reduced_sorted_keys[:ensemble_n_best]
+
+    def get_test_preds(self, selected_keys: List[str]) -> List[str]:
+        """
+        test predictions from disc
+        and store them in self.read_preds
+        Parameters
+        ---------
+        selected_keys: list
+            list of selected keys of self.read_preds
+        Return
+        ------
+        success_keys:
+            all keys in selected keys for which we could read the valid and
+            test predictions
+        """
+        success_keys_test = []
+
+        for k in selected_keys:
+            test_fn = glob.glob(
+                os.path.join(
+                    glob.escape(self.backend.get_runs_directory()),
+                    '%d_%d_%s' % (
+                        self.read_scores[k]["seed"],
+                        self.read_scores[k]["num_run"],
+                        self.read_scores[k]["budget"],
+                    ),
+                    'predictions_test_%d_%d_%s.npy*' % (
+                        self.read_scores[k]["seed"],
+                        self.read_scores[k]["num_run"],
+                        self.read_scores[k]["budget"]
+                    )
+                )
+            )
+            test_fn = [tfn for tfn in test_fn if tfn.endswith('.npy') or tfn.endswith('.npy.gz')]
+
+            if len(test_fn) == 0:
+                # self.logger.debug("Not found test prediction file (although "
+                #                   "ensemble predictions available):%s" %
+                #                   test_fn)
+                pass
+            else:
+                if (
+                    self.read_scores[k]["mtime_test"] == os.path.getmtime(test_fn[0])
+                    and k in self.read_preds
+                    and self.read_preds[k][Y_TEST] is not None
+                ):
+                    success_keys_test.append(k)
+                    continue
+                try:
+                    y_test = self._read_np_fn(test_fn[0])
+                    self.read_preds[k][Y_TEST] = y_test
+                    success_keys_test.append(k)
+                    self.read_scores[k]["mtime_test"] = os.path.getmtime(test_fn[0])
+                except Exception:
+                    self.logger.warning('Error loading %s: %s',
+                                        test_fn, traceback.format_exc())
+
+        return success_keys_test
+
+    def fit_ensemble(self, selected_keys: List[str]) -> Optional[EnsembleSelection]:
+        """
+            fit ensemble
+            Parameters
+            ---------
+            selected_keys: list
+                list of selected keys of self.read_scores
+            Returns
+            -------
+            ensemble: EnsembleSelection
+                trained Ensemble
+        """
+        predictions_train = [self.read_preds[k][Y_ENSEMBLE] for k in selected_keys]
+        include_num_runs = [
+            (
+                self.read_scores[k]["seed"],
+                self.read_scores[k]["num_run"],
+                self.read_scores[k]["budget"],
+            )
+            for k in selected_keys]
+
+        # check hash if ensemble training data changed
+        current_hash = "".join([
+            str(zlib.adler32(predictions_train[i].data.tobytes()))
+            for i in range(len(predictions_train))
+        ])
+        if self.last_hash == current_hash:
+            self.logger.debug(
+                "No new model predictions selected -- skip ensemble building "
+                "-- current performance: %f",
+                self.validation_performance_,
+            )
+
+            return None
+        self.last_hash = current_hash
+
+        opt_metric = [m for m in self.metrics if m.name == self.opt_metric][0]
+        if not opt_metric:
+            raise ValueError(f"Cannot optimize for {self.opt_metric} in {self.metrics} "
+                             "as more than one unique optimization metric was found.")
+
+        ensemble = EnsembleSelection(
+            ensemble_size=self.ensemble_size,
+            metric=opt_metric,
+            random_state=self.random_state,
+            task_type=self.task_type,
+        )
+
+        try:
+            self.logger.debug(
+                "Fitting the ensemble on %d models.",
+                len(predictions_train),
+            )
+            start_time = time.time()
+            ensemble.fit(predictions_train, self.y_true_ensemble,
+                         include_num_runs)
+            end_time = time.time()
+            self.logger.debug(
+                "Fitting the ensemble took %.2f seconds.",
+                end_time - start_time,
+            )
+            self.logger.info(str(ensemble))
+            self.validation_performance_ = min(
+                self.validation_performance_,
+                ensemble.get_validation_performance(),
+            )
+
+        except ValueError:
+            self.logger.error('Caught ValueError: %s', traceback.format_exc())
+            return None
+        except IndexError:
+            self.logger.error('Caught IndexError: %s' + traceback.format_exc())
+            return None
+        finally:
+            # Explicitly free memory
+            del predictions_train
+
+        return ensemble
+
+    def predict(self, set_: str,
+                ensemble: AbstractEnsemble,
+                selected_keys: list,
+                n_preds: int,
+                index_run: int) -> np.ndarray:
+        """
+            save preditions on ensemble, validation and test data on disc
+            Parameters
+            ----------
+            set_: ["test"]
+                data split name
+            ensemble: EnsembleSelection
+                trained Ensemble
+            selected_keys: list
+                list of selected keys of self.read_scores
+            n_preds: int
+                number of prediction models used for ensemble building
+                same number of predictions on valid and test are necessary
+            index_run: int
+                n-th time that ensemble predictions are written to disc
+            Return
+            ------
+            y: np.ndarray
+        """
+        self.logger.debug("Predicting the %s set with the ensemble!", set_)
+
+        if set_ == 'test':
+            pred_set = Y_TEST
+        else:
+            pred_set = Y_ENSEMBLE
+        predictions = [self.read_preds[k][pred_set] for k in selected_keys]
+
+        if n_preds == len(predictions):
+            y = ensemble.predict(predictions)
+            if self.output_type == BINARY:
+                y = y[:, 1]
+            if self.SAVE2DISC:
+                self.backend.save_predictions_as_txt(
+                    predictions=y,
+                    subset=set_,
+                    idx=index_run,
+                    prefix=self.dataset_name,
+                    precision=8,
+                )
+            return y
+        else:
+            self.logger.info(
+                "Found inconsistent number of predictions and models (%d vs "
+                "%d) for subset %s",
+                len(predictions),
+                n_preds,
+                set_,
+            )
+            return None
+
+    def _add_ensemble_trajectory(self, train_pred: np.ndarray, test_pred: np.ndarray) -> None:
+        """
+        Records a snapshot of how the performance look at a given training
+        time.
+        Parameters
+        ----------
+        ensemble: EnsembleSelection
+            The ensemble selection object to record
+        test_pred: np.ndarray
+            The predictions on the test set using ensemble
+        """
+        performance_stamp = {
+            'Timestamp': pd.Timestamp.now(),
+        }
+        if self.output_type == BINARY:
+            if len(train_pred.shape) == 1 or train_pred.shape[1] == 1:
+                train_pred = np.vstack(
+                    ((1 - train_pred).reshape((1, -1)), train_pred.reshape((1, -1)))
+                ).transpose()
+            if test_pred is not None and (len(test_pred.shape) == 1 or test_pred.shape[1] == 1):
+                test_pred = np.vstack(
+                    ((1 - test_pred).reshape((1, -1)), test_pred.reshape((1, -1)))
+                ).transpose()
+
+        train_scores = calculate_score(
+            metrics=self.metrics,
+            target=self.y_true_ensemble,
+            prediction=train_pred,
+            task_type=self.task_type,
+        )
+        performance_stamp.update({'train_' + str(key): val for key, val in train_scores.items()})
+        if self.y_test is not None:
+            test_scores = calculate_score(
+                metrics=self.metrics,
+                target=self.y_test,
+                prediction=test_pred,
+                task_type=self.task_type,
+            )
+            performance_stamp.update(
+                {'test_' + str(key): val for key, val in test_scores.items()})
+
+        self.ensemble_history.append(performance_stamp)
+
+    def _get_list_of_sorted_preds(self) -> List[Tuple[str, float, int]]:
+        """
+            Returns a list of sorted predictions in descending order
+            Scores are taken from self.read_scores.
+            Parameters
+            ----------
+            None
+            Return
+            ------
+            sorted_keys: list
+        """
+        # Sort by score - higher is better!
+        # First sort by num_run
+        sorted_keys = list(reversed(sorted(
+            [
+                (k, v["ens_score"], v["num_run"])
+                for k, v in self.read_scores.items()
+            ],
+            key=lambda x: x[2],
+        )))  # type: List[Tuple[str, float, int]]
+        # Then by score
+        sorted_keys = list(reversed(sorted(sorted_keys, key=lambda x: x[1])))
+        return sorted_keys
+
+    def _delete_excess_models(self, selected_keys: List[str]) -> None:
+        """
+            Deletes models excess models on disc. self.max_models_on_disc
+            defines the upper limit on how many models to keep.
+            Any additional model with a worst score than the top
+            self.max_models_on_disc is deleted.
+        """
+
+        # Comply with mypy
+        if self.max_resident_models is None:
+            return
+
+        # Obtain a list of sorted pred keys
+        pre_sorted_keys = self._get_list_of_sorted_preds()
+        sorted_keys = list(map(lambda x: x[0], pre_sorted_keys))
+
+        if len(sorted_keys) <= self.max_resident_models:
+            # Don't waste time if not enough models to delete
+            return
+
+        # The top self.max_resident_models models would be the candidates
+        # Any other low performance model will be deleted
+        # The list is in ascending order of score
+        candidates = sorted_keys[:self.max_resident_models]
+
+        # Loop through the files currently in the directory
+        for pred_path in self.y_ens_files:
+
+            # Do not delete candidates
+            if pred_path in candidates:
+                continue
+
+            if pred_path in self._has_been_candidate:
+                continue
+
+            match = self.model_fn_re.search(pred_path)
+            if match is None:
+                raise ValueError("Could not interpret file {pred_path} "
+                                 "Something went wrong while reading predictions")
+            _seed = int(match.group(1))
+            _num_run = int(match.group(2))
+            _budget = float(match.group(3))
+
+            # Do not delete the dummy prediction
+            if _num_run == 1:
+                continue
+
+            numrun_dir = self.backend.get_numrun_directory(_seed, _num_run, _budget)
+            try:
+                os.rename(numrun_dir, numrun_dir + '.old')
+                shutil.rmtree(numrun_dir + '.old')
+                self.logger.info("Deleted files of non-candidate model %s", pred_path)
+                self.read_scores[pred_path]["disc_space_cost_mb"] = None
+                self.read_scores[pred_path]["loaded"] = 3
+                self.read_scores[pred_path]["ens_score"] = -np.inf
+            except Exception as e:
+                self.logger.error(
+                    "Failed to delete files of non-candidate model %s due"
+                    " to error %s", pred_path, e
+                )
+
+    def _read_np_fn(self, path: str) -> np.ndarray:
+
+        # Support for string precision
+        if isinstance(self.precision, str):
+            precision = int(self.precision)
+            self.logger.warning("Interpreted str-precision as {}".format(
+                precision
+            ))
+        else:
+            precision = self.precision
+
+        if path.endswith("gz"):
+            fp = gzip.open(path, 'rb')
+        elif path.endswith("npy"):
+            fp = open(path, 'rb')
+        else:
+            raise ValueError("Unknown filetype %s" % path)
+        if precision == 16:
+            predictions = np.load(fp, allow_pickle=True).astype(dtype=np.float16)
+        elif precision == 32:
+            predictions = np.load(fp, allow_pickle=True).astype(dtype=np.float32)
+        elif precision == 64:
+            predictions = np.load(fp, allow_pickle=True).astype(dtype=np.float64)
+        else:
+            predictions = np.load(fp, allow_pickle=True)
+        fp.close()
+        return predictions
diff --git a/autoPyTorch/ensemble/ensemble_selection.py b/autoPyTorch/ensemble/ensemble_selection.py
new file mode 100644
index 000000000..11090989a
--- /dev/null
+++ b/autoPyTorch/ensemble/ensemble_selection.py
@@ -0,0 +1,216 @@
+from collections import Counter
+from typing import Any, Dict, List, Tuple, Union
+
+import numpy as np
+
+from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
+
+
+class EnsembleSelection(AbstractEnsemble):
+    def __init__(
+        self,
+        ensemble_size: int,
+        metric: autoPyTorchMetric,
+        task_type: int,
+        random_state: np.random.RandomState,
+    ) -> None:
+        self.ensemble_size = ensemble_size
+        self.metric = metric
+        self.random_state = random_state
+        self.task_type = task_type
+
+    def __getstate__(self) -> Dict[str, Any]:
+        # Cannot serialize a metric if
+        # it is user defined.
+        # That is, if doing pickle dump
+        # the metric won't be the same as the
+        # one in __main__. we don't use the metric
+        # in the EnsembleSelection so this should
+        # be fine
+        self.metric = None  # type: ignore
+        return self.__dict__
+
+    def fit(
+        self,
+        predictions: List[np.ndarray],
+        labels: np.ndarray,
+        identifiers: List[Tuple[int, int, float]],
+    ) -> AbstractEnsemble:
+        self.ensemble_size = int(self.ensemble_size)
+        if self.ensemble_size < 1:
+            raise ValueError('Ensemble size cannot be less than one!')
+
+        self._fit(predictions, labels)
+        self._calculate_weights()
+        self.identifiers_ = identifiers
+        return self
+
+    def _fit(
+        self,
+        predictions: List[np.ndarray],
+        labels: np.ndarray,
+    ) -> None:
+        """Fast version of Rich Caruana's ensemble selection method."""
+        self.num_input_models_ = len(predictions)
+
+        ensemble = []  # type: List[np.ndarray]
+        trajectory = []
+        order = []
+
+        ensemble_size = self.ensemble_size
+
+        weighted_ensemble_prediction = np.zeros(
+            predictions[0].shape,
+            dtype=np.float64,
+        )
+        fant_ensemble_prediction = np.zeros(
+            weighted_ensemble_prediction.shape,
+            dtype=np.float64,
+        )
+        for i in range(ensemble_size):
+            scores = np.zeros(
+                (len(predictions)),
+                dtype=np.float64,
+            )
+            s = len(ensemble)
+            if s == 0:
+                weighted_ensemble_prediction.fill(0.0)
+            else:
+                weighted_ensemble_prediction.fill(0.0)
+                for pred in ensemble:
+                    np.add(
+                        weighted_ensemble_prediction,
+                        pred,
+                        out=weighted_ensemble_prediction,
+                    )
+                np.multiply(
+                    weighted_ensemble_prediction,
+                    1 / s,
+                    out=weighted_ensemble_prediction,
+                )
+                np.multiply(
+                    weighted_ensemble_prediction,
+                    (s / float(s + 1)),
+                    out=weighted_ensemble_prediction,
+                )
+
+            for j, pred in enumerate(predictions):
+                # Memory-efficient averaging!
+                fant_ensemble_prediction.fill(0.0)
+                np.add(
+                    fant_ensemble_prediction,
+                    weighted_ensemble_prediction,
+                    out=fant_ensemble_prediction
+                )
+                np.add(
+                    fant_ensemble_prediction,
+                    (1. / float(s + 1)) * pred,
+                    out=fant_ensemble_prediction
+                )
+
+                # Calculate score is versatile and can return a dict of score
+                # when all_scoring_functions=False, we know it will be a float
+                score = calculate_score(
+                    metrics=[self.metric],
+                    target=labels,
+                    prediction=fant_ensemble_prediction,
+                    task_type=self.task_type,
+                )
+                scores[j] = self.metric._optimum - score[self.metric.name]
+
+            all_best = np.argwhere(scores == np.nanmin(scores)).flatten()
+            best = self.random_state.choice(all_best)
+            ensemble.append(predictions[best])
+            trajectory.append(scores[best])
+            order.append(best)
+
+            # Handle special case
+            if len(predictions) == 1:
+                break
+
+        self.indices_ = order
+        self.trajectory_ = trajectory
+        self.train_score_ = trajectory[-1]
+
+    def _calculate_weights(self) -> None:
+        ensemble_members = Counter(self.indices_).most_common()
+        weights = np.zeros(
+            (self.num_input_models_,),
+            dtype=np.float64,
+        )
+        for ensemble_member in ensemble_members:
+            weight = float(ensemble_member[1]) / self.ensemble_size
+            weights[ensemble_member[0]] = weight
+
+        if np.sum(weights) < 1:
+            weights = weights / np.sum(weights)
+
+        self.weights_ = weights
+
+    def predict(self, predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+
+        average = np.zeros_like(predictions[0], dtype=np.float64)
+        tmp_predictions = np.empty_like(predictions[0], dtype=np.float64)
+
+        # if predictions.shape[0] == len(self.weights_),
+        # predictions include those of zero-weight models.
+        if len(predictions) == len(self.weights_):
+            for pred, weight in zip(predictions, self.weights_):
+                np.multiply(pred, weight, out=tmp_predictions)
+                np.add(average, tmp_predictions, out=average)
+
+        # if prediction model.shape[0] == len(non_null_weights),
+        # predictions do not include those of zero-weight models.
+        elif len(predictions) == np.count_nonzero(self.weights_):
+            non_null_weights = [w for w in self.weights_ if w > 0]
+            for pred, weight in zip(predictions, non_null_weights):
+                np.multiply(pred, weight, out=tmp_predictions)
+                np.add(average, tmp_predictions, out=average)
+
+        # If none of the above applies, then something must have gone wrong.
+        else:
+            raise ValueError("The dimensions of ensemble predictions"
+                             " and ensemble weights do not match!")
+        del tmp_predictions
+        return average
+
+    def __str__(self) -> str:
+        return 'Ensemble Selection:\n\tTrajectory: %s\n\tMembers: %s' \
+               '\n\tWeights: %s\n\tIdentifiers: %s' % \
+               (' '.join(['%d: %5f' % (idx, performance)
+                         for idx, performance in enumerate(self.trajectory_)]),
+                self.indices_, self.weights_,
+                ' '.join([str(identifier) for idx, identifier in
+                          enumerate(self.identifiers_)
+                          if self.weights_[idx] > 0]))
+
+    def get_models_with_weights(
+        self,
+        models: BasePipeline
+    ) -> List[Tuple[float, BasePipeline]]:
+        output = []
+        for i, weight in enumerate(self.weights_):
+            if weight > 0.0:
+                identifier = self.identifiers_[i]
+                model = models[identifier]
+                output.append((weight, model))
+
+        output.sort(reverse=True, key=lambda t: t[0])
+
+        return output
+
+    def get_selected_model_identifiers(self) -> List[Tuple[int, int, float]]:
+        output = []
+
+        for i, weight in enumerate(self.weights_):
+            identifier = self.identifiers_[i]
+            if weight > 0.0:
+                output.append(identifier)
+
+        return output
+
+    def get_validation_performance(self) -> float:
+        return self.trajectory_[-1]
diff --git a/autoPyTorch/ensemble/singlebest_ensemble.py b/autoPyTorch/ensemble/singlebest_ensemble.py
new file mode 100644
index 000000000..821e32a87
--- /dev/null
+++ b/autoPyTorch/ensemble/singlebest_ensemble.py
@@ -0,0 +1,153 @@
+import os
+from typing import List, Tuple, Union
+
+import numpy as np
+
+from smac.runhistory.runhistory import RunHistory
+
+from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.utils.backend import Backend
+
+
+class SingleBest(AbstractEnsemble):
+    """
+    In the case of a crash, this class searches
+    for the best individual model.
+    Such model is returned as an ensemble of a single
+    object, to comply with the expected interface of an
+    AbstractEnsemble.
+    """
+    def __init__(
+        self,
+        metric: autoPyTorchMetric,
+        run_history: RunHistory,
+        seed: int,
+        backend: Backend,
+    ):
+        self.metric = metric
+        self.seed = seed
+        self.backend = backend
+
+        # Add some default values -- at least 1 model in ensemble is assumed
+        self.indices_ = [0]
+        self.weights_ = [1.0]
+        self.run_history = run_history
+        self.identifiers_ = self.get_identifiers_from_run_history()
+        self.best_performance = self.metric._worst_possible_result
+
+    def get_identifiers_from_run_history(self) -> List[Tuple[int, int, float]]:
+        """
+        This method parses the run history, to identify
+        the best performing model
+        It populates the identifiers attribute, which is used
+        by the backend to access the actual model
+        """
+        best_model_identifier = []
+        best_model_score = self.metric._worst_possible_result
+
+        for run_key in self.run_history.data.keys():
+            run_value = self.run_history.data[run_key]
+            score = self.metric._optimum - (self.metric._sign * run_value.cost)
+
+            if (score > best_model_score and self.metric._sign > 0) \
+                    or (score < best_model_score and self.metric._sign < 0):
+
+                # Make sure that the individual best model actually exists
+                model_dir = self.backend.get_numrun_directory(
+                    self.seed,
+                    run_value.additional_info['num_run'],
+                    run_key.budget,
+                )
+                model_file_name = self.backend.get_model_filename(
+                    self.seed,
+                    run_value.additional_info['num_run'],
+                    run_key.budget,
+                )
+                file_path = os.path.join(model_dir, model_file_name)
+                if not os.path.exists(file_path):
+                    continue
+
+                best_model_identifier = [(
+                    self.seed,
+                    run_value.additional_info['num_run'],
+                    run_key.budget,
+                )]
+                best_model_score = score
+
+        if not best_model_identifier:
+            raise ValueError(
+                "No valid model found in run history. This means smac was not able to fit"
+                " a valid model. Please check the log file for errors."
+            )
+
+        self.best_performance = best_model_score
+
+        return best_model_identifier
+
+    def predict(self, predictions: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        return predictions[0]
+
+    def __str__(self) -> str:
+        return 'Single Model Selection:\n\tMembers: %s' \
+               '\n\tWeights: %s\n\tIdentifiers: %s' % \
+               (self.indices_, self.weights_,
+                ' '.join([str(identifier) for idx, identifier in
+                          enumerate(self.identifiers_)
+                          if self.weights_[idx] > 0]))
+
+    def get_models_with_weights(self, models: BasePipeline
+                                ) -> List[Tuple[float, BasePipeline]]:
+        output = []
+        for i, weight in enumerate(self.weights_):
+            if weight > 0.0:
+                identifier = self.identifiers_[i]
+                model = models[identifier]
+                output.append((weight, model))
+
+        output.sort(reverse=True, key=lambda t: t[0])
+
+        return output
+
+    def get_selected_model_identifiers(self) -> List[Tuple[int, int, float]]:
+        output = []
+
+        for i, weight in enumerate(self.weights_):
+            identifier = self.identifiers_[i]
+            if weight > 0.0:
+                output.append(identifier)
+
+        return output
+
+    def fit(
+        self,
+        base_models_predictions: np.ndarray,
+        true_targets: np.ndarray,
+        model_identifiers: List[Tuple[int, int, float]],
+    ) -> 'SingleBest':
+        """Fit an ensemble given predictions of base models and targets.
+        Ensemble building maximizes performance (in contrast to
+        hyperparameter optimization)!
+
+        Args:
+            base_models_predictions (np.ndarray):
+                array of shape = [n_base_models, n_data_points, n_targets]
+                This are the predictions of the individual models found by SMAC
+            true_targets (np.ndarray) : array of shape [n_targets]
+                This is the ground truth of the above predictions
+            model_identifiers (List[Tuple[int, int, float]]): identifier for each base model.
+                Can be used for practical text output of the ensemble.
+
+        Returns:
+            self
+        """
+        return self
+
+    def get_validation_performance(self) -> float:
+        """Return validation performance of ensemble.
+
+        Returns:
+            Score
+        """
+        return self.best_performance
diff --git a/autoPyTorch/components/networks/__init__.py b/autoPyTorch/evaluation/__init__.py
similarity index 100%
rename from autoPyTorch/components/networks/__init__.py
rename to autoPyTorch/evaluation/__init__.py
diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
new file mode 100644
index 000000000..69619bd2e
--- /dev/null
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -0,0 +1,608 @@
+import logging.handlers
+import time
+import warnings
+from multiprocessing.queues import Queue
+from typing import Any, Dict, List, Optional, Tuple, Union, no_type_check
+
+from ConfigSpace import Configuration
+
+import numpy as np
+
+import pandas as pd
+
+from sklearn.base import BaseEstimator
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import VotingClassifier, VotingRegressor
+
+from smac.tae import StatusType
+
+import autoPyTorch.pipeline.image_classification
+import autoPyTorch.pipeline.tabular_classification
+import autoPyTorch.pipeline.tabular_regression
+import autoPyTorch.pipeline.traditional_tabular_classification
+from autoPyTorch.constants import (
+    CLASSIFICATION_TASKS,
+    IMAGE_TASKS,
+    MULTICLASS,
+    REGRESSION_TASKS,
+    STRING_TO_OUTPUT_TYPES,
+    STRING_TO_TASK_TYPES,
+    TABULAR_TASKS,
+)
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.evaluation.utils import (
+    convert_multioutput_multiclass_to_multilabel,
+    subsampler
+)
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.pipeline.components.training.metrics.utils import (
+    calculate_score,
+    get_metrics,
+)
+from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
+from autoPyTorch.utils.pipeline import get_dataset_requirements
+
+__all__ = [
+    'AbstractEvaluator',
+    'fit_and_suppress_warnings'
+]
+
+
+class MyTraditionalTabularClassificationPipeline(BaseEstimator):
+    def __init__(self, config: str,
+                 dataset_properties: Dict[str, Any],
+                 random_state: Optional[Union[int, np.random.RandomState]] = None,
+                 init_params: Optional[Dict] = None):
+        self.pipeline = autoPyTorch.pipeline.traditional_tabular_classification.\
+            TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties)
+        configuration_space = self.pipeline.get_hyperparameter_search_space()
+        default_configuration = configuration_space.get_default_configuration().get_dictionary()
+        default_configuration['model_trainer:tabular_classifier:classifier'] = config
+        configuration = Configuration(configuration_space, default_configuration)
+        self.pipeline.set_hyperparameters(configuration)
+
+    def fit(self, X: Dict[str, Any], y: Any,
+            sample_weight: Optional[np.ndarray] = None) -> object:
+        return self.pipeline.fit(X, y)
+
+    def predict_proba(self, X: Union[np.ndarray, pd.DataFrame],
+                      batch_size: int = 1000) -> np.array:
+        return self.pipeline.predict_proba(X, batch_size=batch_size)
+
+    def predict(self, X: Union[np.ndarray, pd.DataFrame],
+                batch_size: int = 1000) -> np.array:
+        return self.pipeline.predict(X, batch_size=batch_size)
+
+    def estimator_supports_iterative_fit(self) -> bool:  # pylint: disable=R0201
+        return False
+
+    def get_additional_run_info(self) -> None:  # pylint: disable=R0201
+        return None
+
+    @staticmethod
+    def get_default_pipeline_options() -> Dict[str, Any]:
+        return autoPyTorch.pipeline.traditional_tabular_classification. \
+            TraditionalTabularClassificationPipeline.get_default_pipeline_options()
+
+
+class DummyClassificationPipeline(DummyClassifier):
+    def __init__(self, config: Configuration,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None,
+                 init_params: Optional[Dict] = None
+                 ) -> None:
+        self.configuration = config
+        if config == 1:
+            super(DummyClassificationPipeline, self).__init__(strategy="uniform")
+        else:
+            super(DummyClassificationPipeline, self).__init__(strategy="most_frequent")
+
+    def fit(self, X: Dict[str, Any], y: Any,
+            sample_weight: Optional[np.ndarray] = None) -> object:
+        X_train = subsampler(X['X_train'], X['train_indices'])
+        y_train = subsampler(X['y_train'], X['train_indices'])
+        return super(DummyClassificationPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train,
+                                                            sample_weight=sample_weight)
+
+    def predict_proba(self, X: Union[np.ndarray, pd.DataFrame],
+                      batch_size: int = 1000) -> np.array:
+        new_X = np.ones((X.shape[0], 1))
+        probas = super(DummyClassificationPipeline, self).predict_proba(new_X)
+        probas = convert_multioutput_multiclass_to_multilabel(probas).astype(
+            np.float32)
+        return probas
+
+    def predict(self, X: Union[np.ndarray, pd.DataFrame],
+                batch_size: int = 1000) -> np.array:
+        new_X = np.ones((X.shape[0], 1))
+        return super(DummyClassificationPipeline, self).predict(new_X).astype(np.float32)
+
+    def estimator_supports_iterative_fit(self) -> bool:  # pylint: disable=R0201
+        return False
+
+    def get_additional_run_info(self) -> None:  # pylint: disable=R0201
+        return None
+
+    @staticmethod
+    def get_default_pipeline_options() -> Dict[str, Any]:
+        return {'budget_type': 'epochs',
+                'epochs': 1,
+                'runtime': 1}
+
+
+class DummyRegressionPipeline(DummyRegressor):
+    def __init__(self, config: Configuration,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None,
+                 init_params: Optional[Dict] = None) -> None:
+        self.configuration = config
+        if config == 1:
+            super(DummyRegressionPipeline, self).__init__(strategy='mean')
+        else:
+            super(DummyRegressionPipeline, self).__init__(strategy='median')
+
+    def fit(self, X: Dict[str, Any], y: Any,
+            sample_weight: Optional[np.ndarray] = None) -> object:
+        X_train = subsampler(X['X_train'], X['train_indices'])
+        y_train = subsampler(X['y_train'], X['train_indices'])
+        return super(DummyRegressionPipeline, self).fit(np.ones((X_train.shape[0], 1)), y_train,
+                                                        sample_weight=sample_weight)
+
+    def predict(self, X: Union[np.ndarray, pd.DataFrame],
+                batch_size: int = 1000) -> np.array:
+        new_X = np.ones((X.shape[0], 1))
+        return super(DummyRegressionPipeline, self).predict(new_X).astype(np.float32)
+
+    def estimator_supports_iterative_fit(self) -> bool:  # pylint: disable=R0201
+        return False
+
+    def get_additional_run_info(self) -> None:  # pylint: disable=R0201
+        return None
+
+    @staticmethod
+    def get_default_pipeline_options() -> Dict[str, Any]:
+        return {'budget_type': 'epochs',
+                'epochs': 1,
+                'runtime': 1}
+
+
+def fit_and_suppress_warnings(logger: PicklableClientLogger, pipeline: BaseEstimator,
+                              X: Dict[str, Any], y: Any
+                              ) -> BaseEstimator:
+    @no_type_check
+    def send_warnings_to_log(message, category, filename, lineno,
+                             file=None, line=None) -> None:
+        logger.debug('%s:%s: %s:%s',
+                     filename, lineno, category.__name__, message)
+        return
+
+    with warnings.catch_warnings():
+        warnings.showwarning = send_warnings_to_log
+        pipeline.fit(X, y)
+
+    return pipeline
+
+
+class AbstractEvaluator(object):
+    def __init__(self, backend: Backend,
+                 queue: Queue,
+                 metric: autoPyTorchMetric,
+                 budget: float,
+                 budget_type: str = None,
+                 pipeline_config: Optional[Dict[str, Any]] = None,
+                 configuration: Optional[Configuration] = None,
+                 seed: int = 1,
+                 output_y_hat_optimization: bool = True,
+                 num_run: Optional[int] = None,
+                 include: Optional[Dict[str, Any]] = None,
+                 exclude: Optional[Dict[str, Any]] = None,
+                 disable_file_output: Union[bool, List[str]] = False,
+                 init_params: Optional[Dict[str, Any]] = None,
+                 logger_port: Optional[int] = None,
+                 all_supported_metrics: bool = True) -> None:
+
+        self.starttime = time.time()
+
+        self.configuration = configuration
+        self.backend: Backend = backend
+        self.queue = queue
+
+        self.datamanager: BaseDataset = self.backend.load_datamanager()
+
+        assert self.datamanager.task_type is not None, \
+            "Expected dataset {} to have task_type got None".format(self.datamanager.__class__.__name__)
+        self.task_type = STRING_TO_TASK_TYPES[self.datamanager.task_type]
+        self.output_type = STRING_TO_OUTPUT_TYPES[self.datamanager.output_type]
+        self.issparse = self.datamanager.issparse
+
+        self.include = include
+        self.exclude = exclude
+
+        self.X_train, self.y_train = self.datamanager.train_tensors
+
+        if self.datamanager.val_tensors is not None:
+            self.X_valid, self.y_valid = self.datamanager.val_tensors
+        else:
+            self.X_valid, self.y_valid = None, None
+
+        if self.datamanager.test_tensors is not None:
+            self.X_test, self.y_test = self.datamanager.test_tensors
+        else:
+            self.X_test, self.y_test = None, None
+
+        self.metric = metric
+
+        self.seed = seed
+
+        # Flag to save target for ensemble
+        self.output_y_hat_optimization = output_y_hat_optimization
+
+        if isinstance(disable_file_output, bool):
+            self.disable_file_output: bool = disable_file_output
+        elif isinstance(disable_file_output, List):
+            self.disabled_file_outputs: List[str] = disable_file_output
+        else:
+            raise ValueError('disable_file_output should be either a bool or a list')
+
+        self.pipeline_class: Optional[Union[BaseEstimator, BasePipeline]] = None
+        info: Dict[str, Any] = {'task_type': self.datamanager.task_type,
+                                'output_type': self.datamanager.output_type,
+                                'issparse': self.issparse}
+        if self.task_type in REGRESSION_TASKS:
+            if isinstance(self.configuration, int):
+                self.pipeline_class = DummyClassificationPipeline
+            elif isinstance(self.configuration, str):
+                raise ValueError("Only tabular classifications tasks "
+                                 "are currently supported with traditional methods")
+            elif isinstance(self.configuration, Configuration):
+                self.pipeline_class = autoPyTorch.pipeline.tabular_regression.TabularRegressionPipeline
+            else:
+                raise ValueError('task {} not available'.format(self.task_type))
+            self.predict_function = self._predict_regression
+        else:
+            if isinstance(self.configuration, int):
+                self.pipeline_class = DummyClassificationPipeline
+            elif isinstance(self.configuration, str):
+                if self.task_type in TABULAR_TASKS:
+                    self.pipeline_class = MyTraditionalTabularClassificationPipeline
+                else:
+                    raise ValueError("Only tabular classifications tasks "
+                                     "are currently supported with traditional methods")
+            elif isinstance(self.configuration, Configuration):
+                if self.task_type in TABULAR_TASKS:
+                    self.pipeline_class = autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline
+                elif self.task_type in IMAGE_TASKS:
+                    self.pipeline_class = autoPyTorch.pipeline.image_classification.ImageClassificationPipeline
+                else:
+                    raise ValueError('task {} not available'.format(self.task_type))
+            self.predict_function = self._predict_proba
+        if self.task_type in TABULAR_TASKS:
+            assert isinstance(self.datamanager, TabularDataset)
+            info.update({'numerical_columns': self.datamanager.numerical_columns,
+                         'categorical_columns': self.datamanager.categorical_columns})
+        self.dataset_properties = self.datamanager.get_dataset_properties(get_dataset_requirements(info))
+
+        self.additional_metrics: Optional[List[autoPyTorchMetric]] = None
+        if all_supported_metrics:
+            self.additional_metrics = get_metrics(dataset_properties=self.dataset_properties,
+                                                  all_supported_metrics=all_supported_metrics)
+
+        self.fit_dictionary: Dict[str, Any] = {'dataset_properties': self.dataset_properties}
+        self._init_params = init_params
+        self.fit_dictionary.update({
+            'X_train': self.X_train,
+            'y_train': self.y_train,
+            'X_test': self.X_test,
+            'y_test': self.y_test,
+            'backend': self.backend,
+            'logger_port': logger_port,
+        })
+        assert self.pipeline_class is not None, "Could not infer pipeline class"
+        pipeline_config = pipeline_config if pipeline_config is not None \
+            else self.pipeline_class.get_default_pipeline_options()
+        self.budget_type = pipeline_config['budget_type'] if budget_type is None else budget_type
+        self.budget = pipeline_config[self.budget_type] if budget == 0 else budget
+        self.fit_dictionary = {**pipeline_config, **self.fit_dictionary}
+
+        # If the budget is epochs, we want to limit that in the fit dictionary
+        if self.budget_type == 'epochs':
+            self.fit_dictionary['epochs'] = budget
+
+        self.num_run = 0 if num_run is None else num_run
+
+        logger_name = '%s(%d)' % (self.__class__.__name__.split('.')[-1],
+                                  self.seed)
+        if logger_port is None:
+            logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
+        self.logger = get_named_client_logger(
+            name=logger_name,
+            port=logger_port,
+        )
+        self.Y_optimization: Optional[np.ndarray] = None
+        self.Y_actual_train: Optional[np.ndarray] = None
+        self.pipelines: Optional[List[BaseEstimator]] = None
+        self.pipeline: Optional[BaseEstimator] = None
+        self.logger.debug("Fit dictionary in Abstract evaluator: {}".format(self.fit_dictionary))
+
+    def _get_pipeline(self) -> BaseEstimator:
+        assert self.pipeline_class is not None, "Can't return pipeline, pipeline_class not initialised"
+        if isinstance(self.configuration, int):
+            pipeline = self.pipeline_class(config=self.configuration,
+                                           random_state=np.random.RandomState(self.seed),
+                                           init_params=self.fit_dictionary)
+        elif isinstance(self.configuration, Configuration):
+            pipeline = self.pipeline_class(config=self.configuration,
+                                           dataset_properties=self.dataset_properties,
+                                           random_state=np.random.RandomState(self.seed),
+                                           include=self.include,
+                                           exclude=self.exclude,
+                                           init_params=self._init_params)
+        elif isinstance(self.configuration, str):
+            pipeline = self.pipeline_class(config=self.configuration,
+                                           dataset_properties=self.dataset_properties,
+                                           random_state=np.random.RandomState(self.seed),
+                                           init_params=self.fit_dictionary)
+        else:
+            raise ValueError("Invalid configuration entered")
+        return pipeline
+
+    def _loss(self, y_true: np.ndarray, y_hat: np.ndarray) -> Dict[str, float]:
+        """SMAC follows a minimization goal, so the make_scorer
+        sign is used as a guide to obtain the value to reduce.
+
+        On this regard, to optimize a metric:
+            1- score is calculared with calculate_score, with the caveat, that if
+            for the metric greater is not better, a negative score is returned.
+            2- the err (the optimization goal) is then:
+                optimum - (metric.sign * actual_score)
+                For accuracy for example: optimum(1) - (+1 * actual score)
+                For logloss for example: optimum(0) - (-1 * actual score)
+        """
+
+        if not isinstance(self.configuration, Configuration):
+            return {self.metric.name: 1.0}
+
+        if self.additional_metrics is not None:
+            metrics = self.additional_metrics
+        else:
+            metrics = [self.metric]
+        score = calculate_score(
+            y_true, y_hat, self.task_type, metrics)
+
+        err = {metric.name: metric._optimum - score[metric.name] for metric in metrics
+               if metric.name in score.keys()}
+
+        return err
+
+    def finish_up(self, loss: Dict[str, float], train_loss: Dict[str, float],
+                  opt_pred: np.ndarray, valid_pred: Optional[np.ndarray],
+                  test_pred: Optional[np.ndarray], additional_run_info: Optional[Dict],
+                  file_output: bool, status: StatusType
+                  ) -> Optional[Tuple[float, float, int, Dict]]:
+        """This function does everything necessary after the fitting is done:
+
+        * predicting
+        * saving the files for the ensembles_statistics
+        * generate output for SMAC
+        We use it as the signal handler so we can recycle the code for the
+        normal usecase and when the runsolver kills us here :)"""
+
+        self.duration = time.time() - self.starttime
+
+        if file_output:
+            loss_, additional_run_info_ = self.file_output(
+                opt_pred, valid_pred, test_pred,
+            )
+        else:
+            loss_ = None
+            additional_run_info_ = {}
+
+        validation_loss, test_loss = self.calculate_auxiliary_losses(
+            valid_pred, test_pred
+        )
+
+        if loss_ is not None:
+            return self.duration, loss_, self.seed, additional_run_info_
+
+        cost = loss[self.metric.name]
+
+        additional_run_info = (
+            {} if additional_run_info is None else additional_run_info
+        )
+        for metric_name, value in loss.items():
+            additional_run_info[metric_name] = value
+        additional_run_info['duration'] = self.duration
+        additional_run_info['num_run'] = self.num_run
+        if train_loss is not None:
+            additional_run_info['train_loss'] = train_loss
+        if validation_loss is not None:
+            additional_run_info['validation_loss'] = validation_loss
+        if test_loss is not None:
+            additional_run_info['test_loss'] = test_loss
+
+        rval_dict = {'loss': cost,
+                     'additional_run_info': additional_run_info,
+                     'status': status}
+
+        self.queue.put(rval_dict)
+        return None
+
+    def calculate_auxiliary_losses(
+            self,
+            Y_valid_pred: np.ndarray,
+            Y_test_pred: np.ndarray,
+    ) -> Tuple[Optional[float], Optional[float]]:
+
+        validation_loss: Optional[float] = None
+
+        if Y_valid_pred is not None:
+            if self.y_valid is not None:
+                validation_loss_dict = self._loss(self.y_valid, Y_valid_pred)
+                validation_loss = validation_loss_dict[self.metric.name]
+
+        test_loss: Optional[float] = None
+        if Y_test_pred is not None:
+            if self.y_test is not None:
+                test_loss_dict = self._loss(self.y_test, Y_test_pred)
+                test_loss = test_loss_dict[self.metric.name]
+
+        return validation_loss, test_loss
+
+    def file_output(
+            self,
+            Y_optimization_pred: np.ndarray,
+            Y_valid_pred: np.ndarray,
+            Y_test_pred: np.ndarray
+    ) -> Tuple[Optional[float], Dict]:
+        # Abort if self.Y_optimization is None
+        # self.Y_optimization can be None if we use partial-cv, then,
+        # obviously no output should be saved.
+        if self.Y_optimization is None:
+            return None, {}
+
+        # Abort in case of shape misalignment
+        if self.Y_optimization.shape[0] != Y_optimization_pred.shape[0]:
+            return (
+                1.0,
+                {
+                    'error':
+                        "Targets %s and prediction %s don't have "
+                        "the same length. Probably training didn't "
+                        "finish" % (self.Y_optimization.shape, Y_optimization_pred.shape)
+                },
+            )
+
+        # Abort if predictions contain NaNs
+        for y, s in [
+            # Y_train_pred deleted here. Fix unittest accordingly.
+            [Y_optimization_pred, 'optimization'],
+            [Y_valid_pred, 'validation'],
+            [Y_test_pred, 'test']
+        ]:
+            if y is not None and not np.all(np.isfinite(y)):
+                return (
+                    1.0,
+                    {
+                        'error':
+                            'Model predictions for %s set contains NaNs.' % s
+                    },
+                )
+
+        # Abort if we don't want to output anything.
+        if hasattr(self, 'disable_file_output'):
+            if self.disable_file_output:
+                return None, {}
+            else:
+                self.disabled_file_outputs = []
+
+        # This file can be written independently of the others down bellow
+        if 'y_optimization' not in self.disabled_file_outputs:
+            if self.output_y_hat_optimization:
+                self.backend.save_targets_ensemble(self.Y_optimization)
+
+        if hasattr(self, 'pipelines') and self.pipelines is not None:
+            if self.pipelines[0] is not None and len(self.pipelines) > 0:
+                if 'pipelines' not in self.disabled_file_outputs:
+                    if self.task_type in CLASSIFICATION_TASKS:
+                        pipelines = VotingClassifier(estimators=None, voting='soft', )
+                    else:
+                        pipelines = VotingRegressor(estimators=None)
+                    pipelines.estimators_ = self.pipelines
+                else:
+                    pipelines = None
+            else:
+                pipelines = None
+        else:
+            pipelines = None
+
+        if hasattr(self, 'pipeline') and self.pipeline is not None:
+            if 'pipeline' not in self.disabled_file_outputs:
+                pipeline = self.pipeline
+            else:
+                pipeline = None
+        else:
+            pipeline = None
+
+        self.backend.save_numrun_to_dir(
+            seed=int(self.seed),
+            idx=int(self.num_run),
+            budget=float(self.budget),
+            model=pipeline,
+            cv_model=pipelines,
+            ensemble_predictions=(
+                Y_optimization_pred if 'y_optimization' not in
+                                       self.disabled_file_outputs else None
+            ),
+            valid_predictions=(
+                Y_valid_pred if 'y_valid' not in
+                                self.disabled_file_outputs else None
+            ),
+            test_predictions=(
+                Y_test_pred if 'y_test' not in
+                               self.disabled_file_outputs else None
+            ),
+        )
+
+        return None, {}
+
+    def _predict_proba(self, X: np.ndarray, pipeline: BaseEstimator,
+                       Y_train: Optional[np.ndarray] = None) -> np.ndarray:
+        @no_type_check
+        def send_warnings_to_log(message, category, filename, lineno,
+                                 file=None, line=None):
+            self.logger.debug('%s:%s: %s:%s' %
+                              (filename, lineno, category.__name__, message))
+            return
+
+        with warnings.catch_warnings():
+            warnings.showwarning = send_warnings_to_log
+            Y_pred = pipeline.predict_proba(X, batch_size=1000)
+
+        Y_pred = self._ensure_prediction_array_sizes(Y_pred, Y_train)
+        return Y_pred
+
+    def _predict_regression(self, X: np.ndarray, pipeline: BaseEstimator,
+                            Y_train: Optional[np.ndarray] = None) -> np.ndarray:
+        @no_type_check
+        def send_warnings_to_log(message, category, filename, lineno,
+                                 file=None, line=None):
+            self.logger.debug('%s:%s: %s:%s' %
+                              (filename, lineno, category.__name__, message))
+            return
+
+        with warnings.catch_warnings():
+            warnings.showwarning = send_warnings_to_log
+            Y_pred = pipeline.predict(X, batch_size=1000)
+
+        if len(Y_pred.shape) == 1:
+            Y_pred = Y_pred.reshape((-1, 1))
+
+        return Y_pred
+
+    def _ensure_prediction_array_sizes(self, prediction: np.ndarray,
+                                       Y_train: np.ndarray) -> np.ndarray:
+        assert self.datamanager.num_classes is not None, "Called function on wrong task"
+        num_classes: int = self.datamanager.num_classes
+
+        if self.output_type == MULTICLASS and \
+                prediction.shape[1] < num_classes:
+            if Y_train is None:
+                raise ValueError('Y_train must not be None!')
+            classes = list(np.unique(Y_train))
+
+            mapping = dict()
+            for class_number in range(num_classes):
+                if class_number in classes:
+                    index = classes.index(class_number)
+                    mapping[index] = class_number
+            new_predictions = np.zeros((prediction.shape[0], num_classes),
+                                       dtype=np.float32)
+
+            for index in mapping:
+                class_index = mapping[index]
+                new_predictions[:, class_index] = prediction[:, index]
+
+            return new_predictions
+
+        return prediction
diff --git a/autoPyTorch/evaluation/tae.py b/autoPyTorch/evaluation/tae.py
new file mode 100644
index 000000000..9562d8051
--- /dev/null
+++ b/autoPyTorch/evaluation/tae.py
@@ -0,0 +1,423 @@
+# -*- encoding: utf-8 -*-
+import functools
+import json
+import logging.handlers
+import math
+import multiprocessing
+import time
+import traceback
+import typing
+import warnings
+from queue import Empty
+
+from ConfigSpace import Configuration
+
+import numpy as np
+
+import pynisher
+
+from smac.runhistory.runhistory import RunInfo, RunValue
+from smac.stats.stats import Stats
+from smac.tae import StatusType, TAEAbortException
+from smac.tae.execute_func import AbstractTAFunc
+
+import autoPyTorch.evaluation.train_evaluator
+from autoPyTorch.evaluation.utils import empty_queue, extract_learning_curve, read_queue
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
+
+
+def fit_predict_try_except_decorator(
+        ta: typing.Callable,
+        queue: multiprocessing.Queue, cost_for_crash: float, **kwargs: typing.Any) -> None:
+    try:
+        return ta(queue=queue, **kwargs)
+    except Exception as e:
+        if isinstance(e, (MemoryError, pynisher.TimeoutException)):
+            # Re-raise the memory error to let the pynisher handle that correctly
+            raise e
+
+        exception_traceback = traceback.format_exc()
+        error_message = repr(e)
+
+        # Print also to STDOUT in case of broken handlers
+        warnings.warn("Exception handling in `fit_predict_try_except_decorator`: "
+                      "traceback: %s \nerror message: %s" % (exception_traceback, error_message))
+
+        queue.put({'loss': cost_for_crash,
+                   'additional_run_info': {'traceback': exception_traceback,
+                                           'error': error_message},
+                   'status': StatusType.CRASHED,
+                   'final_queue_element': True}, block=True)
+        queue.close()
+
+
+def get_cost_of_crash(metric: autoPyTorchMetric) -> float:
+    # The metric must always be defined to extract optimum/worst
+    if not isinstance(metric, autoPyTorchMetric):
+        raise ValueError("The metric must be stricly be an instance of autoPyTorchMetric")
+
+    # Autopytorch optimizes the err. This function translates
+    # worst_possible_result to be a minimization problem.
+    # For metrics like accuracy that are bounded to [0,1]
+    # metric.optimum==1 is the worst cost.
+    # A simple guide is to use greater_is_better embedded as sign
+    if metric._sign < 0:
+        worst_possible_result = metric._worst_possible_result
+    else:
+        worst_possible_result = metric._optimum - metric._worst_possible_result
+
+    return worst_possible_result
+
+
+def _encode_exit_status(exit_status: multiprocessing.connection.Connection
+                        ) -> str:
+    try:
+        encoded_exit_status: str = json.dumps(exit_status)
+        return encoded_exit_status
+    except (TypeError, OverflowError):
+        return str(exit_status)
+
+
+class ExecuteTaFuncWithQueue(AbstractTAFunc):
+    """
+    Wrapper class that executes the target algorithm with
+    queues according to what SMAC expects. This allows us to
+    run our target algorithm with different configurations
+    in parallel
+    """
+
+    def __init__(
+            self,
+            backend: Backend,
+            seed: int,
+            metric: autoPyTorchMetric,
+            logger: PicklableClientLogger,
+            cost_for_crash: float,
+            abort_on_first_run_crash: bool,
+            pipeline_config: typing.Optional[typing.Dict[str, typing.Any]] = None,
+            initial_num_run: int = 1,
+            stats: typing.Optional[Stats] = None,
+            run_obj: str = 'quality',
+            par_factor: int = 1,
+            output_y_hat_optimization: bool = True,
+            include: typing.Optional[typing.Dict[str, typing.Any]] = None,
+            exclude: typing.Optional[typing.Dict[str, typing.Any]] = None,
+            memory_limit: typing.Optional[int] = None,
+            disable_file_output: bool = False,
+            init_params: typing.Dict[str, typing.Any] = None,
+            budget_type: str = None,
+            ta: typing.Optional[typing.Callable] = None,
+            logger_port: int = None,
+            all_supported_metrics: bool = True,
+    ):
+
+        eval_function = autoPyTorch.evaluation.train_evaluator.eval_function
+
+        self.worst_possible_result = cost_for_crash
+
+        eval_function = functools.partial(
+            fit_predict_try_except_decorator,
+            ta=eval_function,
+            cost_for_crash=self.worst_possible_result,
+        )
+
+        super().__init__(
+            ta=ta if ta is not None else eval_function,
+            stats=stats,
+            run_obj=run_obj,
+            par_factor=par_factor,
+            cost_for_crash=self.worst_possible_result,
+            abort_on_first_run_crash=abort_on_first_run_crash,
+        )
+
+        self.backend = backend
+        self.seed = seed
+        self.initial_num_run = initial_num_run
+        self.metric = metric
+        self.output_y_hat_optimization = output_y_hat_optimization
+        self.include = include
+        self.exclude = exclude
+        self.disable_file_output = disable_file_output
+        self.init_params = init_params
+        self.pipeline_config = pipeline_config
+        self.budget_type = pipeline_config['budget_type'] if pipeline_config is not None else budget_type
+        self.logger = logger
+        self.logger_port = logger_port if logger_port is not None else logging.handlers.DEFAULT_TCP_LOGGING_PORT
+        self.all_supported_metrics = all_supported_metrics
+
+        if memory_limit is not None:
+            memory_limit = int(math.ceil(memory_limit))
+        self.memory_limit = memory_limit
+
+        dm = self.backend.load_datamanager()
+        if dm.val_tensors is not None:
+            self._get_validation_loss = True
+        else:
+            self._get_validation_loss = False
+        if dm.test_tensors is not None:
+            self._get_test_loss = True
+        else:
+            self._get_test_loss = False
+
+        self.resampling_strategy = dm.resampling_strategy
+        self.resampling_strategy_args = dm.resampling_strategy_args
+
+    def run_wrapper(
+            self,
+            run_info: RunInfo,
+    ) -> typing.Tuple[RunInfo, RunValue]:
+        """
+        wrapper function for ExecuteTARun.run_wrapper() to cap the target algorithm
+        runtime if it would run over the total allowed runtime.
+
+        Args:
+            run_info (RunInfo): Object that contains enough information
+                to execute a configuration run in isolation.
+        Returns:
+            RunInfo:
+                an object containing the configuration launched
+            RunValue:
+                Contains information about the status/performance of config
+        """
+        if self.budget_type is None:
+            if run_info.budget != 0:
+                raise ValueError(
+                    'If budget_type is None, budget must be.0, but is %f' % run_info.budget
+                )
+        else:
+            if run_info.budget == 0:
+                run_info = run_info._replace(budget=100.0)
+            elif run_info.budget <= 0 or run_info.budget > 100:
+                raise ValueError('Illegal value for budget, must be >0 and <=100, but is %f' %
+                                 run_info.budget)
+            if self.budget_type not in ('epochs', 'runtime'):
+                raise ValueError("Illegal value for budget type, must be one of "
+                                 "('epochs', 'runtime'), but is : %s" %
+                                 self.budget_type)
+
+        remaining_time = self.stats.get_remaing_time_budget()
+
+        if remaining_time - 5 < run_info.cutoff:
+            run_info = run_info._replace(cutoff=int(remaining_time - 5))
+
+        if run_info.cutoff < 1.0:
+            return run_info, RunValue(
+                status=StatusType.STOP,
+                cost=self.worst_possible_result,
+                time=0.0,
+                additional_info={},
+                starttime=time.time(),
+                endtime=time.time(),
+            )
+        elif (
+                run_info.cutoff != int(np.ceil(run_info.cutoff))
+                and not isinstance(run_info.cutoff, int)
+        ):
+            run_info = run_info._replace(cutoff=int(np.ceil(run_info.cutoff)))
+
+        return super().run_wrapper(run_info=run_info)
+
+    def run(
+            self,
+            config: Configuration,
+            instance: typing.Optional[str] = None,
+            cutoff: typing.Optional[float] = None,
+            seed: int = 12345,
+            budget: float = 0.0,
+            instance_specific: typing.Optional[str] = None,
+    ) -> typing.Tuple[StatusType, float, float, typing.Dict[str, typing.Any]]:
+
+        queue: multiprocessing.queues.Queue = multiprocessing.Queue()
+
+        if not (instance_specific is None or instance_specific == '0'):
+            raise ValueError(instance_specific)
+        init_params = {'instance': instance}
+        if self.init_params is not None:
+            init_params.update(self.init_params)
+
+        pynisher_arguments = dict(
+            logger=get_named_client_logger(name="pynisher", port=self.logger_port),
+            # Pynisher expects seconds as a time indicator
+            wall_time_in_s=int(cutoff) if cutoff is not None else None,
+            mem_in_mb=self.memory_limit,
+            capture_output=True,
+        )
+
+        if isinstance(config, (int, str)):
+            num_run = self.initial_num_run
+        else:
+            num_run = config.config_id + self.initial_num_run
+
+        obj_kwargs = dict(
+            queue=queue,
+            config=config,
+            backend=self.backend,
+            metric=self.metric,
+            seed=self.seed,
+            num_run=num_run,
+            output_y_hat_optimization=self.output_y_hat_optimization,
+            include=self.include,
+            exclude=self.exclude,
+            disable_file_output=self.disable_file_output,
+            instance=instance,
+            init_params=init_params,
+            budget=budget,
+            budget_type=self.budget_type,
+            pipeline_config=self.pipeline_config,
+            logger_port=self.logger_port,
+            all_supported_metrics=self.all_supported_metrics
+        )
+
+        info: typing.Optional[typing.List[RunValue]]
+        additional_run_info: typing.Dict[str, typing.Any]
+        try:
+            obj = pynisher.enforce_limits(**pynisher_arguments)(self.ta)
+            obj(**obj_kwargs)
+        except Exception as e:
+            exception_traceback = traceback.format_exc()
+            error_message = repr(e)
+            additional_run_info = {
+                'traceback': exception_traceback,
+                'error': error_message
+            }
+            return StatusType.CRASHED, self.cost_for_crash, 0.0, additional_run_info
+
+        if obj.exit_status in (pynisher.TimeoutException, pynisher.MemorylimitException):
+            # Even if the pynisher thinks that a timeout or memout occured,
+            # it can be that the target algorithm wrote something into the queue
+            #  - then we treat it as a successful run
+            try:
+                info = read_queue(queue)
+                result = info[-1]['loss']
+                status = info[-1]['status']
+                additional_run_info = info[-1]['additional_run_info']
+
+                if obj.stdout:
+                    additional_run_info['subprocess_stdout'] = obj.stdout
+                if obj.stderr:
+                    additional_run_info['subprocess_stderr'] = obj.stderr
+
+                if obj.exit_status is pynisher.TimeoutException:
+                    additional_run_info['info'] = 'Run stopped because of timeout.'
+                elif obj.exit_status is pynisher.MemorylimitException:
+                    additional_run_info['info'] = 'Run stopped because of memout.'
+
+                if status in [StatusType.SUCCESS, StatusType.DONOTADVANCE]:
+                    cost = result
+                else:
+                    cost = self.worst_possible_result
+
+            except Empty:
+                info = None
+                if obj.exit_status is pynisher.TimeoutException:
+                    status = StatusType.TIMEOUT
+                    additional_run_info = {'error': 'Timeout'}
+                elif obj.exit_status is pynisher.MemorylimitException:
+                    status = StatusType.MEMOUT
+                    additional_run_info = {
+                        'error': 'Memout (used more than {} MB).'.format(self.memory_limit)
+                    }
+                else:
+                    raise ValueError(obj.exit_status)
+                cost = self.worst_possible_result
+
+        elif obj.exit_status is TAEAbortException:
+            info = None
+            status = StatusType.ABORT
+            cost = self.worst_possible_result
+            additional_run_info = {'error': 'Your configuration of '
+                                            'autoPyTorch does not work!',
+                                   'exit_status': _encode_exit_status(obj.exit_status),
+                                   'subprocess_stdout': obj.stdout,
+                                   'subprocess_stderr': obj.stderr,
+                                   }
+
+        else:
+            try:
+                info = read_queue(queue)
+                result = info[-1]['loss']
+                status = info[-1]['status']
+                additional_run_info = info[-1]['additional_run_info']
+
+                if obj.exit_status == 0:
+                    cost = result
+                else:
+                    status = StatusType.CRASHED
+                    cost = self.worst_possible_result
+                    additional_run_info['info'] = 'Run treated as crashed ' \
+                                                  'because the pynisher exit ' \
+                                                  'status %s is unknown.' % \
+                                                  str(obj.exit_status)
+                    additional_run_info['exit_status'] = _encode_exit_status(obj.exit_status)
+                    additional_run_info['subprocess_stdout'] = obj.stdout
+                    additional_run_info['subprocess_stderr'] = obj.stderr
+            except Empty:
+                info = None
+                additional_run_info = {
+                    'error': 'Result queue is empty',
+                    'exit_status': _encode_exit_status(obj.exit_status),
+                    'subprocess_stdout': obj.stdout,
+                    'subprocess_stderr': obj.stderr,
+                    'exitcode': obj.exitcode
+                }
+                status = StatusType.CRASHED
+                cost = self.worst_possible_result
+
+        if (
+                (self.budget_type is None or budget == 0)
+                and status == StatusType.DONOTADVANCE
+        ):
+            status = StatusType.SUCCESS
+
+        if not isinstance(additional_run_info, dict):
+            additional_run_info = {'message': additional_run_info}
+
+        if (
+                info is not None
+                and self.resampling_strategy in ['holdout-iterative-fit', 'cv-iterative-fit']
+                and status != StatusType.CRASHED
+        ):
+            learning_curve = extract_learning_curve(info)
+            learning_curve_runtime = extract_learning_curve(info, 'duration')
+            if len(learning_curve) > 1:
+                additional_run_info['learning_curve'] = learning_curve
+                additional_run_info['learning_curve_runtime'] = learning_curve_runtime
+
+            train_learning_curve = extract_learning_curve(info, 'train_loss')
+            if len(train_learning_curve) > 1:
+                additional_run_info['train_learning_curve'] = train_learning_curve
+                additional_run_info['learning_curve_runtime'] = learning_curve_runtime
+
+            if self._get_validation_loss:
+                validation_learning_curve = extract_learning_curve(info, 'validation_loss')
+                if len(validation_learning_curve) > 1:
+                    additional_run_info['validation_learning_curve'] = \
+                        validation_learning_curve
+                    additional_run_info[
+                        'learning_curve_runtime'] = learning_curve_runtime
+
+            if self._get_test_loss:
+                test_learning_curve = extract_learning_curve(info, 'test_loss')
+                if len(test_learning_curve) > 1:
+                    additional_run_info['test_learning_curve'] = test_learning_curve
+                    additional_run_info[
+                        'learning_curve_runtime'] = learning_curve_runtime
+
+        if isinstance(config, int):
+            origin = 'DUMMY'
+        elif isinstance(config, str):
+            origin = 'traditional'
+        else:
+            origin = getattr(config, 'origin', 'UNKNOWN')
+        additional_run_info['configuration_origin'] = origin
+
+        runtime = float(obj.wall_clock_time)
+
+        empty_queue(queue)
+        self.logger.debug(
+            'Finished function evaluation. Status: %s, Cost: %f, Runtime: %f, Additional %s',
+            status, cost, runtime, additional_run_info,
+        )
+        return status, cost, runtime, additional_run_info
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
new file mode 100644
index 000000000..5e175df9b
--- /dev/null
+++ b/autoPyTorch/evaluation/train_evaluator.py
@@ -0,0 +1,343 @@
+from multiprocessing.queues import Queue
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+from ConfigSpace.configuration_space import Configuration
+
+import numpy as np
+
+from sklearn.base import BaseEstimator
+
+from smac.tae import StatusType
+
+from autoPyTorch.constants import (
+    CLASSIFICATION_TASKS,
+    MULTICLASSMULTIOUTPUT,
+)
+from autoPyTorch.evaluation.abstract_evaluator import (
+    AbstractEvaluator,
+    fit_and_suppress_warnings
+)
+from autoPyTorch.evaluation.utils import subsampler
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.utils.backend import Backend
+
+__all__ = ['TrainEvaluator', 'eval_function']
+
+
+def _get_y_array(y: np.ndarray, task_type: int) -> np.ndarray:
+    if task_type in CLASSIFICATION_TASKS and task_type != \
+            MULTICLASSMULTIOUTPUT:
+        return y.ravel()
+    else:
+        return y
+
+
+class TrainEvaluator(AbstractEvaluator):
+    def __init__(self, backend: Backend, queue: Queue,
+                 metric: autoPyTorchMetric,
+                 budget: float,
+                 budget_type: str = None,
+                 pipeline_config: Optional[Dict[str, Any]] = None,
+                 configuration: Optional[Configuration] = None,
+                 seed: int = 1,
+                 output_y_hat_optimization: bool = True,
+                 num_run: Optional[int] = None,
+                 include: Optional[Dict[str, Any]] = None,
+                 exclude: Optional[Dict[str, Any]] = None,
+                 disable_file_output: Union[bool, List] = False,
+                 init_params: Optional[Dict[str, Any]] = None,
+                 logger_port: Optional[int] = None,
+                 keep_models: Optional[bool] = None,
+                 all_supported_metrics: bool = True) -> None:
+        super().__init__(
+            backend=backend,
+            queue=queue,
+            configuration=configuration,
+            metric=metric,
+            seed=seed,
+            output_y_hat_optimization=output_y_hat_optimization,
+            num_run=num_run,
+            include=include,
+            exclude=exclude,
+            disable_file_output=disable_file_output,
+            init_params=init_params,
+            budget=budget,
+            budget_type=budget_type,
+            logger_port=logger_port,
+            all_supported_metrics=all_supported_metrics,
+            pipeline_config=pipeline_config
+        )
+
+        self.splits = self.datamanager.splits
+        if self.splits is None:
+            raise AttributeError("Must have called create_splits on {}".format(self.datamanager.__class__.__name__))
+        self.num_folds: int = len(self.splits)
+        self.Y_targets: List[Optional[np.ndarray]] = [None] * self.num_folds
+        self.Y_train_targets: np.ndarray = np.ones(self.y_train.shape) * np.NaN
+        self.pipelines: List[Optional[BaseEstimator]] = [None] * self.num_folds
+        self.indices: List[Optional[Tuple[Union[np.ndarray, List], Union[np.ndarray, List]]]] = [None] * self.num_folds
+
+        self.keep_models = keep_models
+
+    def fit_predict_and_loss(self) -> None:
+        """Fit, predict and compute the loss for cross-validation and
+        holdout"""
+        assert self.splits is not None, "Can't fit pipeline in {} is datamanager.splits is None" \
+            .format(self.__class__.__name__)
+        additional_run_info: Optional[Dict] = None
+        if self.num_folds == 1:
+            split_id = 0
+            self.logger.info("Starting fit {}".format(split_id))
+
+            pipeline = self._get_pipeline()
+
+            train_split, test_split = self.splits[split_id]
+            self.Y_optimization = self.y_train[test_split]
+            self.Y_actual_train = self.y_train[train_split]
+            y_train_pred, y_opt_pred, y_valid_pred, y_test_pred = self._fit_and_predict(pipeline, split_id,
+                                                                                        train_indices=train_split,
+                                                                                        test_indices=test_split,
+                                                                                        add_pipeline_to_self=True)
+            train_loss = self._loss(self.y_train[train_split], y_train_pred)
+            loss = self._loss(self.y_train[test_split], y_opt_pred)
+
+            additional_run_info = pipeline.get_additional_run_info() if hasattr(
+                pipeline, 'get_additional_run_info') else {}
+
+            status = StatusType.SUCCESS
+
+            self.finish_up(
+                loss=loss,
+                train_loss=train_loss,
+                opt_pred=y_opt_pred,
+                valid_pred=y_valid_pred,
+                test_pred=y_test_pred,
+                additional_run_info=additional_run_info,
+                file_output=True,
+                status=status,
+            )
+
+        else:
+            Y_train_pred: List[Optional[np.ndarray]] = [None] * self.num_folds
+            Y_optimization_pred: List[Optional[np.ndarray]] = [None] * self.num_folds
+            Y_valid_pred: List[Optional[np.ndarray]] = [None] * self.num_folds
+            Y_test_pred: List[Optional[np.ndarray]] = [None] * self.num_folds
+            train_splits: List[Optional[Union[np.ndarray, List]]] = [None] * self.num_folds
+
+            self.pipelines = [self._get_pipeline() for _ in range(self.num_folds)]
+
+            # stores train loss of each fold.
+            train_losses = [np.NaN] * self.num_folds
+            # used as weights when averaging train losses.
+            train_fold_weights = [np.NaN] * self.num_folds
+            # stores opt (validation) loss of each fold.
+            opt_losses = [np.NaN] * self.num_folds
+            # weights for opt_losses.
+            opt_fold_weights = [np.NaN] * self.num_folds
+
+            for i, (train_split, test_split) in enumerate(self.splits):
+
+                pipeline = self.pipelines[i]
+                train_pred, opt_pred, valid_pred, test_pred = self._fit_and_predict(pipeline, i,
+                                                                                    train_indices=train_split,
+                                                                                    test_indices=test_split,
+                                                                                    add_pipeline_to_self=False)
+                Y_train_pred[i] = train_pred
+                Y_optimization_pred[i] = opt_pred
+                Y_valid_pred[i] = valid_pred
+                Y_test_pred[i] = test_pred
+                train_splits[i] = train_split
+
+                self.Y_train_targets[train_split] = self.y_train[train_split]
+                self.Y_targets[i] = self.y_train[test_split]
+                # Compute train loss of this fold and store it. train_loss could
+                # either be a scalar or a dict of scalars with metrics as keys.
+                train_loss = self._loss(
+                    self.Y_train_targets[train_split],
+                    train_pred,
+                )
+                train_losses[i] = train_loss
+                # number of training data points for this fold. Used for weighting
+                # the average.
+                train_fold_weights[i] = len(train_split)
+
+                # Compute validation loss of this fold and store it.
+                optimization_loss = self._loss(
+                    self.Y_targets[i],
+                    opt_pred,
+                )
+                opt_losses[i] = optimization_loss
+                # number of optimization data points for this fold.
+                # Used for weighting the average.
+                opt_fold_weights[i] = len(train_split)
+
+            # Compute weights of each fold based on the number of samples in each
+            # fold.
+            train_fold_weights = [w / sum(train_fold_weights)
+                                  for w in train_fold_weights]
+            opt_fold_weights = [w / sum(opt_fold_weights)
+                                for w in opt_fold_weights]
+
+            # train_losses is a list of dicts. It is
+            # computed using the target metric (self.metric).
+            train_loss = np.average([train_losses[i][str(self.metric)]
+                                     for i in range(self.num_folds)],
+                                    weights=train_fold_weights,
+                                    )
+
+            opt_loss = {}
+            # self.logger.debug("OPT LOSSES: {}".format(opt_losses if opt_losses is not None else None))
+            for metric in opt_losses[0].keys():
+                opt_loss[metric] = np.average(
+                    [
+                        opt_losses[i][metric]
+                        for i in range(self.num_folds)
+                    ],
+                    weights=opt_fold_weights,
+                )
+            Y_targets = self.Y_targets
+            Y_train_targets = self.Y_train_targets
+
+            Y_optimization_preds = np.concatenate(
+                [Y_optimization_pred[i] for i in range(self.num_folds)
+                 if Y_optimization_pred[i] is not None])
+            Y_targets = np.concatenate([
+                Y_targets[i] for i in range(self.num_folds)
+                if Y_targets[i] is not None
+            ])
+
+            if self.X_valid is not None:
+                Y_valid_preds = np.array([Y_valid_pred[i]
+                                          for i in range(self.num_folds)
+                                          if Y_valid_pred[i] is not None])
+                # Average the predictions of several pipelines
+                if len(Y_valid_preds.shape) == 3:
+                    Y_valid_preds = np.nanmean(Y_valid_preds, axis=0)
+            else:
+                Y_valid_preds = None
+
+            if self.X_test is not None:
+                Y_test_preds = np.array([Y_test_pred[i]
+                                         for i in range(self.num_folds)
+                                         if Y_test_pred[i] is not None])
+                # Average the predictions of several pipelines
+                if len(Y_test_preds.shape) == 3:
+                    Y_test_preds = np.nanmean(Y_test_preds, axis=0)
+            else:
+                Y_test_preds = None
+
+            self.Y_optimization = Y_targets
+            self.Y_actual_train = Y_train_targets
+
+            self.pipeline = self._get_pipeline()
+
+            status = StatusType.SUCCESS
+            self.logger.debug("In train evaluator fit_predict_and_loss, loss:{}".format(opt_loss))
+            self.finish_up(
+                loss=opt_loss,
+                train_loss=train_loss,
+                opt_pred=Y_optimization_preds,
+                valid_pred=Y_valid_preds,
+                test_pred=Y_test_preds,
+                additional_run_info=additional_run_info,
+                file_output=True,
+                status=status,
+            )
+
+    def _fit_and_predict(self, pipeline: BaseEstimator, fold: int, train_indices: Union[np.ndarray, List],
+                         test_indices: Union[np.ndarray, List],
+                         add_pipeline_to_self: bool
+                         ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray], Optional[np.ndarray]]:
+
+        self.indices[fold] = ((train_indices, test_indices))
+
+        X = {'train_indices': train_indices,
+             'val_indices': test_indices,
+             'split_id': fold,
+             'job_id': self.num_run,
+             **self.fit_dictionary}  # fit dictionary
+        y = None
+        fit_and_suppress_warnings(self.logger, pipeline, X, y)
+        self.logger.info("Model fitted, now predicting")
+        (
+            Y_train_pred,
+            Y_opt_pred,
+            Y_valid_pred,
+            Y_test_pred
+        ) = self._predict(
+            pipeline,
+            train_indices=train_indices,
+            test_indices=test_indices,
+        )
+
+        if add_pipeline_to_self:
+            self.pipeline = pipeline
+        else:
+            self.pipelines[fold] = pipeline
+
+        return Y_train_pred, Y_opt_pred, Y_valid_pred, Y_test_pred
+
+    def _predict(self, pipeline: BaseEstimator,
+                 test_indices: Union[np.ndarray, List],
+                 train_indices: Union[np.ndarray, List]
+                 ) -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray], Optional[np.ndarray]]:
+
+        train_pred = self.predict_function(subsampler(self.X_train, train_indices), pipeline,
+                                           self.y_train[train_indices])
+
+        opt_pred = self.predict_function(subsampler(self.X_train, test_indices), pipeline,
+                                         self.y_train[train_indices])
+
+        if self.X_valid is not None:
+            valid_pred = self.predict_function(self.X_valid, pipeline,
+                                               self.y_valid)
+        else:
+            valid_pred = None
+        if self.X_test is not None:
+            test_pred = self.predict_function(self.X_test, pipeline,
+                                              self.y_train[train_indices])
+        else:
+            test_pred = None
+
+        return train_pred, opt_pred, valid_pred, test_pred
+
+
+# create closure for evaluating an algorithm
+def eval_function(
+        backend: Backend,
+        queue: Queue,
+        metric: autoPyTorchMetric,
+        budget: float,
+        config: Optional[Configuration],
+        seed: int,
+        output_y_hat_optimization: bool,
+        num_run: int,
+        include: Optional[Dict[str, Any]],
+        exclude: Optional[Dict[str, Any]],
+        disable_file_output: Union[bool, List],
+        pipeline_config: Optional[Dict[str, Any]] = None,
+        budget_type: str = None,
+        init_params: Optional[Dict[str, Any]] = None,
+        logger_port: Optional[int] = None,
+        all_supported_metrics: bool = True,
+        instance: str = None,
+) -> None:
+    evaluator = TrainEvaluator(
+        backend=backend,
+        queue=queue,
+        metric=metric,
+        configuration=config,
+        seed=seed,
+        num_run=num_run,
+        output_y_hat_optimization=output_y_hat_optimization,
+        include=include,
+        exclude=exclude,
+        disable_file_output=disable_file_output,
+        init_params=init_params,
+        budget=budget,
+        budget_type=budget_type,
+        logger_port=logger_port,
+        all_supported_metrics=all_supported_metrics,
+        pipeline_config=pipeline_config
+    )
+    evaluator.fit_predict_and_loss()
diff --git a/autoPyTorch/evaluation/utils.py b/autoPyTorch/evaluation/utils.py
new file mode 100644
index 000000000..d783413ca
--- /dev/null
+++ b/autoPyTorch/evaluation/utils.py
@@ -0,0 +1,88 @@
+import queue
+from multiprocessing.queues import Queue
+from typing import List, Optional, Union
+
+import numpy as np
+
+import pandas as pd
+
+from smac.runhistory.runhistory import RunValue
+
+__all__ = [
+    'read_queue',
+    'convert_multioutput_multiclass_to_multilabel',
+    'extract_learning_curve',
+    'empty_queue'
+]
+
+
+def subsampler(data: Union[np.ndarray, pd.DataFrame],
+               x: Union[np.ndarray, List[int]]
+               ) -> Union[np.ndarray, pd.DataFrame]:
+    return data[x] if isinstance(data, np.ndarray) else data.iloc[x]
+
+
+def read_queue(queue_: Queue) -> List[RunValue]:
+    stack: List[RunValue] = []
+    while True:
+        try:
+            rval: RunValue = queue_.get(timeout=1)
+        except queue.Empty:
+            break
+
+        # Check if there is a special placeholder value which tells us that
+        # we don't have to wait until the queue times out in order to
+        # retrieve the final value!
+        if 'final_queue_element' in rval:
+            del rval['final_queue_element']
+            do_break = True
+        else:
+            do_break = False
+        stack.append(rval)
+        if do_break:
+            break
+
+    if len(stack) == 0:
+        raise queue.Empty
+    else:
+        return stack
+
+
+def empty_queue(queue_: Queue) -> None:
+    while True:
+        try:
+            queue_.get(block=False)
+        except queue.Empty:
+            break
+
+    queue_.close()
+
+
+def extract_learning_curve(stack: List[RunValue], key: Optional[str] = None) -> List[List]:
+    learning_curve = []
+    for entry in stack:
+        if key is not None:
+            learning_curve.append(entry['additional_run_info'][key])
+        else:
+            learning_curve.append(entry['loss'])
+    return list(learning_curve)
+
+
+def convert_multioutput_multiclass_to_multilabel(probas: Union[List, np.ndarray]) -> np.ndarray:
+    if isinstance(probas, np.ndarray) and len(probas.shape) > 2:
+        raise ValueError('New unsupported sklearn output!')
+    if isinstance(probas, list):
+        multioutput_probas = np.ndarray((probas[0].shape[0], len(probas)))
+        for i, output in enumerate(probas):
+            if output.shape[1] > 2:
+                raise ValueError('Multioutput-Multiclass supported by '
+                                 'scikit-learn, but not by auto-pytorch!')
+            # Only copy the probability of something having class 1
+            elif output.shape[1] == 2:
+                multioutput_probas[:, i] = output[:, 1]
+            # This label was never observed positive in the training data,
+            # therefore it is only the probability for the label being False
+            else:
+                multioutput_probas[:, i] = 0
+        probas = multioutput_probas
+    return probas
diff --git a/autoPyTorch/metrics/__init__.py b/autoPyTorch/metrics/__init__.py
new file mode 100755
index 000000000..6014b4fbc
--- /dev/null
+++ b/autoPyTorch/metrics/__init__.py
@@ -0,0 +1 @@
+from autoPyTorch.pipeline.components.training.metrics.metrics import *  # noqa
diff --git a/autoPyTorch/components/networks/image/darts/__init__.py b/autoPyTorch/optimizer/__init__.py
similarity index 100%
rename from autoPyTorch/components/networks/image/darts/__init__.py
rename to autoPyTorch/optimizer/__init__.py
diff --git a/autoPyTorch/optimizer/smbo.py b/autoPyTorch/optimizer/smbo.py
new file mode 100644
index 000000000..efc54a516
--- /dev/null
+++ b/autoPyTorch/optimizer/smbo.py
@@ -0,0 +1,343 @@
+import copy
+import json
+import logging.handlers
+import typing
+
+import ConfigSpace
+
+import dask.distributed
+
+from smac.facade.smac_ac_facade import SMAC4AC
+from smac.intensification.hyperband import Hyperband
+from smac.runhistory.runhistory import RunHistory
+from smac.runhistory.runhistory2epm import RunHistory2EPM4LogCost
+from smac.scenario.scenario import Scenario
+from smac.tae.dask_runner import DaskParallelRunner
+from smac.tae.serial_runner import SerialRunner
+from smac.utils.io.traj_logging import TrajEntry
+
+# TODO: Enable when merged Ensemble
+# from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.datasets.resampling_strategy import (
+    CrossValTypes,
+    DEFAULT_RESAMPLING_PARAMETERS,
+    HoldoutValTypes,
+)
+from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.logging_ import get_named_client_logger
+from autoPyTorch.utils.stopwatch import StopWatch
+
+
+def get_smac_object(
+    scenario_dict: typing.Dict[str, typing.Any],
+    seed: int,
+    ta: typing.Callable,
+    ta_kwargs: typing.Dict[str, typing.Any],
+    n_jobs: int,
+    initial_budget: int,
+    max_budget: int,
+    dask_client: typing.Optional[dask.distributed.Client],
+) -> SMAC4AC:
+    """
+    This function returns an SMAC object that is gonna be used as
+    optimizer of pipelines
+
+    Args:
+        scenario_dict (typing.Dict[str, typing.Any]): constrain on how to run
+            the jobs
+        seed (int): to make the job deterministic
+        ta (typing.Callable): the function to be intensifier by smac
+        ta_kwargs (typing.Dict[str, typing.Any]): Arguments to the above ta
+        n_jobs (int): Amount of cores to use for this task
+        dask_client (dask.distributed.Client): User provided scheduler
+
+    Returns:
+        (SMAC4AC): sequential model algorithm configuration object
+
+    """
+    intensifier = Hyperband
+
+    rh2EPM = RunHistory2EPM4LogCost
+    return SMAC4AC(
+        scenario=Scenario(scenario_dict),
+        rng=seed,
+        runhistory2epm=rh2EPM,
+        tae_runner=ta,
+        tae_runner_kwargs=ta_kwargs,
+        initial_configurations=None,
+        run_id=seed,
+        intensifier=intensifier,
+        intensifier_kwargs={'initial_budget': initial_budget, 'max_budget': max_budget,
+                            'eta': 3, 'min_chall': 1, 'instance_order': 'shuffle_once'},
+        dask_client=dask_client,
+        n_jobs=n_jobs,
+    )
+
+
+class AutoMLSMBO(object):
+
+    def __init__(self,
+                 config_space: ConfigSpace.ConfigurationSpace,
+                 dataset_name: str,
+                 backend: Backend,
+                 total_walltime_limit: float,
+                 func_eval_time_limit: float,
+                 memory_limit: typing.Optional[int],
+                 metric: autoPyTorchMetric,
+                 watcher: StopWatch,
+                 n_jobs: int,
+                 dask_client: typing.Optional[dask.distributed.Client],
+                 pipeline_config: typing.Dict[str, typing.Any],
+                 start_num_run: int = 1,
+                 seed: int = 1,
+                 resampling_strategy: typing.Union[HoldoutValTypes, CrossValTypes] = HoldoutValTypes.holdout_validation,
+                 resampling_strategy_args: typing.Optional[typing.Dict[str, typing.Any]] = None,
+                 include: typing.Optional[typing.Dict[str, typing.Any]] = None,
+                 exclude: typing.Optional[typing.Dict[str, typing.Any]] = None,
+                 disable_file_output: typing.List = [],
+                 smac_scenario_args: typing.Optional[typing.Dict[str, typing.Any]] = None,
+                 get_smac_object_callback: typing.Optional[typing.Callable] = None,
+                 all_supported_metrics: bool = True,
+                 # TODO: Re-enable when ensemble merged
+                 # ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
+                 ensemble_callback: typing.Any = None,
+                 logger_port: typing.Optional[int] = None,
+                 ):
+        """
+        Interface to SMAC. This method calls the SMAC optimize method, and allows
+        to pass a callback (ensemble_callback) to make launch task at the end of each
+        optimize() algorithm. The later is needed due to the nature of blocking long running
+        tasks in Dask.
+
+        Args:
+            config_space (ConfigSpace.ConfigurationSpac):
+                The configuration space of the whole process
+            dataset_name (str):
+                The name of the dataset, used to identify the current job
+            backend (Backend):
+                An interface with disk
+            total_walltime_limit (float):
+                The maximum allowed time for this job
+            func_eval_time_limit (float):
+                How much each individual task is allowed to last
+            memory_limit (typing.Optional[int]):
+                Maximum allowed CPU memory this task can use
+            metric (autoPyTorchMetric):
+                An scorer object to evaluate the performance of each jon
+            watcher (StopWatch):
+                A stopwatch object to debug time consumption
+            n_jobs (int):
+                How many workers are allowed in each task
+            dask_client (typing.Optional[dask.distributed.Client]):
+                An user provided scheduler. Else smac will create its own.
+            start_num_run (int):
+                The ID index to start runs
+            seed (int):
+                To make the run deterministic
+            resampling_strategy (str):
+                What strategy to use for performance validation
+            resampling_strategy_args (typing.Optional[typing.Dict[str, typing.Any]]):
+                Arguments to the resampling strategy -- like number of folds
+            include (typing.Optional[typing.Dict[str, typing.Any]] = None):
+                Optimal Configuration space modifiers
+            exclude (typing.Optional[typing.Dict[str, typing.Any]] = None):
+                Optimal Configuration space modifiers
+            disable_file_output List:
+                Support to disable file output to disk -- to reduce space
+            smac_scenario_args (typing.Optional[typing.Dict[str, typing.Any]]):
+                Additional arguments to the smac scenario
+            get_smac_object_callback (typing.Optional[typing.Callable]):
+                Allows to create a user specified SMAC object
+            ensemble_callback (typing.Optional[EnsembleBuilderManager]):
+                A callback used in this scenario to start ensemble building subtasks
+
+        """
+        super(AutoMLSMBO, self).__init__()
+        # data related
+        self.dataset_name = dataset_name
+        self.datamanager: typing.Optional[BaseDataset] = None
+        self.metric = metric
+        self.task: typing.Optional[str] = None
+        self.backend = backend
+        self.all_supported_metrics = all_supported_metrics
+
+        self.pipeline_config = pipeline_config
+        # the configuration space
+        self.config_space = config_space
+
+        # the number of parallel workers/jobs
+        self.n_jobs = n_jobs
+        self.dask_client = dask_client
+
+        # Evaluation
+        self.resampling_strategy = resampling_strategy
+        if resampling_strategy_args is None:
+            resampling_strategy_args = DEFAULT_RESAMPLING_PARAMETERS[resampling_strategy]
+        self.resampling_strategy_args = resampling_strategy_args
+
+        # and a bunch of useful limits
+        self.worst_possible_result = get_cost_of_crash(self.metric)
+        self.total_walltime_limit = int(total_walltime_limit)
+        self.func_eval_time_limit = int(func_eval_time_limit)
+        self.memory_limit = memory_limit
+        self.watcher = watcher
+        self.seed = seed
+        self.start_num_run = start_num_run
+        self.include = include
+        self.exclude = exclude
+        self.disable_file_output = disable_file_output
+        self.smac_scenario_args = smac_scenario_args
+        self.get_smac_object_callback = get_smac_object_callback
+
+        self.ensemble_callback = ensemble_callback
+
+        dataset_name_ = "" if dataset_name is None else dataset_name
+        if logger_port is None:
+            self.logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT
+        else:
+            self.logger_port = logger_port
+        logger_name = '%s(%d):%s' % (self.__class__.__name__, self.seed, ":" + dataset_name_)
+        self.logger = get_named_client_logger(name=logger_name,
+                                              port=self.logger_port)
+        self.logger.info("initialised {}".format(self.__class__.__name__))
+
+    def reset_data_manager(self) -> None:
+        if self.datamanager is not None:
+            del self.datamanager
+        self.datamanager = self.backend.load_datamanager()
+
+        if self.datamanager is not None and self.datamanager.task_type is not None:
+            self.task = self.datamanager.task_type
+
+    def run_smbo(self, func: typing.Optional[typing.Callable] = None
+                 ) -> typing.Tuple[RunHistory, typing.List[TrajEntry], str]:
+
+        self.watcher.start_task('SMBO')
+        self.logger.info("Started run of SMBO")
+        # == first things first: load the datamanager
+        self.reset_data_manager()
+
+        # == Initialize non-SMBO stuff
+        # first create a scenario
+        seed = self.seed
+        self.config_space.seed(seed)
+        # allocate a run history
+        num_run = self.start_num_run
+
+        # Initialize some SMAC dependencies
+
+        if isinstance(self.resampling_strategy, CrossValTypes):
+            num_splits = self.resampling_strategy_args['num_splits']
+            instances = [[json.dumps({'task_id': self.dataset_name,
+                                      'fold': fold_number})]
+                         for fold_number in range(num_splits)]
+        else:
+            instances = [[json.dumps({'task_id': self.dataset_name})]]
+
+        # TODO rebuild target algorithm to be it's own target algorithm
+        # evaluator, which takes into account that a run can be killed prior
+        # to the model being fully fitted; thus putting intermediate results
+        # into a queue and querying them once the time is over
+        ta_kwargs = dict(
+            backend=copy.deepcopy(self.backend),
+            seed=seed,
+            initial_num_run=num_run,
+            logger=self.logger,
+            include=self.include if self.include is not None else dict(),
+            exclude=self.exclude if self.exclude is not None else dict(),
+            metric=self.metric,
+            memory_limit=self.memory_limit,
+            disable_file_output=self.disable_file_output,
+            ta=func,
+            logger_port=self.logger_port,
+            all_supported_metrics=self.all_supported_metrics,
+            pipeline_config=self.pipeline_config
+        )
+        ta = ExecuteTaFuncWithQueue
+        self.logger.info("Created TA")
+
+        startup_time = self.watcher.wall_elapsed(self.dataset_name)
+        total_walltime_limit = self.total_walltime_limit - startup_time - 5
+        scenario_dict = {
+            'abort_on_first_run_crash': False,
+            'cs': self.config_space,
+            'cutoff_time': self.func_eval_time_limit,
+            'deterministic': 'true',
+            'instances': instances,
+            'memory_limit': self.memory_limit,
+            'output-dir': self.backend.get_smac_output_directory(),
+            'run_obj': 'quality',
+            'wallclock_limit': total_walltime_limit,
+            'cost_for_crash': self.worst_possible_result,
+        }
+        if self.smac_scenario_args is not None:
+            for arg in [
+                'abort_on_first_run_crash',
+                'cs',
+                'deterministic',
+                'instances',
+                'output-dir',
+                'run_obj',
+                'shared-model',
+                'cost_for_crash',
+            ]:
+                if arg in self.smac_scenario_args:
+                    self.logger.warning('Cannot override scenario argument %s, '
+                                        'will ignore this.', arg)
+                    del self.smac_scenario_args[arg]
+            for arg in [
+                'cutoff_time',
+                'memory_limit',
+                'wallclock_limit',
+            ]:
+                if arg in self.smac_scenario_args:
+                    self.logger.warning(
+                        'Overriding scenario argument %s: %s with value %s',
+                        arg,
+                        scenario_dict[arg],
+                        self.smac_scenario_args[arg]
+                    )
+            scenario_dict.update(self.smac_scenario_args)
+
+        initial_budget = self.pipeline_config['min_epochs']
+        max_budget = self.pipeline_config['epochs']
+
+        if self.get_smac_object_callback is not None:
+            smac = self.get_smac_object_callback(scenario_dict=scenario_dict,
+                                                 seed=seed,
+                                                 ta=ta,
+                                                 ta_kwargs=ta_kwargs,
+                                                 n_jobs=self.n_jobs,
+                                                 initial_budget=initial_budget,
+                                                 max_budget=max_budget,
+                                                 dask_client=self.dask_client)
+        else:
+            smac = get_smac_object(scenario_dict=scenario_dict,
+                                   seed=seed,
+                                   ta=ta,
+                                   ta_kwargs=ta_kwargs,
+                                   n_jobs=self.n_jobs,
+                                   initial_budget=initial_budget,
+                                   max_budget=max_budget,
+                                   dask_client=self.dask_client)
+
+        if self.ensemble_callback is not None:
+            smac.register_callback(self.ensemble_callback)
+
+        self.logger.info("initialised smac, running optimise")
+
+        smac.optimize()
+
+        self.runhistory = smac.solver.runhistory
+        self.trajectory = smac.solver.intensifier.traj_logger.trajectory
+        if isinstance(smac.solver.tae_runner, DaskParallelRunner):
+            self._budget_type = smac.solver.tae_runner.single_worker.budget_type
+        elif isinstance(smac.solver.tae_runner, SerialRunner):
+            self._budget_type = smac.solver.tae_runner.budget_type
+        else:
+            raise NotImplementedError(type(smac.solver.tae_runner))
+
+        return self.runhistory, self.trajectory, self._budget_type
diff --git a/autoPyTorch/pipeline/base/node.py b/autoPyTorch/pipeline/base/node.py
deleted file mode 100644
index 45d43448e..000000000
--- a/autoPyTorch/pipeline/base/node.py
+++ /dev/null
@@ -1,211 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-import gc
-import inspect
-import logging
-
-
-class Node():
-    def __init__(self):
-        self.child_node = None
-        self.fit_output = None
-        self.predict_output = None
-        self.logger = logging.getLogger('autonet')
-
-    def fit(self, **kwargs):
-        """Fit pipeline node.
-        Each node computes its fit function in linear order by fit_traverse()..
-        All args have to be specified in a parent node fit output.
-
-        Returns:
-            dict -- output values that will be passed to child nodes, if required
-        """
-        return dict()
-
-    def predict(self, **kwargs):
-        """Predict pipeline node.
-        Each node computes its predict function in linear order by predict_traverse().
-        All args have to be specified in a parent node predict output or in the fit output of this node
-
-        Returns:
-            dict -- output values that will be passed to child nodes, if required
-        """
-        return dict()
-    
-    def get_fit_argspec(self):
-        """Get the necessary keywords of the fit method for this node
-        
-        Returns:
-            tuple -- The keywords and their defaults
-        """
-        possible_keywords, _, _, defaults, _, _, _ = inspect.getfullargspec(self.fit)
-        possible_keywords = [k for k in possible_keywords if k != 'self']
-        return possible_keywords, defaults
-
-    def get_predict_argspec(self):
-        """Get the necessary keywords of the predict method for this node
-        
-        Returns:
-            tuple -- The keywords and their defaults
-        """
-        possible_keywords, _, _, defaults, _, _, _ = inspect.getfullargspec(self.predict)
-        possible_keywords = [k for k in possible_keywords if k != 'self']
-        return possible_keywords, defaults
-
-    def clean_fit_data(self):
-        node = self
-        
-        # clear outputs
-        while (node is not None):
-            node.fit_output = None
-            node.predict_output = None
-            node = node.child_node
-
-    def fit_traverse(self, **kwargs):
-        """
-        Calls fit function of child nodes.
-        The fit function can have different keyword arguments.
-        All keywords have to be either defined in kwargs or in an fit output of a parent node.
-
-        The fit method of each node specifies a list of keyword arguments.
-        The fit method of each node returns a dictionary of values for keywords of follwing nodes.
-
-        This method collects the results of each fit method call and calls the fit methods with the collected values.
-        """
-
-        self.clean_fit_data()
-        gc.collect()
-
-        base = Node()
-        base.fit_output = kwargs
-
-        # map all collected kwargs to node whose result the kwarg was
-        available_kwargs = {key: base for key in kwargs.keys()}
-
-        node = self
-        prev_node = base
-
-        while (node is not None):
-            prev_node = node
-            # get necessary kwargs of current node
-            possible_keywords, defaults = node.get_fit_argspec()
-
-            last_required_keyword_index = len(possible_keywords) - len(defaults or [])
-            required_kwargs = dict()
-
-            # get the values to the necessary keywords if available. Use default if not.
-            for index, keyword in enumerate(possible_keywords):
-                if (keyword in available_kwargs):
-                    required_kwargs[keyword] = available_kwargs[keyword].fit_output[keyword]
-
-                elif index >= last_required_keyword_index:
-                    required_kwargs[keyword] = defaults[index - last_required_keyword_index]
-
-                else:  # Neither default specified nor keyword available
-                    print ("Available keywords:", sorted(available_kwargs.keys()))
-                    raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
-
-            if type(node) != Node:
-                self.logger.debug('Fit: ' + str(type(node).__name__))
-
-            # call fit method
-            node.fit_output = node.fit(**required_kwargs)
-            if (not isinstance(node.fit_output, dict)):
-                raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
-
-            # collect resulting keyword-value pairs
-            for keyword in node.fit_output.keys():
-                if keyword in available_kwargs:
-                    # delete old values
-                    if (keyword not in available_kwargs[keyword].get_predict_argspec()[0]):
-                        del available_kwargs[keyword].fit_output[keyword]
-                available_kwargs[keyword] = node
-            node = node.child_node
-
-        gc.collect()
-
-        return prev_node.fit_output
-
-    def predict_traverse(self, **kwargs):
-        """Calls predict function of child nodes.
-        The predict function can have different keyword arguments.
-        All keywords have to be either defined in kwargs, in a predict output of a parent node or in the nodes own fit output.
-
-        The predict method of each node specifies a list of keyword arguments.
-        The predict method of each node returns a dictionary of values for keywords of follwing nodes.
-
-        This method collects the results of each predict method call and calls the predict methods with the collected values.
-        For each node, the results of the fit call can also be passed to the predict method
-        
-        """
-        
-        base = Node()
-        base.predict_output = kwargs
-
-        # map all collected kwargs to node whose whose result the kwarg was
-        available_kwargs = {key: base for key in kwargs.keys()}
-
-        node = self
-
-        # clear outputs
-        while (node is not None):
-            node.predict_output = None
-            node = node.child_node
-
-        gc.collect()
-
-        node = self
-        prev_node = base
-
-        while (node is not None):
-            prev_node = node
-            # get necessary kwargs of current node
-            possible_keywords, defaults = node.get_predict_argspec()
-
-            last_required_keyword_index = len(possible_keywords) - len(defaults or [])
-            required_kwargs = dict()
-
-            # get the values to the necessary keywords if available. Use fit result or default if not.
-            for index, keyword in enumerate(possible_keywords):
-                if (keyword in available_kwargs):
-                    if (available_kwargs[keyword].predict_output is None):
-                        print(str(type(available_kwargs[keyword])))
-                    required_kwargs[keyword] = available_kwargs[keyword].predict_output[keyword]
-                
-                elif (node.fit_output is not None and keyword in node.fit_output):
-                    required_kwargs[keyword] = node.fit_output[keyword]
-
-                elif index >= last_required_keyword_index:
-                    required_kwargs[keyword] = defaults[index - last_required_keyword_index]
-
-                else:  # Neither default specified nor keyword available nor available in fit result of the node
-                    raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + keyword + ' which is not available.')
-            
-            node.predict_output = node.predict(**required_kwargs)
-            if (not isinstance(node.predict_output, dict)):
-                raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
-
-            # collect keyword arguments
-            for keyword in node.predict_output.keys():
-                if keyword in available_kwargs:
-                    # delete old values
-                    if (available_kwargs[keyword].predict_output[keyword] is not None):
-                        del available_kwargs[keyword].predict_output[keyword]
-                available_kwargs[keyword] = node
-            node = node.child_node
-            
-        gc.collect()
-
-        return prev_node.predict_output
-
-
-
-
-
-
-
-
-
diff --git a/autoPyTorch/pipeline/base/pipeline.py b/autoPyTorch/pipeline/base/pipeline.py
deleted file mode 100644
index af246f8a5..000000000
--- a/autoPyTorch/pipeline/base/pipeline.py
+++ /dev/null
@@ -1,196 +0,0 @@
-import time
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.pipeline.base.node import Node
-import ConfigSpace
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
-import traceback
-
-
-class Pipeline():
-    """A machine learning pipeline"""
-
-    def __init__(self, pipeline_nodes=[]):
-        """Construct a Pipeline
-        
-        Keyword Arguments:
-            pipeline_nodes {list} -- The nodes of the pipeline (default: {[]})
-        """
-        self.root = Node()
-        self._pipeline_nodes = dict()
-        self._parent_pipeline = None
-
-        # add all the given nodes to the pipeline
-        last_node = self.root
-        for node in pipeline_nodes:
-            last_node.child_node = node
-            self.add_pipeline_node(node)
-            last_node = node
-
-    def __getitem__(self, key):
-        return self._pipeline_nodes[key]
-    
-    def __contains__(self, key):
-        if isinstance(key, str):
-            return key in self._pipeline_nodes
-        elif issubclass(key, PipelineNode):
-            return key.get_name() in self._pipeline_nodes
-        else:
-            raise ValueError("Cannot check if instance " + str(key) + " of type " + str(type(key)) + " is contained in pipeline")
-
-    def set_parent_pipeline(self, pipeline):
-        """Set this pipeline as a child pipeline of the given pipeline.
-        This will allow the parent pipeline to access the pipeline nodes of its child pipelines.
-        
-        Arguments:
-            pipeline {Pipeline} -- parent pipeline
-        """
-
-        if (not issubclass(type(pipeline), Pipeline)):
-            raise ValueError("Given pipeline has to be of type Pipeline, got " + str(type(pipeline)))
-
-        self._parent_pipeline = pipeline
-
-        for _, node in self._pipeline_nodes.items():
-            self._parent_pipeline.add_pipeline_node(node)
-
-
-    def fit_pipeline(self, **kwargs):
-        return self.root.fit_traverse(**kwargs)
-
-    def predict_pipeline(self, **kwargs):
-        return self.root.predict_traverse(**kwargs)
-
-    def add_pipeline_node(self, pipeline_node):
-        """Add a node to the pipeline
-        
-        Arguments:
-            pipeline_node {PipelineNode} -- node
-        
-        Returns:
-            PipelineNode -- return input node
-        """
-
-        if (not issubclass(type(pipeline_node), PipelineNode)):
-            raise ValueError("You can only add PipelineElement subclasses to the pipeline")
-        
-        self._pipeline_nodes[pipeline_node.get_name()] = pipeline_node
-        pipeline_node.set_pipeline(self)
-
-        if (self._parent_pipeline):
-            self._parent_pipeline.add_pipeline_node(pipeline_node)
-
-        return pipeline_node
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        """Get the search space of the pipeline.
-        
-        Keyword Arguments:
-            dataset_info {DatasetInfo} -- Object describing the dataset. (default: {None})
-        
-        Returns:
-            ConfigurationSpace -- The search space of the pipeline
-        """
-        pipeline_config = self.get_pipeline_config(**pipeline_config)
-
-        # check for hyperparameter search space updates and apply them
-        if "hyperparameter_search_space_updates" in pipeline_config and pipeline_config["hyperparameter_search_space_updates"] is not None:
-            assert isinstance(pipeline_config["hyperparameter_search_space_updates"], HyperparameterSearchSpaceUpdates)
-            pipeline_config["hyperparameter_search_space_updates"].apply(self, pipeline_config)
-
-        # initialize the config space
-        if "random_seed" in pipeline_config:
-            cs = ConfigSpace.ConfigurationSpace(seed=pipeline_config["random_seed"])
-        else:
-            cs = ConfigSpace.ConfigurationSpace()
-
-        # add the config space of each node
-        for name, node in self._pipeline_nodes.items():
-            #print("dataset_info" in pipeline_config.keys())
-            config_space = node.get_hyperparameter_search_space(**pipeline_config)
-            cs.add_configuration_space(prefix=name, configuration_space=config_space, delimiter=ConfigWrapper.delimiter)
-        
-        # add the dependencies between the nodes
-        for name, node in self._pipeline_nodes.items():
-            cs = node.insert_inter_node_hyperparameter_dependencies(cs, dataset_info=dataset_info, **pipeline_config)
-
-        return cs
-
-    def get_pipeline_config(self, throw_error_if_invalid=True, **pipeline_config):
-        """Get the full pipeline config given a partial pipeline config
-        
-        Keyword Arguments:
-            throw_error_if_invalid {bool} -- Throw an error if invalid config option is defined (default: {True})
-        
-        Returns:
-            dict -- the full config for the pipeline, containing values for all options
-        """
-        options = self.get_pipeline_config_options()
-        conditions = self.get_pipeline_config_conditions()
-            
-        parser = ConfigFileParser(options)
-        pipeline_config = parser.set_defaults(pipeline_config, throw_error_if_invalid=throw_error_if_invalid)
-
-        # check the conditions e.g. max_budget > min_budget
-        for c in conditions:
-            try:
-                c(pipeline_config)
-            except Exception as e:
-                if throw_error_if_invalid:
-                    raise
-                print(e)
-                traceback.print_exc()
- 
-        return pipeline_config
-
-
-    def get_pipeline_config_options(self):
-        """Get all ConfigOptions of all nodes in the pipeline.
-        
-        Returns:
-            list -- A list of ConfigOptions.
-        """
-        if (self._parent_pipeline is not None):
-            return self._parent_pipeline.get_pipeline_config_options()
-
-        options = []
-
-        for node in self._pipeline_nodes.values():
-            options += node.get_pipeline_config_options()
-
-        return options
-
-    def get_pipeline_config_conditions(self):
-        """Get all ConfigConditions of all the nodes in the pipeline.
-        
-        Returns:
-            list -- A list of ConfigConditions
-        """
-        if (self._parent_pipeline is not None):
-            return self._parent_pipeline.get_pipeline_config_options()
-        
-        conditions = []
-
-        for node in self._pipeline_nodes.values():
-            conditions += node.get_pipeline_config_conditions()
-        
-        return conditions
-    
-    def clean(self):
-        self.root.clean_fit_data()
-
-    def clone(self):
-        """Clone the pipeline
-        
-        Returns:
-            Pipeline -- The cloned pipeline
-        """
-        pipeline_nodes = []
-
-        current_node = self.root.child_node
-        while current_node is not None:
-            pipeline_nodes.append(current_node.clone())
-            current_node = current_node.child_node
-        
-        return type(self)(pipeline_nodes)
diff --git a/autoPyTorch/pipeline/base/pipeline_node.py b/autoPyTorch/pipeline/base/pipeline_node.py
deleted file mode 100644
index 57f673c45..000000000
--- a/autoPyTorch/pipeline/base/pipeline_node.py
+++ /dev/null
@@ -1,175 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-from copy import deepcopy
-import ConfigSpace
-import inspect
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.pipeline.base.node import Node
-
-
-class PipelineNode(Node):
-    """ A node in the ML Pipeline"""
-
-    def __init__(self):
-        """A pipeline node is a step in a pipeline.
-        It can implement a fit function:
-            Returns a dictionary.
-            Input parameter (kwargs) are given by previous fit function computations in the pipeline.
-        It can implement a predict function:
-            Returns a dictionary.
-            Input parameter (kwargs) are given by previous predict function computations in the pipeline or defined in fit function output of this node.
-
-        Each node can provide a list of config options that the user can specify/customize.
-        Each node can provide a config space for optimization.
-
-        """
-
-        super(PipelineNode, self).__init__()
-        self._cs_updates = dict()
-        self.pipeline = None
-
-    @classmethod
-    def get_name(cls):
-        return cls.__name__
-    
-    def clone(self, skip=("pipeline", "fit_output", "predict_output", "child_node")):
-        """Clone a pipeline node
-        
-        Keyword Arguments:
-            skip {tuple} -- attributes that should not be cloned (default: {("pipeline", "fit_output", "predict_output", "child_node")})
-        
-        Returns:
-            PipelineNode -- The cloned node
-        """
-        node_type = type(self)
-        new_node = node_type.__new__(node_type)
-        for key, value in self.__dict__.items():
-            if key not in skip:
-                setattr(new_node, key, deepcopy(value))
-            else:
-                setattr(new_node, key, None)
-        return new_node
-
-    def set_pipeline(self, pipeline):
-        """Set the pipeline of this node
-        
-        Arguments:
-            pipeline {Pipeline} -- The pipeline to set
-        """
-        self.pipeline = pipeline
-
-    # VIRTUAL
-    def get_pipeline_config_options(self):
-        """Get available ConfigOption parameter.
-        
-        Returns:
-            List[ConfigOption] -- list of available config options
-        """
-
-        return []
-
-    # VIRTUAL
-    def get_pipeline_config_conditions(self):
-        """Get the conditions on the pipeline config (e.g. max_budget > min_budget)
-        
-        Returns:
-            List[ConfigCondition] -- list of functions, that take a pipeline config and raise an Error, if faulty configuration is detected.
-        """
-
-        return []
-
-
-    # VIRTUAL
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        """Get hyperparameter that should be optimized.
-        
-        Returns:
-            ConfigSpace -- config space
-        """
-        return ConfigSpace.ConfigurationSpace()
-    
-    # VIRTUAL
-    def insert_inter_node_hyperparameter_dependencies(self, config_space, dataset_info=None, **pipeline_config):
-        """Insert Conditions and Forbiddens of hyperparameters of different nodes
-
-        Returns:
-            ConfigSpace -- config space
-        """
-        return config_space
-
-    def _apply_search_space_update(self, name, new_value_range, log=False):
-        """Allows the user to update a hyperparameter
-        
-        Arguments:
-            name {string} -- name of hyperparameter
-            new_value_range {List[?] -- value range can be either lower, upper or a list of possible conditionals
-            log {bool} -- is hyperparameter logscale
-        """
-
-        if (len(new_value_range) == 0):
-            raise ValueError("The new value range needs at least one value")
-        self._cs_updates[name] = tuple([new_value_range, log])
-    
-    def _check_search_space_updates(self, *allowed_hps):
-        """Check if the given search space updates are valid.
-
-        Arguments:
-            *allowed_hps: List of allowed hps. A list of lists, tuples or strings.
-                          If a allowed hp is a string, hyperparameter updates with given string as name are allowed.
-                          If allowed hp update is a star "*", all hyperparameter updates are allowed.
-                          If allowed hp is a list, all elements in the list are allowed.
-                          If allowed hp is a tuple, we join the values with the ConfigWrapper delimiter.
-                            The elements here can also be lists or stars, with meaning explained above.
-        
-        Raises:
-            ValueError: The given search space updates are not valid.
-        """
-        # process all allowed hps given and add them to this list
-        exploded_allowed_hps = list()
-
-        # iterate over all given allowed hps
-        for allowed_hp in allowed_hps:
-            add = [list()]  # the list of allowed hps to add to exploded_allowed_hps.
-            allowed_hp = (allowed_hp, ) if isinstance(allowed_hp, str) else allowed_hp
-
-            # if tuple, iterate over all parts of allowed hp
-            for part in allowed_hp:
-                # add the part to each element of add. Check if part is str or list.
-                if isinstance(part, str):
-                    add = [x + [part] for x in add]
-                else:
-                    add = [x + [p] for p in part for x in add]
-            exploded_allowed_hps += add
-        
-        # join the allowed hps with ConfigWrapper delimiter
-        exploded_allowed_hps = [ConfigWrapper.delimiter.join(x) for x in exploded_allowed_hps]
-        
-        # Check given hyperparameter updates and raise exception if invalid hyperparameter update is given.
-        for key in self._get_search_space_updates().keys():
-            if key not in exploded_allowed_hps and \
-                    ConfigWrapper.delimiter.join(key.split(ConfigWrapper.delimiter)[:-1] + ["*"]) not in exploded_allowed_hps:
-                raise ValueError("Invalid search space update given: %s" % key)
-    
-    def _get_search_space_updates(self, prefix=None):
-        """Get the search space updates with the given prefix
-        
-        Keyword Arguments:
-            prefix {str} -- Only return search space updates with given prefix (default: {None})
-        
-        Returns:
-            dict -- Mapping of search space updates. Keys don't contain the prefix.
-        """
-        if prefix is None:
-            return self._cs_updates
-        if isinstance(prefix, tuple):
-            prefix = ConfigWrapper.delimiter.join(prefix)
-        result = dict()
-
-        # iterate over all search space updates of this node and filter the ones out, that have the given prefix
-        for key in self._cs_updates.keys():
-            if key.startswith(prefix + ConfigWrapper.delimiter):
-                result[key[len(prefix + ConfigWrapper.delimiter):]] = self._cs_updates[key]
-        return result
diff --git a/autoPyTorch/pipeline/base/sub_pipeline_node.py b/autoPyTorch/pipeline/base/sub_pipeline_node.py
deleted file mode 100644
index c54b6d3df..000000000
--- a/autoPyTorch/pipeline/base/sub_pipeline_node.py
+++ /dev/null
@@ -1,32 +0,0 @@
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-
-class SubPipelineNode(PipelineNode):
-    """A Pipeline node that contains a sub-pipeline"""
-    def __init__(self, sub_pipeline_nodes):
-        """Construct the node and the sub pipeline
-        
-        Arguments:
-            sub_pipeline_nodes {list} -- A list of nodes of the sub-pipeline
-        """
-        super(SubPipelineNode, self).__init__()
-        
-        self.sub_pipeline = Pipeline(sub_pipeline_nodes)
-
-    def set_pipeline(self, pipeline):
-        super(SubPipelineNode, self).set_pipeline(pipeline)
-        self.sub_pipeline.set_parent_pipeline(pipeline)
-
-    def fit(self, **kwargs):
-        return self.sub_pipeline.fit_pipeline(**kwargs)
-
-    def predict(self, **kwargs):
-        return self.sub_pipeline.predict_pipeline(**kwargs)
-    
-    def clone(self):
-        sub_pipeline = self.sub_pipeline.clone()
-        new_node = super().clone(skip=("pipeline", "fit_output", "predict_output", "child_node", "sub_pipeline"))
-        new_node.sub_pipeline = sub_pipeline
-        return new_node
-
diff --git a/autoPyTorch/pipeline/base_pipeline.py b/autoPyTorch/pipeline/base_pipeline.py
new file mode 100644
index 000000000..2d7449c03
--- /dev/null
+++ b/autoPyTorch/pipeline/base_pipeline.py
@@ -0,0 +1,418 @@
+import warnings
+from abc import ABCMeta
+from collections import Counter
+from typing import Any, Dict, List, Optional, Tuple
+
+from ConfigSpace import Configuration
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from sklearn.pipeline import Pipeline
+from sklearn.utils.validation import check_random_state
+
+import torch
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
+from autoPyTorch.pipeline.create_searchspace_util import (
+    add_forbidden,
+    find_active_choices,
+    get_match_array
+)
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BasePipeline(Pipeline):
+    """Base class for all pipeline objects.
+    Notes
+    -----
+    This class should not be instantiated, only subclassed.
+
+    Args:
+        config (Optional[Configuration]): Allows to directly specify a configuration space
+        steps (Optional[List[Tuple[str, autoPyTorchChoice]]]): the list of steps that
+            build the pipeline. If provided, they won't be dynamically produced.
+        include (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to honor during the creation of the configuration space.
+        exclude (Optional[Dict[str, Any]]): Allows the caller to specify which configurations
+            to avoid during the creation of the configuration space.
+        random_state (np.random.RandomState): allows to produce reproducible results by
+            setting a seed for randomized settings
+        init_params (Optional[Dict[str, Any]])
+
+
+    Attributes:
+        steps (List[Tuple[str, autoPyTorchChoice]]]): the steps of the current pipeline
+        config (Configuration): a configuration to delimit the current component choice
+        random_state (Optional[np.random.RandomState]): allows to produce reproducible
+               results by setting a seed for randomized settings
+
+    """
+    __metaclass__ = ABCMeta
+
+    def __init__(
+        self,
+        config: Optional[Configuration] = None,
+        steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
+        dataset_properties: Optional[Dict[str, Any]] = None,
+        include: Optional[Dict[str, Any]] = None,
+        exclude: Optional[Dict[str, Any]] = None,
+        random_state: Optional[np.random.RandomState] = None,
+        init_params: Optional[Dict[str, Any]] = None
+    ):
+
+        self.init_params = init_params if init_params is not None else {}
+        self.dataset_properties = dataset_properties if \
+            dataset_properties is not None else {}
+        self.include = include if include is not None else {}
+        self.exclude = exclude if exclude is not None else {}
+
+        if steps is None:
+            self.steps = self._get_pipeline_steps(dataset_properties)
+        else:
+            self.steps = steps
+
+        self.config_space = self.get_hyperparameter_search_space()
+
+        if config is None:
+            self.config = self.config_space.get_default_configuration()
+        else:
+            if isinstance(config, dict):
+                config = Configuration(self.config_space, config)
+            if self.config_space != config.configuration_space:
+                warnings.warn(self.config_space._children)
+                warnings.warn(config.configuration_space._children)
+                import difflib
+                diff = difflib.unified_diff(
+                    str(self.config_space).splitlines(),
+                    str(config.configuration_space).splitlines())
+                diff_msg = '\n'.join(diff)
+                raise ValueError('Configuration passed does not come from the '
+                                 'same configuration space. Differences are: '
+                                 '%s' % diff_msg)
+            self.config = config
+
+        self.set_hyperparameters(self.config, init_params=init_params)
+
+        if random_state is None:
+            self.random_state = check_random_state(1)
+        else:
+            self.random_state = check_random_state(random_state)
+        super().__init__(steps=self.steps)
+
+        self._additional_run_info = {}  # type: Dict[str, str]
+
+    def get_max_iter(self) -> int:
+        if self.estimator_supports_iterative_fit():
+            return self._final_estimator.get_max_iter()
+        else:
+            raise NotImplementedError()
+
+    def configuration_fully_fitted(self) -> bool:
+        return self._final_estimator.configuration_fully_fitted()
+
+    def get_current_iter(self) -> int:
+        return self._final_estimator.get_current_iter()
+
+    def predict(self, X: np.ndarray, batch_size: Optional[int] = None
+                ) -> np.ndarray:
+        """Predict the output using the selected model.
+
+        Args:
+            X (np.ndarray): input data to the array
+            batch_size (Optional[int]): batch_size controls whether the pipeline will be
+                called on small chunks of the data. Useful when calling the
+                predict method on the whole array X results in a MemoryError.
+
+        Returns:
+            np.ndarray: the predicted values given input X
+        """
+
+        # Pre-process X
+        if batch_size is None:
+            warnings.warn("Batch size not provided. "
+                          "Will predict on the whole data in a single iteration")
+            batch_size = X.shape[0]
+        loader = self.named_steps['data_loader'].get_loader(X=X, batch_size=batch_size)
+        return self.named_steps['network'].predict(loader)
+
+    def set_hyperparameters(
+        self,
+        configuration: Configuration,
+        init_params: Optional[Dict] = None
+    ) -> 'Pipeline':
+        """Method to set the hyperparameter configuration of the pipeline.
+
+        It iterates over the components of the pipeline and applies a given
+        configuration accordingly.
+
+        Args:
+            configuration (Configuration): configuration object to search and overwrite in
+                the pertinent spaces
+            init_params (Optional[Dict]): optional initial settings for the config
+
+        """
+        self.configuration = configuration
+
+        for node_idx, n_ in enumerate(self.steps):
+            node_name, node = n_
+
+            sub_configuration_space = node.get_hyperparameter_search_space(self.dataset_properties)
+            sub_config_dict = {}
+            for param in configuration:
+                if param.startswith('%s:' % node_name):
+                    value = configuration[param]
+                    new_name = param.replace('%s:' % node_name, '', 1)
+                    sub_config_dict[new_name] = value
+
+            sub_configuration = Configuration(sub_configuration_space,
+                                              values=sub_config_dict)
+
+            if init_params is not None:
+                sub_init_params_dict = {}
+                for param in init_params:
+                    if param.startswith('%s:' % node_name):
+                        value = init_params[param]
+                        new_name = param.replace('%s:' % node_name, '', 1)
+                        sub_init_params_dict[new_name] = value
+
+            if isinstance(node, (autoPyTorchChoice, autoPyTorchComponent, BasePipeline)):
+                node.set_hyperparameters(
+                    configuration=sub_configuration,
+                    init_params=None if init_params is None else sub_init_params_dict,
+                )
+            else:
+                raise NotImplementedError('Not supported yet!')
+
+        return self
+
+    def get_hyperparameter_search_space(self) -> ConfigurationSpace:
+        """Return the configuration space for the CASH problem.
+
+        Returns:
+            ConfigurationSpace: The configuration space describing the Pipeline.
+        """
+        if not hasattr(self, 'config_space') or self.config_space is None:
+            self.config_space = self._get_hyperparameter_search_space(
+                dataset_properties=self.dataset_properties,
+                include=self.include,
+                exclude=self.exclude,
+            )
+        return self.config_space
+
+    def get_model(self) -> torch.nn.Module:
+        """
+        Returns the fitted model to the user
+        """
+        return self.named_steps['network'].get_network()
+
+    def _get_hyperparameter_search_space(self,
+                                         dataset_properties: Dict[str, Any],
+                                         include: Optional[Dict[str, Any]] = None,
+                                         exclude: Optional[Dict[str, Any]] = None,
+                                         ) -> ConfigurationSpace:
+        """Return the configuration space for the CASH problem.
+        This method should be called by the method
+        get_hyperparameter_search_space of a subclass. After the subclass
+        assembles a list of available estimators and preprocessor components,
+        _get_hyperparameter_search_space can be called to do the work of
+        creating the actual ConfigSpace.configuration_space.ConfigurationSpace object.
+
+        Args:
+            include (Dict): Overwrite to include user desired components to the pipeline
+            exclude (Dict): Overwrite to exclude user desired components to the pipeline
+
+        Returns:
+            Configuration: The configuration space describing the AutoPytorch estimator.
+        """
+        raise NotImplementedError()
+
+    def __repr__(self) -> str:
+        """Retrieves a str representation of the current pipeline
+
+        Returns:
+            str: A formatted representation of the pipeline stages
+                 and components
+        """
+        string = ''
+        string += '_' * 40
+        string += "\n\t" + self.__class__.__name__ + "\n"
+        string += '_' * 40
+        string += "\n"
+        for i, (stage_name, component) in enumerate(self.named_steps.items()):
+            string += str(i) + "-) " + stage_name + ": "
+            string += "\n\t"
+            string += str(component.choice) if hasattr(component, 'choice') else str(component)
+            string += "\n"
+            string += "\n"
+        string += '_' * 40
+        return string
+
+    def _get_base_search_space(
+        self,
+        cs: ConfigurationSpace,
+        dataset_properties: Dict[str, Any],
+        include: Optional[Dict[str, Any]],
+        exclude: Optional[Dict[str, Any]],
+        pipeline: List[Tuple[str, autoPyTorchChoice]]
+    ) -> ConfigurationSpace:
+        if include is None:
+            if self.include is None:
+                include = {}
+            else:
+                include = self.include
+
+        keys = [pair[0] for pair in pipeline]
+        for key in include:
+            if key not in keys:
+                raise ValueError('Invalid key in include: %s; should be one '
+                                 'of %s' % (key, keys))
+
+        if exclude is None:
+            if self.exclude is None:
+                exclude = {}
+            else:
+                exclude = self.exclude
+
+        keys = [pair[0] for pair in pipeline]
+        for key in exclude:
+            if key not in keys:
+                raise ValueError('Invalid key in exclude: %s; should be one '
+                                 'of %s' % (key, keys))
+
+        matches = get_match_array(
+            pipeline, dataset_properties, include=include, exclude=exclude)
+
+        # Now we have only legal combinations at this step of the pipeline
+        # Simple sanity checks
+        assert np.sum(matches) != 0, "No valid pipeline found."
+
+        assert np.sum(matches) <= np.size(matches), \
+            "'matches' is not binary; %s <= %d, %s" % \
+            (str(np.sum(matches)), np.size(matches), str(matches.shape))
+
+        # Iterate each dimension of the matches array (each step of the
+        # pipeline) to see if we can add a hyperparameter for that step
+        for node_idx, n_ in enumerate(pipeline):
+            node_name, node = n_
+
+            is_choice = isinstance(node, autoPyTorchChoice)
+
+            # if the node isn't a choice we can add it immediately because it
+            #  must be active (if it wasn't, np.sum(matches) would be zero
+            if not is_choice:
+                cs.add_configuration_space(
+                    node_name,
+                    node.get_hyperparameter_search_space(dataset_properties),
+                )
+            # If the node is a choice, we have to figure out which of its
+            #  choices are actually legal choices
+            else:
+                choices_list = find_active_choices(
+                    matches, node, node_idx,
+                    dataset_properties,
+                    include.get(node_name),
+                    exclude.get(node_name)
+                )
+                sub_config_space = node.get_hyperparameter_search_space(
+                    dataset_properties, include=choices_list)
+                cs.add_configuration_space(node_name, sub_config_space)
+
+        # And now add forbidden parameter configurations
+        # According to matches
+        if np.sum(matches) < np.size(matches):
+            cs = add_forbidden(
+                conf_space=cs, pipeline=pipeline, matches=matches,
+                dataset_properties=dataset_properties, include=include,
+                exclude=exclude)
+
+        return cs
+
+    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]]
+                            ) -> List[Tuple[str, autoPyTorchChoice]]:
+        """
+        Defines what steps a pipeline should follow.
+        The step itself has choices given via autoPyTorchChoices.
+
+        Returns:
+            List[Tuple[str, autoPyTorchChoices]]: list of steps sequentially exercised
+                by the pipeline.
+        """
+        raise NotImplementedError()
+
+    def get_fit_requirements(self) -> List[FitRequirement]:
+        """
+        Utility function that goes through all the components in
+        the pipeline and gets the fit requirement of that components.
+        All the fit requirements are then aggregated into a list
+        Returns:
+            List[NamedTuple]: List of FitRequirements
+        """
+        fit_requirements = list()  # List[FitRequirement]
+        for name, step in self.steps:
+            step_requirements = step.get_fit_requirements()
+            if step_requirements:
+                fit_requirements.extend(step_requirements)
+
+        # remove duplicates in the list
+        fit_requirements = list(set(fit_requirements))
+        fit_requirements = [req for req in fit_requirements if (req.user_defined and not req.dataset_property)]
+        req_names = [req.name for req in fit_requirements]
+
+        # check wether requirement names are unique
+        if len(set(req_names)) != len(fit_requirements):
+            name_occurences = Counter(req_names)
+            multiple_names = [name for name, num_occ in name_occurences.items() if num_occ > 1]
+            multiple_fit_requirements = [req for req in fit_requirements if req.name in multiple_names]
+            raise ValueError("Found fit requirements with different values %s" % multiple_fit_requirements)
+        return fit_requirements
+
+    def get_dataset_requirements(self) -> List[FitRequirement]:
+        """
+        Utility function that goes through all the components in
+        the pipeline and gets the fit requirement that are expected to be
+        computed by the dataset for that components. All the fit requirements
+        are then aggregated into a list.
+        Returns:
+            List[NamedTuple]: List of FitRequirements
+        """
+        fit_requirements = list()  # type: List[FitRequirement]
+        for name, step in self.steps:
+            step_requirements = step.get_fit_requirements()
+            if step_requirements:
+                fit_requirements.extend(step_requirements)
+
+        # remove duplicates in the list
+        fit_requirements = list(set(fit_requirements))
+        fit_requirements = [req for req in fit_requirements if (req.user_defined and req.dataset_property)]
+        return fit_requirements
+
+    def _get_estimator_hyperparameter_name(self) -> str:
+        """The name of the current pipeline estimator, for representation purposes"""
+        raise NotImplementedError()
+
+    def get_additional_run_info(self) -> Dict:
+        """Allows retrieving additional run information from the pipeline.
+        Can be overridden by subclasses to return additional information to
+        the optimization algorithm.
+
+        Returns:
+            Dict: Additional information about the pipeline
+        """
+        return self._additional_run_info
+
+    @staticmethod
+    def get_default_pipeline_options() -> Dict[str, Any]:
+        return {
+            'job_id': '1',
+            'device': 'cpu',
+            'budget_type': 'epochs',
+            'epochs': 5,
+            'runtime': 3600,
+            'torch_num_threads': 1,
+            'early_stopping': 10,
+            'use_tensorboard_logger': True,
+            'use_pynisher': False,
+            'metrics_during_training': True
+        }
diff --git a/autoPyTorch/components/networks/image/utils/__init__.py b/autoPyTorch/pipeline/components/__init__.py
similarity index 100%
rename from autoPyTorch/components/networks/image/utils/__init__.py
rename to autoPyTorch/pipeline/components/__init__.py
diff --git a/autoPyTorch/pipeline/components/base_choice.py b/autoPyTorch/pipeline/components/base_choice.py
new file mode 100644
index 000000000..0cf61a12f
--- /dev/null
+++ b/autoPyTorch/pipeline/components/base_choice.py
@@ -0,0 +1,246 @@
+import warnings
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+import numpy as np
+
+from sklearn.utils import check_random_state
+
+from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
+from autoPyTorch.utils.common import FitRequirement
+
+
+class autoPyTorchChoice(object):
+    """Allows for the dynamically generation of components as pipeline steps.
+
+    Args:
+        dataset_properties (Dict[str, Union[str, int]]): Describes the dataset
+            to work on
+        random_state (Optional[np.random.RandomState]): allows to produce reproducible
+            results by setting a seed for randomized settings
+
+    Attributes:
+        random_state (Optional[np.random.RandomState]): allows to produce reproducible
+            results by setting a seed for randomized settings
+        choice (autoPyTorchComponent): the choice of components for this stage
+    """
+    def __init__(self,
+                 dataset_properties: Dict[str, Any],
+                 random_state: Optional[np.random.RandomState] = None
+                 ):
+
+        # Since all calls to get_hyperparameter_search_space will be done by the
+        # pipeline on construction, it is not necessary to construct a
+        # configuration space at this location!
+        # self.configuration = self.get_hyperparameter_search_space(
+        #     dataset_properties).get_default_configuration()
+
+        if random_state is None:
+            self.random_state = check_random_state(1)
+        else:
+            self.random_state = check_random_state(random_state)
+
+        self.dataset_properties = dataset_properties
+        self._check_dataset_properties(dataset_properties)
+        # Since the pipeline will initialize the hyperparameters, it is not
+        # necessary to do this upon the construction of this object
+        # self.set_hyperparameters(self.configuration)
+        self.choice: Optional[autoPyTorchComponent] = None
+
+    def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
+        if self.choice is not None:
+            return self.choice.get_fit_requirements()
+        else:
+            raise AttributeError("Expected choice attribute to be autoPyTorchComponent"
+                                 " but got None, to get fit requirements for {}, "
+                                 "call get_fit_requirements of the component".format(self.__class__.__name__))
+
+    def get_components(cls: 'autoPyTorchChoice') -> Dict[str, autoPyTorchComponent]:
+        """Returns and ordered dict with the components available
+        for current step.
+
+        Args:
+            cls (autoPyTorchChoice): The choice object from which to query the valid
+                components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: The available components via a mapping
+                from the module name to the component class
+
+        """
+        raise NotImplementedError()
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """
+        Wrapper over get components to incorporate include/exclude
+        user specification
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Results:
+            Dict[str, autoPyTorchComponent]: A dictionary with valid components for this
+                choice object
+
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+            if 'issparse' in dataset_properties:
+                if dataset_properties['issparse'] and \
+                        not available_comp[name].get_properties(dataset_properties)['handles_sparse']:
+                    continue
+            components_dict[name] = available_comp[name]
+
+        return components_dict
+
+    def set_hyperparameters(self,
+                            configuration: Configuration,
+                            init_params: Optional[Dict[str, Any]] = None
+                            ) -> 'autoPyTorchChoice':
+        """
+        Applies a configuration to the given component.
+        This method translate a hierarchical configuration key,
+        to an actual parameter of the autoPyTorch component.
+
+        Args:
+            configuration (Configuration): which configuration to apply to
+                the chosen component
+            init_params (Optional[Dict[str, any]]): Optional arguments to
+                initialize the chosen component
+
+        Returns:
+            self: returns an instance of self
+        """
+        new_params = {}
+
+        params = configuration.get_dictionary()
+        choice = params['__choice__']
+        del params['__choice__']
+
+        for param, value in params.items():
+            param = param.replace(choice + ':', '')
+            new_params[param] = value
+
+        if init_params is not None:
+            for param, value in init_params.items():
+                param = param.replace(choice + ':', '')
+                new_params[param] = value
+
+        new_params['random_state'] = self.random_state
+
+        self.new_params = new_params
+        self.choice = self.get_components()[choice](**new_params)
+
+        return self
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default: (Optional[str]) : Default component to use in hyperparameters
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                chosen component
+        """
+        raise NotImplementedError()
+
+    def fit(self, X: Dict[str, Any], y: Any) -> autoPyTorchComponent:
+        """Handy method to check if a component is fitted
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+        """
+        # Allows to use check_is_fitted on the choice object
+        self.fitted_ = True
+        assert self.choice is not None, "Cannot call fit without initializing the component"
+        return self.choice.fit(X, y)
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        """Predicts the target given an input, by using the chosen component
+
+        Args:
+            X (np.ndarray): input features from which to predict the target
+
+        Returns:
+            np.ndarray: the predicted target
+        """
+        assert self.choice is not None, "Cannot call predict without initializing the component"
+        return self.choice.predict(X)
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the current choice in the fit dictionary
+        Args:
+            X (Dict[str, Any]): fit dictionary
+
+        Returns:
+            (Dict[str, Any])
+        """
+        assert self.choice is not None, "Can not call transform without initialising the component"
+        return self.choice.transform(X)
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+        assert isinstance(X, dict), "The input X to the pipeline must be a dictionary"
+
+        if y is not None:
+            warnings.warn("Provided y argument, yet only X is required")
+
+    def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
+        """
+        A mechanism in code to ensure the correctness of the initialised dataset properties.
+        Args:
+            dataset_properties:
+
+        """
+        assert isinstance(dataset_properties, dict), "dataset_properties must be a dictionary"
diff --git a/autoPyTorch/pipeline/components/base_component.py b/autoPyTorch/pipeline/components/base_component.py
new file mode 100644
index 000000000..7918bca2f
--- /dev/null
+++ b/autoPyTorch/pipeline/components/base_component.py
@@ -0,0 +1,254 @@
+import importlib
+import inspect
+import pkgutil
+import sys
+import warnings
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.utils.common import FitRequirement
+
+
+def find_components(
+        package: str,
+        directory: str,
+        base_class: BaseEstimator
+) -> Dict[str, BaseEstimator]:
+    """Utility to find component on a given directory,
+    that inherit from base_class
+    Args:
+        package (str): The associated package that contains the components
+        directory (str): The directory from which to extract the components
+        base_class (BaseEstimator): base class to filter out desired components
+            that don't inherit from this class
+    """
+    components = OrderedDict()
+
+    for module_loader, module_name, ispkg in pkgutil.iter_modules([directory]):
+        full_module_name = "%s.%s" % (package, module_name)
+        if full_module_name not in sys.modules and not ispkg:
+            module = importlib.import_module(full_module_name)
+
+            for member_name, obj in inspect.getmembers(module):
+                if inspect.isclass(obj) and issubclass(obj, base_class) and \
+                        obj != base_class:
+                    # TODO test if the obj implements the interface
+                    # Keep in mind that this only instantiates the ensemble_wrapper,
+                    # but not the real target classifier
+                    classifier = obj
+                    components[module_name] = classifier
+
+    return components
+
+
+class ThirdPartyComponents(object):
+    """
+    This class allow the user to create a new component for any stage of the pipeline.
+    Inheriting from the base class of each component does not provide any checks,
+    to make sure that the hyperparameter space is properly specified.
+
+    This class ensures the minimum component checking for the configuration
+    space to work.
+
+    Args:
+        base_class (BaseEstimator) component type desired to be created
+    """
+
+    def __init__(self, base_class: BaseEstimator) -> None:
+        self.base_class = base_class
+        self.components = OrderedDict()  # type: Dict[str, BaseEstimator]
+
+    def add_component(self, obj: BaseEstimator) -> None:
+        if inspect.isclass(obj) and self.base_class in obj.__bases__:
+            name = obj.__name__
+            classifier = obj
+        else:
+            raise TypeError('add_component works only with a subclass of %s' %
+                            str(self.base_class))
+
+        properties = set(classifier.get_properties())
+        class_specific_properties = classifier.get_required_properties()
+        # TODO: Add desired properties when we define them
+        should_be_there = {'shortname', 'name'}
+        if class_specific_properties is not None:
+            should_be_there = should_be_there.union(class_specific_properties)
+        for property in properties:
+            if property not in should_be_there:
+                raise ValueError('Property %s must not be specified for '
+                                 'algorithm %s. Only the following properties '
+                                 'can be specified: %s' %
+                                 (property, name, str(should_be_there)))
+        for property in should_be_there:
+            if property not in properties:
+                raise ValueError('Property %s not specified for algorithm %s' %
+                                 (property, name))
+
+        self.components[name] = classifier
+
+
+class autoPyTorchComponent(BaseEstimator):
+
+    _required_properties: Optional[List[str]] = None
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._fit_requirements: List[FitRequirement] = list()
+
+    @classmethod
+    def get_required_properties(cls) -> Optional[List[str]]:
+        """
+        Function to get the properties in the component
+        that are required for the properly fitting the pipeline.
+        Usually defined in the base class of the component
+        Returns:
+            List[str]: list of properties autopytorch component must have for proper functioning of the pipeline
+        """
+        return cls._required_properties
+
+    def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
+        """
+        Function to get the required keys by the component
+        that need to be in the fit dictionary
+        Returns:
+            List[FitRequirement]: a list containing required keys
+                            in a named tuple (name: str, type: object)
+        """
+        return self._fit_requirements
+
+    def add_fit_requirements(self, requirements: List[FitRequirement]) -> None:
+        if self._fit_requirements is not None:
+            self._fit_requirements.extend(requirements)
+        else:
+            self._fit_requirements = requirements
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        """Get the properties of the underlying algorithm.
+
+        Args:
+            dataset_properties (Optional[Dict[str, Union[str, int]]): Describes the dataset
+               to work on
+        Returns:
+            Dict[str, Any]: Properties of the algorithm
+        """
+        raise NotImplementedError()
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+            dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+        """Return the configuration space of this classification algorithm.
+
+        Args:
+            dataset_properties (Optional[Dict[str, Union[str, int]]): Describes the dataset
+               to work on
+
+        Returns:
+            ConfigurationSpace: The configuration space of this algorithm.
+        """
+        raise NotImplementedError()
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+        """The fit function calls the fit function of the underlying
+        model and returns `self`.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+            y (Any): Not Used -- to comply with API
+
+        Returns:
+            self : returns an instance of self.
+        Notes
+        -----
+        Please see the `scikit-learn API documentation
+        <http://scikit-learn.org/dev/developers/index.html#apis-of-scikit
+        -learn-objects>`_ for further information."""
+        raise NotImplementedError()
+
+    def set_hyperparameters(self,
+                            configuration: Configuration,
+                            init_params: Optional[Dict[str, Any]] = None
+                            ) -> BaseEstimator:
+        """
+        Applies a configuration to the given component.
+        This method translate a hierarchical configuration key,
+        to an actual parameter of the autoPyTorch component.
+
+        Args:
+            configuration (Configuration): which configuration to apply to
+                the chosen component
+            init_params (Optional[Dict[str, any]]): Optional arguments to
+                initialize the chosen component
+
+        Returns:
+            An instance of self
+        """
+        params = configuration.get_dictionary()
+
+        for param, value in params.items():
+            if not hasattr(self, param):
+                raise ValueError('Cannot set hyperparameter %s for %s because '
+                                 'the hyperparameter does not exist.' %
+                                 (param, str(self)))
+            setattr(self, param, value)
+
+        if init_params is not None:
+            for param, value in init_params.items():
+                if not hasattr(self, param):
+                    raise ValueError('Cannot set init param %s for %s because '
+                                     'the init param does not exist.' %
+                                     (param, str(self)))
+                setattr(self, param, value)
+
+        return self
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+        assert isinstance(X, dict), "The input X to the pipeline must be a dictionary"
+
+        if y is not None:
+            warnings.warn("Provided y argument, yet only X is required")
+        if 'dataset_properties' not in X:
+            raise ValueError(
+                "To fit a pipeline, expected fit dictionary to have a dataset_properties key")
+
+        for requirement in self._fit_requirements:
+            check_dict = X['dataset_properties'] if requirement.dataset_property else X
+            if requirement.name not in check_dict.keys():
+                if requirement.name in ['X_train', 'backend']:
+                    if 'X_train' in check_dict.keys() or 'backend' in check_dict.keys():
+                        continue
+                else:
+                    raise ValueError(
+                        "To fit {}, expected fit dictionary to have '{}'"
+                        " but got \n {}".format(
+                            self.__class__.__name__,
+                            requirement.name, list(check_dict.keys())))
+            else:
+                TYPE_SUPPORTED = isinstance(check_dict[requirement.name], tuple(requirement.supported_types))
+                if not TYPE_SUPPORTED:
+                    raise TypeError("Expected {} to be instance of {} got {}"
+                                    .format(requirement.name,
+                                            requirement.supported_types,
+                                            type(check_dict[requirement.name])))
+
+    def __str__(self) -> str:
+        """Representation of the current Component"""
+        name = self.get_properties()['name']
+        return "autoPyTorch.pipeline %s" % name
diff --git a/autoPyTorch/components/optimizer/__init__.py b/autoPyTorch/pipeline/components/preprocessing/__init__.py
similarity index 100%
rename from autoPyTorch/components/optimizer/__init__.py
rename to autoPyTorch/pipeline/components/preprocessing/__init__.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/base_preprocessing.py b/autoPyTorch/pipeline/components/preprocessing/base_preprocessing.py
new file mode 100644
index 000000000..1bcbc5a51
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/base_preprocessing.py
@@ -0,0 +1,69 @@
+from typing import Any, Dict, Optional, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+import pandas as pd
+
+from scipy.sparse import csr_matrix
+
+import torch
+
+from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
+from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.common import FitRequirement
+
+
+class autoPyTorchPreprocessingComponent(autoPyTorchComponent):
+    """
+     Provides abstract interface for preprocessing algorithms in AutoPyTorch.
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        self.add_fit_requirements([
+            FitRequirement('X_train',
+                           (np.ndarray, pd.DataFrame, csr_matrix),
+                           user_defined=True, dataset_property=False),
+            FitRequirement('backend',
+                           (Backend, ),
+                           user_defined=True, dataset_property=False)])
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the fitted early_preprocessor into the 'X' dictionary and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        raise NotImplementedError()
+
+    def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]:
+        """
+        Makes the autoPyTorchPreprocessingComponent Callable. Calling the component
+        calls the transform function of the underlying early_preprocessor and
+        returns the transformed array.
+        Args:
+            X (Union[np.ndarray, torch.tensor]): input data tensor
+
+        Returns:
+            Union[np.ndarray, torch.tensor]: Transformed data tensor
+        """
+        raise NotImplementedError()
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+        """Return the configuration space of this classification algorithm.
+
+        Args:
+            dataset_properties (Optional[Dict[str, Union[str, int]]): Describes the dataset
+               to work on
+
+        Returns:
+            ConfigurationSpace: The configuration space of this algorithm.
+        """
+        return ConfigurationSpace()
diff --git a/autoPyTorch/components/preprocessing/__init__.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/__init__.py
similarity index 100%
rename from autoPyTorch/components/preprocessing/__init__.py
rename to autoPyTorch/pipeline/components/preprocessing/image_preprocessing/__init__.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/base_image_preprocessor.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/base_image_preprocessor.py
new file mode 100644
index 000000000..1d4a39d6f
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/base_image_preprocessor.py
@@ -0,0 +1,22 @@
+from typing import Any, Dict, Optional
+
+from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchPreprocessingComponent
+
+
+class autoPyTorchImagePreprocessingComponent(autoPyTorchPreprocessingComponent):
+    """
+     Provides abstract interface for preprocessing algorithms in AutoPyTorch.
+    """
+
+    def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "autoPyTorchImagePreprocessingComponent":
+        """
+        Initialises early_preprocessor and returns self.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            autoPyTorchImagePreprocessingComponent: self
+        """
+        self.check_requirements(X, y)
+
+        return self
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py
new file mode 100644
index 000000000..4327d6346
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/ImageNormalizer.py
@@ -0,0 +1,53 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+import torch.tensor
+
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer
+
+
+class ImageNormalizer(BaseNormalizer):
+
+    def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None
+                 ):
+        super().__init__()
+        self.random_state = random_state
+        self.mean = None  # type: Optional[np.ndarray]
+        self.std = None  # type: Optional[np.ndarray]
+
+    def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "ImageNormalizer":
+        """
+        Initialises early_preprocessor and returns self.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            autoPyTorchImagePreprocessingComponent: self
+        """
+        self.check_requirements(X, y)
+        self.mean = X['dataset_properties']['mean']
+        self.std = X['dataset_properties']['std']
+        return self
+
+    def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]:
+        """
+        Makes the autoPyTorchPreprocessingComponent Callable. Calling the component
+        calls the transform function of the underlying early_preprocessor and
+        returns the transformed array.
+        Args:
+            X (Union[np.ndarray, torch.tensor]): input data tensor
+
+        Returns:
+            Union[np.ndarray, torch.tensor]: Transformed data tensor
+        """
+        X = (X - self.mean) / self.std
+        return X
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {
+            'shortname': 'normalize',
+            'name': 'Image Normalizer Node',
+        }
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py
new file mode 100644
index 000000000..7aeb83a9c
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/NoNormalizer.py
@@ -0,0 +1,56 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+import torch.tensor
+
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import (
+    BaseNormalizer
+
+)
+
+
+class NoNormalizer(BaseNormalizer):
+    def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None
+                 ):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Optional[Any] = None) -> "NoNormalizer":
+        """
+        Initialises early_preprocessor and returns self.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            autoPyTorchImagePreprocessingComponent: self
+        """
+        self.check_requirements(X, y)
+
+        return self
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+
+        X.update({'normalise': self})
+        return X
+
+    def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]:
+        """
+        Makes the autoPyTorchPreprocessingComponent Callable. Calling the component
+        calls the transform function of the underlying early_preprocessor and
+        returns the transformed array.
+        Args:
+            X (Union[np.ndarray, torch.tensor]): input data tensor
+
+        Returns:
+            Union[np.ndarray, torch.tensor]: Transformed data tensor
+        """
+        return X
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {
+            'shortname': 'no-normalize',
+            'name': 'No Normalizer Node',
+        }
diff --git a/autoPyTorch/components/preprocessing/image_preprocessing/__init__.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/__init__.py
similarity index 100%
rename from autoPyTorch/components/preprocessing/image_preprocessing/__init__.py
rename to autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/__init__.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer.py
new file mode 100644
index 000000000..2ea12fae8
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer.py
@@ -0,0 +1,46 @@
+from typing import Any, Dict
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.base_image_preprocessor import \
+    autoPyTorchImagePreprocessingComponent
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseNormalizer(autoPyTorchImagePreprocessingComponent):
+
+    def __init__(self) -> None:
+        super(BaseNormalizer, self).__init__()
+        self.add_fit_requirements([
+            FitRequirement('mean', (np.ndarray,), user_defined=True, dataset_property=True),
+            FitRequirement('std', (np.ndarray,), user_defined=True, dataset_property=True)])
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+
+        X.update({'normalise': self})
+        return X
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+        super().check_requirements(X, y)
+
+        if 0 in X['dataset_properties']['std']:
+            raise ZeroDivisionError("Can't normalise when std is zero")
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('random_state', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer_choice.py b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer_choice.py
new file mode 100644
index 000000000..696601b4f
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/image_preprocessing/normalise/base_normalizer_choice.py
@@ -0,0 +1,95 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer import BaseNormalizer
+
+
+normalise_directory = os.path.split(__file__)[0]
+_normalizers = find_components(__package__,
+                               normalise_directory,
+                               BaseNormalizer)
+
+_addons = ThirdPartyComponents(BaseNormalizer)
+
+
+def add_normalizer(normalizer: BaseNormalizer) -> None:
+    _addons.add_component(normalizer)
+
+
+class NormalizerChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing encoding component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available normalizer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseNormalise components available
+                as choices for encoding the categorical columns
+        """
+        components = OrderedDict()
+        components.update(_normalizers)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
+                                                                include=include,
+                                                                exclude=exclude)
+
+        if len(available_preprocessors) == 0:
+            raise ValueError("no image normalizers found, please add an image normalizer")
+
+        if default is None:
+            defaults = ['ImageNormalizer', 'NoNormalizer']
+            for default_ in defaults:
+                if default_ in available_preprocessors:
+                    if include is not None and default_ not in include:
+                        continue
+                    if exclude is not None and default_ in exclude:
+                        continue
+                    default = default_
+                    break
+
+        preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                     list(available_preprocessors.keys()),
+                                                     default_value=default)
+
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of early_preprocessor choices
+        for name in preprocessor.choices:
+            preprocessor_configuration_space = available_preprocessors[name].\
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, preprocessor_configuration_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
new file mode 100644
index 000000000..e77c65be2
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
@@ -0,0 +1,94 @@
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+
+from sklearn.compose import ColumnTransformer, make_column_transformer
+from sklearn.pipeline import make_pipeline
+
+import torch
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
+    autoPyTorchTabularPreprocessingComponent
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
+from autoPyTorch.utils.common import FitRequirement
+
+
+class TabularColumnTransformer(autoPyTorchTabularPreprocessingComponent):
+
+    def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
+        super().__init__()
+        self.random_state = random_state
+        self.preprocessor: Optional[ColumnTransformer] = None
+        self.add_fit_requirements([
+            FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True),
+            FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True)])
+
+    def get_column_transformer(self) -> ColumnTransformer:
+        """
+        Get fitted column transformer that is wrapped around
+        the sklearn early_preprocessor. Can only be called if fit()
+        has been called on the object.
+        Returns:
+            BaseEstimator: Fitted sklearn column transformer
+        """
+        if self.preprocessor is None:
+            raise AttributeError("{} can't return column transformer before transform is called"
+                                 .format(self.__class__.__name__))
+        return self.preprocessor
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
+        """
+        Creates a column transformer for the chosen tabular
+        preprocessors
+        Args:
+            X (Dict[str, Any]): fit dictionary
+
+        Returns:
+            "TabularColumnTransformer": an instance of self
+        """
+        self.check_requirements(X, y)
+
+        numerical_pipeline = 'drop'
+        categorical_pipeline = 'drop'
+
+        preprocessors = get_tabular_preprocessers(X)
+        if len(X['dataset_properties']['numerical_columns']):
+            numerical_pipeline = make_pipeline(*preprocessors['numerical'])
+        if len(X['dataset_properties']['categorical_columns']):
+            categorical_pipeline = make_pipeline(*preprocessors['categorical'])
+
+        self.preprocessor = make_column_transformer(
+            (numerical_pipeline, X['dataset_properties']['numerical_columns']),
+            (categorical_pipeline, X['dataset_properties']['categorical_columns']),
+            remainder='passthrough'
+        )
+
+        # Where to get the data -- Prioritize X_train if any else
+        # get from backend
+        if 'X_train' in X:
+            X_train = X['X_train']
+        else:
+            X_train = X['backend'].load_datamanager().train_tensors[0]
+        self.preprocessor.fit(X_train)
+
+        return self
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the column transformer to fit dictionary
+        Args:
+            X (Dict[str, Any]): fit dictionary
+
+        Returns:
+            X (Dict[str, Any]): updated fit dictionary
+        """
+        X.update({'tabular_transformer': self})
+        return X
+
+    def __call__(self, X: Union[np.ndarray, torch.tensor]) -> Union[np.ndarray, torch.tensor]:
+
+        if self.preprocessor is None:
+            raise ValueError("cant call {} without fitting the column transformer first."
+                             .format(self.__class__.__name__))
+        return self.preprocessor.transform(X)
diff --git a/autoPyTorch/components/regularization/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/__init__.py
similarity index 100%
rename from autoPyTorch/components/regularization/__init__.py
rename to autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/__init__.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/base_tabular_preprocessing.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/base_tabular_preprocessing.py
new file mode 100644
index 000000000..6e7c2f8f1
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/base_tabular_preprocessing.py
@@ -0,0 +1,45 @@
+from typing import Dict, List, Optional, Union
+
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchPreprocessingComponent
+
+
+class autoPyTorchTabularPreprocessingComponent(autoPyTorchPreprocessingComponent):
+    """
+     Provides abstract interface for preprocessing algorithms in AutoPyTorch.
+    """
+    _required_properties: List[str] = ['handles_sparse']
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.preprocessor: Union[Dict[str, Optional[BaseEstimator]], BaseEstimator] = dict(
+            numerical=None, categorical=None)
+
+    def get_preprocessor_dict(self) -> Dict[str, BaseEstimator]:
+        """
+        Returns early_preprocessor dictionary containing the sklearn numerical
+        and categorical early_preprocessor with "numerical" and "categorical"
+        keys. May contain None for a key if early_preprocessor does not
+        handle the datatype defined by key
+
+        Returns:
+            Dict[str, BaseEstimator]: early_preprocessor dictionary
+        """
+        if (self.preprocessor['numerical'] and self.preprocessor['categorical']) is None:
+            raise AttributeError("{} can't return early_preprocessor dict without fitting first"
+                                 .format(self.__class__.__name__))
+        return self.preprocessor
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('early_preprocessor', None)
+        info.pop('column_transformer', None)
+        info.pop('random_state', None)
+        info.pop('_fit_requirements', None)
+        if len(info.keys()) != 0:
+            string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py
new file mode 100644
index 000000000..3ea9d4f94
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py
@@ -0,0 +1,51 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
+
+
+class NoEncoder(BaseEncoder):
+    """
+    Don't perform encoding on categorical features
+    """
+    def __init__(self,
+                 random_state: Optional[Union[np.random.RandomState, int]] = None
+                 ):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEncoder:
+        """
+        The fit function calls the fit function of the underlying model
+        and returns the transformed array.
+        Args:
+            X (np.ndarray): input features
+            y (Optional[np.ndarray]): input labels
+
+        Returns:
+            instance of self
+        """
+        self.check_requirements(X, y)
+
+        return self
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the self into the 'X' dictionary and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        X.update({'encoder': self.preprocessor})
+        return X
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'NoEncoder',
+            'name': 'No Encoder',
+            'handles_sparse': True
+        }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OneHotEncoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OneHotEncoder.py
new file mode 100644
index 000000000..8a781a986
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OneHotEncoder.py
@@ -0,0 +1,32 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+from sklearn.preprocessing import OneHotEncoder as OHE
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
+
+
+class OneHotEncoder(BaseEncoder):
+    """
+    Encode categorical features as a one-hot numerical array
+    """
+    def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEncoder:
+
+        self.check_requirements(X, y)
+
+        self.preprocessor['categorical'] = OHE(categories=X['dataset_properties']['categories'],
+                                               sparse=False, handle_unknown='error')
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'OneHotEncoder',
+            'name': 'One Hot Encoder',
+            'handles_sparse': False
+        }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OrdinalEncoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OrdinalEncoder.py
new file mode 100644
index 000000000..7b127f00a
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/OrdinalEncoder.py
@@ -0,0 +1,31 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+from sklearn.preprocessing import OrdinalEncoder as OE
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
+
+
+class OrdinalEncoder(BaseEncoder):
+    """
+    Encode categorical features as a one-hot numerical array
+    """
+    def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEncoder:
+
+        self.check_requirements(X, y)
+
+        self.preprocessor['categorical'] = OE(categories=X['dataset_properties']['categories'])
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'OrdinalEncoder',
+            'name': 'Ordinal Encoder',
+            'handles_sparse': False
+        }
diff --git a/autoPyTorch/components/training/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/__init__.py
similarity index 100%
rename from autoPyTorch/components/training/__init__.py
rename to autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/__init__.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
new file mode 100644
index 000000000..eadc0a188
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
@@ -0,0 +1,32 @@
+from typing import Any, Dict, List
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
+    autoPyTorchTabularPreprocessingComponent
+)
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseEncoder(autoPyTorchTabularPreprocessingComponent):
+    """
+    Base class for encoder
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        self.add_fit_requirements([
+            FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True),
+            FitRequirement('categories', (List,), user_defined=True, dataset_property=True)])
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the self into the 'X' dictionary and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
+            raise ValueError("cant call transform on {} without fitting first."
+                             .format(self.__class__.__name__))
+        X.update({'encoder': self.preprocessor})
+        return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py
new file mode 100644
index 000000000..019861c92
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder_choice.py
@@ -0,0 +1,116 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder import BaseEncoder
+
+
+encoding_directory = os.path.split(__file__)[0]
+_encoders = find_components(__package__,
+                            encoding_directory,
+                            BaseEncoder)
+_addons = ThirdPartyComponents(BaseEncoder)
+
+
+def add_encoder(encoder: BaseEncoder) -> None:
+    _addons.add_component(encoder)
+
+
+class EncoderChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing encoding component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available encoder components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseEncoder components available
+                as choices for encoding the categorical columns
+        """
+        components = OrderedDict()
+        components.update(_encoders)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
+                                                                include=include,
+                                                                exclude=exclude)
+
+        if len(available_preprocessors) == 0:
+            raise ValueError("no encoders found, please add a encoder")
+
+        if default is None:
+            defaults = ['OneHotEncoder', 'OrdinalEncoder', 'NoEncoder']
+            for default_ in defaults:
+                if default_ in available_preprocessors:
+                    if include is not None and default_ not in include:
+                        continue
+                    if exclude is not None and default_ in exclude:
+                        continue
+                    default = default_
+                    break
+
+        # add only no encoder to choice hyperparameters in case the dataset is only numerical
+        if len(dataset_properties['categorical_columns']) == 0:
+            default = 'NoEncoder'
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         ['NoEncoder'],
+                                                         default_value=default)
+        else:
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         list(available_preprocessors.keys()),
+                                                         default_value=default)
+
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of early_preprocessor choices
+        for name in preprocessor.choices:
+            preprocessor_configuration_space = available_preprocessors[name].\
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, preprocessor_configuration_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+        Args:
+            dataset_properties:
+
+        """
+        super()._check_dataset_properties(dataset_properties)
+        assert 'numerical_columns' in dataset_properties.keys(), \
+            "Dataset properties must contain information about numerical columns"
+        assert 'categorical_columns' in dataset_properties.keys(), \
+            "Dataset properties must contain information about categorical columns"
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/SimpleImputer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/SimpleImputer.py
new file mode 100644
index 000000000..4ae9d8d40
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/SimpleImputer.py
@@ -0,0 +1,83 @@
+from typing import Any, Dict, Optional, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter
+)
+
+import numpy as np
+
+from sklearn.impute import SimpleImputer as SklearnSimpleImputer
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.base_imputer import BaseImputer
+
+
+class SimpleImputer(BaseImputer):
+    """
+    Impute missing values for categorical columns with '!missing!'
+    """
+
+    def __init__(self,
+                 random_state: Optional[Union[np.random.RandomState, int]] = None,
+                 numerical_strategy: str = 'mean',
+                 categorical_strategy: str = 'most_frequent'):
+        super().__init__()
+        self.random_state = random_state
+        self.numerical_strategy = numerical_strategy
+        self.categorical_strategy = categorical_strategy
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImputer:
+        """
+        The fit function calls the fit function of the underlying model
+        and returns the transformed array.
+        Args:
+            X (np.ndarray): input features
+            y (Optional[np.ndarray]): input labels
+
+        Returns:
+            instance of self
+        """
+        self.check_requirements(X, y)
+        if len(X['dataset_properties']['categorical_columns']) != 0:
+            if self.categorical_strategy == 'constant_!missing!':
+                self.preprocessor['categorical'] = SklearnSimpleImputer(strategy='constant',
+                                                                        fill_value='!missing!',
+                                                                        copy=False)
+            else:
+                self.preprocessor['categorical'] = SklearnSimpleImputer(strategy=self.categorical_strategy,
+                                                                        copy=False)
+        if len(X['dataset_properties']['numerical_columns']) != 0:
+            if self.numerical_strategy == 'constant_zero':
+                self.preprocessor['numerical'] = SklearnSimpleImputer(strategy='constant',
+                                                                      fill_value=0,
+                                                                      copy=False)
+            else:
+                self.preprocessor['numerical'] = SklearnSimpleImputer(strategy=self.numerical_strategy, copy=False)
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, Any]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        assert dataset_properties is not None, "To create hyperparameter search space" \
+                                               ", dataset_properties should not be None"
+        if len(dataset_properties['numerical_columns']) != 0:
+            numerical_strategy = CategoricalHyperparameter("numerical_strategy",
+                                                           ["mean", "median", "most_frequent", "constant_zero"],
+                                                           default_value="mean")
+            cs.add_hyperparameter(numerical_strategy)
+
+        if len(dataset_properties['categorical_columns']) != 0:
+            categorical_strategy = CategoricalHyperparameter("categorical_strategy",
+                                                             ["most_frequent", "constant_!missing!"],
+                                                             default_value="most_frequent")
+            cs.add_hyperparameter(categorical_strategy)
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'SimpleImputer',
+            'name': 'Simple Imputer',
+            'handles_sparse': True
+        }
diff --git a/autoPyTorch/components/training/image/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/__init__.py
similarity index 100%
rename from autoPyTorch/components/training/image/__init__.py
rename to autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/__init__.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
new file mode 100644
index 000000000..b65f3c229
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
@@ -0,0 +1,33 @@
+from typing import Any, Dict, List
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
+    autoPyTorchTabularPreprocessingComponent
+)
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseImputer(autoPyTorchTabularPreprocessingComponent):
+    """
+    Provides abstract class interface for Imputers in AutoPyTorch
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.add_fit_requirements([
+            FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True),
+            FitRequirement('categorical_columns', (List,), user_defined=True, dataset_property=True)])
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds self into the 'X' dictionary and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
+            raise ValueError("cant call transform on {} without fitting first."
+                             .format(self.__class__.__name__))
+        X.update({'imputer': self.preprocessor})
+        return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/MinMaxScaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/MinMaxScaler.py
new file mode 100644
index 000000000..d94274108
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/MinMaxScaler.py
@@ -0,0 +1,34 @@
+from typing import Any, Dict, Optional, Tuple, Union
+
+import numpy as np
+
+from sklearn.preprocessing import MinMaxScaler as SklearnMinMaxScaler
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
+
+
+class MinMaxScaler(BaseScaler):
+    """
+    Scale numerical columns/features into feature_range
+    """
+    def __init__(self,
+                 random_state: Optional[Union[np.random.RandomState, int]] = None,
+                 feature_range: Tuple[Union[int, float], Union[int, float]] = (0, 1)):
+        super().__init__()
+        self.random_state = random_state
+        self.feature_range = feature_range
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
+
+        self.check_requirements(X, y)
+
+        self.preprocessor['numerical'] = SklearnMinMaxScaler(feature_range=self.feature_range, copy=False)
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'MinMaxScaler',
+            'name': 'MinMaxScaler',
+            'handles_sparse': True
+        }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py
new file mode 100644
index 000000000..281e99d65
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py
@@ -0,0 +1,54 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
+
+
+class NoScaler(BaseScaler):
+    """
+    No scaling performed
+    """
+    def __init__(self,
+                 random_state: Optional[Union[np.random.RandomState, int]] = None
+                 ):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
+        """
+        The fit function calls the fit function of the underlying model
+        and returns the transformed array.
+        Args:
+            X (np.ndarray): input features
+            y (Optional[np.ndarray]): input labels
+
+        Returns:
+            instance of self
+        """
+
+        self.check_requirements(X, y)
+
+        return self
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        The transform function calls the transform function of the
+        underlying model and returns the transformed array.
+
+        Args:
+            X (np.ndarray): input features
+
+        Returns:
+            np.ndarray: Transformed features
+        """
+        X.update({'scaler': self.preprocessor})
+        return X
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'NoScaler',
+            'name': 'No Scaler',
+            'handles_sparse': True
+        }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/Normalizer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/Normalizer.py
new file mode 100644
index 000000000..a28791b2c
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/Normalizer.py
@@ -0,0 +1,52 @@
+from typing import Any, Dict, Optional, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter
+)
+
+import numpy as np
+
+from sklearn.preprocessing import Normalizer as SklearnNormalizer
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
+
+
+class Normalizer(BaseScaler):
+    """
+    Normalises samples individually according to norm {mean_abs, mean_squared, max}
+    """
+
+    def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None, norm: str = 'mean_squared'):
+        """
+        Args:
+            random_state (Optional[Union[np.random.RandomState, int]]): Determines random number generation for
+            subsampling and smoothing noise.
+            norm (str): {mean_abs, mean_squared, max} default: mean_squared
+        """
+        super().__init__()
+        self.random_state = random_state
+        self.norm = norm
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
+
+        self.check_requirements(X, y)
+
+        map_norm = dict({"mean_abs": "l1", "mean_squared": "l2", "max": "max"})
+        self.preprocessor['numerical'] = SklearnNormalizer(norm=map_norm[self.norm], copy=False)
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, Any]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        norm = CategoricalHyperparameter("norm", ["mean_abs", "mean_squared", "max"], default_value="mean_squared")
+        cs.add_hyperparameter(norm)
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'Normalizer',
+            'name': 'Normalizer',
+            'handles_sparse': True
+        }
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/StandardScaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/StandardScaler.py
new file mode 100644
index 000000000..84eff7d9c
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/StandardScaler.py
@@ -0,0 +1,38 @@
+from typing import Any, Dict, Optional, Union
+
+import numpy as np
+
+from sklearn.preprocessing import StandardScaler as SklearnStandardScaler
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
+from autoPyTorch.utils.common import FitRequirement
+
+
+class StandardScaler(BaseScaler):
+    """
+    Standardise numerical columns/features by removing mean and scaling to unit/variance
+    """
+    def __init__(self,
+                 random_state: Optional[Union[np.random.RandomState, int]] = None
+                 ):
+        super().__init__()
+        self.random_state = random_state
+        self.add_fit_requirements([
+            FitRequirement('issparse', (bool,), user_defined=True, dataset_property=True)
+        ])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler:
+
+        self.check_requirements(X, y)
+
+        with_mean, with_std = (False, False) if X['dataset_properties']['issparse'] else (True, True)
+        self.preprocessor['numerical'] = SklearnStandardScaler(with_mean=with_mean, with_std=with_std, copy=False)
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, Union[str, bool]]:
+        return {
+            'shortname': 'StandardScaler',
+            'name': 'Standard Scaler',
+            'handles_sparse': True
+        }
diff --git a/autoPyTorch/components/training/image/checkpoints/__init__.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/__init__.py
similarity index 100%
rename from autoPyTorch/components/training/image/checkpoints/__init__.py
rename to autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/__init__.py
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
new file mode 100644
index 000000000..39834dd2b
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
@@ -0,0 +1,32 @@
+from typing import Any, Dict, List
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
+    autoPyTorchTabularPreprocessingComponent
+)
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseScaler(autoPyTorchTabularPreprocessingComponent):
+    """
+    Provides abstract class interface for Scalers in AutoPytorch
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.add_fit_requirements([
+            FitRequirement('numerical_columns', (List,), user_defined=True, dataset_property=True)])
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the fitted scalar into the 'X' dictionary and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
+            raise ValueError("cant call transform on {} without fitting first."
+                             .format(self.__class__.__name__))
+        X.update({'scaler': self.preprocessor})
+        return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py
new file mode 100644
index 000000000..0c3357026
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler_choice.py
@@ -0,0 +1,109 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler
+
+scaling_directory = os.path.split(__file__)[0]
+_scalers = find_components(__package__,
+                           scaling_directory,
+                           BaseScaler)
+
+_addons = ThirdPartyComponents(BaseScaler)
+
+
+def add_scaler(scaler: BaseScaler) -> None:
+    _addons.add_component(scaler)
+
+
+class ScalerChoice(autoPyTorchChoice):
+    """
+    Allows for dynamically choosing scaling component at runtime
+    """
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available scaler components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseScalers components available
+                as choices for scaling
+        """
+        components = OrderedDict()
+        components.update(_scalers)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(self,
+                                        dataset_properties: Optional[Dict[str, Any]] = None,
+                                        default: Optional[str] = None,
+                                        include: Optional[List[str]] = None,
+                                        exclude: Optional[List[str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        dataset_properties = {**self.dataset_properties, **dataset_properties}
+
+        available_preprocessors = self.get_available_components(dataset_properties=dataset_properties,
+                                                                include=include,
+                                                                exclude=exclude)
+
+        if len(available_preprocessors) == 0:
+            raise ValueError("no scalers found, please add a scaler")
+
+        if default is None:
+            defaults = ['StandardScaler', 'Normalizer', 'MinMaxScaler', 'NoScaler']
+            for default_ in defaults:
+                if default_ in available_preprocessors:
+                    default = default_
+                    break
+
+        # add only no scaler to choice hyperparameters in case the dataset is only categorical
+        if len(dataset_properties['numerical_columns']) == 0:
+            default = 'NoScaler'
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         ['NoScaler'],
+                                                         default_value=default)
+        else:
+            preprocessor = CSH.CategoricalHyperparameter('__choice__',
+                                                         list(available_preprocessors.keys()),
+                                                         default_value=default)
+        cs.add_hyperparameter(preprocessor)
+
+        # add only child hyperparameters of early_preprocessor choices
+        for name in preprocessor.choices:
+            preprocessor_configuration_space = available_preprocessors[name].\
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': preprocessor, 'value': name}
+            cs.add_configuration_space(name, preprocessor_configuration_space,
+                                       parent_hyperparameter=parent_hyperparameter)
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _check_dataset_properties(self, dataset_properties: Dict[str, Any]) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+        Args:
+            dataset_properties:
+
+        """
+        super()._check_dataset_properties(dataset_properties)
+        assert 'numerical_columns' in dataset_properties.keys() and 'categorical_columns' in dataset_properties.keys(),\
+            "Dataset properties must contain information about the type of columns"
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/utils.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/utils.py
new file mode 100644
index 000000000..d67e9c61d
--- /dev/null
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/utils.py
@@ -0,0 +1,29 @@
+from typing import Any, Dict, List
+
+from sklearn.base import BaseEstimator
+
+
+def get_tabular_preprocessers(X: Dict[str, Any]) -> Dict[str, List[BaseEstimator]]:
+    """
+    Expects fit_dictionary(X) to have numerical/categorical preprocessors
+    (fited numerical/categorical preprocessing nodes) that will build the
+    column transformer in the TabularColumnTransformer. This function
+    parses X and extracts such components.
+    Creates a dictionary with two keys,
+    numerical- containing list of numerical preprocessors
+    categorical- containing list of categorical preprocessors
+    Args:
+        X: fit dictionary
+    Returns:
+        (Dict[str, List[BaseEstimator]]): dictionary with list of numerical and categorical preprocessors
+    """
+    preprocessor = dict(numerical=list(), categorical=list())  # type: Dict[str, List[BaseEstimator]]
+    for key, value in X.items():
+        if isinstance(value, dict):
+            # as each preprocessor is child of BaseEstimator
+            if 'numerical' in value and isinstance(value['numerical'], BaseEstimator):
+                preprocessor['numerical'].append(value['numerical'])
+            if 'categorical' in value and isinstance(value['categorical'], BaseEstimator):
+                preprocessor['categorical'].append(value['categorical'])
+
+    return preprocessor
diff --git a/autoPyTorch/core/__init__.py b/autoPyTorch/pipeline/components/setup/__init__.py
similarity index 100%
rename from autoPyTorch/core/__init__.py
rename to autoPyTorch/pipeline/components/setup/__init__.py
diff --git a/autoPyTorch/core/hpbandster_extensions/__init__.py b/autoPyTorch/pipeline/components/setup/augmentation/__init__.py
similarity index 100%
rename from autoPyTorch/core/hpbandster_extensions/__init__.py
rename to autoPyTorch/pipeline/components/setup/augmentation/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/GaussianBlur.py b/autoPyTorch/pipeline/components/setup/augmentation/image/GaussianBlur.py
new file mode 100644
index 000000000..6a4ab0c27
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/GaussianBlur.py
@@ -0,0 +1,51 @@
+from typing import Any, Dict, Optional, Union
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+
+
+class GaussianBlur(BaseImageAugmenter):
+    def __init__(self, use_augmenter: bool = True, sigma_min: float = 0, sigma_offset: float = 0.5,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__(use_augmenter=use_augmenter)
+        self.random_state = random_state
+        self.sigma = (sigma_min, sigma_min + sigma_offset)
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        if self.use_augmenter:
+            self.augmenter: Augmenter = iaa.GaussianBlur(sigma=self.sigma, name=self.get_properties()['name'])
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+        use_augmenter = CategoricalHyperparameter('use_augmenter', choices=[True, False], default_value=True)
+        sigma_min = UniformFloatHyperparameter('sigma_min', lower=0, upper=3, default_value=0)
+        sigma_offset = UniformFloatHyperparameter('sigma_offset', lower=0, upper=3, default_value=0.5)
+        cs.add_hyperparameters([use_augmenter, sigma_min, sigma_offset])
+
+        # only add hyperparameters to configuration space if we are using the augmenter
+        cs.add_condition(CS.EqualsCondition(sigma_min, use_augmenter, True))
+        cs.add_condition(CS.EqualsCondition(sigma_offset, use_augmenter, True))
+
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'GaussianBlur'}
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/GaussianNoise.py b/autoPyTorch/pipeline/components/setup/augmentation/image/GaussianNoise.py
new file mode 100644
index 000000000..3f5be3173
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/GaussianNoise.py
@@ -0,0 +1,46 @@
+from typing import Any, Dict, Optional, Union
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+
+
+class GaussianNoise(BaseImageAugmenter):
+    def __init__(self, use_augmenter: bool = True, sigma_offset: float = 0.3,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__(use_augmenter=use_augmenter)
+        self.random_state = random_state
+        self.sigma = (0, sigma_offset)
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        if self.use_augmenter:
+            self.augmenter: Augmenter = iaa.AdditiveGaussianNoise(scale=self.sigma, name=self.get_properties()['name'])
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+        sigma_offset = UniformFloatHyperparameter('sigma_offset', lower=0, upper=3, default_value=0.3)
+        use_augmenter = CategoricalHyperparameter('use_augmenter', choices=[True, False], default_value=True)
+        cs.add_hyperparameters([use_augmenter, sigma_offset])
+        # only add hyperparameters to configuration space if we are using the augmenter
+        cs.add_condition(CS.EqualsCondition(sigma_offset, use_augmenter, True))
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'GaussianNoise'}
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/HorizontalFlip.py b/autoPyTorch/pipeline/components/setup/augmentation/image/HorizontalFlip.py
new file mode 100644
index 000000000..3077e3a79
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/HorizontalFlip.py
@@ -0,0 +1,24 @@
+from typing import Any, Dict, Optional, Union
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+
+
+class HorizontalFlip(BaseImageAugmenter):
+    def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        self.augmenter: Augmenter = iaa.Fliplr(p=0.5, name=self.get_properties()['name'])
+
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'HorizontalFlip'}
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/ImageAugmenter.py b/autoPyTorch/pipeline/components/setup/augmentation/image/ImageAugmenter.py
new file mode 100644
index 000000000..a718dec26
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/ImageAugmenter.py
@@ -0,0 +1,144 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, Optional, Union
+
+from ConfigSpace.configuration_space import (
+    Configuration,
+    ConfigurationSpace
+)
+
+import imgaug.augmenters as iaa
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+
+
+augmenter_directory = os.path.split(__file__)[0]
+_augmenters = find_components(__package__,
+                              augmenter_directory,
+                              BaseImageAugmenter)
+_addons = ThirdPartyComponents(BaseImageAugmenter)
+
+
+def add_augmenter(augmenter: BaseImageAugmenter) -> None:
+    _addons.add_component(augmenter)
+
+
+def get_components() -> Dict[str, BaseImageAugmenter]:
+    """Returns the available augmenter components
+
+    Args:
+        None
+
+    Returns:
+        Dict[str, BaseImageAugmenter]: all BaseImageAugmenter components available
+            as choices
+    """
+    components = OrderedDict()
+    components.update(_augmenters)
+    components.update(_addons.components)
+    return components
+
+
+class ImageAugmenter(BaseImageAugmenter):
+
+    def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__()
+        self.available_augmenters = get_components()  # type: Dict[str, BaseImageAugmenter]
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        # aggregate all the imgaug augmenters from the fitted augmenter component if they are set to use
+        fitted_augmenters = [augmenter.fit(X).get_image_augmenter() for _, augmenter in
+                             self.available_augmenters.items() if augmenter.use_augmenter]
+
+        # Create sequantial from the fitted augmenters augmenters
+        self.augmenter = iaa.Sequential(fitted_augmenters, name=self.get_properties()['name'])
+        return self
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        X.update({'image_augmenter': self})
+        return X
+
+    def set_hyperparameters(self,
+                            configuration: Configuration,
+                            init_params: Optional[Dict[str, Any]] = None
+                            ) -> 'ImageAugmenter':
+        """
+        Applies a configuration to the given component.
+        This method translate a hierarchical configuration key,
+        to an actual parameter of the autoPyTorch component.
+
+        Args:
+            configuration (Configuration): which configuration to apply to
+                the chosen component
+            init_params (Optional[Dict[str, any]]): Optional arguments to
+                initialize the chosen component
+
+        Returns:
+            self: returns an instance of self
+        """
+        available_augmenters = get_components()
+        for name, augmenter in available_augmenters.items():
+            new_params = {}
+
+            params = configuration.get_dictionary()
+
+            for param, value in params.items():
+                if name in param:
+                    param = param.replace(name, '').replace(':', '')
+                    new_params[param] = value
+
+            if init_params is not None:
+                for param, value in init_params.items():
+                    if name in param:
+                        param = param.replace(name, '').replace(':', '')
+                        new_params[param] = value
+
+            new_params['random_state'] = self.random_state
+
+            self.available_augmenters[name] = augmenter(**new_params)
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        available_augmenters = get_components()  # type: Dict[str, BaseImageAugmenter]
+
+        if dataset_properties is None:
+            dataset_properties = dict()
+
+        # add child hyperparameters
+        for name in available_augmenters.keys():
+            preprocessor_configuration_space = available_augmenters[name].\
+                get_hyperparameter_search_space(dataset_properties)
+            cs.add_configuration_space(name, preprocessor_configuration_space)
+
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'ImageAugmenter'}
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.__class__.__name__
+        info = vars(self)
+        augmenters = list()
+        for augmenter in info['augmenter']:
+            augmenters.append(augmenter.name)
+        info['augmenters'] = augmenters
+        # Remove unwanted info
+        info.pop('random_state', None)
+        info.pop('available_augmenters', None)
+        info.pop('augmenter', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/RandomAffine.py b/autoPyTorch/pipeline/components/setup/augmentation/image/RandomAffine.py
new file mode 100644
index 000000000..01d6f16e5
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/RandomAffine.py
@@ -0,0 +1,67 @@
+from typing import Any, Dict, Optional, Union
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+
+
+class RandomAffine(BaseImageAugmenter):
+    def __init__(self, use_augmenter: bool = True, scale_offset: float = 0.2,
+                 translate_percent_offset: float = 0.3, shear: int = 30,
+                 rotate: int = 45, random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__(use_augmenter=use_augmenter)
+        self.random_state = random_state
+        self.scale = (1, 1 - scale_offset)
+        self.translate_percent = (0, translate_percent_offset)
+        self.shear = (-shear, shear)
+        self.rotate = (-rotate, rotate)
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        if self.use_augmenter:
+            self.augmenter: Augmenter = iaa.Affine(scale=self.scale, translate_percent=self.translate_percent,
+                                                   rotate=self.rotate, shear=self.shear, mode='symmetric',
+                                                   name=self.get_properties()['name'])
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+        scale_offset = UniformFloatHyperparameter('scale_offset', lower=0, upper=0.4, default_value=0.2)
+
+        translate_percent_offset = UniformFloatHyperparameter('translate_percent_offset', lower=0, upper=0.4,
+                                                              default_value=0.2)
+        shear = UniformIntegerHyperparameter('shear', lower=0, upper=45, default_value=30)
+        rotate = UniformIntegerHyperparameter('rotate', lower=0, upper=360, default_value=45)
+
+        use_augmenter = CategoricalHyperparameter('use_augmenter', choices=[True, False], default_value=True)
+
+        cs.add_hyperparameters([scale_offset, translate_percent_offset])
+        cs.add_hyperparameters([shear, rotate, use_augmenter])
+
+        # only add hyperparameters to configuration space if we are using the augmenter
+        cs.add_condition(CS.EqualsCondition(scale_offset, use_augmenter, True))
+        cs.add_condition(CS.EqualsCondition(translate_percent_offset, use_augmenter, True))
+        cs.add_condition(CS.EqualsCondition(shear, use_augmenter, True))
+        cs.add_condition(CS.EqualsCondition(rotate, use_augmenter, True))
+
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'RandomAffine'}
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/RandomCutout.py b/autoPyTorch/pipeline/components/setup/augmentation/image/RandomCutout.py
new file mode 100644
index 000000000..4a12bbdef
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/RandomCutout.py
@@ -0,0 +1,49 @@
+from typing import Any, Dict, Optional, Union
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+)
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+
+
+class RandomCutout(BaseImageAugmenter):
+    def __init__(self, use_augmenter: bool = True, p: float = 0.5,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__(use_augmenter=use_augmenter)
+        self.p = p
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        if self.use_augmenter:
+            self.augmenter: Augmenter = iaa.Sometimes(self.p, iaa.Cutout(nb_iterations=(1, 10), size=(0.1, 0.5),
+                                                                         random_state=self.random_state),
+                                                      name=self.get_properties()['name'])
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+        p = UniformFloatHyperparameter('p', lower=0.2, upper=1, default_value=0.5)
+        use_augmenter = CategoricalHyperparameter('use_augmenter', choices=[True, False], default_value=True)
+        cs.add_hyperparameters([p, use_augmenter])
+
+        # only add hyperparameters to configuration space if we are using the augmenter
+        cs.add_condition(CS.EqualsCondition(p, use_augmenter, True))
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'RandomCutout'}
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/Resize.py b/autoPyTorch/pipeline/components/setup/augmentation/image/Resize.py
new file mode 100644
index 000000000..7ee10d8d3
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/Resize.py
@@ -0,0 +1,49 @@
+from typing import Any, Dict, Optional, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+)
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+from autoPyTorch.utils.common import FitRequirement
+
+
+class Resize(BaseImageAugmenter):
+
+    def __init__(self, use_augmenter: bool = True,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__(use_augmenter=use_augmenter)
+        self.random_state = random_state
+        self.add_fit_requirements([
+            FitRequirement('image_height', (int,), user_defined=True, dataset_property=True),
+            FitRequirement('image_width', (int,), user_defined=True, dataset_property=True)])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        self.check_requirements(X, y)
+        if self.use_augmenter:
+            self.augmenter: Augmenter = iaa.Resize(size=(X['dataset_properties']['image_height'],
+                                                         X['dataset_properties']['image_width']),
+                                                   interpolation='linear', name=self.get_properties()['name'])
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+            dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        use_augmenter = CategoricalHyperparameter('use_augmenter', choices=[True, False], default_value=True)
+        cs.add_hyperparameters([use_augmenter])
+
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'Resize'}
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/VerticalFlip.py b/autoPyTorch/pipeline/components/setup/augmentation/image/VerticalFlip.py
new file mode 100644
index 000000000..ad19b5519
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/VerticalFlip.py
@@ -0,0 +1,24 @@
+from typing import Any, Dict, Optional, Union
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+
+
+class VerticalFlip(BaseImageAugmenter):
+    def __init__(self, random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__()
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        self.augmenter: Augmenter = iaa.Flipud(p=0.5, name=self.get_properties()['name'])
+
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'VerticalFlip'}
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/ZeroPadAndCrop.py b/autoPyTorch/pipeline/components/setup/augmentation/image/ZeroPadAndCrop.py
new file mode 100644
index 000000000..bf8ce63fe
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/ZeroPadAndCrop.py
@@ -0,0 +1,55 @@
+from typing import Any, Dict, Optional, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter
+)
+
+import imgaug.augmenters as iaa
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.base_image_augmenter import BaseImageAugmenter
+from autoPyTorch.utils.common import FitRequirement
+
+
+class ZeroPadAndCrop(BaseImageAugmenter):
+
+    def __init__(self, percent: float = 0.1,
+                 random_state: Optional[Union[int, np.random.RandomState]] = None):
+        super().__init__()
+        self.random_state = random_state
+        self.percent = percent
+        self.pad_augmenter: Optional[Augmenter] = None
+        self.crop_augmenter: Optional[Augmenter] = None
+        self.add_fit_requirements([
+            FitRequirement('image_height', (int,), user_defined=True, dataset_property=True),
+            FitRequirement('image_width', (int,), user_defined=True, dataset_property=True)])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseImageAugmenter:
+        self.check_requirements(X, y)
+        self.pad_augmenter = iaa.Pad(percent=self.percent, keep_size=False)
+        self.crop_augmenter = iaa.CropToFixedSize(height=X['dataset_properties']['image_height'],
+                                                  width=X['dataset_properties']['image_width'])
+        self.augmenter: Augmenter = iaa.Sequential([
+            self.pad_augmenter,
+            self.crop_augmenter
+        ], name=self.get_properties()['name'])
+
+        return self
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+        percent = UniformFloatHyperparameter('percent', lower=0, upper=0.5, default_value=0.1)
+        cs.add_hyperparameters([percent])
+        return cs
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None
+                       ) -> Dict[str, Any]:
+        return {'name': 'ZeroPadAndCrop'}
diff --git a/autoPyTorch/core/presets/__init__.py b/autoPyTorch/pipeline/components/setup/augmentation/image/__init__.py
similarity index 100%
rename from autoPyTorch/core/presets/__init__.py
rename to autoPyTorch/pipeline/components/setup/augmentation/image/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/augmentation/image/base_image_augmenter.py b/autoPyTorch/pipeline/components/setup/augmentation/image/base_image_augmenter.py
new file mode 100644
index 000000000..6702e5e42
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/augmentation/image/base_image_augmenter.py
@@ -0,0 +1,43 @@
+from typing import Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from imgaug.augmenters.meta import Augmenter
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
+
+
+class BaseImageAugmenter(autoPyTorchSetupComponent):
+    def __init__(self, use_augmenter: bool = True) -> None:
+        super().__init__()
+        self.use_augmenter = use_augmenter
+        self.augmenter: Optional[Augmenter] = None
+
+    def get_image_augmenter(self) -> Optional[Augmenter]:
+        """
+        Get fitted augmenter. Can only be called if fit()
+        has been called on the object.
+        Returns:
+            BaseEstimator: Fitted augmentor
+        """
+        if self.augmenter is None and self.use_augmenter:
+            raise AttributeError("Can't return augmenter for {}, as augmenter is  "
+                                 "set to be used but it has not been fitted"
+                                 "  yet".format(self.__class__.__name__))
+        return self.augmenter
+
+    def __call__(self, X: np.ndarray) -> np.ndarray:
+        if self.augmenter is None:
+            raise ValueError("cant call {} without fitting first."
+                             .format(self.__class__.__name__))
+        # explicitly converting to np array as currently zeropadandcrop gives a list
+        return np.array(self.augmenter(images=X))
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+            dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/base_setup.py b/autoPyTorch/pipeline/components/setup/base_setup.py
new file mode 100644
index 000000000..43bb41b56
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/base_setup.py
@@ -0,0 +1,21 @@
+from typing import Any, Dict
+
+from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
+
+
+class autoPyTorchSetupComponent(autoPyTorchComponent):
+    """Provide an abstract interface for schedulers
+    in Auto-Pytorch"""
+
+    def __init__(self) -> None:
+        super(autoPyTorchSetupComponent, self).__init__()
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the fitted component into the fit dictionary 'X' and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        raise NotImplementedError()
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
new file mode 100644
index 000000000..ad6dfabd6
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
@@ -0,0 +1,68 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+import pandas as pd
+
+from scipy.sparse import csr_matrix
+
+from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
+from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import get_preprocess_transforms, preprocess
+from autoPyTorch.utils.common import FitRequirement
+
+
+class EarlyPreprocessing(autoPyTorchSetupComponent):
+
+    def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
+        super().__init__()
+        self.random_state = random_state
+        self.add_fit_requirements([
+            FitRequirement('is_small_preprocess', (bool,), user_defined=True, dataset_property=True),
+            FitRequirement('X_train', (np.ndarray, pd.DataFrame, csr_matrix), user_defined=True,
+                           dataset_property=False)])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> "EarlyPreprocessing":
+        self.check_requirements(X, y)
+
+        return self
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+
+        transforms = get_preprocess_transforms(X)
+
+        if X['dataset_properties']['is_small_preprocess']:
+            if 'X_train' in X:
+                X_train = X['X_train']
+            else:
+                # Incorporate the transform to the dataset
+                X_train = X['backend'].load_datamanager().train_tensors[0]
+
+            X['X_train'] = preprocess(dataset=X_train, transforms=transforms)
+
+        # We need to also save the preprocess transforms for inference
+        X.update({'preprocess_transforms': transforms})
+        return X
+
+    @staticmethod
+    def get_hyperparameter_search_space(
+        dataset_properties: Optional[Dict[str, str]] = None
+    ) -> ConfigurationSpace:
+        return ConfigurationSpace()
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'EarlyPreprocessing',
+            'name': 'Early Preprocessing Node',
+        }
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('random_state', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/core/presets/feature_classification/__init__.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/__init__.py
similarity index 100%
rename from autoPyTorch/core/presets/feature_classification/__init__.py
rename to autoPyTorch/pipeline/components/setup/early_preprocessor/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/utils.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/utils.py
new file mode 100644
index 000000000..3f7c1adb6
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/early_preprocessor/utils.py
@@ -0,0 +1,39 @@
+import copy
+from typing import Any, Dict, List
+
+import numpy as np
+
+from sklearn.utils import check_array
+
+import torchvision.transforms
+
+from autoPyTorch.pipeline.components.preprocessing.base_preprocessing import autoPyTorchPreprocessingComponent
+
+
+def get_preprocess_transforms(X: Dict[str, Any]) -> torchvision.transforms.Compose:
+    candidate_transforms = list()  # type: List[autoPyTorchPreprocessingComponent]
+    for key, value in X.items():
+        if isinstance(value, autoPyTorchPreprocessingComponent):
+            candidate_transforms.append(copy.deepcopy(value))
+
+    return candidate_transforms
+
+
+def preprocess(dataset: np.ndarray, transforms: torchvision.transforms.Compose,
+               indices: List[int] = None) -> np.ndarray:
+
+    composite_transforms = torchvision.transforms.Compose(transforms)
+    if indices is None:
+        dataset = composite_transforms(dataset)
+    else:
+        dataset[indices, :] = composite_transforms(np.take(dataset, indices, axis=0))
+    # In case the configuration space is so that no
+    # sklearn transformation is proposed, we perform
+    # check array to convert object to float
+    return check_array(
+        dataset,
+        force_all_finite=False,
+        accept_sparse='csr',
+        ensure_2d=False,
+        allow_nd=True,
+    )
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingLR.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingLR.py
new file mode 100644
index 000000000..870302747
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingLR.py
@@ -0,0 +1,70 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import torch.optim.lr_scheduler
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+class CosineAnnealingLR(BaseLRComponent):
+    """
+    Set the learning rate of each parameter group using a cosine annealing schedule
+
+    Args:
+        T_max (int): Maximum number of iterations.
+
+    """
+    def __init__(
+        self,
+        T_max: int,
+        random_state: Optional[np.random.RandomState] = None
+    ):
+
+        super().__init__()
+        self.T_max = T_max
+        self.random_state = random_state
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+            optimizer=X['optimizer'],
+            T_max=int(self.T_max)
+        )
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'CosineAnnealingWarmRestarts',
+            'name': 'Cosine Annealing WarmRestarts',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        T_max = UniformIntegerHyperparameter(
+            "T_max", 10, 500, default_value=200)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([T_max])
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py
new file mode 100644
index 000000000..fea2d30c4
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/CosineAnnealingWarmRestarts.py
@@ -0,0 +1,78 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import UniformFloatHyperparameter, UniformIntegerHyperparameter
+
+import numpy as np
+
+import torch.optim.lr_scheduler
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+class CosineAnnealingWarmRestarts(BaseLRComponent):
+    """
+    Set the learning rate of each parameter group using a cosine annealing schedule,
+    where \eta_{max}ηmax is set to the initial lr, T_{cur} is the number of epochs
+    since the last restart and T_{i} is the number of epochs between two warm
+    restarts in SGDR
+
+    Args:
+        T_0 (int): Number of iterations for the first restart
+        T_mult (int):  A factor increases T_{i} after a restart
+        random_state (Optional[np.random.RandomState]): random state
+    """
+
+    def __init__(
+        self,
+        T_0: int,
+        T_mult: int,
+        random_state: Optional[np.random.RandomState] = None
+    ):
+
+        super().__init__()
+        self.T_0 = T_0
+        self.T_mult = T_mult
+        self.random_state = random_state
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
+            optimizer=X['optimizer'],
+            T_0=int(self.T_0),
+            T_mult=int(self.T_mult),
+        )
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'CosineAnnealingWarmRestarts',
+            'name': 'Cosine Annealing WarmRestarts',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        T_0 = UniformIntegerHyperparameter(
+            "T_0", 1, 20, default_value=1)
+        T_mult = UniformFloatHyperparameter(
+            "T_mult", 1.0, 2.0, default_value=1.0)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([T_0, T_mult])
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/CyclicLR.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/CyclicLR.py
new file mode 100644
index 000000000..8bbf5c237
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/CyclicLR.py
@@ -0,0 +1,100 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import torch.optim.lr_scheduler
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+class CyclicLR(BaseLRComponent):
+    """
+    Sets the learning rate of each parameter group according to cyclical learning rate
+    policy (CLR). The policy cycles the learning rate between two boundaries with a
+    constant frequency.
+
+    Args:
+        base_lr (float): Initial learning rate which is the lower boundary in the
+            cycle for each parameter group.
+        mode (str): policy for the cycle
+        step_size_up (int): Number of training iterations in the increasing half of a cycle.
+        max_lr (float): Upper learning rate boundaries in the cycle for each parameter group.
+            In this implementation, to make sure max_lr>base_lr, max_lr is the increment from
+            base_lr. This simplifies the learning space
+
+    """
+    def __init__(
+        self,
+        base_lr: float,
+        mode: str,
+        step_size_up: int,
+        max_lr: float = 0.1,
+        random_state: Optional[np.random.RandomState] = None
+    ):
+
+        super().__init__()
+        self.base_lr = base_lr
+        self.mode = mode
+        self.max_lr = max_lr
+        self.step_size_up = step_size_up
+        self.random_state = random_state
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+
+        # No momentum to cycle in adam
+        cycle_momentum = True
+        if 'Adam' in X['optimizer'].__class__.__name__:
+            cycle_momentum = False
+
+        self.scheduler = torch.optim.lr_scheduler.CyclicLR(
+            optimizer=X['optimizer'],
+            base_lr=float(self.base_lr),
+            max_lr=float(self.max_lr),
+            step_size_up=int(self.step_size_up),
+            cycle_momentum=cycle_momentum,
+            mode=self.mode,
+        )
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'CyclicLR',
+            'name': 'CyclicLR',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        base_lr = UniformFloatHyperparameter(
+            "base_lr", 1e-6, 1e-1, default_value=0.01)
+        mode = CategoricalHyperparameter('mode', ['triangular', 'triangular2', 'exp_range'])
+        step_size_up = UniformIntegerHyperparameter(
+            "step_size_up", 1000, 4000, default_value=2000)
+        max_lr = UniformFloatHyperparameter(
+            "max_lr", 1e-3, 1e-1, default_value=0.1)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([base_lr, mode, step_size_up, max_lr])
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/ExponentialLR.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/ExponentialLR.py
new file mode 100644
index 000000000..0e5584da7
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/ExponentialLR.py
@@ -0,0 +1,71 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+)
+
+import numpy as np
+
+import torch.optim.lr_scheduler
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+class ExponentialLR(BaseLRComponent):
+    """
+    Decays the learning rate of each parameter group by gamma every epoch.
+    When last_epoch=-1, sets initial lr as lr.
+
+    Args:
+        gamma (float): Multiplicative factor of learning rate decay.
+
+    """
+    def __init__(
+        self,
+        gamma: float,
+        random_state: Optional[np.random.RandomState] = None
+    ):
+
+        super().__init__()
+        self.gamma = gamma
+        self.random_state = random_state
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+
+        self.scheduler = torch.optim.lr_scheduler.ExponentialLR(
+            optimizer=X['optimizer'],
+            gamma=float(self.gamma)
+        )
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'ExponentialLR',
+            'name': 'ExponentialLR',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        gamma = UniformFloatHyperparameter(
+            "gamma", 0.7, 0.9999, default_value=0.9)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([gamma])
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/NoScheduler.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/NoScheduler.py
new file mode 100644
index 000000000..a67fa46f1
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/NoScheduler.py
@@ -0,0 +1,53 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+class NoScheduler(BaseLRComponent):
+    """
+    Performs no scheduling via a LambdaLR with lambda==1.
+
+    """
+    def __init__(
+        self,
+        random_state: Optional[np.random.RandomState] = None
+    ):
+
+        super().__init__()
+        self.random_state = random_state
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'NoScheduler',
+            'name': 'No LR Scheduling',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py
new file mode 100644
index 000000000..0eb9dcbff
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/ReduceLROnPlateau.py
@@ -0,0 +1,89 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter
+)
+
+import numpy as np
+
+import torch.optim.lr_scheduler
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+class ReduceLROnPlateau(BaseLRComponent):
+    """
+    Reduce learning rate when a metric has stopped improving. Models often benefit from
+    reducing the learning rate by a factor of 2-10 once learning stagnates. This scheduler
+    reads a metrics quantity and if no improvement is seen for a ‘patience’ number of epochs,
+    the learning rate is reduced.
+
+    Args:
+        mode (str): One of min, max. In min mode, lr will be reduced when the quantity
+            monitored has stopped decreasing; in max mode it will be reduced when
+            the quantity monitored has stopped increasing
+        factor (float): Factor by which the learning rate will be reduced. new_lr = lr * factor.
+        patience (int): Number of epochs with no improvement after which learning
+            rate will be reduced.
+        random_state (Optional[np.random.RandomState]): random state
+    """
+    def __init__(
+        self,
+        mode: str,
+        factor: float,
+        patience: int,
+        random_state: Optional[np.random.RandomState] = None
+    ):
+
+        super().__init__()
+        self.mode = mode
+        self.factor = factor
+        self.patience = patience
+        self.random_state = random_state
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+
+        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer=X['optimizer'],
+            mode=self.mode,
+            factor=float(self.factor),
+            patience=int(self.patience),
+        )
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'ReduceLROnPlateau',
+            'name': 'ReduceLROnPlateau',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        mode = CategoricalHyperparameter('mode', ['min', 'max'])
+        patience = UniformIntegerHyperparameter(
+            "patience", 5, 20, default_value=10)
+        factor = UniformFloatHyperparameter(
+            "factor", 0.01, 0.9, default_value=0.1)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([mode, patience, factor])
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/StepLR.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/StepLR.py
new file mode 100644
index 000000000..8c94a38b6
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/StepLR.py
@@ -0,0 +1,79 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import torch.optim.lr_scheduler
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+class StepLR(BaseLRComponent):
+    """
+    Decays the learning rate of each parameter group by gamma every step_size epochs.
+    Notice that such decay can happen simultaneously with other changes to the learning
+    rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr.
+
+    Args:
+        step_size (int) – Period of learning rate decay.
+        gamma (float) – Multiplicative factor of learning rate decay. Default: 0.1.
+
+    """
+    def __init__(
+        self,
+        step_size: int,
+        gamma: float,
+        random_state: Optional[np.random.RandomState] = None
+    ):
+
+        super().__init__()
+        self.gamma = gamma
+        self.step_size = step_size
+        self.random_state = random_state
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseLRComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+
+        self.scheduler = torch.optim.lr_scheduler.StepLR(
+            optimizer=X['optimizer'],
+            step_size=int(self.step_size),
+            gamma=float(self.gamma),
+        )
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'StepLR',
+            'name': 'StepLR',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        gamma = UniformFloatHyperparameter(
+            "gamma", 0.001, 0.9, default_value=0.1)
+        step_size = UniformIntegerHyperparameter(
+            "step_size", 1, 10, default_value=5)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([gamma, step_size])
+        return cs
diff --git a/autoPyTorch/core/presets/feature_multilabel/__init__.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/__init__.py
similarity index 100%
rename from autoPyTorch/core/presets/feature_multilabel/__init__.py
rename to autoPyTorch/pipeline/components/setup/lr_scheduler/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py
new file mode 100644
index 000000000..221e4e9a5
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py
@@ -0,0 +1,48 @@
+from typing import Any, Dict, Optional
+
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+
+from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseLRComponent(autoPyTorchSetupComponent):
+    """Provide an abstract interface for schedulers
+    in Auto-Pytorch"""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.scheduler = None  # type: Optional[_LRScheduler]
+
+        self.add_fit_requirements([
+            FitRequirement('optimizer', (Optimizer,), user_defined=False, dataset_property=False)])
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Adds the scheduler into the fit dictionary 'X' and returns it.
+        Args:
+            X (Dict[str, Any]): 'X' dictionary
+        Returns:
+            (Dict[str, Any]): the updated 'X' dictionary
+        """
+        X.update({'lr_scheduler': self.scheduler})
+        return X
+
+    def get_scheduler(self) -> _LRScheduler:
+        """Return the underlying scheduler object.
+        Returns:
+            scheduler : the underlying scheduler object
+        """
+        assert self.scheduler is not None, "No scheduler was fit"
+        return self.scheduler
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.scheduler.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('scheduler', None)
+        info.pop('random_state', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler_choice.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler_choice.py
new file mode 100644
index 000000000..a12c8abad
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler_choice.py
@@ -0,0 +1,175 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler import BaseLRComponent
+
+
+directory = os.path.split(__file__)[0]
+_schedulers = find_components(__package__,
+                              directory,
+                              BaseLRComponent)
+_addons = ThirdPartyComponents(BaseLRComponent)
+
+
+def add_scheduler(scheduler: BaseLRComponent) -> None:
+    _addons.add_component(scheduler)
+
+
+class SchedulerChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available scheduler components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all baseScheduler components available
+                as choices for learning rate scheduling
+        """
+        components = OrderedDict()
+        components.update(_schedulers)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of learning
+                rate schedulers
+
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == SchedulerChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here for
+            # schedulers based on the dataset!
+            # TODO: Think if there is any case where a scheduler
+            # is not recommended for a certain dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default scheduler to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_schedulers = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_schedulers) == 0:
+            raise ValueError("No scheduler found")
+
+        if default is None:
+            defaults = ['no_LRScheduler',
+                        'LambdaLR',
+                        'StepLR',
+                        'ExponentialLR',
+                        'CosineAnnealingLR',
+                        'ReduceLROnPlateau'
+                        ]
+            for default_ in defaults:
+                if default_ in available_schedulers:
+                    default = default_
+                    break
+
+        scheduler = CSH.CategoricalHyperparameter(
+            '__choice__',
+            list(available_schedulers.keys()),
+            default_value=default
+        )
+        cs.add_hyperparameter(scheduler)
+        for name in available_schedulers:
+            scheduler_configuration_space = available_schedulers[name]. \
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': scheduler, 'value': name}
+            cs.add_configuration_space(
+                name,
+                scheduler_configuration_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network/BackboneHeadNet.py b/autoPyTorch/pipeline/components/setup/network/BackboneHeadNet.py
new file mode 100644
index 000000000..3a3773b25
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/BackboneHeadNet.py
@@ -0,0 +1,112 @@
+from typing import Any, Dict, Optional, Tuple, Type
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter
+)
+
+import numpy as np
+
+from torch import nn
+
+from autoPyTorch.pipeline.components.setup.network.backbone import BaseBackbone, get_available_backbones
+from autoPyTorch.pipeline.components.setup.network.base_network import BaseNetworkComponent
+from autoPyTorch.pipeline.components.setup.network.head import BaseHead, get_available_heads
+from autoPyTorch.utils import common
+
+
+class BackboneHeadNet(BaseNetworkComponent):
+    """
+    Implementation of a dynamic network, that consists of a backbone and a head
+    """
+
+    def __init__(
+            self,
+            network: Optional[BaseNetworkComponent] = None,
+            random_state: Optional[np.random.RandomState] = None,
+            **kwargs: Any
+    ):
+        super().__init__(
+            network=network,
+            random_state=random_state,
+        )
+        self.config = kwargs
+        self._backbones = get_available_backbones()
+        self._heads = get_available_heads()
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {
+            "shortname": "BackboneHeadNet",
+            "name": "BackboneHeadNet",
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        **kwargs: Any) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        backbones: Dict[str, Type[BaseBackbone]] = get_available_backbones()
+        heads: Dict[str, Type[BaseHead]] = get_available_heads()
+
+        # filter backbones and heads for those who support the current task type
+        if dataset_properties is not None and "task_type" in dataset_properties:
+            task = dataset_properties["task_type"]
+            backbones = {name: backbone for name, backbone in backbones.items() if task in backbone.supported_tasks}
+            heads = {name: head for name, head in heads.items() if task in head.supported_tasks}
+
+        backbone_defaults = [
+            'ShapedMLPBackbone',
+            'MLPBackbone',
+            'ConvNetImageBackbone',
+            'InceptionTimeBackbone',
+        ]
+        for default_ in backbone_defaults:
+            if default_ in backbones.keys():
+                backbone_default = default_
+                break
+
+        backbone_hp = CategoricalHyperparameter("backbone", choices=backbones.keys(), default_value=backbone_default)
+        head_hp = CategoricalHyperparameter("head", choices=heads.keys())
+        cs.add_hyperparameters([backbone_hp, head_hp])
+
+        # for each backbone and head, add a conditional search space if this backbone or head is chosen
+        for backbone_name in backbones.keys():
+            backbone_cs = backbones[backbone_name].get_hyperparameter_search_space(dataset_properties)
+            cs.add_configuration_space(backbone_name,
+                                       backbone_cs,
+                                       parent_hyperparameter={"parent": backbone_hp, "value": backbone_name})
+
+        for head_name in heads.keys():
+            head_cs: ConfigurationSpace = heads[head_name].get_hyperparameter_search_space(dataset_properties)
+            cs.add_configuration_space(head_name,
+                                       head_cs,
+                                       parent_hyperparameter={"parent": head_hp, "value": head_name})
+        return cs
+
+    def build_network(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> nn.Module:
+        """
+        This method returns a pytorch network, that is dynamically built using
+        a self.config that is network specific, and contains the additional
+        configuration hyperparameters to build a domain specific network
+        """
+        backbone_name = self.config["backbone"]
+        head_name = self.config["head"]
+        Backbone = self._backbones[backbone_name]
+        Head = self._heads[head_name]
+
+        backbone = Backbone(**common.replace_prefix_in_config_dict(self.config, backbone_name))
+        backbone_module = backbone.build_backbone(input_shape=input_shape)
+        backbone_output_shape = backbone.get_output_shape(input_shape=input_shape)
+
+        head = Head(**common.replace_prefix_in_config_dict(self.config, head_name))
+        head_module = head.build_head(input_shape=backbone_output_shape, output_shape=output_shape)
+
+        return nn.Sequential(backbone_module, head_module)
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('network', None)
+        info.pop('random_state', None)
+        return f"BackboneHeadNet: {self.config['backbone']} -> {self.config['head']} ({str(info)})"
diff --git a/autoPyTorch/core/presets/feature_regression/__init__.py b/autoPyTorch/pipeline/components/setup/network/__init__.py
similarity index 100%
rename from autoPyTorch/core/presets/feature_regression/__init__.py
rename to autoPyTorch/pipeline/components/setup/network/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/network/backbone/__init__.py b/autoPyTorch/pipeline/components/setup/network/backbone/__init__.py
new file mode 100644
index 000000000..97b0392ee
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/backbone/__init__.py
@@ -0,0 +1,32 @@
+from typing import Any, Dict, Type, Union
+
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+)
+from autoPyTorch.pipeline.components.setup.network.backbone.base_backbone import BaseBackbone
+from autoPyTorch.pipeline.components.setup.network.backbone.image import ConvNetImageBackbone, DenseNetBackbone
+from autoPyTorch.pipeline.components.setup.network.backbone.tabular import MLPBackbone, ResNetBackbone, \
+    ShapedMLPBackbone
+from autoPyTorch.pipeline.components.setup.network.backbone.time_series import InceptionTimeBackbone, TCNBackbone
+
+_backbones = {
+    ConvNetImageBackbone.get_name(): ConvNetImageBackbone,
+    DenseNetBackbone.get_name(): DenseNetBackbone,
+    ResNetBackbone.get_name(): ResNetBackbone,
+    ShapedMLPBackbone.get_name(): ShapedMLPBackbone,
+    MLPBackbone.get_name(): MLPBackbone,
+    TCNBackbone.get_name(): TCNBackbone,
+    InceptionTimeBackbone.get_name(): InceptionTimeBackbone
+}
+_addons = ThirdPartyComponents(BaseBackbone)
+
+
+def add_backbone(backbone: BaseBackbone) -> None:
+    _addons.add_component(backbone)
+
+
+def get_available_backbones() -> Dict[str, Union[Type[BaseBackbone], Any]]:
+    backbones = dict()
+    backbones.update(_backbones)
+    backbones.update(_addons.components)
+    return backbones
diff --git a/autoPyTorch/pipeline/components/setup/network/backbone/base_backbone.py b/autoPyTorch/pipeline/components/setup/network/backbone/base_backbone.py
new file mode 100644
index 000000000..62089d892
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/backbone/base_backbone.py
@@ -0,0 +1,62 @@
+from abc import abstractmethod
+from typing import Any, Dict, Set, Tuple
+
+import torch
+from torch import nn
+
+from autoPyTorch.pipeline.components.base_component import BaseEstimator
+from autoPyTorch.pipeline.components.base_component import (
+    autoPyTorchComponent,
+)
+
+
+class BaseBackbone(autoPyTorchComponent):
+    """
+    Backbone base class
+    """
+    supported_tasks: Set = set()
+
+    def __init__(self,
+                 **kwargs: Any):
+        super().__init__()
+        self.backbone: nn.Module = None
+        self.config = kwargs
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+        """
+        Not used. Just for API compatibility.
+        """
+        return self
+
+    @abstractmethod
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module:
+        """
+
+        Builds the backbone module and assigns it to self.backbone
+
+        :param input_shape: shape of the input
+        :return: the backbone module
+        """
+        raise NotImplementedError()
+
+    def get_output_shape(self, input_shape: Tuple[int, ...]) -> Tuple[int, ...]:
+        """
+        Run a dummy forward pass to get the output shape of the backbone.
+        Can and should be overridden by subclasses that know the output shape
+        without running a dummy forward pass.
+
+        :param input_shape: shape of the input
+        :return: output_shape
+        """
+        placeholder = torch.randn((2, *input_shape), dtype=torch.float)
+        with torch.no_grad():
+            output = self.backbone(placeholder)
+        return tuple(output.shape[1:])
+
+    @classmethod
+    def get_name(cls) -> str:
+        """
+        Get the name of the backbone
+        :return: name of the backbone
+        """
+        return cls.get_properties()["shortname"]
diff --git a/autoPyTorch/pipeline/components/setup/network/backbone/image.py b/autoPyTorch/pipeline/components/setup/network/backbone/image.py
new file mode 100644
index 000000000..b980bc1bb
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/backbone/image.py
@@ -0,0 +1,282 @@
+import logging
+import math
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional, Tuple
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter
+)
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from autoPyTorch.pipeline.components.setup.network.backbone.base_backbone import BaseBackbone
+
+_activations: Dict[str, nn.Module] = {
+    "relu": nn.ReLU,
+    "tanh": nn.Tanh,
+    "sigmoid": nn.Sigmoid
+}
+
+
+class ConvNetImageBackbone(BaseBackbone):
+    supported_tasks = {"image_classification", "image_regression"}
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.bn_args = {"eps": 1e-5, "momentum": 0.1}
+
+    def _get_layer_size(self, w: int, h: int) -> Tuple[int, int]:
+        cw = ((w - self.config["conv_kernel_size"] + 2 * self.config["conv_kernel_padding"])
+              // self.config["conv_kernel_stride"]) + 1
+        ch = ((h - self.config["conv_kernel_size"] + 2 * self.config["conv_kernel_padding"])
+              // self.config["conv_kernel_stride"]) + 1
+        cw, ch = cw // self.config["pool_size"], ch // self.config["pool_size"]
+        return cw, ch
+
+    def _add_layer(self, layers: List[nn.Module], in_filters: int, out_filters: int) -> None:
+        layers.append(nn.Conv2d(in_filters, out_filters,
+                                kernel_size=self.config["conv_kernel_size"],
+                                stride=self.config["conv_kernel_stride"],
+                                padding=self.config["conv_kernel_padding"]))
+        layers.append(nn.BatchNorm2d(out_filters, **self.bn_args))
+        layers.append(_activations[self.config["activation"]]())
+        layers.append(nn.MaxPool2d(kernel_size=self.config["pool_size"], stride=self.config["pool_size"]))
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module:
+        channels, iw, ih = input_shape
+        layers: List[nn.Module] = []
+        init_filter = self.config["conv_init_filters"]
+        self._add_layer(layers, channels, init_filter)
+
+        cw, ch = self._get_layer_size(iw, ih)
+        for i in range(2, self.config["num_layers"] + 1):
+            cw, ch = self._get_layer_size(cw, ch)
+            if cw == 0 or ch == 0:
+                logging.info("> reduce network size due to too small layers.")
+                break
+            self._add_layer(layers, init_filter, init_filter * 2)
+            init_filter *= 2
+        backbone = nn.Sequential(*layers)
+        self.backbone = backbone
+        return backbone
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {
+            'shortname': 'ConvNetImageBackbone',
+            'name': 'ConvNetImageBackbone',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        min_num_layers: int = 2,
+                                        max_num_layers: int = 5,
+                                        min_init_filters: int = 16,
+                                        max_init_filters: int = 64,
+                                        min_kernel_size: int = 2,
+                                        max_kernel_size: int = 5,
+                                        min_stride: int = 1,
+                                        max_stride: int = 3,
+                                        min_padding: int = 2,
+                                        max_padding: int = 3,
+                                        min_pool_size: int = 2,
+                                        max_pool_size: int = 3) -> ConfigurationSpace:
+        cs = CS.ConfigurationSpace()
+
+        cs.add_hyperparameter(UniformIntegerHyperparameter('num_layers',
+                                                           lower=min_num_layers,
+                                                           upper=max_num_layers))
+        cs.add_hyperparameter(CategoricalHyperparameter('activation',
+                                                        choices=list(_activations.keys())))
+        cs.add_hyperparameter(UniformIntegerHyperparameter('conv_init_filters',
+                                                           lower=min_init_filters,
+                                                           upper=max_init_filters))
+        cs.add_hyperparameter(UniformIntegerHyperparameter('conv_kernel_size',
+                                                           lower=min_kernel_size,
+                                                           upper=max_kernel_size))
+        cs.add_hyperparameter(UniformIntegerHyperparameter('conv_kernel_stride',
+                                                           lower=min_stride,
+                                                           upper=max_stride))
+        cs.add_hyperparameter(UniformIntegerHyperparameter('conv_kernel_padding',
+                                                           lower=min_padding,
+                                                           upper=max_padding))
+        cs.add_hyperparameter(UniformIntegerHyperparameter('pool_size',
+                                                           lower=min_pool_size,
+                                                           upper=max_pool_size))
+        return cs
+
+
+class _DenseLayer(nn.Sequential):
+    def __init__(self,
+                 num_input_features: int,
+                 activation: str,
+                 growth_rate: int,
+                 bn_size: int,
+                 drop_rate: float,
+                 bn_args: Dict[str, Any]):
+        super(_DenseLayer, self).__init__()
+        self.add_module('norm1', nn.BatchNorm2d(num_input_features, **bn_args)),
+        self.add_module('relu1', _activations[activation]()),
+        self.add_module('conv1', nn.Conv2d(num_input_features, bn_size * growth_rate,
+                                           kernel_size=1, stride=1, bias=False)),
+        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate, **bn_args)),
+        self.add_module('relu2', _activations[activation]()),
+        self.add_module('conv2', nn.Conv2d(bn_size * growth_rate, growth_rate,
+                                           kernel_size=3, stride=1, padding=1, bias=False)),
+        self.drop_rate = drop_rate
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        new_features = super(_DenseLayer, self).forward(x)
+        if self.drop_rate > 0:
+            new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
+        return torch.cat([x, new_features], 1)
+
+
+class _DenseBlock(nn.Sequential):
+    def __init__(self,
+                 num_layers: int,
+                 num_input_features: int,
+                 activation: str,
+                 bn_size: int,
+                 growth_rate: int,
+                 drop_rate: float,
+                 bn_args: Dict[str, Any]):
+        super(_DenseBlock, self).__init__()
+        for i in range(num_layers):
+            layer = _DenseLayer(num_input_features=num_input_features + i * growth_rate,
+                                activation=activation,
+                                growth_rate=growth_rate,
+                                bn_size=bn_size,
+                                drop_rate=drop_rate,
+                                bn_args=bn_args)
+            self.add_module('denselayer%d' % (i + 1), layer)
+
+
+class _Transition(nn.Sequential):
+    def __init__(self,
+                 num_input_features: int,
+                 activation: str,
+                 num_output_features: int,
+                 pool_size: int,
+                 bn_args: Dict[str, Any]):
+        super(_Transition, self).__init__()
+        self.add_module('norm', nn.BatchNorm2d(num_input_features, **bn_args))
+        self.add_module('relu', _activations[activation]())
+        self.add_module('conv', nn.Conv2d(num_input_features, num_output_features,
+                                          kernel_size=1, stride=1, bias=False))
+        self.add_module('pool', nn.AvgPool2d(kernel_size=pool_size, stride=pool_size))
+
+
+class DenseNetBackbone(BaseBackbone):
+    supported_tasks = {"image_classification", "image_regression"}
+
+    def __init__(self, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.bn_args = {"eps": 1e-5, "momentum": 0.1}
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module:
+        channels, iw, ih = input_shape
+
+        growth_rate = self.config['growth_rate']
+        block_config = [self.config['layer_in_block_%d' % (i + 1)] for i in range(self.config['blocks'])]
+        num_init_features = 2 * growth_rate
+        bn_size = 4
+        drop_rate = self.config['dropout'] if self.config['use_dropout'] else 0
+
+        image_size, min_image_size = min(iw, ih), 1
+
+        division_steps = math.floor(math.log2(image_size) - math.log2(min_image_size) - 1e-5) + 1
+
+        if division_steps > len(block_config) + 1:
+            # First convolution
+            features = nn.Sequential(OrderedDict([
+                ('conv0', nn.Conv2d(channels, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)),
+                ('norm0', nn.BatchNorm2d(num_init_features, **self.bn_args)),
+                ('relu0', nn.ReLU(inplace=True)),
+                ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
+            ]))
+            division_steps -= 2
+        else:
+            features = nn.Sequential(OrderedDict([
+                ('conv0', nn.Conv2d(channels, num_init_features, kernel_size=3, stride=1, padding=1, bias=False))
+            ]))
+
+        # Each denseblock
+        num_features = num_init_features
+        for i, num_layers in enumerate(block_config):
+            block = _DenseBlock(num_layers=num_layers,
+                                activation=self.config["activation"],
+                                num_input_features=num_features,
+                                bn_size=bn_size,
+                                growth_rate=growth_rate,
+                                drop_rate=drop_rate,
+                                bn_args=self.bn_args)
+            features.add_module('denseblock%d' % (i + 1), block)
+            num_features = num_features + num_layers * growth_rate
+            if i != len(block_config) - 1:
+                trans = _Transition(num_input_features=num_features,
+                                    activation=self.config["activation"],
+                                    num_output_features=num_features // 2,
+                                    pool_size=2 if i > len(block_config) - division_steps else 1,
+                                    bn_args=self.bn_args)
+                features.add_module('transition%d' % (i + 1), trans)
+                num_features = num_features // 2
+
+        # Final batch norm
+        features.add_module('last_norm', nn.BatchNorm2d(num_features, **self.bn_args))
+        self.backbone = features
+        return features
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {
+            'shortname': 'DenseNetBackbone',
+            'name': 'DenseNetBackbone',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        min_growth_rate: int = 12,
+                                        max_growth_rate: int = 40,
+                                        min_num_blocks: int = 3,
+                                        max_num_blocks: int = 4,
+                                        min_num_layers: int = 4,
+                                        max_num_layers: int = 64) -> ConfigurationSpace:
+        cs = CS.ConfigurationSpace()
+        growth_rate_hp = UniformIntegerHyperparameter('growth_rate',
+                                                      lower=min_growth_rate,
+                                                      upper=max_growth_rate)
+        cs.add_hyperparameter(growth_rate_hp)
+
+        blocks_hp = UniformIntegerHyperparameter('blocks',
+                                                 lower=min_num_blocks,
+                                                 upper=max_num_blocks)
+        cs.add_hyperparameter(blocks_hp)
+
+        activation_hp = CategoricalHyperparameter('activation',
+                                                  choices=list(_activations.keys()))
+        cs.add_hyperparameter(activation_hp)
+
+        use_dropout = CategoricalHyperparameter('use_dropout', choices=[True, False])
+        dropout = UniformFloatHyperparameter('dropout',
+                                             lower=0.0,
+                                             upper=1.0)
+        cs.add_hyperparameters([use_dropout, dropout])
+        cs.add_condition(CS.EqualsCondition(dropout, use_dropout, True))
+
+        for i in range(1, max_num_blocks + 1):
+            layer_hp = UniformIntegerHyperparameter('layer_in_block_%d' % i,
+                                                    lower=min_num_layers,
+                                                    upper=max_num_layers)
+            cs.add_hyperparameter(layer_hp)
+
+            if i > min_num_blocks:
+                cs.add_condition(CS.GreaterThanCondition(layer_hp, blocks_hp, i - 1))
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/network/backbone/tabular.py b/autoPyTorch/pipeline/components/setup/network/backbone/tabular.py
new file mode 100644
index 000000000..a24424a06
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/backbone/tabular.py
@@ -0,0 +1,620 @@
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter
+)
+
+import torch
+from torch import nn
+
+from autoPyTorch.pipeline.components.setup.network.backbone.base_backbone import BaseBackbone
+from autoPyTorch.pipeline.components.setup.network.utils import (
+    get_shaped_neuron_counts,
+    shake_drop,
+    shake_drop_get_bl,
+    shake_get_alpha_beta,
+    shake_shake
+)
+
+_activations = {
+    "relu": nn.ReLU,
+    "tanh": nn.Tanh,
+    "sigmoid": nn.Sigmoid
+}
+
+
+class MLPBackbone(BaseBackbone):
+    """
+    This component automatically creates a Multi Layer Perceptron based on a given config.
+
+    This MLP allows for:
+        - Different number of layers
+        - Specifying the activation. But this activation is shared among layers
+        - Using or not dropout
+        - Specifying the number of units per layers
+    """
+    supported_tasks = {"tabular_classification", "tabular_regression"}
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module:
+        layers = list()  # type: List[nn.Module]
+        in_features = input_shape[0]
+
+        self._add_layer(layers, in_features, self.config['num_units_1'], 1)
+
+        for i in range(2, self.config['num_groups'] + 1):
+            self._add_layer(layers, self.config["num_units_%d" % (i - 1)],
+                            self.config["num_units_%d" % i], i)
+        backbone = nn.Sequential(*layers)
+        self.backbone = backbone
+        return backbone
+
+    def get_output_shape(self, input_shape: Tuple[int, ...]) -> Tuple[int, ...]:
+        return (self.config["num_units_%d" % self.config["num_groups"]],)
+
+    def _add_layer(self, layers: List[nn.Module], in_features: int, out_features: int,
+                   layer_id: int) -> None:
+        """
+        Dynamically add a layer given the in->out specification
+
+        Args:
+            layers (List[nn.Module]): The list where all modules are added
+            in_features (int): input dimensionality of the new layer
+            out_features (int): output dimensionality of the new layer
+
+        """
+        layers.append(nn.Linear(in_features, out_features))
+        layers.append(_activations[self.config["activation"]]())
+        if self.config['use_dropout']:
+            layers.append(nn.Dropout(self.config["dropout_%d" % layer_id]))
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'MLPBackbone',
+            'name': 'MLPBackbone',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        min_mlp_layers: int = 1,
+                                        max_mlp_layers: int = 15,
+                                        dropout: bool = True,
+                                        min_num_units: int = 10,
+                                        max_num_units: int = 1024,
+                                        ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+
+        # The number of hidden layers the network will have.
+        # Layer blocks are meant to have the same architecture, differing only
+        # by the number of units
+        num_groups = UniformIntegerHyperparameter(
+            "num_groups", min_mlp_layers, max_mlp_layers, default_value=5)
+
+        activation = CategoricalHyperparameter(
+            "activation", choices=list(_activations.keys())
+        )
+        cs.add_hyperparameters([num_groups, activation])
+
+        # We can have dropout in the network for
+        # better generalization
+        if dropout:
+            use_dropout = CategoricalHyperparameter(
+                "use_dropout", choices=[True, False], default_value=False)
+            cs.add_hyperparameters([use_dropout])
+
+        for i in range(1, max_mlp_layers + 1):
+            n_units_hp = UniformIntegerHyperparameter("num_units_%d" % i,
+                                                      lower=min_num_units,
+                                                      upper=max_num_units,
+                                                      default_value=200)
+            cs.add_hyperparameter(n_units_hp)
+
+            if i > min_mlp_layers:
+                # The units of layer i should only exist
+                # if there are at least i layers
+                cs.add_condition(
+                    CS.GreaterThanCondition(
+                        n_units_hp, num_groups, i - 1
+                    )
+                )
+
+            if dropout:
+                dropout_hp = UniformFloatHyperparameter(
+                    "dropout_%d" % i,
+                    lower=0.0,
+                    upper=0.8,
+                    default_value=0.5
+                )
+                cs.add_hyperparameter(dropout_hp)
+                dropout_condition_1 = CS.EqualsCondition(dropout_hp, use_dropout, True)
+
+                if i > min_mlp_layers:
+                    dropout_condition_2 = CS.GreaterThanCondition(dropout_hp, num_groups, i - 1)
+                    cs.add_condition(CS.AndConjunction(dropout_condition_1, dropout_condition_2))
+                else:
+                    cs.add_condition(dropout_condition_1)
+
+        return cs
+
+
+class ShapedMLPBackbone(BaseBackbone):
+    """
+        Implementation of a Shaped MLP -- an MLP with the number of units
+        arranged so that a given shape is honored
+    """
+    supported_tasks = {"tabular_classification", "tabular_regression"}
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module:
+        layers = list()  # type: List[nn.Module]
+        in_features = input_shape[0]
+        out_features = self.config["output_dim"]
+        neuron_counts = get_shaped_neuron_counts(self.config['mlp_shape'],
+                                                 in_features,
+                                                 out_features,
+                                                 self.config['max_units'],
+                                                 self.config['num_groups'])
+        if self.config["use_dropout"] and self.config["max_dropout"] > 0.05:
+            dropout_shape = get_shaped_neuron_counts(
+                self.config['mlp_shape'], 0, 0, 1000, self.config['num_groups']
+            )
+
+        previous = in_features
+        for i in range(self.config['num_groups'] - 1):
+            if i >= len(neuron_counts):
+                break
+            if self.config["use_dropout"] and self.config["max_dropout"] > 0.05:
+                dropout = dropout_shape[i] / 1000 * self.config["max_dropout"]
+            else:
+                dropout = 0.0
+            self._add_layer(layers, previous, neuron_counts[i], dropout)
+            previous = neuron_counts[i]
+        layers.append(nn.Linear(previous, out_features))
+
+        backbone = nn.Sequential(*layers)
+        self.backbone = backbone
+        return backbone
+
+    def get_output_shape(self, input_shape: Tuple[int, ...]) -> Tuple[int, ...]:
+        return (self.config["output_dim"],)
+
+    def _add_layer(self, layers: List[nn.Module],
+                   in_features: int, out_features: int, dropout: float
+                   ) -> None:
+        layers.append(nn.Linear(in_features, out_features))
+        layers.append(_activations[self.config["activation"]]())
+        if self.config["use_dropout"] and self.config["max_dropout"] > 0.05:
+            layers.append(nn.Dropout(dropout))
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'ShapedMLPBackbone',
+            'name': 'ShapedMLPBackbone',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        min_num_gropus: int = 1,
+                                        max_num_groups: int = 15,
+                                        min_num_units: int = 10,
+                                        max_num_units: int = 1024,
+                                        ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+
+        # The number of groups that will compose the resnet. That is,
+        # a group can have N Resblock. The M number of this N resblock
+        # repetitions is num_groups
+        num_groups = UniformIntegerHyperparameter(
+            "num_groups", lower=min_num_gropus, upper=max_num_groups, default_value=5)
+
+        mlp_shape = CategoricalHyperparameter('mlp_shape', choices=[
+            'funnel', 'long_funnel', 'diamond', 'hexagon', 'brick', 'triangle', 'stairs'
+        ])
+
+        activation = CategoricalHyperparameter(
+            "activation", choices=list(_activations.keys())
+        )
+
+        max_units = UniformIntegerHyperparameter(
+            "max_units",
+            lower=min_num_units,
+            upper=max_num_units,
+            default_value=200,
+        )
+
+        output_dim = UniformIntegerHyperparameter(
+            "output_dim",
+            lower=min_num_units,
+            upper=max_num_units
+        )
+
+        cs.add_hyperparameters([num_groups, activation, mlp_shape, max_units, output_dim])
+
+        # We can have dropout in the network for
+        # better generalization
+        use_dropout = CategoricalHyperparameter(
+            "use_dropout", choices=[True, False])
+        max_dropout = UniformFloatHyperparameter("max_dropout", lower=0.0, upper=1.0)
+        cs.add_hyperparameters([use_dropout, max_dropout])
+        cs.add_condition(CS.EqualsCondition(max_dropout, use_dropout, True))
+
+        return cs
+
+
+class ResNetBackbone(BaseBackbone):
+    """
+    Implementation of a Residual Network backbone
+
+    """
+    supported_tasks = {"tabular_classification", "tabular_regression"}
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
+        layers = list()  # type: List[nn.Module]
+        in_features = input_shape[0]
+        layers.append(nn.Linear(in_features, self.config["num_units_0"]))
+
+        # build num_groups-1 groups each consisting of blocks_per_group ResBlocks
+        # the output features of each group is defined by num_units_i
+        for i in range(1, self.config['num_groups'] + 1):
+            layers.append(
+                self._add_group(
+                    in_features=self.config["num_units_%d" % (i - 1)],
+                    out_features=self.config["num_units_%d" % i],
+                    blocks_per_group=self.config["blocks_per_group_%d" % i],
+                    last_block_index=(i - 1) * self.config["blocks_per_group_%d" % i],
+                    dropout=self.config['use_dropout']
+                )
+            )
+
+        layers.append(nn.BatchNorm1d(self.config["num_units_%i" % self.config['num_groups']]))
+        layers.append(_activations[self.config["activation"]]())
+        backbone = nn.Sequential(*layers)
+        self.backbone = backbone
+        return backbone
+
+    def _add_group(self, in_features: int, out_features: int,
+                   blocks_per_group: int, last_block_index: int, dropout: bool
+                   ) -> nn.Module:
+        """
+        Adds a group into the main backbone.
+        In the case of ResNet a group is a set of blocks_per_group
+        ResBlocks
+
+        Args:
+            in_features (int): number of inputs for the current block
+            out_features (int): output dimensionality for the current block
+            blocks_per_group (int): Number of ResNet per group
+            last_block_index (int): block index for shake regularization
+            dropout (bool): whether or not use dropout
+        """
+        blocks = list()
+        for i in range(blocks_per_group):
+            blocks.append(
+                ResBlock(
+                    config=self.config,
+                    in_features=in_features,
+                    out_features=out_features,
+                    blocks_per_group=blocks_per_group,
+                    block_index=last_block_index + i,
+                    dropout=dropout,
+                    activation=_activations[self.config["activation"]]
+                )
+            )
+            in_features = out_features
+        return nn.Sequential(*blocks)
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'ResNetBackbone',
+            'name': 'ResidualNetworkBackbone',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        min_num_gropus: int = 1,
+                                        max_num_groups: int = 9,
+                                        min_blocks_per_groups: int = 1,
+                                        max_blocks_per_groups: int = 4,
+                                        min_num_units: int = 10,
+                                        max_num_units: int = 1024,
+                                        ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        # The number of groups that will compose the resnet. That is,
+        # a group can have N Resblock. The M number of this N resblock
+        # repetitions is num_groups
+        num_groups = UniformIntegerHyperparameter(
+            "num_groups", lower=min_num_gropus, upper=max_num_groups, default_value=5)
+
+        activation = CategoricalHyperparameter(
+            "activation", choices=list(_activations.keys())
+        )
+        cs.add_hyperparameters([num_groups, activation])
+
+        # We can have dropout in the network for
+        # better generalization
+        use_dropout = CategoricalHyperparameter(
+            "use_dropout", choices=[True, False])
+        cs.add_hyperparameters([use_dropout])
+
+        use_shake_shake = CategoricalHyperparameter("use_shake_shake", choices=[True, False])
+        use_shake_drop = CategoricalHyperparameter("use_shake_drop", choices=[True, False])
+        shake_drop_prob = UniformFloatHyperparameter(
+            "max_shake_drop_probability", lower=0.0, upper=1.0)
+        cs.add_hyperparameters([use_shake_shake, use_shake_drop, shake_drop_prob])
+        cs.add_condition(CS.EqualsCondition(shake_drop_prob, use_shake_drop, True))
+
+        # It is the upper bound of the nr of groups,
+        # since the configuration will actually be sampled.
+        for i in range(0, max_num_groups + 1):
+
+            n_units = UniformIntegerHyperparameter(
+                "num_units_%d" % i,
+                lower=min_num_units,
+                upper=max_num_units,
+            )
+            blocks_per_group = UniformIntegerHyperparameter(
+                "blocks_per_group_%d" % i, lower=min_blocks_per_groups,
+                upper=max_blocks_per_groups)
+
+            cs.add_hyperparameters([n_units, blocks_per_group])
+
+            if i > 1:
+                cs.add_condition(CS.GreaterThanCondition(n_units, num_groups, i - 1))
+                cs.add_condition(CS.GreaterThanCondition(blocks_per_group, num_groups, i - 1))
+
+            this_dropout = UniformFloatHyperparameter(
+                "dropout_%d" % i, lower=0.0, upper=1.0
+            )
+            cs.add_hyperparameters([this_dropout])
+
+            dropout_condition_1 = CS.EqualsCondition(this_dropout, use_dropout, True)
+
+            if i > 1:
+
+                dropout_condition_2 = CS.GreaterThanCondition(this_dropout, num_groups, i - 1)
+
+                cs.add_condition(CS.AndConjunction(dropout_condition_1, dropout_condition_2))
+            else:
+                cs.add_condition(dropout_condition_1)
+        return cs
+
+
+class ResBlock(nn.Module):
+    """
+    __author__ = "Max Dippel, Michael Burkart and Matthias Urban"
+    """
+
+    def __init__(
+            self,
+            config: Dict[str, Any],
+            in_features: int,
+            out_features: int,
+            blocks_per_group: int,
+            block_index: int,
+            dropout: bool,
+            activation: nn.Module
+    ):
+        super(ResBlock, self).__init__()
+        self.config = config
+        self.dropout = dropout
+        self.activation = activation
+
+        self.shortcut = None
+        self.start_norm = None  # type: Optional[Callable]
+
+        # if in != out the shortcut needs a linear layer to match the result dimensions
+        # if the shortcut needs a layer we apply batchnorm and activation to the shortcut
+        # as well (start_norm)
+        if in_features != out_features:
+            self.shortcut = nn.Linear(in_features, out_features)
+            self.start_norm = nn.Sequential(
+                nn.BatchNorm1d(in_features),
+                self.activation()
+            )
+
+        self.block_index = block_index
+        self.num_blocks = blocks_per_group * self.config["num_groups"]
+        self.layers = self._build_block(in_features, out_features)
+
+        if config["use_shake_shake"]:
+            self.shake_shake_layers = self._build_block(in_features, out_features)
+
+    # each bloack consists of two linear layers with batch norm and activation
+    def _build_block(self, in_features: int, out_features: int) -> nn.Module:
+        layers = list()
+
+        if self.start_norm is None:
+            layers.append(nn.BatchNorm1d(in_features))
+            layers.append(self.activation())
+        layers.append(nn.Linear(in_features, out_features))
+
+        layers.append(nn.BatchNorm1d(out_features))
+        layers.append(self.activation())
+
+        if self.config["use_dropout"]:
+            layers.append(nn.Dropout(self.dropout))
+        layers.append(nn.Linear(out_features, out_features))
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
+        residual = x
+
+        # if shortcut is not none we need a layer such that x matches the output dimension
+        if self.shortcut is not None and self.start_norm is not None:
+            # in this case self.start_norm is also != none
+            # apply start_norm to x in order to have batchnorm+activation
+            # in front of shortcut and layers. Note that in this case layers
+            # does not start with batchnorm+activation but with the first linear layer
+            # (see _build_block). As a result if in_features == out_features
+            # -> result = x + W(~D(A(BN(W(A(BN(x))))))
+            # if in_features != out_features
+            # -> result = W_shortcut(A(BN(x))) + W_2(~D(A(BN(W_1(A(BN(x))))))
+            x = self.start_norm(x)
+            residual = self.shortcut(x)
+
+        if self.config["use_shake_shake"]:
+            x1 = self.layers(x)
+            x2 = self.shake_shake_layers(x)
+            alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
+            x = shake_shake(x1, x2, alpha, beta)
+        else:
+            x = self.layers(x)
+
+        if self.config["use_shake_drop"]:
+            alpha, beta = shake_get_alpha_beta(self.training, x.is_cuda)
+            bl = shake_drop_get_bl(
+                self.block_index,
+                1 - self.config["max_shake_drop_probability"],
+                self.num_blocks,
+                self.training,
+                x.is_cuda
+            )
+            x = shake_drop(x, alpha, beta, bl)
+
+        x = x + residual
+        return x
+
+
+class ShapedResNetBackbone(ResNetBackbone):
+    """
+    Implementation of a Residual Network builder with support
+    for shaped number of units per group.
+
+    """
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> None:
+        layers = list()  # type: List[nn.Module]
+        in_features = input_shape[0]
+        out_features = self.config["output_dim"]
+
+        # use the get_shaped_neuron_counts to update the number of units
+        neuron_counts = get_shaped_neuron_counts(self.config['resnet_shape'],
+                                                 in_features,
+                                                 out_features,
+                                                 self.config['max_units'],
+                                                 self.config['num_groups'] + 2)[:-1]
+        self.config.update(
+            {"num_units_%d" % (i): num for i, num in enumerate(neuron_counts)}
+        )
+        if self.config['use_dropout'] and self.config["max_dropout"] > 0.05:
+            dropout_shape = get_shaped_neuron_counts(
+                self.config['resnet_shape'], 0, 0, 1000, self.config['num_groups']
+            )
+
+            dropout_shape = [
+                dropout / 1000 * self.config["max_dropout"] for dropout in dropout_shape
+            ]
+
+            self.config.update(
+                {"dropout_%d" % (i + 1): dropout for i, dropout in enumerate(dropout_shape)}
+            )
+        layers.append(nn.Linear(in_features, self.config["num_units_0"]))
+
+        # build num_groups-1 groups each consisting of blocks_per_group ResBlocks
+        # the output features of each group is defined by num_units_i
+        for i in range(1, self.config['num_groups'] + 1):
+            layers.append(
+                self._add_group(
+                    in_features=self.config["num_units_%d" % (i - 1)],
+                    out_features=self.config["num_units_%d" % i],
+                    blocks_per_group=self.config["blocks_per_group"],
+                    last_block_index=(i - 1) * self.config["blocks_per_group"],
+                    dropout=self.config['use_dropout']
+                )
+            )
+
+        layers.append(nn.BatchNorm1d(self.config["num_units_%i" % self.config['num_groups']]))
+        backbone = nn.Sequential(*layers)
+        self.backbone = backbone
+        return backbone
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'ShapedResNetBackbone',
+            'name': 'ShapedResidualNetworkBackbone',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        min_num_gropus: int = 1,
+                                        max_num_groups: int = 9,
+                                        min_blocks_per_groups: int = 1,
+                                        max_blocks_per_groups: int = 4,
+                                        min_num_units: int = 10,
+                                        max_num_units: int = 1024,
+                                        ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        # Support for different shapes
+        resnet_shape = CategoricalHyperparameter(
+            'resnet_shape',
+            choices=[
+                'funnel',
+                'long_funnel',
+                'diamond',
+                'hexagon',
+                'brick',
+                'triangle',
+                'stairs'
+            ]
+        )
+        cs.add_hyperparameter(resnet_shape)
+
+        # The number of groups that will compose the resnet. That is,
+        # a group can have N Resblock. The M number of this N resblock
+        # repetitions is num_groups
+        num_groups = UniformIntegerHyperparameter(
+            "num_groups", lower=min_num_gropus, upper=max_num_groups, default_value=5)
+
+        blocks_per_group = UniformIntegerHyperparameter(
+            "blocks_per_group", lower=min_blocks_per_groups, upper=max_blocks_per_groups)
+
+        activation = CategoricalHyperparameter(
+            "activation", choices=list(_activations.keys())
+        )
+
+        output_dim = UniformIntegerHyperparameter(
+            "output_dim",
+            lower=min_num_units,
+            upper=max_num_units
+        )
+
+        cs.add_hyperparameters([num_groups, blocks_per_group, activation, output_dim])
+
+        # We can have dropout in the network for
+        # better generalization
+        use_dropout = CategoricalHyperparameter(
+            "use_dropout", choices=[True, False])
+        cs.add_hyperparameters([use_dropout])
+
+        use_shake_shake = CategoricalHyperparameter("use_shake_shake", choices=[True, False])
+        use_shake_drop = CategoricalHyperparameter("use_shake_drop", choices=[True, False])
+        shake_drop_prob = UniformFloatHyperparameter(
+            "max_shake_drop_probability", lower=0.0, upper=1.0)
+        cs.add_hyperparameters([use_shake_shake, use_shake_drop, shake_drop_prob])
+        cs.add_condition(CS.EqualsCondition(shake_drop_prob, use_shake_drop, True))
+
+        max_units = UniformIntegerHyperparameter(
+            "max_units",
+            lower=min_num_units,
+            upper=max_num_units,
+        )
+        cs.add_hyperparameters([max_units])
+
+        max_dropout = UniformFloatHyperparameter(
+            "max_dropout", lower=0.0, upper=1.0
+        )
+        cs.add_hyperparameters([max_dropout])
+        cs.add_condition(CS.EqualsCondition(max_dropout, use_dropout, True))
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/network/backbone/time_series.py b/autoPyTorch/pipeline/components/setup/network/backbone/time_series.py
new file mode 100644
index 000000000..5ecb5f94c
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/backbone/time_series.py
@@ -0,0 +1,321 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+    UniformFloatHyperparameter,
+    UniformIntegerHyperparameter
+)
+
+import torch
+from torch import nn
+from torch.nn.utils import weight_norm
+
+from autoPyTorch.pipeline.components.setup.network.backbone.base_backbone import BaseBackbone
+
+
+# Code inspired by https://github.com/hfawaz/InceptionTime
+# Paper: https://arxiv.org/pdf/1909.04939.pdf
+class _InceptionBlock(nn.Module):
+    def __init__(self,
+                 n_inputs: int,
+                 n_filters: int,
+                 kernel_size: int,
+                 bottleneck: int = None):
+        super(_InceptionBlock, self).__init__()
+        self.n_filters = n_filters
+        self.bottleneck = None \
+            if bottleneck is None \
+            else nn.Conv1d(n_inputs, bottleneck, kernel_size=1)
+
+        kernel_sizes = [kernel_size // (2 ** i) for i in range(3)]
+        n_inputs = n_inputs if bottleneck is None else bottleneck
+
+        # create 3 conv layers with different kernel sizes which are applied in parallel
+        self.pad1 = nn.ConstantPad1d(
+            padding=self._padding(kernel_sizes[0]), value=0)
+        self.conv1 = nn.Conv1d(n_inputs, n_filters, kernel_sizes[0])
+
+        self.pad2 = nn.ConstantPad1d(
+            padding=self._padding(kernel_sizes[1]), value=0)
+        self.conv2 = nn.Conv1d(n_inputs, n_filters, kernel_sizes[1])
+
+        self.pad3 = nn.ConstantPad1d(
+            padding=self._padding(kernel_sizes[2]), value=0)
+        self.conv3 = nn.Conv1d(n_inputs, n_filters, kernel_sizes[2])
+
+        # create 1 maxpool and conv layer which are also applied in parallel
+        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=1, padding=1)
+        self.convpool = nn.Conv1d(n_inputs, n_filters, 1)
+
+        self.bn = nn.BatchNorm1d(4 * n_filters)
+
+    def _padding(self, kernel_size: int) -> Tuple[int, int]:
+        if kernel_size % 2 == 0:
+            return kernel_size // 2, kernel_size // 2 - 1
+        else:
+            return kernel_size // 2, kernel_size // 2
+
+    def get_n_outputs(self) -> int:
+        return 4 * self.n_filters
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        if self.bottleneck is not None:
+            x = self.bottleneck(x)
+        x1 = self.conv1(self.pad1(x))
+        x2 = self.conv2(self.pad2(x))
+        x3 = self.conv3(self.pad3(x))
+        x4 = self.convpool(self.maxpool(x))
+        x = torch.cat([x1, x2, x3, x4], dim=1)
+        x = self.bn(x)
+        return torch.relu(x)
+
+
+class _ResidualBlock(nn.Module):
+    def __init__(self, n_res_inputs: int, n_outputs: int):
+        super(_ResidualBlock, self).__init__()
+        self.shortcut = nn.Conv1d(n_res_inputs, n_outputs, 1, bias=False)
+        self.bn = nn.BatchNorm1d(n_outputs)
+
+    def forward(self, x: torch.Tensor, res: torch.Tensor) -> torch.Tensor:
+        shortcut = self.shortcut(res)
+        shortcut = self.bn(shortcut)
+        x += shortcut
+        return torch.relu(x)
+
+
+class _InceptionTime(nn.Module):
+    def __init__(self,
+                 in_features: int,
+                 config: Dict[str, Any]) -> None:
+        super().__init__()
+        self.config = config
+        n_inputs = in_features
+        n_filters = self.config["num_filters"]
+        bottleneck_size = self.config["bottleneck_size"]
+        kernel_size = self.config["kernel_size"]
+        n_res_inputs = in_features
+        for i in range(self.config["num_blocks"]):
+            block = _InceptionBlock(n_inputs=n_inputs,
+                                    n_filters=n_filters,
+                                    bottleneck=bottleneck_size,
+                                    kernel_size=kernel_size)
+            self.__setattr__(f"inception_block_{i}", block)
+
+            # add a residual block after every 3 inception blocks
+            if i % 3 == 2:
+                n_res_outputs = block.get_n_outputs()
+                self.__setattr__(f"residual_block_{i}", _ResidualBlock(n_res_inputs=n_res_inputs,
+                                                                       n_outputs=n_res_outputs))
+                n_res_inputs = n_res_outputs
+            n_inputs = block.get_n_outputs()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # swap sequence and feature dimensions for use with convolutional nets
+        x = x.transpose(1, 2).contiguous()
+        res = x
+        for i in range(self.config["num_blocks"]):
+            x = self.__getattr__(f"inception_block_{i}")(x)
+            if i % 3 == 2:
+                x = self.__getattr__(f"residual_block_{i}")(x, res)
+                res = x
+        x = x.transpose(1, 2).contiguous()
+        return x
+
+
+class InceptionTimeBackbone(BaseBackbone):
+    supported_tasks = {"time_series_classification", "time_series_regression"}
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module:
+        backbone = _InceptionTime(in_features=input_shape[-1],
+                                  config=self.config)
+        self.backbone = backbone
+        return backbone
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {
+            'shortname': 'InceptionTimeBackbone',
+            'name': 'InceptionTimeBackbone',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        min_num_blocks: int = 1,
+                                        max_num_blocks: int = 10,
+                                        min_num_filters: int = 16,
+                                        max_num_filters: int = 64,
+                                        min_kernel_size: int = 32,
+                                        max_kernel_size: int = 64,
+                                        min_bottleneck_size: int = 16,
+                                        max_bottleneck_size: int = 64,
+                                        ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        num_blocks_hp = UniformIntegerHyperparameter("num_blocks",
+                                                     lower=min_num_blocks,
+                                                     upper=max_num_blocks)
+        cs.add_hyperparameter(num_blocks_hp)
+
+        num_filters_hp = UniformIntegerHyperparameter("num_filters",
+                                                      lower=min_num_filters,
+                                                      upper=max_num_filters)
+        cs.add_hyperparameter(num_filters_hp)
+
+        bottleneck_size_hp = UniformIntegerHyperparameter("bottleneck_size",
+                                                          lower=min_bottleneck_size,
+                                                          upper=max_bottleneck_size)
+        cs.add_hyperparameter(bottleneck_size_hp)
+
+        kernel_size_hp = UniformIntegerHyperparameter("kernel_size",
+                                                      lower=min_kernel_size,
+                                                      upper=max_kernel_size)
+        cs.add_hyperparameter(kernel_size_hp)
+        return cs
+
+
+# Chomp1d, TemporalBlock and TemporalConvNet copied from
+# https://github.com/locuslab/TCN/blob/master/TCN/tcn.py, Carnegie Mellon University Locus Labs
+# Paper: https://arxiv.org/pdf/1803.01271.pdf
+class _Chomp1d(nn.Module):
+    def __init__(self, chomp_size: int):
+        super(_Chomp1d, self).__init__()
+        self.chomp_size = chomp_size
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return x[:, :, :-self.chomp_size].contiguous()
+
+
+class _TemporalBlock(nn.Module):
+    def __init__(self,
+                 n_inputs: int,
+                 n_outputs: int,
+                 kernel_size: int,
+                 stride: int,
+                 dilation: int,
+                 padding: int,
+                 dropout: float = 0.2):
+        super(_TemporalBlock, self).__init__()
+        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
+                                           stride=stride, padding=padding, dilation=dilation))
+        self.chomp1 = _Chomp1d(padding)
+        self.relu1 = nn.ReLU()
+        self.dropout1 = nn.Dropout(dropout)
+
+        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
+                                           stride=stride, padding=padding, dilation=dilation))
+        self.chomp2 = _Chomp1d(padding)
+        self.relu2 = nn.ReLU()
+        self.dropout2 = nn.Dropout(dropout)
+
+        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
+                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
+        self.downsample = nn.Conv1d(
+            n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
+        self.relu = nn.ReLU()
+        # self.init_weights()
+
+    def init_weights(self) -> None:
+        self.conv1.weight.data.normal_(0, 0.01)
+        self.conv2.weight.data.normal_(0, 0.01)
+        if self.downsample is not None:
+            self.downsample.weight.data.normal_(0, 0.01)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        out = self.net(x)
+        res = x if self.downsample is None else self.downsample(x)
+        return self.relu(out + res)
+
+
+class _TemporalConvNet(nn.Module):
+    def __init__(self, num_inputs: int, num_channels: List[int], kernel_size: int = 2, dropout: float = 0.2):
+        super(_TemporalConvNet, self).__init__()
+        layers: List[Any] = []
+        num_levels = len(num_channels)
+        for i in range(num_levels):
+            dilation_size = 2 ** i
+            in_channels = num_inputs if i == 0 else num_channels[i - 1]
+            out_channels = num_channels[i]
+            layers += [_TemporalBlock(in_channels,
+                                      out_channels,
+                                      kernel_size,
+                                      stride=1,
+                                      dilation=dilation_size,
+                                      padding=(kernel_size - 1) * dilation_size,
+                                      dropout=dropout)]
+        self.network = nn.Sequential(*layers)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # swap sequence and feature dimensions for use with convolutional nets
+        x = x.transpose(1, 2).contiguous()
+        x = self.network(x)
+        x = x.transpose(1, 2).contiguous()
+        return x
+
+
+class TCNBackbone(BaseBackbone):
+    supported_tasks = {"time_series_classification", "time_series_regression"}
+
+    def build_backbone(self, input_shape: Tuple[int, ...]) -> nn.Module:
+        num_channels = [self.config["num_filters_0"]]
+        for i in range(1, self.config["num_blocks"]):
+            num_channels.append(self.config[f"num_filters_{i}"])
+        backbone = _TemporalConvNet(input_shape[-1],
+                                    num_channels,
+                                    kernel_size=self.config["kernel_size"],
+                                    dropout=self.config["dropout"] if self.config["use_dropout"] else 0.0
+                                    )
+        self.backbone = backbone
+        return backbone
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {
+            "shortname": "TCNBackbone",
+            "name": "TCNBackbone",
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        min_num_blocks: int = 1,
+                                        max_num_blocks: int = 10,
+                                        min_num_filters: int = 4,
+                                        max_num_filters: int = 64,
+                                        min_kernel_size: int = 4,
+                                        max_kernel_size: int = 64,
+                                        min_dropout: float = 0.0,
+                                        max_dropout: float = 0.5
+                                        ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        num_blocks_hp = UniformIntegerHyperparameter("num_blocks",
+                                                     lower=min_num_blocks,
+                                                     upper=max_num_blocks)
+        cs.add_hyperparameter(num_blocks_hp)
+
+        kernel_size_hp = UniformIntegerHyperparameter("kernel_size",
+                                                      lower=min_kernel_size,
+                                                      upper=max_kernel_size)
+        cs.add_hyperparameter(kernel_size_hp)
+
+        use_dropout_hp = CategoricalHyperparameter("use_dropout",
+                                                   choices=[True, False])
+        cs.add_hyperparameter(use_dropout_hp)
+
+        dropout_hp = UniformFloatHyperparameter("dropout",
+                                                lower=min_dropout,
+                                                upper=max_dropout)
+        cs.add_hyperparameter(dropout_hp)
+        cs.add_condition(CS.EqualsCondition(dropout_hp, use_dropout_hp, True))
+
+        for i in range(0, max_num_blocks):
+            num_filters_hp = UniformIntegerHyperparameter(f"num_filters_{i}",
+                                                          lower=min_num_filters,
+                                                          upper=max_num_filters)
+            cs.add_hyperparameter(num_filters_hp)
+            if i >= min_num_blocks:
+                cs.add_condition(CS.GreaterThanCondition(
+                    num_filters_hp, num_blocks_hp, i))
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/network/base_network.py b/autoPyTorch/pipeline/components/setup/network/base_network.py
new file mode 100644
index 000000000..83872b55f
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/base_network.py
@@ -0,0 +1,136 @@
+from abc import abstractmethod
+from typing import Any, Dict, Optional, Tuple
+
+import numpy as np
+
+import torch
+from torch import nn
+
+from autoPyTorch.constants import CLASSIFICATION_TASKS, STRING_TO_TASK_TYPES
+from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseNetworkComponent(autoPyTorchSetupComponent):
+    """
+    Provide an abstract interface for networks
+    in Auto-Pytorch
+    """
+
+    def __init__(
+            self,
+            network: Optional[torch.nn.Module] = None,
+            random_state: Optional[np.random.RandomState] = None,
+            device: Optional[torch.device] = None
+    ) -> None:
+        super(BaseNetworkComponent, self).__init__()
+        self.network = network
+        self.random_state = random_state
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device is None else device
+        self.add_fit_requirements([FitRequirement('task_type', (str,), user_defined=True, dataset_property=True),
+                                   FitRequirement('input_shape', (tuple,), user_defined=True, dataset_property=True),
+                                   ])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchSetupComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+        # Make sure that input dictionary X has the required
+        # information to fit this stage
+        self.check_requirements(X, y)
+
+        output_shape = (X['dataset_properties']['num_classes'],) if \
+            STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']] in \
+            CLASSIFICATION_TASKS else X['dataset_properties']['output_shape']
+        input_shape = X['X_train'].shape[1:]
+        self.network = self.build_network(input_shape=input_shape,
+                                          output_shape=output_shape)
+
+        # Properly set the network training device
+        self.to(self.device)
+
+        return self
+
+    @abstractmethod
+    def build_network(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> torch.nn.Module:
+        """
+        This method returns a pytorch network, that is dynamically built using
+        a self.config that is network specific, and contains the additional
+        configuration hyperparameters to build a domain specific network
+        """
+        raise NotImplementedError()
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        The transform function updates the network in the X dictionary.
+        """
+        X.update({'network': self.network})
+        return X
+
+    def get_network(self) -> nn.Module:
+        """
+        Return the underlying network object.
+        Returns:
+            model : the underlying network object
+        """
+        assert self.network is not None, "No network was initialized"
+        return self.network
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        This common utility makes sure that the input dictionary X,
+        used to fit a given component class, contains the minimum information
+        to fit the given component, and it's parents
+        """
+
+        # Honor the parent requirements
+        super().check_requirements(X, y)
+
+    def get_network_weights(self) -> torch.nn.parameter.Parameter:
+        """Returns the weights of the network"""
+        assert self.network is not None, "No network was initialized"
+        return self.network.parameters()
+
+    def to(self, device: Optional[torch.device] = None) -> None:
+        """Setups the network in cpu or gpu"""
+        assert self.network is not None, "No network was initialized"
+        if device is not None:
+            self.network = self.network.to(device)
+        else:
+            self.network = self.network.to(self.device)
+
+    def predict(self, loader: torch.utils.data.DataLoader) -> torch.Tensor:
+        """
+        Performs batched prediction given a loader object
+        """
+        assert self.network is not None
+        self.network.eval()
+
+        # Batch prediction
+        Y_batch_preds = list()
+
+        for i, (X_batch, Y_batch) in enumerate(loader):
+            # Predict on batch
+            X_batch = torch.autograd.Variable(X_batch).float().to(self.device)
+
+            Y_batch_pred = self.network(X_batch).detach().cpu()
+            Y_batch_preds.append(Y_batch_pred)
+
+        return torch.cat(Y_batch_preds, 0).cpu().numpy()
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.network.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('network', None)
+        info.pop('random_state', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/setup/network/base_network_choice.py b/autoPyTorch/pipeline/components/setup/network/base_network_choice.py
new file mode 100644
index 000000000..2f840a508
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/base_network_choice.py
@@ -0,0 +1,161 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.network.base_network import BaseNetworkComponent
+
+directory = os.path.split(__file__)[0]
+_networks = find_components(__package__,
+                            directory,
+                            BaseNetworkComponent)
+_addons = ThirdPartyComponents(BaseNetworkComponent)
+
+
+def add_network(network: BaseNetworkComponent) -> None:
+    _addons.add_component(network)
+
+
+class NetworkChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available network components
+        Args:
+            None
+        Returns:
+            Dict[str, autoPyTorchComponent]: all baseNetwork components available
+                as choices
+        """
+        components = OrderedDict()
+        components.update(_networks)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+            self,
+            dataset_properties: Optional[Dict[str, str]] = None,
+            include: List[str] = None,
+            exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of Network
+                components
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError(
+                        "Trying to include unknown component: {} from {}".format(
+                            incl,
+                            available_comp,
+                        ))
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == NetworkChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here based on dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+            self,
+            dataset_properties: Optional[Dict[str, str]] = None,
+            default: Optional[str] = None,
+            include: Optional[List[str]] = None,
+            exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default component to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_networks = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_networks) == 0:
+            raise ValueError("No Network found")
+
+        if default is None:
+            defaults = ['BackboneHeadNet']
+            for default_ in defaults:
+                if default_ in available_networks:
+                    default = default_
+                    break
+
+        network = CSH.CategoricalHyperparameter(
+            '__choice__',
+            list(available_networks.keys()),
+            default_value=default
+        )
+        cs.add_hyperparameter(network)
+        for name in available_networks:
+            network_configuration_space = available_networks[name]. \
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': network, 'value': name}
+            cs.add_configuration_space(
+                name,
+                network_configuration_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network/head/__init__.py b/autoPyTorch/pipeline/components/setup/network/head/__init__.py
new file mode 100644
index 000000000..dc07a268a
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/head/__init__.py
@@ -0,0 +1,26 @@
+from collections import OrderedDict
+from typing import Dict, Type
+
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents
+)
+from autoPyTorch.pipeline.components.setup.network.head.base_head import BaseHead
+from autoPyTorch.pipeline.components.setup.network.head.fully_connected import FullyConnectedHead
+from autoPyTorch.pipeline.components.setup.network.head.fully_convolutional import FullyConvolutional2DHead
+
+_heads = {
+    FullyConnectedHead.get_name(): FullyConnectedHead,
+    FullyConvolutional2DHead.get_name(): FullyConvolutional2DHead
+}
+_addons = ThirdPartyComponents(BaseHead)
+
+
+def add_head(head: BaseHead) -> None:
+    _addons.add_component(head)
+
+
+def get_available_heads() -> Dict[str, Type[BaseHead]]:
+    heads = OrderedDict()
+    heads.update(_heads)
+    heads.update(_addons.components)
+    return heads
diff --git a/autoPyTorch/pipeline/components/setup/network/head/base_head.py b/autoPyTorch/pipeline/components/setup/network/head/base_head.py
new file mode 100644
index 000000000..c4d17fd5f
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/head/base_head.py
@@ -0,0 +1,45 @@
+from abc import abstractmethod
+from typing import Any, Dict, Set, Tuple
+
+import torch.nn as nn
+
+from autoPyTorch.pipeline.components.base_component import BaseEstimator, autoPyTorchComponent
+
+
+class BaseHead(autoPyTorchComponent):
+    """
+    Head base class
+    """
+    supported_tasks: Set = set()
+
+    def __init__(self,
+                 **kwargs: Any):
+        super().__init__()
+        self.head: nn.Module = None
+        self.config = kwargs
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
+        """
+        Not used. Just for API compatibility.
+        """
+        return self
+
+    @abstractmethod
+    def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> nn.Module:
+        """
+
+        Builds the head module and assigns it to self.head
+
+        :param input_shape: shape of the input (usually the shape of the backbone output)
+        :param output_shape: shape of the output
+        :return: the head module
+        """
+        raise NotImplementedError()
+
+    @classmethod
+    def get_name(cls) -> str:
+        """
+        Get the name of the head
+        :return: name of the head
+        """
+        return cls.get_properties()["shortname"]
diff --git a/autoPyTorch/pipeline/components/setup/network/head/fully_connected.py b/autoPyTorch/pipeline/components/setup/network/head/fully_connected.py
new file mode 100644
index 000000000..b17f390a0
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/head/fully_connected.py
@@ -0,0 +1,76 @@
+from typing import Any, Dict, Optional, Tuple
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter, UniformIntegerHyperparameter
+
+import numpy as np
+
+from torch import nn
+
+from autoPyTorch.pipeline.components.setup.network.head.base_head import BaseHead
+
+_activations: Dict[str, nn.Module] = {
+    "relu": nn.ReLU,
+    "tanh": nn.Tanh,
+    "sigmoid": nn.Sigmoid
+}
+
+
+class FullyConnectedHead(BaseHead):
+    """
+    Standard head consisting of a number of fully connected layers.
+    Flattens any input in a array of shape [B, prod(input_shape)].
+    """
+    supported_tasks = {"tabular_classification", "tabular_regression",
+                       "image_classification", "image_regression",
+                       "time_series_classification", "time_series_regression"}
+
+    def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> nn.Module:
+        layers = [nn.Flatten()]
+        in_features = np.prod(input_shape).item()
+        for i in range(1, self.config["num_layers"]):
+            layers.append(nn.Linear(in_features=in_features,
+                                    out_features=self.config[f"layer_{i}_units"]))
+            layers.append(_activations[self.config["activation"]]())
+            in_features = self.config[f"layer_{i}_units"]
+        out_features = np.prod(output_shape).item()
+        layers.append(nn.Linear(in_features=in_features,
+                                out_features=out_features))
+        return nn.Sequential(*layers)
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'FullyConnectedHead',
+            'name': 'FullyConnectedHead',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        min_num_layers: int = 1,
+                                        max_num_layers: int = 4,
+                                        min_num_units: int = 64,
+                                        max_num_units: int = 512) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        num_layers_hp = UniformIntegerHyperparameter("num_layers",
+                                                     lower=min_num_layers,
+                                                     upper=max_num_layers)
+
+        activation_hp = CategoricalHyperparameter("activation",
+                                                  choices=list(_activations.keys()))
+
+        cs.add_hyperparameters([num_layers_hp, activation_hp])
+        cs.add_condition(CS.GreaterThanCondition(activation_hp, num_layers_hp, 1))
+
+        for i in range(1, max_num_layers):
+
+            num_units_hp = UniformIntegerHyperparameter(f"layer_{i}_units",
+                                                        lower=min_num_units,
+                                                        upper=max_num_units)
+            cs.add_hyperparameter(num_units_hp)
+            if i >= min_num_layers:
+                cs.add_condition(CS.GreaterThanCondition(num_units_hp, num_layers_hp, i))
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/network/head/fully_convolutional.py b/autoPyTorch/pipeline/components/setup/network/head/fully_convolutional.py
new file mode 100644
index 000000000..54ea887ec
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/head/fully_convolutional.py
@@ -0,0 +1,104 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+import ConfigSpace as CS
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import CategoricalHyperparameter, UniformIntegerHyperparameter
+
+import torch
+from torch import nn
+
+from autoPyTorch.pipeline.components.setup.network.head.base_head import BaseHead
+
+_activations: Dict[str, nn.Module] = {
+    "relu": nn.ReLU,
+    "tanh": nn.Tanh,
+    "sigmoid": nn.Sigmoid
+}
+
+
+class _FullyConvolutional2DHead(nn.Module):
+    def __init__(self,
+                 input_shape: Tuple[int, ...],
+                 output_shape: Tuple[int, ...],
+                 pooling_method: str,
+                 activation: str,
+                 num_layers: int,
+                 num_channels: List[int]):
+        super().__init__()
+
+        layers = []
+        in_channels = input_shape[0]
+        for i in range(1, num_layers):
+            layers.append(nn.Conv2d(in_channels=in_channels,
+                                    out_channels=num_channels[i - 1],
+                                    kernel_size=1))
+            layers.append(_activations[activation]())
+            in_channels = num_channels[i - 1]
+        out_channels = output_shape[0]
+        layers.append(nn.Conv2d(in_channels=in_channels,
+                                out_channels=out_channels,
+                                kernel_size=1))
+        if pooling_method == "average":
+            layers.append(nn.AdaptiveAvgPool2d(output_size=1))
+        else:
+            layers.append(nn.AdaptiveMaxPool2d(output_size=1))
+        self.head = nn.Sequential(*layers)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        B, C, H, W = x.size()
+        return self.head(x).view(B, -1)
+
+
+class FullyConvolutional2DHead(BaseHead):
+    """
+    Head consisting of a number of 2d convolutional connected layers.
+    Applies a global pooling operation in the end.
+    """
+    supported_tasks = {"image_classification", "image_regression"}
+
+    def build_head(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> nn.Module:
+        return _FullyConvolutional2DHead(input_shape=input_shape,
+                                         output_shape=output_shape,
+                                         pooling_method=self.config["pooling_method"],
+                                         activation=self.config.get("activation", None),
+                                         num_layers=self.config["num_layers"],
+                                         num_channels=[self.config[f"layer_{i}_filters"]
+                                                       for i in range(1, self.config["num_layers"])])
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'FullyConvolutionalHead',
+            'name': 'FullyConvolutionalHead',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        min_num_layers: int = 1,
+                                        max_num_layers: int = 4,
+                                        min_num_filters: int = 16,
+                                        max_num_filters: int = 256) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+
+        num_layers_hp = UniformIntegerHyperparameter("num_layers",
+                                                     lower=min_num_layers,
+                                                     upper=max_num_layers)
+
+        pooling_method_hp = CategoricalHyperparameter("pooling_method",
+                                                      choices=["average", "max"])
+
+        activation_hp = CategoricalHyperparameter('activation',
+                                                  choices=list(_activations.keys()))
+
+        cs.add_hyperparameters([num_layers_hp, pooling_method_hp, activation_hp])
+        cs.add_condition(CS.GreaterThanCondition(activation_hp, num_layers_hp, 1))
+
+        for i in range(1, max_num_layers):
+            num_filters_hp = UniformIntegerHyperparameter(f"layer_{i}_filters",
+                                                          lower=min_num_filters,
+                                                          upper=max_num_filters)
+            cs.add_hyperparameter(num_filters_hp)
+            if i >= min_num_layers:
+                cs.add_condition(CS.GreaterThanCondition(num_filters_hp, num_layers_hp, i))
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/network/utils.py b/autoPyTorch/pipeline/components/setup/network/utils.py
new file mode 100644
index 000000000..8ae1f0f39
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network/utils.py
@@ -0,0 +1,281 @@
+import random
+import typing
+import warnings
+
+import torch
+from torch.autograd import Function
+
+__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
+__version__ = "0.0.1"
+__license__ = "BSD"
+
+
+class ShakeShakeFunction(Function):
+    @staticmethod
+    def forward(
+        ctx: typing.Any,  # No typing for AutogradContext
+        x1: torch.Tensor,
+        x2: torch.Tensor,
+        alpha: torch.tensor,
+        beta: torch.tensor,
+    ) -> torch.Tensor:
+        ctx.save_for_backward(x1, x2, alpha, beta)
+
+        y = x1 * alpha + x2 * (1 - alpha)
+        return y
+
+    @staticmethod
+    def backward(ctx: typing.Any,
+                 grad_output: torch.Tensor
+                 ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        x1, x2, alpha, beta = ctx.saved_variables
+        grad_x1 = grad_x2 = grad_alpha = grad_beta = None
+
+        if ctx.needs_input_grad[0]:
+            grad_x1 = grad_output * beta
+        if ctx.needs_input_grad[1]:
+            grad_x2 = grad_output * (1 - beta)
+
+        return grad_x1, grad_x2, grad_alpha, grad_beta
+
+
+shake_shake = ShakeShakeFunction.apply
+
+
+class ShakeDropFunction(Function):
+    @staticmethod
+    def forward(ctx: typing.Any,
+                x: torch.tensor,
+                alpha: torch.tensor,
+                beta: torch.tensor,
+                bl: torch.tensor,
+                ) -> torch.Tensor:
+        ctx.save_for_backward(x, alpha, beta, bl)
+
+        y = (bl + alpha - bl * alpha) * x
+        return y
+
+    @staticmethod
+    def backward(ctx: typing.Any,
+                 grad_output: torch.Tensor
+                 ) -> typing.Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        x, alpha, beta, bl = ctx.saved_variables
+        grad_x = grad_alpha = grad_beta = grad_bl = None
+
+        if ctx.needs_input_grad[0]:
+            grad_x = grad_output * (bl + beta - bl * beta)
+
+        return grad_x, grad_alpha, grad_beta, grad_bl
+
+
+shake_drop = ShakeDropFunction.apply
+
+
+def shake_get_alpha_beta(is_training: bool, is_cuda: bool
+                         ) -> typing.Tuple[torch.tensor, torch.tensor]:
+    if is_training:
+        result = (torch.FloatTensor([0.5]), torch.FloatTensor([0.5]))
+        return result if not is_cuda else (result[0].cuda(), result[1].cuda())
+
+    # TODO implement other update methods
+    alpha = torch.rand(1)
+    beta = torch.rand(1)
+
+    if is_cuda:
+        alpha = alpha.cuda()
+        beta = beta.cuda()
+
+    return alpha, beta
+
+
+def shake_drop_get_bl(
+        block_index: int,
+        min_prob_no_shake: float,
+        num_blocks: int,
+        is_training: bool,
+        is_cuda: bool
+) -> torch.tensor:
+    pl = 1 - ((block_index + 1) / num_blocks) * (1 - min_prob_no_shake)
+
+    if not is_training:
+        bl = torch.tensor(1.0) if random.random() <= pl else torch.tensor(0.0)
+    if is_training:
+        bl = torch.tensor(pl)
+
+    if is_cuda:
+        bl = bl.cuda()
+
+    return bl
+
+
+def get_shaped_neuron_counts(
+    shape: str,
+    in_feat: int,
+    out_feat: int,
+    max_neurons: int,
+    layer_count: int
+) -> typing.List[int]:
+    counts = []  # type: typing.List[int]
+
+    if (layer_count <= 0):
+        return counts
+
+    if (layer_count == 1):
+        counts.append(out_feat)
+        return counts
+
+    max_neurons = max(in_feat, max_neurons)
+    # https://mikkokotila.github.io/slate/#shapes
+
+    if shape == 'brick':
+        #
+        #   |        |
+        #   |        |
+        #   |        |
+        #   |        |
+        #   |        |
+        #   |___  ___|
+        #
+        for _ in range(layer_count - 1):
+            counts.append(max_neurons)
+        counts.append(out_feat)
+
+    if shape == 'triangle':
+        #
+        #        /  \
+        #       /    \
+        #      /      \
+        #     /        \
+        #    /          \
+        #   /_____  _____\
+        #
+        previous = in_feat
+        step_size = int((max_neurons - previous) / (layer_count - 1))
+        step_size = max(0, step_size)
+        for _ in range(layer_count - 2):
+            previous = previous + step_size
+            counts.append(previous)
+        counts.append(max_neurons)
+        counts.append(out_feat)
+
+    if shape == 'funnel':
+        #
+        #   \            /
+        #    \          /
+        #     \        /
+        #      \      /
+        #       \    /
+        #        \  /
+        #
+        previous = max_neurons
+        counts.append(previous)
+
+        step_size = int((previous - out_feat) / (layer_count - 1))
+        step_size = max(0, step_size)
+        for _ in range(layer_count - 2):
+            previous = previous - step_size
+            counts.append(previous)
+
+        counts.append(out_feat)
+
+    if shape == 'long_funnel':
+        #
+        #   |        |
+        #   |        |
+        #   |        |
+        #    \      /
+        #     \    /
+        #      \  /
+        #
+        brick_layer = int(layer_count / 2)
+        funnel_layer = layer_count - brick_layer
+        counts.extend(get_shaped_neuron_counts(
+                      'brick', in_feat, max_neurons, max_neurons, brick_layer))
+        counts.extend(get_shaped_neuron_counts(
+                      'funnel', in_feat, out_feat, max_neurons, funnel_layer))
+
+        if (len(counts) != layer_count):
+            warnings.warn("\nWarning: long funnel layer count does not match "
+                          "" + str(layer_count) + " != " + str(len(counts)) + "\n")
+
+    if shape == 'diamond':
+        #
+        #     /  \
+        #    /    \
+        #   /      \
+        #   \      /
+        #    \    /
+        #     \  /
+        #
+        triangle_layer = int(layer_count / 2) + 1
+        funnel_layer = layer_count - triangle_layer
+        counts.extend(get_shaped_neuron_counts(
+                      'triangle', in_feat, max_neurons, max_neurons, triangle_layer))
+        remove_triangle_layer = len(counts) > 1
+        if (remove_triangle_layer):
+            # remove the last two layers since max_neurons == out_features
+            # (-> two layers with the same size)
+            counts = counts[0:-2]
+        counts.extend(get_shaped_neuron_counts(
+                      'funnel',
+                      max_neurons,
+                      out_feat,
+                      max_neurons,
+                      funnel_layer + (2 if remove_triangle_layer else 0)))
+
+        if (len(counts) != layer_count):
+            warnings.warn("\nWarning: diamond layer count does not match "
+                          "" + str(layer_count) + " != " + str(len(counts)) + "\n")
+
+    if shape == 'hexagon':
+        #
+        #     /  \
+        #    /    \
+        #   |      |
+        #   |      |
+        #    \    /
+        #     \  /
+        #
+        triangle_layer = int(layer_count / 3) + 1
+        funnel_layer = triangle_layer
+        brick_layer = layer_count - triangle_layer - funnel_layer
+        counts.extend(get_shaped_neuron_counts(
+                      'triangle', in_feat, max_neurons, max_neurons, triangle_layer))
+        counts.extend(get_shaped_neuron_counts(
+                      'brick', max_neurons, max_neurons, max_neurons, brick_layer))
+        counts.extend(get_shaped_neuron_counts(
+                      'funnel', max_neurons, out_feat, max_neurons, funnel_layer))
+
+        if (len(counts) != layer_count):
+            warnings.warn("\nWarning: hexagon layer count does not match "
+                          "" + str(layer_count) + " != " + str(len(counts)) + "\n")
+
+    if shape == 'stairs':
+        #
+        #   |          |
+        #   |_        _|
+        #     |      |
+        #     |_    _|
+        #       |  |
+        #       |  |
+        #
+        previous = max_neurons
+        counts.append(previous)
+
+        if layer_count % 2 == 1:
+            counts.append(previous)
+
+        step_size = 2 * int((max_neurons - out_feat) / (layer_count - 1))
+        step_size = max(0, step_size)
+        for _ in range(int(layer_count / 2 - 1)):
+            previous = previous - step_size
+            counts.append(previous)
+            counts.append(previous)
+
+        counts.append(out_feat)
+
+        if (len(counts) != layer_count):
+            warnings.warn("\nWarning: stairs layer count does not match "
+                          "" + str(layer_count) + " != " + str(len(counts)) + "\n")
+
+    return counts
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/KaimingInit.py b/autoPyTorch/pipeline/components/setup/network_initializer/KaimingInit.py
new file mode 100644
index 000000000..8e1867856
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/KaimingInit.py
@@ -0,0 +1,32 @@
+from typing import Callable
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
+    BaseNetworkInitializerComponent
+)
+
+
+class KaimingInit(BaseNetworkInitializerComponent):
+    """
+    Fills the input Tensor with values according to the method described in
+    Delving deep into rectifiers: Surpassing human-level performance on
+    ImageNet classification
+    """
+
+    def weights_init(self) -> Callable:
+        """Returns the actual PyTorch model, that is dynamically created
+        from a self.config object.
+
+        self.config is a dictionary created form a given config in the config space.
+        It contains the necessary information to build a network.
+        """
+        def initialization(m: torch.nn.Module) -> None:
+            if isinstance(m, (torch.nn.Conv1d,
+                              torch.nn.Conv2d,
+                              torch.nn.Conv3d,
+                              torch.nn.Linear)):
+                torch.nn.init.kaiming_normal_(m.weight.data)
+                if m.bias is not None and self.bias_strategy == 'Zero':
+                    torch.nn.init.constant_(m.bias.data, 0.0)
+        return initialization
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/NoInit.py b/autoPyTorch/pipeline/components/setup/network_initializer/NoInit.py
new file mode 100644
index 000000000..38a1aeec7
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/NoInit.py
@@ -0,0 +1,24 @@
+from typing import Callable
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
+    BaseNetworkInitializerComponent
+)
+
+
+class NoInit(BaseNetworkInitializerComponent):
+    """
+    No initialization on the weights/bias
+    """
+
+    def weights_init(self) -> Callable:
+        """Returns the actual PyTorch model, that is dynamically created
+        from a self.config object.
+
+        self.config is a dictionary created form a given config in the config space.
+        It contains the necessary information to build a network.
+        """
+        def initialization(m: torch.nn.Module) -> None:
+            pass
+        return initialization
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/OrthogonalInit.py b/autoPyTorch/pipeline/components/setup/network_initializer/OrthogonalInit.py
new file mode 100644
index 000000000..d7a310243
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/OrthogonalInit.py
@@ -0,0 +1,30 @@
+from typing import Callable
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
+    BaseNetworkInitializerComponent
+)
+
+
+class OrthogonalInit(BaseNetworkInitializerComponent):
+    """
+    Fills the input Tensor with a (semi) orthogonal matrix
+    """
+
+    def weights_init(self) -> Callable:
+        """Returns the actual PyTorch model, that is dynamically created
+        from a self.config object.
+
+        self.config is a dictionary created form a given config in the config space.
+        It contains the necessary information to build a network.
+        """
+        def initialization(m: torch.nn.Module) -> None:
+            if isinstance(m, (torch.nn.Conv1d,
+                              torch.nn.Conv2d,
+                              torch.nn.Conv3d,
+                              torch.nn.Linear)):
+                torch.nn.init.orthogonal_(m.weight.data)
+                if m.bias is not None and self.bias_strategy == 'Zero':
+                    torch.nn.init.constant_(m.bias.data, 0.0)
+        return initialization
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/SparseInit.py b/autoPyTorch/pipeline/components/setup/network_initializer/SparseInit.py
new file mode 100644
index 000000000..1e6dbdbf3
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/SparseInit.py
@@ -0,0 +1,30 @@
+from typing import Callable
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
+    BaseNetworkInitializerComponent
+)
+
+
+class SparseInit(BaseNetworkInitializerComponent):
+    """
+    Fills the 2D input Tensor as a sparse matrix
+    """
+
+    def weights_init(self) -> Callable:
+        """Returns the actual PyTorch model, that is dynamically created
+        from a self.config object.
+
+        self.config is a dictionary created form a given config in the config space.
+        It contains the necessary information to build a network.
+        """
+        def initialization(m: torch.nn.Module) -> None:
+            if isinstance(m, (torch.nn.Conv1d,
+                              torch.nn.Conv2d,
+                              torch.nn.Conv3d,
+                              torch.nn.Linear)):
+                torch.nn.init.sparse_(m.weight.data, 0.9)
+                if m.bias is not None and self.bias_strategy == 'Zero':
+                    torch.nn.init.constant_(m.bias.data, 0.0)
+        return initialization
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py b/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py
new file mode 100644
index 000000000..240cd4f14
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/XavierInit.py
@@ -0,0 +1,30 @@
+from typing import Callable
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
+    BaseNetworkInitializerComponent
+)
+
+
+class XavierInit(BaseNetworkInitializerComponent):
+    """
+    Fills the input Tensor with a (semi) orthogonal matrix
+    """
+
+    def weights_init(self) -> Callable:
+        """Returns the actual PyTorch model, that is dynamically created
+        from a self.config object.
+
+        self.config is a dictionary created form a given config in the config space.
+        It contains the necessary information to build a network.
+        """
+        def initialization(m: torch.nn.Module) -> None:
+            if isinstance(m, (torch.nn.Conv1d,
+                              torch.nn.Conv2d,
+                              torch.nn.Conv3d,
+                              torch.nn.Linear)):
+                torch.nn.init.xavier_uniform_(m.weight.data)
+                if m.bias is not None and self.bias_strategy == 'Zero':
+                    torch.nn.init.constant_(m.bias.data, 0.0)
+        return initialization
diff --git a/autoPyTorch/core/presets/image_classification/__init__.py b/autoPyTorch/pipeline/components/setup/network_initializer/__init__.py
similarity index 100%
rename from autoPyTorch/core/presets/image_classification/__init__.py
rename to autoPyTorch/pipeline/components/setup/network_initializer/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/base_network_init_choice.py b/autoPyTorch/pipeline/components/setup/network_initializer/base_network_init_choice.py
new file mode 100644
index 000000000..8f4b734d1
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/base_network_init_choice.py
@@ -0,0 +1,168 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_initializer import (
+    BaseNetworkInitializerComponent
+)
+
+
+directory = os.path.split(__file__)[0]
+_initializers = find_components(__package__,
+                                directory,
+                                BaseNetworkInitializerComponent)
+_addons = ThirdPartyComponents(BaseNetworkInitializerComponent)
+
+
+def add_network_initializer(initializer: BaseNetworkInitializerComponent) -> None:
+    _addons.add_component(initializer)
+
+
+class NetworkInitializerChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available initializer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseInitializerComponent components available
+                as choices
+        """
+        components = OrderedDict()
+        components.update(_initializers)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of initializer
+                components
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == NetworkInitializerChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here based on dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default component to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_initializers = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_initializers) == 0:
+            raise ValueError("No initializers found")
+
+        if default is None:
+            defaults = ['XavierInit',
+                        ]
+            for default_ in defaults:
+                if default_ in available_initializers:
+                    default = default_
+                    break
+
+        initializer = CSH.CategoricalHyperparameter(
+            '__choice__',
+            list(available_initializers.keys()),
+            default_value=default
+        )
+        cs.add_hyperparameter(initializer)
+        for name in available_initializers:
+            initializer_configuration_space = available_initializers[name]. \
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': initializer, 'value': name}
+            cs.add_configuration_space(
+                name,
+                initializer_configuration_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/pipeline/components/setup/network_initializer/base_network_initializer.py b/autoPyTorch/pipeline/components/setup/network_initializer/base_network_initializer.py
new file mode 100644
index 000000000..cf89ab067
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/network_initializer/base_network_initializer.py
@@ -0,0 +1,99 @@
+from abc import abstractmethod
+from typing import Any, Callable, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+)
+
+import numpy as np
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseNetworkInitializerComponent(autoPyTorchSetupComponent):
+    """Provide an abstract interface for weight initialization
+    strategies in Auto-Pytorch
+    """
+
+    def __init__(
+        self,
+        bias_strategy: str,
+        random_state: Optional[np.random.RandomState] = None,
+    ) -> None:
+        super().__init__()
+        self.bias_strategy = bias_strategy
+        self.random_state = random_state
+        self.add_fit_requirements([
+            FitRequirement('network', (torch.nn.Module,), user_defined=False, dataset_property=False)])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchSetupComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+        # Make sure that input dictionary X has the required
+        # information to fit this stage
+        self.check_requirements(X, y)
+
+        X['network'].apply(self.weights_init())
+
+        return self
+
+    @abstractmethod
+    def weights_init(self) -> Callable:
+        """ A weight initialization strategy to be applied to the network. It can be a custom
+        implementation, a method from torch.init or even pre-trained weights
+
+        Returns:
+            Callable: a function to apply to each module in the network
+        """
+        raise NotImplementedError()
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """The transform function calls the transform function of the
+        underlying model and returns the transformed array.
+
+        Args:
+            X (np.ndarray): input features
+
+        Returns:
+            np.ndarray: Transformed features
+        """
+        return X
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        min_mlp_layers: int = 1,
+                                        max_mlp_layers: int = 15,
+                                        dropout: bool = True,
+                                        min_num_units: int = 10,
+                                        max_num_units: int = 1024,
+                                        ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+
+        # The strategy for bias initializations
+        bias_strategy = CategoricalHyperparameter(
+            "bias_strategy", choices=['Zero', 'Normal'])
+        cs.add_hyperparameters([bias_strategy])
+        return cs
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('strategy', None)
+        info.pop('random_state', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamOptimizer.py
new file mode 100644
index 000000000..1293444ad
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamOptimizer.py
@@ -0,0 +1,96 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+)
+
+import numpy as np
+
+from torch.optim import Adam
+
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer import BaseOptimizerComponent
+
+
+class AdamOptimizer(BaseOptimizerComponent):
+    """
+    Implements Adam  algorithm.
+
+    Args:
+        lr (float): learning rate (default: 1e-2)
+        beta1 (float): coefficients used for computing running averages of gradient
+        beta2 (float): coefficients used for computing running averages of square
+        weight_decay (float): weight decay (L2 penalty)
+        random_state (Optional[np.random.RandomState]): random state
+    """
+
+    def __init__(
+        self,
+        lr: float,
+        beta1: float,
+        beta2: float,
+        weight_decay: float,
+        random_state: Optional[np.random.RandomState] = None,
+    ):
+
+        super().__init__()
+        self.lr = lr
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.weight_decay = weight_decay
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseOptimizerComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure that input dictionary X has the required
+        # information to fit this stage
+        self.check_requirements(X, y)
+
+        self.optimizer = Adam(
+            params=X['network'].parameters(),
+            lr=self.lr,
+            betas=(self.beta1, self.beta2),
+            weight_decay=self.weight_decay,
+        )
+
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'Adam',
+            'name': 'Adaptive Momentum Optimizer',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+
+        # The learning rate for the model
+        lr = UniformFloatHyperparameter('lr', lower=1e-5, upper=1e-1,
+                                        default_value=1e-2, log=True)
+
+        beta1 = UniformFloatHyperparameter('beta1', lower=0.85, upper=0.999,
+                                           default_value=0.9)
+
+        beta2 = UniformFloatHyperparameter('beta2', lower=0.9, upper=0.9999,
+                                           default_value=0.9)
+
+        weight_decay = UniformFloatHyperparameter('weight_decay', lower=0.0, upper=0.1,
+                                                  default_value=0.0)
+
+        cs.add_hyperparameters([lr, beta1, beta2, weight_decay])
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py
new file mode 100644
index 000000000..74e0504a7
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/optimizer/AdamWOptimizer.py
@@ -0,0 +1,96 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+)
+
+import numpy as np
+
+from torch.optim import AdamW
+
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer import BaseOptimizerComponent
+
+
+class AdamWOptimizer(BaseOptimizerComponent):
+    """
+    Implements AdamW  algorithm.
+
+    Args:
+        lr (float): learning rate (default: 1e-2)
+        beta1 (float): coefficients used for computing running averages of gradient
+        beta2 (float): coefficients used for computing running averages of square
+        weight_decay (float): weight decay (L2 penalty)
+        random_state (Optional[np.random.RandomState]): random state
+    """
+
+    def __init__(
+        self,
+        lr: float,
+        beta1: float,
+        beta2: float,
+        weight_decay: float,
+        random_state: Optional[np.random.RandomState] = None,
+    ):
+
+        super().__init__()
+        self.lr = lr
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.weight_decay = weight_decay
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseOptimizerComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure that input dictionary X has the required
+        # information to fit this stage
+        self.check_requirements(X, y)
+
+        self.optimizer = AdamW(
+            params=X['network'].parameters(),
+            lr=self.lr,
+            betas=(self.beta1, self.beta2),
+            weight_decay=self.weight_decay,
+        )
+
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'AdamW',
+            'name': 'Adaptive Momentum Optimizer with decouple weight decay',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+
+        # The learning rate for the model
+        lr = UniformFloatHyperparameter('lr', lower=1e-5, upper=1e-1,
+                                        default_value=1e-2, log=True)
+
+        beta1 = UniformFloatHyperparameter('beta1', lower=0.85, upper=0.999,
+                                           default_value=0.9)
+
+        beta2 = UniformFloatHyperparameter('beta2', lower=0.9, upper=0.9999,
+                                           default_value=0.9)
+
+        weight_decay = UniformFloatHyperparameter('weight_decay', lower=0.0, upper=0.1,
+                                                  default_value=0.0)
+
+        cs.add_hyperparameters([lr, beta1, beta2, weight_decay])
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/RMSpropOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/RMSpropOptimizer.py
new file mode 100644
index 000000000..f589584fb
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/optimizer/RMSpropOptimizer.py
@@ -0,0 +1,99 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+)
+
+import numpy as np
+
+from torch.optim import RMSprop
+
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer import BaseOptimizerComponent
+
+
+class RMSpropOptimizer(BaseOptimizerComponent):
+    """
+    Implements RMSprop algorithm.
+    The implementation here takes the square root of the gradient average
+    before adding epsilon
+
+    Args:
+        lr (float): learning rate (default: 1e-2)
+        momentum (float): momentum factor (default: 0)
+        alpha (float): smoothing constant (default: 0.99)
+        weight_decay (float): weight decay (L2 penalty) (default: 0)
+        random_state (Optional[np.random.RandomState]): random state
+    """
+
+    def __init__(
+        self,
+        lr: float,
+        momentum: float,
+        alpha: float,
+        weight_decay: float,
+        random_state: Optional[np.random.RandomState] = None,
+    ):
+
+        super().__init__()
+        self.lr = lr
+        self.momentum = momentum
+        self.alpha = alpha
+        self.weight_decay = weight_decay
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseOptimizerComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure that input dictionary X has the required
+        # information to fit this stage
+        self.check_requirements(X, y)
+
+        self.optimizer = RMSprop(
+            params=X['network'].parameters(),
+            lr=self.lr,
+            alpha=self.alpha,
+            weight_decay=self.weight_decay,
+            momentum=self.momentum,
+        )
+
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'RMSprop',
+            'name': 'RMSprop Optimizer',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+
+        # The learning rate for the model
+        lr = UniformFloatHyperparameter('lr', lower=1e-5, upper=1e-1,
+                                        default_value=1e-2, log=True)
+
+        alpha = UniformFloatHyperparameter('alpha', lower=0.1, upper=0.99,
+                                           default_value=0.99)
+
+        weight_decay = UniformFloatHyperparameter('weight_decay', lower=0.0, upper=0.1,
+                                                  default_value=0.0)
+
+        momentum = UniformFloatHyperparameter('momentum', lower=0.0, upper=0.99,
+                                              default_value=0.0)
+
+        cs.add_hyperparameters([lr, alpha, weight_decay, momentum])
+
+        return cs
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/SGDOptimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/SGDOptimizer.py
new file mode 100644
index 000000000..831419a39
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/optimizer/SGDOptimizer.py
@@ -0,0 +1,90 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+)
+
+import numpy as np
+
+from torch.optim import SGD
+
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer import BaseOptimizerComponent
+
+
+class SGDOptimizer(BaseOptimizerComponent):
+    """
+    Implements Stochstic Gradient Descend  algorithm.
+
+    Args:
+        lr (float): learning rate (default: 1e-2)
+        momentum (float): momentum factor (default: 0)
+        weight_decay (float): weight decay (L2 penalty) (default: 0)
+        random_state (Optional[np.random.RandomState]): random state
+    """
+
+    def __init__(
+        self,
+        lr: float,
+        momentum: float,
+        weight_decay: float,
+        random_state: Optional[np.random.RandomState] = None,
+    ):
+
+        super().__init__()
+        self.lr = lr
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+        self.random_state = random_state
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> BaseOptimizerComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure that input dictionary X has the required
+        # information to fit this stage
+        self.check_requirements(X, y)
+
+        self.optimizer = SGD(
+            params=X['network'].parameters(),
+            lr=self.lr,
+            weight_decay=self.weight_decay,
+            momentum=self.momentum,
+        )
+
+        return self
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, Any]] = None) -> Dict[str, str]:
+        return {
+            'shortname': 'SGD',
+            'name': 'Stochastic gradient descent (optionally with momentum)',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None,
+                                        ) -> ConfigurationSpace:
+
+        cs = ConfigurationSpace()
+
+        # The learning rate for the model
+        lr = UniformFloatHyperparameter('lr', lower=1e-5, upper=1e-1,
+                                        default_value=1e-2, log=True)
+
+        weight_decay = UniformFloatHyperparameter('weight_decay', lower=0.0, upper=0.1,
+                                                  default_value=0.0)
+
+        momentum = UniformFloatHyperparameter('momentum', lower=0.0, upper=0.99,
+                                              default_value=0.0)
+
+        cs.add_hyperparameters([lr, weight_decay, momentum])
+
+        return cs
diff --git a/autoPyTorch/core/presets/image_classification_multiple_datasets/__init__.py b/autoPyTorch/pipeline/components/setup/optimizer/__init__.py
similarity index 100%
rename from autoPyTorch/core/presets/image_classification_multiple_datasets/__init__.py
rename to autoPyTorch/pipeline/components/setup/optimizer/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer.py b/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer.py
new file mode 100644
index 000000000..a831e8db8
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer.py
@@ -0,0 +1,49 @@
+from typing import Any, Dict, Optional
+
+import torch
+from torch.optim import Optimizer
+
+from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
+from autoPyTorch.utils.common import FitRequirement
+
+
+class BaseOptimizerComponent(autoPyTorchSetupComponent):
+    """Provide an abstract interface for Pytorch Optimizers
+    in Auto-Pytorch"""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.optimizer = None  # type: Optional[Optimizer]
+        self.add_fit_requirements([
+            FitRequirement('network', (torch.nn.Module,), user_defined=False, dataset_property=False)])
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """The transform function calls the transform function of the
+        underlying model and returns the transformed array.
+
+        Args:
+            X (np.ndarray): input features
+
+        Returns:
+            np.ndarray: Transformed features
+        """
+        X.update({'optimizer': self.optimizer})
+        return X
+
+    def get_optimizer(self) -> Optimizer:
+        """Return the underlying Optimizer object.
+        Returns:
+            model : the underlying Optimizer object
+        """
+        assert self.optimizer is not None, "No optimizer was fitted"
+        return self.optimizer
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.optimizer.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('optimizer', None)
+        info.pop('random_state', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer_choice.py b/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer_choice.py
new file mode 100644
index 000000000..82bacf9d2
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/optimizer/base_optimizer_choice.py
@@ -0,0 +1,169 @@
+import os
+from collections import OrderedDict
+from typing import Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer import BaseOptimizerComponent
+
+directory = os.path.split(__file__)[0]
+_optimizers = find_components(__package__,
+                              directory,
+                              BaseOptimizerComponent)
+_addons = ThirdPartyComponents(BaseOptimizerComponent)
+
+
+def add_optimizer(optimizer: BaseOptimizerComponent) -> None:
+    _addons.add_component(optimizer)
+
+
+class OptimizerChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available optimizer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all BaseOptimizerComponents  available
+                as choices
+        """
+        components = OrderedDict()
+        components.update(_optimizers)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        include: List[str] = None,
+        exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of Optimizer
+                components
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == OptimizerChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here based on dataset
+
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default component to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_optimizers = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_optimizers) == 0:
+            raise ValueError("No Optimizer found")
+
+        if default is None:
+            defaults = [
+                'AdamOptimizer',
+                'AdamWOptimizer',
+                'SGDOptimizer',
+                'RMSpropOptimizer'
+            ]
+            for default_ in defaults:
+                if default_ in available_optimizers:
+                    default = default_
+                    break
+
+        optimizer = CSH.CategoricalHyperparameter(
+            '__choice__',
+            list(available_optimizers.keys()),
+            default_value=default
+        )
+        cs.add_hyperparameter(optimizer)
+        for name in available_optimizers:
+            optimizer_configuration_space = available_optimizers[name]. \
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': optimizer, 'value': name}
+            cs.add_configuration_space(
+                name,
+                optimizer_configuration_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
diff --git a/autoPyTorch/data_management/__init__.py b/autoPyTorch/pipeline/components/setup/traditional_ml/__init__.py
similarity index 100%
rename from autoPyTorch/data_management/__init__.py
rename to autoPyTorch/pipeline/components/setup/traditional_ml/__init__.py
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/base_model.py b/autoPyTorch/pipeline/components/setup/traditional_ml/base_model.py
new file mode 100644
index 000000000..f9c11a32b
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/base_model.py
@@ -0,0 +1,150 @@
+import os
+import sys
+from abc import abstractmethod
+from typing import Any, Dict, Optional, Tuple, Union
+
+import numpy as np
+
+import pandas as pd
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.base_setup import autoPyTorchSetupComponent
+from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models.base_classifier import BaseClassifier
+from autoPyTorch.utils.common import FitRequirement
+
+
+# Disable
+def blockPrint() -> None:
+    sys.stdout = open(os.devnull, 'w')
+
+
+# Restore
+def enablePrint() -> None:
+    sys.stdout = sys.__stdout__
+
+
+class BaseModelComponent(autoPyTorchSetupComponent):
+    """
+    Provide an abstract interface for traditional classification methods
+    in Auto-Pytorch
+    """
+
+    def __init__(
+            self,
+            random_state: Optional[np.random.RandomState] = None,
+            model: Optional[BaseClassifier] = None,
+            device: Optional[torch.device] = None
+    ) -> None:
+        super(BaseModelComponent, self).__init__()
+        self.random_state = random_state
+        self.fit_output: Dict[str, Any] = dict()
+
+        self.model: Optional[BaseClassifier] = model
+
+        self.add_fit_requirements([
+            FitRequirement('X_train', (np.ndarray, list, pd.DataFrame), user_defined=False, dataset_property=False),
+            FitRequirement('y_train', (np.ndarray, list, pd.Series,), user_defined=False, dataset_property=False),
+            FitRequirement('train_indices', (np.ndarray, list), user_defined=False, dataset_property=False),
+            FitRequirement('val_indices', (np.ndarray, list), user_defined=False, dataset_property=False)])
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> autoPyTorchSetupComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+        # Make sure that input dictionary X has the required
+        # information to fit this stage
+        self.check_requirements(X, y)
+
+        if isinstance(X['X_train'], pd.DataFrame):
+            X['X_train'] = X['X_train'].to_numpy()
+
+        if isinstance(X['y_train'], pd.core.series.Series):
+            X['y_train'] = X['y_train'].to_numpy()
+
+        input_shape = X['X_train'].shape[1:]
+        output_shape = X['y_train'].shape
+
+        # instantiate model
+        self.model = self.build_model(input_shape=input_shape,
+                                      output_shape=output_shape)
+
+        # train model
+        blockPrint()
+        self.fit_output = self.model.fit(X['X_train'][X['train_indices']], X['y_train'][X['train_indices']],
+                                         X['X_train'][X['val_indices']], X['y_train'][X['val_indices']])
+        enablePrint()
+
+        # infer
+        if 'X_test' in X.keys() and X['X_test'] is not None:
+            if isinstance(X['X_test'], pd.DataFrame):
+                X['X_test'] = X['X_test'].to_numpy()
+            test_preds = self.model.predict(X_test=X['X_test'], predict_proba=True)
+            self.fit_output["test_preds"] = test_preds
+        return self
+
+    @abstractmethod
+    def build_model(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> BaseClassifier:
+        """
+        This method returns a pytorch model, that is dynamically built using
+        a self.config that is model specific, and contains the additional
+        configuration hyperparameters to build a domain specific model
+        """
+        raise NotImplementedError()
+
+    def predict(self, X_test: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
+        assert self.model is not None, "Cant predict without fitting first"
+        if isinstance(X_test, pd.DataFrame):
+            X_test = X_test.to_numpy()
+        return self.model.predict(X_test=X_test).reshape((-1, 1))
+
+    def predict_proba(self, X_test: Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
+        assert self.model is not None, "Cant predict without fitting first"
+        if isinstance(X_test, pd.DataFrame):
+            X_test = X_test.to_numpy()
+        return self.model.predict(X_test, predict_proba=True)
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        The transform function updates the model in the X dictionary.
+        """
+        X.update({'model': self.model})
+        X.update({'results': self.fit_output})
+        return X
+
+    def get_model(self) -> BaseClassifier:
+        """
+        Return the underlying model object.
+        Returns:
+            model : the underlying model object
+        """
+        assert self.model is not None, "No model was initialized"
+        return self.model
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        This common utility makes sure that the input dictionary X,
+        used to fit a given component class, contains the minimum information
+        to fit the given component, and it's parents
+        """
+
+        # Honor the parent requirements
+        super().check_requirements(X, y)
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.model.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('model', None)
+        info.pop('random_state', None)
+        info.pop('fit_output', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/base_model_choice.py b/autoPyTorch/pipeline/components/setup/traditional_ml/base_model_choice.py
new file mode 100644
index 000000000..3512fa6ce
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/base_model_choice.py
@@ -0,0 +1,162 @@
+import os
+from collections import OrderedDict
+from typing import Any, Dict, List, Optional
+
+import ConfigSpace.hyperparameters as CSH
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.setup.traditional_ml.base_model import BaseModelComponent
+
+
+directory = os.path.split(__file__)[0]
+_models = find_components(__package__,
+                          directory,
+                          BaseModelComponent)
+_addons = ThirdPartyComponents(BaseModelComponent)
+
+
+def add_model(model: BaseModelComponent) -> None:
+    _addons.add_component(model)
+
+
+class ModelChoice(autoPyTorchChoice):
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available model components
+        Args:
+            None
+        Returns:
+            Dict[str, autoPyTorchComponent]: all baseNetwork components available
+                as choices
+        """
+        components = OrderedDict()
+        components.update(_models)
+        components.update(_addons.components)
+        return components
+
+    def get_available_components(
+            self,
+            dataset_properties: Optional[Dict[str, str]] = None,
+            include: List[str] = None,
+            exclude: List[str] = None,
+    ) -> Dict[str, autoPyTorchComponent]:
+        """Filters out components based on user provided
+        include/exclude directives, as well as the dataset properties
+        Args:
+         include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+            to honor when creating the configuration space
+         exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+             to remove from the configuration space
+         dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+             of the dataset to guide the pipeline choices of components
+        Returns:
+            Dict[str, autoPyTorchComponent]: A filtered dict of Network
+                components
+        """
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        if include is not None and exclude is not None:
+            raise ValueError(
+                "The argument include and exclude cannot be used together.")
+
+        available_comp = self.get_components()
+
+        if include is not None:
+            for incl in include:
+                if incl not in available_comp:
+                    raise ValueError("Trying to include unknown component: "
+                                     "%s" % incl)
+
+        components_dict = OrderedDict()
+        for name in available_comp:
+            if include is not None and name not in include:
+                continue
+            elif exclude is not None and name in exclude:
+                continue
+
+            entry = available_comp[name]
+
+            # Exclude itself to avoid infinite loop
+            if entry == ModelChoice or hasattr(entry, 'get_components'):
+                continue
+
+            # target_type = dataset_properties['target_type']
+            # Apply some automatic filtering here based on dataset
+            components_dict[name] = entry
+
+        return components_dict
+
+    def get_hyperparameter_search_space(
+            self,
+            dataset_properties: Optional[Dict[str, str]] = None,
+            default: Optional[str] = None,
+            include: Optional[List[str]] = None,
+            exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default component to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal preprocessors for this problem
+        available_models = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_models) == 0:
+            raise ValueError("No Network found")
+
+        if default is None:
+            defaults: List[Any] = []
+            for default_ in defaults:
+                if default_ in available_models:
+                    default = default_
+                    break
+
+        model = CSH.CategoricalHyperparameter(
+            '__choice__',
+            list(available_models.keys()),
+            default_value=default
+        )
+        cs.add_hyperparameter(model)
+        for name in available_models:
+            model_configuration_space = available_models[name]. \
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': model, 'value': name}
+            cs.add_configuration_space(
+                name,
+                model_configuration_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call transform before the object is initialized"
+        return self.choice.transform(X)
+
+    def predict_proba(self, X: np.ndarray) -> np.ndarray:
+        assert self.choice is not None, "Cannot call predict before the object is initialized"
+        return self.choice.predict_proba(X)
diff --git a/autoPyTorch/components/baselines/baseline_configs/catboost.json b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/catboost.json
similarity index 100%
rename from autoPyTorch/components/baselines/baseline_configs/catboost.json
rename to autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/catboost.json
diff --git a/autoPyTorch/components/baselines/baseline_configs/extra_trees.json b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/extra_trees.json
similarity index 100%
rename from autoPyTorch/components/baselines/baseline_configs/extra_trees.json
rename to autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/extra_trees.json
diff --git a/autoPyTorch/components/baselines/baseline_configs/knn.json b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/knn.json
similarity index 100%
rename from autoPyTorch/components/baselines/baseline_configs/knn.json
rename to autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/knn.json
diff --git a/autoPyTorch/components/baselines/baseline_configs/lgb.json b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/lgb.json
similarity index 73%
rename from autoPyTorch/components/baselines/baseline_configs/lgb.json
rename to autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/lgb.json
index 58dd09cbb..048fb5962 100644
--- a/autoPyTorch/components/baselines/baseline_configs/lgb.json
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/lgb.json
@@ -6,7 +6,5 @@
 	"feature_fraction" : 0.9,
 	"boosting_type" : "gbdt",
 	"learning_rate" : 0.03,
-	"objective" : "multiclass",
-	"metric" : "multi_error,multi_logloss",
 	"num_threads" : -1
 }
diff --git a/autoPyTorch/components/baselines/baseline_configs/random_forest.json b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/random_forest.json
similarity index 100%
rename from autoPyTorch/components/baselines/baseline_configs/random_forest.json
rename to autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/random_forest.json
diff --git a/autoPyTorch/components/baselines/baseline_configs/rotation_forest.json b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/rotation_forest.json
similarity index 100%
rename from autoPyTorch/components/baselines/baseline_configs/rotation_forest.json
rename to autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/rotation_forest.json
diff --git a/autoPyTorch/components/baselines/baseline_configs/svm.json b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/svm.json
similarity index 100%
rename from autoPyTorch/components/baselines/baseline_configs/svm.json
rename to autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs/svm.json
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/__init__.py b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/__init__.py
new file mode 100644
index 000000000..c973fef00
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/__init__.py
@@ -0,0 +1,33 @@
+from typing import Any, Dict, Type, Union
+
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+)
+from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models.base_classifier import BaseClassifier
+from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models.classifiers import (
+    CatboostModel,
+    ExtraTreesModel,
+    KNNModel,
+    LGBModel,
+    RFModel,
+    SVMModel)
+
+_classifiers = {
+    'catboost': CatboostModel,
+    'extra_trees': ExtraTreesModel,
+    'knn_classifier': KNNModel,
+    'lgb': LGBModel,
+    'random_forest': RFModel,
+    'svm_classifier': SVMModel
+}
+_addons = ThirdPartyComponents(BaseClassifier)
+
+
+def add_classifier(classifier: BaseClassifier) -> None:
+    _addons.add_component(classifier)
+
+
+def get_available_classifiers() -> Dict[str, Union[Type[BaseClassifier], Any]]:
+    classifiers = dict()
+    classifiers.update(_classifiers)
+    return classifiers
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/base_classifier.py b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/base_classifier.py
new file mode 100644
index 000000000..67d905aa5
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/base_classifier.py
@@ -0,0 +1,81 @@
+import json
+import logging
+import os as os
+from abc import abstractmethod
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+from autoPyTorch.metrics import accuracy
+
+
+class BaseClassifier():
+    """
+    Base class for classifiers.
+    """
+
+    def __init__(self, name: str = ''):
+
+        self.configure_logging()
+
+        self.name = name
+        self.config = self.get_config()
+
+        self.categoricals: np.ndarray = np.array(())
+        self.all_nan: np.ndarray = np.array(())
+        self.encode_dicts: List = []
+        self.num_classes: Optional[int] = None
+
+        self.metric = accuracy
+
+    def configure_logging(self) -> None:
+        """
+        Setup self.logger
+        """
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(logging.INFO)
+
+        ch = logging.StreamHandler()
+        ch.setLevel(logging.INFO)
+        self.logger.addHandler(ch)
+
+    def get_config(self) -> Dict[str, Any]:
+        """
+        Load the parameters for the classifier model from ../classifier_configs/modelname.json.
+        """
+        dirname = os.path.dirname(os.path.abspath(__file__))
+        config_path = os.path.join(dirname, "../classifier_configs", self.name + ".json")
+        with open(config_path, "r") as f:
+            config = json.load(f)
+        for k, v in config.items():
+            if v == "True":
+                config[k] = True
+            if v == "False":
+                config[k] = False
+        return config
+
+    @abstractmethod
+    def fit(self,
+            X_train: np.ndarray,
+            y_train: np.ndarray,
+            X_val: np.ndarray,
+            y_val: np.ndarray) -> Dict[str, Any]:
+        """
+        Fit the model (possible using the validation set for early stopping) and
+        return the results on the training and validation set.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def score(self, X_test: np.ndarray, y_test: np.ndarray) -> float:
+        """
+        Score the model performance on a test set.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def predict(self, X_test: np.ndarray, predict_proba: bool = False) -> np.ndarray:
+        """
+        predict the model performance on a test set.
+        """
+        raise NotImplementedError
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/classifiers.py b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/classifiers.py
new file mode 100644
index 000000000..bb1f6d2ac
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/classifier_models/classifiers.py
@@ -0,0 +1,386 @@
+import tempfile
+from typing import Any, Dict, List, Optional, Union
+
+from catboost import CatBoostClassifier, Pool
+
+from lightgbm import LGBMClassifier
+
+import numpy as np
+
+import pandas as pd
+
+from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.svm import SVC
+
+from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models.base_classifier import BaseClassifier
+
+
+def encode_categoricals(X_train: np.ndarray,
+                        X_val: Optional[np.ndarray] = None,
+                        encode_dicts: Optional[List] = None
+                        ) -> Union[np.ndarray, Optional[np.ndarray], Optional[List]]:
+    if encode_dicts is None:
+        encode_dicts = []
+        got_encoded_dicts = False
+    else:
+        got_encoded_dicts = True
+
+    for ind in range(X_train.shape[1]):
+        if isinstance(X_train[0, ind], str):
+            uniques = np.unique(X_train[0, :])
+
+            if got_encoded_dicts:
+                cat_to_int_dict = encode_dicts[ind]
+            else:
+                cat_to_int_dict = {val: ind for ind, val in enumerate(uniques)}
+
+            converted_column_train = [cat_to_int_dict[v] for v in X_train[0, :]]
+            X_train[0, :] = converted_column_train
+
+            if X_val is not None:
+                converted_column_val = [cat_to_int_dict[v] for v in X_val[0, :]]
+                X_val[0, :] = converted_column_val
+
+            if not got_encoded_dicts:
+                encode_dicts.append(cat_to_int_dict)
+    return X_train, X_val, encode_dicts
+
+
+class LGBModel(BaseClassifier):
+
+    def __init__(self) -> None:
+        super(LGBModel, self).__init__(name="lgb")
+
+    def fit(self, X_train: np.ndarray,
+            y_train: np.ndarray,
+            X_val: np.ndarray,
+            y_val: np.ndarray,
+            categoricals: np.ndarray = np.array(())) -> Dict[str, Any]:
+
+        results = dict()
+
+        self.num_classes = len(np.unique(y_train)) if len(np.unique(y_train)) != 2 else 1  # this fixes a bug
+        self.config["num_class"] = self.num_classes
+
+        early_stopping = 150 if X_train.shape[0] > 10000 else max(round(150 * 10000 / X_train.shape[0]), 10)
+        self.config["early_stopping_rounds"] = early_stopping
+
+        self.all_nan = np.all(pd.isnull(X_train), axis=0)
+        X_train = X_train[:, ~self.all_nan]
+        X_val = X_val[:, ~self.all_nan]
+
+        X_train = np.nan_to_num(X_train)
+        X_val = np.nan_to_num(X_val)
+
+        self.model = LGBMClassifier(**self.config)
+        self.model.fit(X_train, y_train, eval_set=[(X_val, y_val)])
+
+        pred_train = self.model.predict_proba(X_train)
+        pred_val = self.model.predict_proba(X_val)
+
+        results["val_preds"] = pred_val.tolist()
+        results["labels"] = y_val.tolist()
+
+        pred_train = np.argmax(pred_train, axis=1)
+        pred_val = np.argmax(pred_val, axis=1)
+
+        results["train_score"] = self.metric(y_train, pred_train)
+        results["val_score"] = self.metric(y_val, pred_val)
+
+        return results
+
+    def score(self, X_test: np.ndarray, y_test: Union[np.ndarray, List]) -> float:
+        y_pred = self.predict(X_test)
+        return self.metric(y_test, y_pred)
+
+    def predict(self, X_test: np.ndarray, predict_proba: bool = False) -> np.ndarray:
+        X_test = X_test[:, ~self.all_nan]
+        X_test = np.nan_to_num(X_test)
+        if predict_proba:
+            y_pred_proba = self.model.predict_proba(X_test)
+            if self.num_classes == 2:
+                y_pred_proba = y_pred_proba.transpose()[0:len(X_test)]
+            return y_pred_proba
+
+        y_pred = self.model.predict(X_test)
+        return y_pred
+
+
+class CatboostModel(BaseClassifier):
+
+    def __init__(self) -> None:
+        super(CatboostModel, self).__init__(name="catboost")
+        self.config["train_dir"] = tempfile.gettempdir()
+
+    def fit(self, X_train: np.ndarray,
+            y_train: np.ndarray,
+            X_val: np.ndarray,
+            y_val: np.ndarray,
+            categoricals: np.ndarray = np.array(())) -> Dict[str, Any]:
+
+        results = dict()
+
+        categoricals = [ind for ind in range(X_train.shape[1]) if isinstance(X_train[0, ind], str)]
+
+        self.all_nan = np.all(pd.isnull(X_train), axis=0)
+        X_train = X_train[:, ~self.all_nan]
+        X_val = X_val[:, ~self.all_nan]
+
+        X_train = np.nan_to_num(X_train)
+        X_val = np.nan_to_num(X_val)
+
+        early_stopping = 150 if X_train.shape[0] > 10000 else max(round(150 * 10000 / X_train.shape[0]), 10)
+
+        X_train_pooled = Pool(data=X_train, label=y_train, cat_features=categoricals)
+        X_val_pooled = Pool(data=X_val, label=y_val, cat_features=categoricals)
+
+        self.model = CatBoostClassifier(**self.config)
+        self.model.fit(X_train_pooled, eval_set=X_val_pooled, use_best_model=True, early_stopping_rounds=early_stopping)
+
+        pred_train = self.model.predict_proba(X_train)
+        pred_val = self.model.predict_proba(X_val)
+
+        results["val_preds"] = pred_val.tolist()
+        results["labels"] = y_val.tolist()
+
+        try:
+            pred_train = np.argmax(pred_train, axis=1)
+            pred_val = np.argmax(pred_val, axis=1)
+        except ValueError:
+            self.logger.info("==> No probabilities provided in predictions")
+
+        results["train_score"] = self.metric(y_train, pred_train)
+        results["val_score"] = self.metric(y_val, pred_val)
+
+        return results
+
+    def score(self, X_test: np.ndarray, y_test: Union[np.ndarray, List]) -> float:
+        y_pred = self.predict(X_test)
+        return self.metric(y_test, y_pred)
+
+    def predict(self, X_test: np.ndarray, predict_proba: bool = False) -> np.ndarray:
+        X_test = X_test[:, ~self.all_nan]
+        X_test = np.nan_to_num(X_test)
+        if predict_proba:
+            return self.model.predict_proba(X_test)
+        y_pred = self.model.predict(X_test)
+        return y_pred
+
+
+class RFModel(BaseClassifier):
+
+    def __init__(self) -> None:
+        super(RFModel, self).__init__(name="random_forest")
+
+    def fit(self, X_train: np.ndarray,
+            y_train: np.ndarray,
+            X_val: np.ndarray,
+            y_val: np.ndarray) -> Dict[str, Any]:
+
+        results = dict()
+
+        self.all_nan = np.all(pd.isnull(X_train), axis=0)
+        X_train = X_train[:, ~self.all_nan]
+        X_val = X_val[:, ~self.all_nan]
+
+        X_train = np.nan_to_num(X_train)
+        X_val = np.nan_to_num(X_val)
+
+        self.config["warm_start"] = False
+        self.num_classes = len(np.unique(y_train))
+        if self.num_classes > 2:
+            self.logger.info("==> Using warmstarting for multiclass")
+            final_n_estimators = self.config["n_estimators"]
+            self.config["n_estimators"] = 8
+            self.config["warm_start"] = True
+
+        self.model = RandomForestClassifier(**self.config)
+
+        self.model.fit(X_train, y_train)
+        if self.config["warm_start"]:
+            self.model.n_estimators = final_n_estimators
+            self.model.fit(X_train, y_train)
+
+        pred_val_probas = self.model.predict_proba(X_val)
+
+        pred_train = self.model.predict(X_train)
+        pred_val = self.model.predict(X_val)
+
+        results["train_score"] = self.metric(y_train, pred_train)
+        results["val_score"] = self.metric(y_val, pred_val)
+        results["val_preds"] = pred_val_probas.tolist()
+        results["labels"] = y_val.tolist()
+
+        return results
+
+    def score(self, X_test: np.ndarray, y_test: Union[np.ndarray, List]) -> float:
+        y_pred = self.predict(X_test)
+        return self.metric(y_test, y_pred)
+
+    def predict(self, X_test: np.ndarray, predict_proba: bool = False) -> np.ndarray:
+        X_test = X_test[:, ~self.all_nan]
+        X_test = np.nan_to_num(X_test)
+        if predict_proba:
+            return self.model.predict_proba(X_test)
+        y_pred = self.model.predict(X_test)
+        return y_pred
+
+
+class ExtraTreesModel(BaseClassifier):
+
+    def __init__(self) -> None:
+        super(ExtraTreesModel, self).__init__(name="extra_trees")
+
+    def fit(self, X_train: np.ndarray,
+            y_train: np.ndarray,
+            X_val: np.ndarray,
+            y_val: np.ndarray) -> Dict[str, Any]:
+
+        results = dict()
+
+        self.all_nan = np.all(pd.isnull(X_train), axis=0)
+        X_train = X_train[:, ~self.all_nan]
+        X_val = X_val[:, ~self.all_nan]
+
+        X_train = np.nan_to_num(X_train)
+        X_val = np.nan_to_num(X_val)
+
+        self.config["warm_start"] = False
+        self.num_classes = len(np.unique(y_train))
+        if self.num_classes > 2:
+            self.logger.info("==> Using warmstarting for multiclass")
+            final_n_estimators = self.config["n_estimators"]
+            self.config["n_estimators"] = 8
+            self.config["warm_start"] = True
+
+        self.model = ExtraTreesClassifier(**self.config)
+
+        self.model.fit(X_train, y_train)
+        if self.config["warm_start"]:
+            self.model.n_estimators = final_n_estimators
+            self.model.fit(X_train, y_train)
+
+        pred_val_probas = self.model.predict_proba(X_val)
+
+        pred_train = self.model.predict(X_train)
+        pred_val = self.model.predict(X_val)
+
+        results["train_score"] = self.metric(y_train, pred_train)
+        results["val_score"] = self.metric(y_val, pred_val)
+        results["val_preds"] = pred_val_probas.tolist()
+        results["labels"] = y_val.tolist()
+
+        return results
+
+    def score(self, X_test: np.ndarray, y_test: Union[np.ndarray, List]) -> float:
+        y_pred = self.predict(X_test)
+        return self.metric(y_test, y_pred)
+
+    def predict(self, X_test: np.ndarray, predict_proba: bool = False) -> np.ndarray:
+        X_test = X_test[:, ~self.all_nan]
+        X_test = np.nan_to_num(X_test)
+        if predict_proba:
+            return self.model.predict_proba(X_test)
+        y_pred = self.model.predict(X_test)
+        return y_pred
+
+
+class KNNModel(BaseClassifier):
+
+    def __init__(self) -> None:
+        super(KNNModel, self).__init__(name="knn")
+
+    def fit(self, X_train: np.ndarray,
+            y_train: np.ndarray,
+            X_val: np.ndarray,
+            y_val: np.ndarray) -> Dict[str, Any]:
+        results = dict()
+
+        self.all_nan = np.all(pd.isnull(X_train), axis=0)
+        X_train = X_train[:, ~self.all_nan]
+        X_val = X_val[:, ~self.all_nan]
+
+        X_train = np.nan_to_num(X_train)
+        X_val = np.nan_to_num(X_val)
+
+        self.categoricals = np.array([isinstance(X_train[0, ind], str) for ind in range(X_train.shape[1])])
+        X_train = X_train[:, ~self.categoricals] if self.categoricals is not None else X_train
+        X_val = X_val[:, ~self.categoricals] if self.categoricals is not None else X_val
+
+        self.num_classes = len(np.unique(y_train))
+
+        self.model = KNeighborsClassifier(**self.config)
+        self.model.fit(X_train, y_train)
+
+        pred_val_probas = self.model.predict_proba(X_val)
+
+        pred_train = self.model.predict(X_train)
+        pred_val = self.model.predict(X_val)
+
+        results["train_score"] = self.metric(y_train, pred_train)
+        results["val_score"] = self.metric(y_val, pred_val)
+        results["val_preds"] = pred_val_probas.tolist()
+        results["labels"] = y_val.tolist()
+
+        return results
+
+    def score(self, X_test: np.ndarray, y_test: Union[np.ndarray, List]) -> float:
+        y_pred = self.predict(X_test)
+        return self.metric(y_test, y_pred)
+
+    def predict(self, X_test: np.ndarray, predict_proba: bool = False) -> np.ndarray:
+        X_test = X_test[:, ~self.all_nan]
+        X_test = np.nan_to_num(X_test)
+        X_test = X_test[:, ~self.categoricals] if self.categoricals is not None else X_test
+        if predict_proba:
+            return self.model.predict_proba(X_test)
+        y_pred = self.model.predict(X_test)
+        return y_pred
+
+
+class SVMModel(BaseClassifier):
+
+    def __init__(self) -> None:
+        super(SVMModel, self).__init__(name="svm")
+
+    def fit(self, X_train: np.ndarray,
+            y_train: np.ndarray,
+            X_val: np.ndarray,
+            y_val: np.ndarray) -> Dict[str, Any]:
+        results = dict()
+
+        self.all_nan = np.all(pd.isnull(X_train), axis=0)
+        X_train = X_train[:, ~self.all_nan]
+        X_val = X_val[:, ~self.all_nan]
+
+        X_train = np.nan_to_num(X_train)
+        X_val = np.nan_to_num(X_val)
+
+        self.model = SVC(**self.config, probability=True)
+
+        self.model.fit(X_train, y_train)
+
+        pred_val_probas = self.model.predict_proba(X_val)
+
+        pred_train = self.model.predict(X_train)
+        pred_val = self.model.predict(X_val)
+
+        results["train_score"] = self.metric(y_train, pred_train)
+        results["val_score"] = self.metric(y_val, pred_val)
+        results["val_preds"] = pred_val_probas.tolist()
+        results["labels"] = y_val.tolist()
+
+        return results
+
+    def score(self, X_test: np.ndarray, y_test: Union[np.ndarray, List]) -> float:
+        y_pred = self.predict(X_test)
+        return self.metric(y_test, y_pred)
+
+    def predict(self, X_test: np.ndarray, predict_proba: bool = False) -> np.ndarray:
+        X_test = X_test[:, ~self.all_nan]
+        X_test = np.nan_to_num(X_test)
+        if predict_proba:
+            return self.model.predict_proba(X_test)
+        y_pred = self.model.predict(X_test)
+        return y_pred
diff --git a/autoPyTorch/pipeline/components/setup/traditional_ml/tabular_classifier.py b/autoPyTorch/pipeline/components/setup/traditional_ml/tabular_classifier.py
new file mode 100644
index 000000000..6f46e754c
--- /dev/null
+++ b/autoPyTorch/pipeline/components/setup/traditional_ml/tabular_classifier.py
@@ -0,0 +1,71 @@
+from typing import Any, Dict, Optional, Tuple, Type
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter
+)
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.traditional_ml.base_model import BaseModelComponent
+from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models import (
+    BaseClassifier, get_available_classifiers)
+
+
+class TabularClassifier(BaseModelComponent):
+    """
+    Implementation of a dynamic model, that consists of a classifier and a head
+    """
+
+    def __init__(
+            self,
+            random_state: Optional[np.random.RandomState] = None,
+            **kwargs: Any
+    ):
+        super().__init__(
+            random_state=random_state,
+        )
+        self.config = kwargs
+        self._classifiers = get_available_classifiers()
+
+    @staticmethod
+    def get_properties(dataset_properties: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
+        return {
+            "shortname": "TabularClassifier",
+            "name": "TabularClassifier",
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict[str, str]] = None,
+                                        **kwargs: Any) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        classifiers: Dict[str, Type[BaseClassifier]] = get_available_classifiers()
+        # Remove knn classifier if data is all categorical
+        if dataset_properties is not None and len(dataset_properties['numerical_columns']) == 0:
+            del classifiers['knn_classifier']
+        classifier_hp = CategoricalHyperparameter("classifier", choices=classifiers.keys())
+        cs.add_hyperparameters([classifier_hp])
+
+        return cs
+
+    def build_model(self, input_shape: Tuple[int, ...], output_shape: Tuple[int, ...]) -> BaseClassifier:
+        """
+        This method returns a classifier, that is dynamically built using
+        a self.config that is model specific, and contains the additional
+        configuration hyperparameters to build a domain specific model
+        """
+        classifier_name = self.config["classifier"]
+        Classifier = self._classifiers[classifier_name]
+
+        classifier = Classifier()
+
+        return classifier
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('random_state', None)
+        info.pop('fit_output', None)
+        info.pop('config', None)
+        return f"TabularClassifier: {self.model.name if self.model is not None else None} ({str(info)})"
diff --git a/autoPyTorch/pipeline/base/__init__.py b/autoPyTorch/pipeline/components/training/__init__.py
similarity index 100%
rename from autoPyTorch/pipeline/base/__init__.py
rename to autoPyTorch/pipeline/components/training/__init__.py
diff --git a/autoPyTorch/pipeline/components/training/base_training.py b/autoPyTorch/pipeline/components/training/base_training.py
new file mode 100644
index 000000000..3145d636b
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/base_training.py
@@ -0,0 +1,38 @@
+from typing import Any, Dict, Optional
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
+
+
+class autoPyTorchTrainingComponent(autoPyTorchComponent):
+    """Provide an abstract interface for training nodes
+    in Auto-Pytorch"""
+
+    def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None:
+        super(autoPyTorchTrainingComponent, self).__init__()
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        """The transform function calls the transform function of the
+        underlying model and returns the transformed array.
+
+        Args:
+            X (np.ndarray): input features
+
+        Returns:
+            np.ndarray: Transformed features
+        """
+        raise NotImplementedError()
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+        pass
diff --git a/autoPyTorch/pipeline/nodes/image/__init__.py b/autoPyTorch/pipeline/components/training/data_loader/__init__.py
similarity index 100%
rename from autoPyTorch/pipeline/nodes/image/__init__.py
rename to autoPyTorch/pipeline/components/training/data_loader/__init__.py
diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
new file mode 100644
index 000000000..0049d8e38
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -0,0 +1,270 @@
+from typing import Any, Dict, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformIntegerHyperparameter,
+)
+
+import numpy as np
+
+import torch
+
+import torchvision
+
+
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent
+from autoPyTorch.utils.backend import Backend
+from autoPyTorch.utils.common import FitRequirement, custom_collate_fn
+
+
+class BaseDataLoaderComponent(autoPyTorchTrainingComponent):
+    """This class is an interface to the PyTorch Dataloader.
+
+    It gives the possibility to read various types of mapped
+    datasets as described in:
+    https://pytorch.org/docs/stable/data.html
+
+    """
+
+    def __init__(self, batch_size: int = 64) -> None:
+        super().__init__()
+        self.batch_size = batch_size
+        self.train_data_loader = None  # type: Optional[torch.utils.data.DataLoader]
+        self.val_data_loader = None  # type: Optional[torch.utils.data.DataLoader]
+
+        # We also support existing datasets!
+        self.dataset = None
+        self.vision_datasets = self.get_torchvision_datasets()
+
+        # Save the transformations for reuse
+        self.train_transform = None  # type: Optional[torchvision.transforms.Compose]
+
+        # The only reason we have val/test transform separated is to speed up
+        # prediction during training. Namely, if is_small_preprocess is set to true
+        # X_train data will be pre-processed, so we do no need preprocessing in the transform
+        # Regardless, test/inference always need this transformation
+        self.val_transform = None  # type: Optional[torchvision.transforms.Compose]
+        self.test_transform = None  # type: Optional[torchvision.transforms.Compose]
+
+        # Define fit requirements
+        self.add_fit_requirements([
+            FitRequirement("split_id", (int,), user_defined=True, dataset_property=False),
+            FitRequirement("Backend", (Backend,), user_defined=True, dataset_property=False),
+            FitRequirement("is_small_preprocess", (bool,), user_defined=True, dataset_property=True)])
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        """The transform function calls the transform function of the
+        underlying model and returns the transformed array.
+
+        Args:
+            X (np.ndarray): input features
+
+        Returns:
+            np.ndarray: Transformed features
+        """
+        X.update({'train_data_loader': self.train_data_loader,
+                  'val_data_loader': self.val_data_loader})
+        return X
+
+    def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Make sure there is an optimizer
+        self.check_requirements(X, y)
+
+        # Incorporate the transform to the dataset
+        datamanager = X['backend'].load_datamanager()
+        self.train_transform = self.build_transform(X, mode='train')
+        self.val_transform = self.build_transform(X, mode='val')
+        self.test_transform = self.build_transform(X, mode='test')
+        datamanager.update_transform(
+            self.train_transform,
+            train=True,
+        )
+        datamanager.update_transform(
+            self.val_transform,
+            train=False,
+        )
+        if X['dataset_properties']["is_small_preprocess"]:
+            # This parameter indicates that the data has been pre-processed for speed
+            # Overwrite the datamanager with the pre-processes data
+            datamanager.replace_data(X['X_train'], X['X_test'] if 'X_test' in X else None)
+        train_dataset, val_dataset = datamanager.get_dataset_for_training(split_id=X['split_id'])
+
+        self.train_data_loader = torch.utils.data.DataLoader(
+            train_dataset,
+            batch_size=min(self.batch_size, len(train_dataset)),
+            shuffle=True,
+            num_workers=X.get('num_workers', 0),
+            pin_memory=X.get('pin_memory', True),
+            drop_last=X.get('drop_last', True),
+            collate_fn=custom_collate_fn,
+        )
+
+        self.val_data_loader = torch.utils.data.DataLoader(
+            val_dataset,
+            batch_size=min(self.batch_size, len(val_dataset)),
+            shuffle=False,
+            num_workers=X.get('num_workers', 0),
+            pin_memory=X.get('pin_memory', True),
+            drop_last=X.get('drop_last', False),
+            collate_fn=custom_collate_fn,
+        )
+
+        return self
+
+    def get_loader(self, X: np.ndarray, y: Optional[np.ndarray] = None, batch_size: int = np.inf,
+                   ) -> torch.utils.data.DataLoader:
+        """
+        Creates a data loader object from the provided data,
+        applying the transformations meant to validation objects
+        """
+
+        dataset = BaseDataset(
+            train_tensors=(X, y),
+            # This dataset is used for loading test data in a batched format
+            train_transforms=self.test_transform,
+            val_transforms=self.test_transform,
+        )
+        return torch.utils.data.DataLoader(
+            dataset,
+            batch_size=min(batch_size, len(dataset)),
+            shuffle=False,
+            collate_fn=custom_collate_fn,
+        )
+
+    def build_transform(self, X: Dict[str, Any], mode: str) -> torchvision.transforms.Compose:
+        """
+        Method to build a transformation that can pre-process input data
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            mode (str): train/val/test
+
+        Returns:
+            A composition of transformations
+        """
+        raise NotImplementedError()
+
+    def get_train_data_loader(self) -> torch.utils.data.DataLoader:
+        """Returns a data loader object for the train data
+
+        Returns:
+            torch.utils.data.DataLoader: A train data loader
+        """
+        assert self.train_data_loader is not None, "No train data loader fitted"
+        return self.train_data_loader
+
+    def get_val_data_loader(self) -> torch.utils.data.DataLoader:
+        """Returns a data loader object for the validation data
+
+        Returns:
+            torch.utils.data.DataLoader: A validation data loader
+        """
+        assert self.val_data_loader is not None, "No val data loader fitted"
+        return self.val_data_loader
+
+    def get_test_data_loader(self) -> torch.utils.data.DataLoader:
+        """Returns a data loader object for the test data
+
+        Returns:
+            torch.utils.data.DataLoader: A validation data loader
+        """
+        return self.test_data_loader
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+
+        # make sure the parent requirements are honored
+        super().check_requirements(X, y)
+
+        # We allow reading data from a user provided dataset
+        # or from X, Y pairs
+        if 'split_id' not in X:
+            raise ValueError("To fit a data loader, expected fit dictionary to have split_id. "
+                             "Currently X={}.".format(
+                                 X
+                             )
+                             )
+        if 'backend' not in X:
+            raise ValueError("backend is needed to load the data from disk")
+
+        if 'is_small_preprocess' not in X['dataset_properties']:
+            raise ValueError("is_small_pre-process is required to know if the data was preprocessed"
+                             " or if the data-loader should transform it while loading a batch")
+
+        # We expect this class to be a base for image/tabular/time
+        # And the difference among this data types should be mainly
+        # in the transform, so we delegate for special transformation checking
+        # to the below method
+        self._check_transform_requirements(X, y)
+
+    def _check_transform_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+
+        Makes sure that the fit dictionary contains the required transformations
+        that the dataset should go through
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+        raise NotImplementedError()
+
+    def get_torchvision_datasets(self) -> Dict[str, torchvision.datasets.VisionDataset]:
+        """ Returns the supported dataset classes from torchvision
+
+        This is gonna be used to instantiate a dataset object for the dataloader
+
+        Returns:
+            Dict[str, torchvision.datasets.VisionDataset]: A mapping from dataset name to class
+
+        """
+        return {
+            'FashionMNIST': torchvision.datasets.FashionMNIST,
+            'MNIST': torchvision.datasets.MNIST,
+            'CIFAR10': torchvision.datasets.CIFAR10,
+            'CIFAR100': torchvision.datasets.CIFAR100,
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: Optional[Dict] = None
+                                        ) -> ConfigurationSpace:
+        batch_size = UniformIntegerHyperparameter(
+            "batch_size", 32, 320, default_value=64)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([batch_size])
+        return cs
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = self.train_data_loader.__class__.__name__
+        info = vars(self)
+        # Remove unwanted info
+        info.pop('train_data_loader', None)
+        info.pop('val_data_loader', None)
+        info.pop('test_data_loader', None)
+        info.pop('vision_datasets', None)
+        info.pop('random_state', None)
+        string += " (" + str(info) + ")"
+        return string
diff --git a/autoPyTorch/pipeline/components/training/data_loader/feature_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/feature_data_loader.py
new file mode 100644
index 000000000..3cde4c3c9
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/data_loader/feature_data_loader.py
@@ -0,0 +1,86 @@
+from typing import Any, Callable, Dict, List
+
+import numpy as np
+
+from sklearn.utils import check_array
+
+import torch
+
+import torchvision
+
+from autoPyTorch.pipeline.components.training.data_loader.base_data_loader import BaseDataLoaderComponent
+
+
+class ContractTransform(object):
+    """Reverses the effect of ExpandTransform"""
+    def __call__(self, data: np.ndarray) -> np.ndarray:
+        # We make sure that the data is converted from
+        # Object regardless of the configuration space
+        # (normally the CS will convert a pd.DataFrame->np.array,
+        # but the current config might be no preprocessing)
+        # Also, Batch normalization expect a flattened input, so
+        # we have to squeeze sklearn output which is normally (N, 1)
+        data = check_array(
+            data,
+            force_all_finite=False,
+            accept_sparse='csr',
+            ensure_2d=False,
+            allow_nd=True,
+        )
+        return np.squeeze(data)
+
+
+class FeatureDataLoader(BaseDataLoaderComponent):
+    """This class is an interface to the PyTorch Dataloader.
+
+    Particularly, this data loader builds transformations for
+    tabular data.
+
+    """
+
+    def build_transform(self, X: Dict[str, Any], mode: str) -> torchvision.transforms.Compose:
+        """
+        Method to build a transformation that can pre-process input data
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            mode (str): train/val/test
+
+        Returns:
+            A composition of transformations
+        """
+
+        if mode not in ['train', 'val', 'test']:
+            raise ValueError("Unsupported mode provided {}. ".format(mode))
+
+        # In the case of feature data, the options currently available
+        # for transformations are:
+        #   + imputer
+        #   + encoder
+        #   + scaler
+        # This transformations apply for both train/val/test, so no
+        # distinction is performed
+        candidate_transformations = []  # type: List[Callable]
+
+        if 'test' in mode or not X['dataset_properties']['is_small_preprocess']:
+            candidate_transformations.extend(X['preprocess_transforms'])
+            candidate_transformations.append((ContractTransform()))
+
+        # Transform to tensor
+        candidate_transformations.append(torch.from_numpy)
+
+        return torchvision.transforms.Compose(candidate_transformations)
+
+    def _check_transform_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+
+        Makes sure that the fit dictionary contains the required transformations
+        that the dataset should go through
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+        if not X['dataset_properties']['is_small_preprocess'] and 'preprocess_transforms' not in X:
+            raise ValueError("Cannot find the preprocess_transforms in the fit dictionary")
diff --git a/autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py
new file mode 100644
index 000000000..21cc05447
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py
@@ -0,0 +1,67 @@
+from typing import Any, Dict
+
+import torchvision
+
+from autoPyTorch.pipeline.components.training.data_loader.base_data_loader import BaseDataLoaderComponent
+
+
+class ImageDataLoader(BaseDataLoaderComponent):
+    """This class is an interface to the PyTorch Dataloader.
+
+    Particularly, this data loader builds transformations for
+    image data.
+
+    """
+
+    def build_transform(self, X: Dict[str, Any], mode: str) -> torchvision.transforms.Compose:
+        """
+        Method to build a transformation that can pre-process input data
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            mode (str): train/val/test
+
+        Returns:
+            A composition of transformations
+        """
+
+        if mode not in ['train', 'val', 'test']:
+            raise ValueError("Unsupported mode provided {}. ".format(mode))
+
+        transformations = []
+
+        if 'train' in mode:
+            transformations.append(X['image_augmenter'])
+        # In the case of image data, the options currently available
+        # for preprocessors are:
+        #   + normalise
+        # These can apply for both train/val/test, so no
+        # distinction is performed
+
+        # check if data set is small enough to be preprocessed.
+        # If it is, then no need to add preprocess_transforms to
+        # the data loader as the data is already preprocessed
+        if 'test' in mode or not X['dataset_properties']['is_small_preprocess']:
+            transformations.append(X['preprocess_transforms'])
+
+        # Transform to tensor
+        transformations.append(torchvision.transforms.ToTensor())
+
+        return torchvision.transforms.Compose(transformations)
+
+    def _check_transform_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+
+        Makes sure that the fit dictionary contains the required transformations
+        that the dataset should go through
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+        if not X['image_augmenter'] and 'image_augmenter' not in X:
+            raise ValueError("Cannot find the image_augmenter in the fit dictionary")
+
+        if not X['dataset_properties']['is_small_preprocess'] and 'preprocess_transforms' not in X:
+            raise ValueError("Cannot find the preprocess_transforms in the fit dictionary")
diff --git a/autoPyTorch/pipeline/components/training/losses.py b/autoPyTorch/pipeline/components/training/losses.py
new file mode 100644
index 000000000..705246310
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/losses.py
@@ -0,0 +1,71 @@
+from typing import Any, Dict, Optional, Type
+
+from torch.nn.modules.loss import (
+    BCEWithLogitsLoss,
+    CrossEntropyLoss,
+    L1Loss,
+    MSELoss
+)
+from torch.nn.modules.loss import _Loss as Loss
+
+from autoPyTorch.constants import BINARY, CLASSIFICATION_TASKS, CONTINUOUS, MULTICLASS, REGRESSION_TASKS, \
+    STRING_TO_OUTPUT_TYPES, STRING_TO_TASK_TYPES, TASK_TYPES_TO_STRING
+
+losses = dict(classification=dict(
+    CrossEntropyLoss=dict(
+        module=CrossEntropyLoss, supported_output_type=MULTICLASS),
+    BCEWithLogitsLoss=dict(
+        module=BCEWithLogitsLoss, supported_output_type=BINARY)),
+    regression=dict(
+        MSELoss=dict(
+            module=MSELoss, supported_output_type=CONTINUOUS),
+        L1Loss=dict(
+            module=L1Loss, supported_output_type=CONTINUOUS)))
+
+default_losses = dict(classification=CrossEntropyLoss, regression=MSELoss)
+
+
+def get_default(task: int) -> Type[Loss]:
+    if task in CLASSIFICATION_TASKS:
+        return default_losses['classification']
+    elif task in REGRESSION_TASKS:
+        return default_losses['regression']
+    else:
+        raise ValueError("Invalid task type {}".format(TASK_TYPES_TO_STRING[task]))
+
+
+def get_supported_losses(task: int, output_type: int) -> Dict[str, Type[Loss]]:
+    supported_losses = dict()
+    if task in CLASSIFICATION_TASKS:
+        for key, value in losses['classification'].items():
+            if output_type == value['supported_output_type']:
+                supported_losses[key] = value['module']
+    elif task in REGRESSION_TASKS:
+        for key, value in losses['regression'].items():
+            if output_type == value['supported_output_type']:
+                supported_losses[key] = value['module']
+    return supported_losses
+
+
+def get_loss_instance(dataset_properties: Dict[str, Any], name: Optional[str] = None) -> Loss:
+    assert 'task_type' in dataset_properties, \
+        "Expected dataset_properties to have task_type got {}".format(dataset_properties.keys())
+    assert 'output_type' in dataset_properties, \
+        "Expected dataset_properties to have output_type got {}".format(dataset_properties.keys())
+
+    task = STRING_TO_TASK_TYPES[dataset_properties['task_type']]
+    output_type = STRING_TO_OUTPUT_TYPES[dataset_properties['output_type']]
+    supported_losses = get_supported_losses(task, output_type)
+
+    if name is not None:
+        if name not in supported_losses.keys():
+            raise ValueError("Invalid name entered for task {}, and output type {} currently supported losses"
+                             " for task include {}".format(dataset_properties['task_type'],
+                                                           dataset_properties['output_type'],
+                                                           list(supported_losses.keys())))
+        else:
+            loss = supported_losses[name]
+    else:
+        loss = get_default(task)
+
+    return loss()
diff --git a/autoPyTorch/utils/benchmarking/__init__.py b/autoPyTorch/pipeline/components/training/metrics/__init__.py
similarity index 100%
rename from autoPyTorch/utils/benchmarking/__init__.py
rename to autoPyTorch/pipeline/components/training/metrics/__init__.py
diff --git a/autoPyTorch/pipeline/components/training/metrics/base.py b/autoPyTorch/pipeline/components/training/metrics/base.py
new file mode 100644
index 000000000..5c68a1f00
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/metrics/base.py
@@ -0,0 +1,227 @@
+from abc import ABCMeta
+from typing import Any, Callable, List, Optional
+
+import numpy as np
+
+import sklearn.metrics
+from sklearn.utils.multiclass import type_of_target
+
+
+class autoPyTorchMetric(object, metaclass=ABCMeta):
+
+    def __init__(self,
+                 name: str,
+                 score_func: Callable,
+                 optimum: float,
+                 worst_possible_result: float,
+                 sign: float,
+                 kwargs: Any) -> None:
+        self.name = name
+        self._kwargs = kwargs
+        self._metric_func = score_func
+        self._optimum = optimum
+        self._worst_possible_result = worst_possible_result
+        self._sign = sign
+
+    def __call__(self,
+                 y_true: np.ndarray,
+                 y_pred: np.ndarray,
+                 sample_weight: Optional[List[float]] = None
+                 ) -> float:
+        raise NotImplementedError()
+
+    def get_metric_func(self) -> Callable:
+        return self._metric_func
+
+    def __repr__(self) -> str:
+        return self.name
+
+
+class _PredictMetric(autoPyTorchMetric):
+    def __call__(
+            self,
+            y_true: np.ndarray,
+            y_pred: np.ndarray,
+            sample_weight: Optional[List[float]] = None
+    ) -> float:
+        """Evaluate predicted target values for X relative to y_true.
+        Parameters
+        ----------
+        y_true : array-like
+            Gold standard target values for X.
+        y_pred : array-like, [n_samples x n_classes]
+            Model predictions
+        sample_weight : array-like, optional (default=None)
+            Sample weights.
+        Returns
+        -------
+        score : float
+            Score function applied to prediction of estimator on X.
+        """
+        type_true = type_of_target(y_true)
+        if type_true == 'binary' and type_of_target(y_pred) == 'continuous' and \
+                len(y_pred.shape) == 1:
+            # For a pred autoPyTorchMetric, no threshold, nor probability is required
+            # If y_true is binary, and y_pred is continuous
+            # it means that a rounding is necessary to obtain the binary class
+            y_pred = np.around(y_pred, decimals=0)
+        elif len(y_pred.shape) == 1 or y_pred.shape[1] == 1 or \
+                type_true == 'continuous':
+            # must be regression, all other task types would return at least
+            # two probabilities
+            pass
+        elif type_true in ['binary', 'multiclass']:
+            y_pred = np.argmax(y_pred, axis=1)
+        elif type_true == 'multilabel-indicator':
+            y_pred[y_pred > 0.5] = 1.0
+            y_pred[y_pred <= 0.5] = 0.0
+        elif type_true == 'continuous-multioutput':
+            pass
+        else:
+            raise ValueError(type_true)
+
+        if sample_weight is not None:
+            return self._sign * self._metric_func(y_true, y_pred,
+                                                  sample_weight=sample_weight,
+                                                  **self._kwargs)
+        else:
+            return self._sign * self._metric_func(y_true, y_pred,
+                                                  **self._kwargs)
+
+
+class _ProbaMetric(autoPyTorchMetric):
+    def __call__(
+            self,
+            y_true: np.ndarray,
+            y_pred: np.ndarray,
+            sample_weight: Optional[List[float]] = None
+    ) -> float:
+        """Evaluate predicted probabilities for X relative to y_true.
+        Parameters
+        ----------
+        y_true : array-like
+            Gold standard target values for X. These must be class labels,
+            not probabilities.
+        y_pred : array-like, [n_samples x n_classes]
+            Model predictions
+        sample_weight : array-like, optional (default=None)
+            Sample weights.
+        Returns
+        -------
+        score : float
+            Score function applied to prediction of estimator on X.
+        """
+
+        if self._metric_func is sklearn.metrics.log_loss:
+            n_labels_pred = np.array(y_pred).reshape((len(y_pred), -1)).shape[1]
+            n_labels_test = len(np.unique(y_true))
+            if n_labels_pred != n_labels_test:
+                labels = list(range(n_labels_pred))
+                if sample_weight is not None:
+                    return self._sign * self._metric_func(y_true, y_pred,
+                                                          sample_weight=sample_weight,
+                                                          labels=labels,
+                                                          **self._kwargs)
+                else:
+                    return self._sign * self._metric_func(y_true, y_pred,
+                                                          labels=labels, **self._kwargs)
+
+        if sample_weight is not None:
+            return self._sign * self._metric_func(y_true, y_pred,
+                                                  sample_weight=sample_weight,
+                                                  **self._kwargs)
+        else:
+            return self._sign * self._metric_func(y_true, y_pred,
+                                                  **self._kwargs)
+
+
+class _ThresholdMetric(autoPyTorchMetric):
+    def __call__(
+            self,
+            y_true: np.ndarray,
+            y_pred: np.ndarray,
+            sample_weight: Optional[List[float]] = None
+    ) -> float:
+        """Evaluate decision function output for X relative to y_true.
+        Parameters
+        ----------
+        y_true : array-like
+            Gold standard target values for X. These must be class labels,
+            not probabilities.
+        y_pred : array-like, [n_samples x n_classes]
+            Model predictions
+        sample_weight : array-like, optional (default=None)
+            Sample weights.
+        Returns
+        -------
+        score : float
+            Score function applied to prediction of estimator on X.
+        """
+        y_type = type_of_target(y_true)
+        if y_type not in ("binary", "multilabel-indicator"):
+            raise ValueError("{0} format is not supported".format(y_type))
+
+        if y_type == "binary":
+            if y_pred.ndim > 1:
+                y_pred = y_pred[:, 1]
+        elif isinstance(y_pred, list):
+            y_pred = np.vstack([p[:, -1] for p in y_pred]).T
+
+        if sample_weight is not None:
+            return self._sign * self._metric_func(y_true, y_pred,
+                                                  sample_weight=sample_weight,
+                                                  **self._kwargs)
+        else:
+            return self._sign * self._metric_func(y_true, y_pred, **self._kwargs)
+
+
+def make_metric(
+    name: str,
+    score_func: Callable,
+    optimum: float = 1.0,
+    worst_possible_result: float = 0.0,
+    greater_is_better: bool = True,
+    needs_proba: bool = False,
+    needs_threshold: bool = False,
+    **kwargs: Any
+) -> autoPyTorchMetric:
+    """Make a autoPyTorchMetric from a performance metric or loss function.
+    Factory inspired by scikit-learn which wraps scikit-learn scoring functions
+    to be used in auto-sklearn.
+    Parameters
+    ----------
+
+    Returns
+    -------
+    autoPyTorchMetric : callable
+        Callable object that returns a scalar score; greater is better.
+
+    Args:
+        name : str
+        Name of the metric
+        score_func : callable
+        Score function (or loss function) with signature
+        ``score_func(y, y_pred, **kwargs)``.
+    optimum : int or float, default=1
+        The best score achievable by the score function, i.e. maximum in case of
+        metric function and minimum in case of loss function.
+    greater_is_better : boolean, default=True
+        Whether score_func is a score function (default), meaning high is good,
+        or a loss function, meaning low is good. In the latter case, the
+        autoPyTorchMetric object will sign-flip the outcome of the score_func.
+    needs_proba : boolean, default=False
+        Whether score_func requires predict_proba to get probability estimates
+        out of a classifier.
+    needs_threshold : boolean, default=False
+        Whether score_func takes a continuous decision certainty.
+        This only works for binary classification.
+    **kwargs : additional arguments
+        Additional parameters to be passed to score_func.
+    """
+    sign = 1 if greater_is_better else -1
+    if needs_proba:
+        return _ProbaMetric(name, score_func, optimum, worst_possible_result, sign, kwargs)
+    elif needs_threshold:
+        return _ThresholdMetric(name, score_func, optimum, worst_possible_result, sign, kwargs)
+    else:
+        return _PredictMetric(name, score_func, optimum, worst_possible_result, sign, kwargs)
diff --git a/autoPyTorch/pipeline/components/training/metrics/metrics.py b/autoPyTorch/pipeline/components/training/metrics/metrics.py
new file mode 100644
index 000000000..b669e4ede
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/metrics/metrics.py
@@ -0,0 +1,89 @@
+from functools import partial
+
+
+import sklearn.metrics
+
+from smac.utils.constants import MAXINT
+
+from autoPyTorch.pipeline.components.training.metrics.base import make_metric
+
+# Standard regression scores
+mean_absolute_error = make_metric('mean_absolute_error',
+                                  sklearn.metrics.mean_absolute_error,
+                                  optimum=0,
+                                  worst_possible_result=MAXINT,
+                                  greater_is_better=False)
+mean_squared_error = make_metric('mean_squared_error',
+                                 sklearn.metrics.mean_squared_error,
+                                 optimum=0,
+                                 worst_possible_result=MAXINT,
+                                 greater_is_better=False,
+                                 squared=True)
+root_mean_squared_error = make_metric('root_mean_squared_error',
+                                      sklearn.metrics.mean_squared_error,
+                                      optimum=0,
+                                      worst_possible_result=MAXINT,
+                                      greater_is_better=False,
+                                      squared=False)
+mean_squared_log_error = make_metric('mean_squared_log_error',
+                                     sklearn.metrics.mean_squared_log_error,
+                                     optimum=0,
+                                     worst_possible_result=MAXINT,
+                                     greater_is_better=False, )
+median_absolute_error = make_metric('median_absolute_error',
+                                    sklearn.metrics.median_absolute_error,
+                                    optimum=0,
+                                    worst_possible_result=MAXINT,
+                                    greater_is_better=False)
+r2 = make_metric('r2',
+                 sklearn.metrics.r2_score)
+
+# Standard Classification Scores
+accuracy = make_metric('accuracy',
+                       sklearn.metrics.accuracy_score)
+balanced_accuracy = make_metric('balanced_accuracy',
+                                sklearn.metrics.balanced_accuracy_score)
+f1 = make_metric('f1',
+                 sklearn.metrics.f1_score)
+
+# Score functions that need decision values
+roc_auc = make_metric('roc_auc', sklearn.metrics.roc_auc_score, needs_threshold=True)
+average_precision = make_metric('average_precision',
+                                sklearn.metrics.average_precision_score,
+                                needs_threshold=True)
+precision = make_metric('precision',
+                        sklearn.metrics.precision_score)
+recall = make_metric('recall',
+                     sklearn.metrics.recall_score)
+
+# Score function for probabilistic classification
+log_loss = make_metric('log_loss',
+                       sklearn.metrics.log_loss,
+                       optimum=0,
+                       worst_possible_result=MAXINT,
+                       greater_is_better=False,
+                       needs_proba=True)
+
+REGRESSION_METRICS = dict()
+for scorer in [mean_absolute_error, mean_squared_error, root_mean_squared_error,
+               mean_squared_log_error, median_absolute_error, r2]:
+    REGRESSION_METRICS[scorer.name] = scorer
+
+CLASSIFICATION_METRICS = dict()
+
+for scorer in [accuracy, balanced_accuracy, roc_auc, average_precision,
+               log_loss]:
+    CLASSIFICATION_METRICS[scorer.name] = scorer
+
+for name, metric in [('precision', sklearn.metrics.precision_score),
+                     ('recall', sklearn.metrics.recall_score),
+                     ('f1', sklearn.metrics.f1_score)]:
+    globals()[name] = make_metric(name, metric)
+    CLASSIFICATION_METRICS[name] = globals()[name]
+    for average in ['macro', 'micro', 'samples', 'weighted']:
+        qualified_name = '{0}_{1}'.format(name, average)
+        globals()[qualified_name] = make_metric(qualified_name,
+                                                partial(metric,
+                                                        pos_label=None,
+                                                        average=average))
+        CLASSIFICATION_METRICS[qualified_name] = globals()[qualified_name]
diff --git a/autoPyTorch/pipeline/components/training/metrics/utils.py b/autoPyTorch/pipeline/components/training/metrics/utils.py
new file mode 100644
index 000000000..f245d5f39
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/metrics/utils.py
@@ -0,0 +1,141 @@
+import warnings
+from typing import Any, Dict, Iterable, List, Optional
+
+import numpy as np
+
+from autoPyTorch.constants import (
+    CLASSIFICATION_TASKS,
+    REGRESSION_TASKS,
+    STRING_TO_TASK_TYPES,
+    TASK_TYPES,
+)
+from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
+from autoPyTorch.pipeline.components.training.metrics.metrics import CLASSIFICATION_METRICS, REGRESSION_METRICS
+
+
+def sanitize_array(array: np.ndarray) -> np.ndarray:
+    """
+    Replace NaN and Inf (there should not be any!)
+    :param array:
+    :return:
+    """
+    a = np.ravel(array)
+    maxi = np.nanmax(a[np.isfinite(a)])
+    mini = np.nanmin(a[np.isfinite(a)])
+    array[array == float('inf')] = maxi
+    array[array == float('-inf')] = mini
+    mid = (maxi + mini) / 2
+    array[np.isnan(array)] = mid
+    return array
+
+
+def get_supported_metrics(dataset_properties: Dict[str, Any]) -> Dict[str, autoPyTorchMetric]:
+    task_type = dataset_properties['task_type']
+
+    if STRING_TO_TASK_TYPES[task_type] in REGRESSION_TASKS:
+        return REGRESSION_METRICS
+    elif STRING_TO_TASK_TYPES[task_type] in CLASSIFICATION_TASKS:
+        return CLASSIFICATION_METRICS
+    else:
+        raise NotImplementedError(task_type)
+
+
+def get_metrics(dataset_properties: Dict[str, Any],
+                names: Optional[Iterable[str]] = None,
+                all_supported_metrics: bool = False,
+                ) -> List[autoPyTorchMetric]:
+    """
+    Returns metrics for current task_type, if names is None and
+    all_supported_metrics is False, returns preset default for
+    given task
+
+    Args:
+        dataset_properties: Dict[str, Any]
+        contains information about the dataset and task type
+        names: Optional[Iterable[str]]
+        names of metrics to return
+        all_supported_metrics: bool
+        if true, returns all metrics that are relevant to task_type
+
+    Returns:
+
+    """
+    assert 'task_type' in dataset_properties, \
+        "Expected dataset_properties to have task_type got {}".format(dataset_properties.keys())
+    assert 'output_type' in dataset_properties, \
+        "Expected dataset_properties to have output_type got {}".format(dataset_properties.keys())
+    if all_supported_metrics:
+        assert names is None, "Can't pass names when all_supported_metrics are true"
+
+    if STRING_TO_TASK_TYPES[dataset_properties['task_type']] not in TASK_TYPES:
+        raise NotImplementedError(dataset_properties['task_type'])
+
+    default_metrics = dict(classification=dict({'multiclass': 'accuracy',
+                                                'binary': 'accuracy',
+                                                'multiclass-multioutput': 'f1'}),
+                           regression=dict({'continuous': 'r2',
+                                            'continuous-multioutput': 'r2'}))
+
+    supported_metrics = get_supported_metrics(dataset_properties)
+    metrics = list()  # type: List[autoPyTorchMetric]
+    if names is not None:
+        for name in names:
+            if name not in supported_metrics.keys():
+                raise ValueError("Invalid name entered for task {}, currently "
+                                 "supported metrics for task include {}".format(dataset_properties['task_type'],
+                                                                                list(supported_metrics.keys())))
+            else:
+                metric = supported_metrics[name]
+                metrics.append(metric)
+    else:
+        if all_supported_metrics:
+            metrics.extend(list(supported_metrics.values()))
+        else:
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in CLASSIFICATION_TASKS:
+                metrics.append(supported_metrics[default_metrics['classification'][dataset_properties['output_type']]])
+            if STRING_TO_TASK_TYPES[dataset_properties['task_type']] in REGRESSION_TASKS:
+                metrics.append(supported_metrics[default_metrics['regression'][dataset_properties['output_type']]])
+
+    return metrics
+
+
+def calculate_score(
+        target: np.ndarray,
+        prediction: np.ndarray,
+        task_type: int,
+        metrics: Iterable[autoPyTorchMetric],
+) -> Dict[str, float]:
+    score_dict = dict()
+    if task_type in REGRESSION_TASKS:
+        cprediction = sanitize_array(prediction)
+        for metric_ in metrics:
+            try:
+                score_dict[metric_.name] = metric_(target, cprediction)
+            except ValueError as e:
+                warnings.warn(f"{e} {e.args[0]}")
+                if e.args[0] == "Mean Squared Logarithmic Error cannot be used when " \
+                                "targets contain negative values.":
+                    continue
+                else:
+                    raise e
+
+    else:
+        for metric_ in metrics:
+            try:
+                score_dict[metric_.name] = metric_(target, prediction)
+            except ValueError as e:
+                if e.args[0] == 'multiclass format is not supported':
+                    continue
+                elif e.args[0] == "Samplewise metrics are not available " \
+                                  "outside of multilabel classification.":
+                    continue
+                elif e.args[0] == "Target is multiclass but " \
+                                  "average='binary'. Please choose another average " \
+                                  "setting, one of [None, 'micro', 'macro', 'weighted'].":
+                    continue
+                elif e.args[0] == "The labels array needs to contain at " \
+                                  "least two labels for log_loss, got [0].":
+                    continue
+                else:
+                    raise e
+    return score_dict
diff --git a/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py b/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py
new file mode 100644
index 000000000..b391b7d59
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/trainer/MixUpTrainer.py
@@ -0,0 +1,70 @@
+import typing
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    UniformFloatHyperparameter,
+)
+
+import numpy as np
+
+import torch
+
+from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent
+
+
+class MixUpTrainer(BaseTrainerComponent):
+    def __init__(self, alpha: float, random_state: typing.Optional[np.random.RandomState] = None):
+        """
+        This class handles the training of a network for a single given epoch.
+
+        Args:
+            alpha (float): the mixup ratio
+
+        """
+        super().__init__(random_state=random_state)
+        self.alpha = alpha
+
+    def data_preparation(self, X: np.ndarray, y: np.ndarray,
+                         ) -> typing.Tuple[np.ndarray, typing.Dict[str, np.ndarray]]:
+        """
+        Depending on the trainer choice, data fed to the network might be pre-processed
+        on a different way. That is, in standard training we provide the data to the
+        network as we receive it to the loader. Some regularization techniques, like mixup
+        alter the data.
+
+        Args:
+            X (np.ndarray): The batch training features
+            y (np.ndarray): The batch training labels
+
+        Returns:
+            np.ndarray: that processes data
+            typing.Dict[str, np.ndarray]: arguments to the criterion function
+        """
+        lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0. else 1.
+        batch_size = X.size()[0]
+        index = torch.randperm(batch_size).cuda() if X.is_cuda else torch.randperm(batch_size)
+
+        mixed_x = lam * X + (1 - lam) * X[index, :]
+        y_a, y_b = y, y[index]
+        return mixed_x, {'y_a': y_a, 'y_b': y_b, 'lam': lam}
+
+    def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0
+                              ) -> typing.Callable:
+        return lambda criterion, pred: lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)
+
+    @staticmethod
+    def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.Any]] = None
+                       ) -> typing.Dict[str, str]:
+        return {
+            'shortname': 'MixUpTrainer',
+            'name': 'MixUp Regularized Trainer',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: typing.Optional[typing.Dict] = None
+                                        ) -> ConfigurationSpace:
+        alpha = UniformFloatHyperparameter(
+            "alpha", 0, 1, default_value=0.2)
+        cs = ConfigurationSpace()
+        cs.add_hyperparameters([alpha])
+        return cs
diff --git a/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py
new file mode 100644
index 000000000..4509c17f6
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/trainer/StandardTrainer.py
@@ -0,0 +1,45 @@
+import typing
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.training.trainer.base_trainer import BaseTrainerComponent
+
+
+class StandardTrainer(BaseTrainerComponent):
+    def data_preparation(self, X: np.ndarray, y: np.ndarray,
+                         ) -> typing.Tuple[np.ndarray, typing.Dict[str, np.ndarray]]:
+        """
+        Depending on the trainer choice, data fed to the network might be pre-processed
+        on a different way. That is, in standard training we provide the data to the
+        network as we receive it to the loader. Some regularization techniques, like mixup
+        alter the data.
+
+        Args:
+            X (np.ndarray): The batch training features
+            y (np.ndarray): The batch training labels
+
+        Returns:
+            np.ndarray: that processes data
+            typing.Dict[str, np.ndarray]: arguments to the criterion function
+        """
+        return X, {'y_a': y}
+
+    def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0
+                              ) -> typing.Callable:
+        return lambda criterion, pred: criterion(pred, y_a)
+
+    @staticmethod
+    def get_properties(dataset_properties: typing.Optional[typing.Dict[str, typing.Any]] = None
+                       ) -> typing.Dict[str, str]:
+        return {
+            'shortname': 'StandardTrainer',
+            'name': 'StandardTrainer',
+        }
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties: typing.Optional[typing.Dict] = None
+                                        ) -> ConfigurationSpace:
+        cs = ConfigurationSpace()
+        return cs
diff --git a/autoPyTorch/utils/config/__init__.py b/autoPyTorch/pipeline/components/training/trainer/__init__.py
old mode 100644
new mode 100755
similarity index 100%
rename from autoPyTorch/utils/config/__init__.py
rename to autoPyTorch/pipeline/components/training/trainer/__init__.py
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
new file mode 100644
index 000000000..6c26df225
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -0,0 +1,393 @@
+import time
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+
+import torch
+from torch.autograd import Variable
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.utils.tensorboard.writer import SummaryWriter
+
+from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent
+from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
+from autoPyTorch.utils.logging_ import PicklableClientLogger
+
+
+class BudgetTracker(object):
+    def __init__(self,
+                 budget_type: str,
+                 max_epochs: Optional[int] = None,
+                 max_runtime: Optional[int] = None,
+                 ):
+        """
+        An object for tracking when to stop the network training.
+        It handles epoch based criteria as well as training based criteria.
+
+        It also allows to define a 'epoch_or_time' budget type, which means,
+        the first of them both which is exhausted, is honored
+
+        In case use_pynisher is set to false, this function allows to
+        still terminate the task with a time domain consideration
+        """
+        self.start_time = time.time()
+        self.budget_type = budget_type
+        self.max_epochs = max_epochs
+        self.max_runtime = max_runtime
+
+    def is_max_epoch_reached(self, epoch: int) -> bool:
+
+        # Make None a method to run without this constrain
+        if self.max_epochs is None:
+            return False
+        if self.budget_type in ['epochs', 'epoch_or_time'] and epoch > self.max_epochs:
+            return True
+        return False
+
+    def is_max_time_reached(self) -> bool:
+        # Make None a method to run without this constrain
+        if self.max_runtime is None:
+            return False
+        elapsed_time = time.time() - self.start_time
+        if self.budget_type in ['runtime', 'epoch_or_time'] and elapsed_time > self.max_runtime:
+            return True
+        return False
+
+
+class RunSummary(object):
+    def __init__(
+        self,
+        total_parameter_count: float,
+        trainable_parameter_count: float,
+    ):
+        """
+        A useful object to track performance per epoch.
+
+        It allows to track train, validation and test information not only for
+        debug, but for research purposes (Like understanding overfit).
+
+        It does so by tracking a metric/loss at the end of each epoch.
+        """
+        self.performance_tracker = {
+            'start_time': {},
+            'end_time': {},
+        }  # type: Dict[str, Dict]
+
+        self.total_parameter_count = total_parameter_count
+        self.trainable_parameter_count = trainable_parameter_count
+
+        # Allow to track the training performance
+        self.performance_tracker['train_loss'] = {}
+
+        # Allow to track the val performance
+        self.performance_tracker['val_loss'] = {}
+
+        # Allow to track the test performance
+        self.performance_tracker['test_loss'] = {}
+
+        # Allow to track the metrics performance
+        for metric in ['train_metrics', 'val_metrics', 'test_metrics']:
+            self.performance_tracker[metric] = {}
+
+    def add_performance(self,
+                        epoch: int,
+                        start_time: float,
+                        end_time: float,
+                        train_loss: float,
+                        train_metrics: Dict[str, float],
+                        val_metrics: Dict[str, float] = {},
+                        test_metrics: Dict[str, float] = {},
+                        val_loss: Optional[float] = None,
+                        test_loss: Optional[float] = None,
+                        ) -> None:
+        """
+        Tracks performance information about the run, useful for
+        plotting individual runs
+        """
+        self.performance_tracker['train_loss'][epoch] = train_loss
+        self.performance_tracker['val_loss'][epoch] = val_loss
+        self.performance_tracker['test_loss'][epoch] = test_loss
+        self.performance_tracker['start_time'][epoch] = start_time
+        self.performance_tracker['end_time'][epoch] = end_time
+        self.performance_tracker['train_metrics'][epoch] = train_metrics
+        self.performance_tracker['val_metrics'][epoch] = val_metrics
+        self.performance_tracker['test_metrics'][epoch] = test_metrics
+
+    def get_best_epoch(self, loss_type: str = 'val_loss') -> int:
+        return np.argmin(
+            [self.performance_tracker[loss_type][e] for e in range(1, len(
+                self.performance_tracker[loss_type]) + 1
+            )]
+        )
+
+    def get_last_epoch(self) -> int:
+        if 'train_loss' not in self.performance_tracker:
+            return 0
+        else:
+            return max(self.performance_tracker['train_loss'].keys())
+
+    def repr_last_epoch(self) -> str:
+        """
+        For debug purposes, returns a nice representation of last epoch
+        performance
+
+        Returns:
+            str: A nice representation of the last epoch
+        """
+        last_epoch = len(self.performance_tracker['train_loss'])
+        string = "\n"
+        string += '=' * 40
+        string += f"\n\t\tEpoch {last_epoch}\n"
+        string += '=' * 40
+        string += "\n"
+        for key, value in sorted(self.performance_tracker.items()):
+            if isinstance(value[last_epoch], dict):
+                # Several metrics can be passed
+                string += "\t{}:\n".format(
+                    key,
+                )
+                for sub_key, sub_value in sorted(value[last_epoch].items()):
+                    string += "\t\t{}: {}\n".format(
+                        sub_key,
+                        sub_value,
+                    )
+            else:
+                string += "\t{}: {}\n".format(
+                    key,
+                    value[last_epoch],
+                )
+        string += '=' * 40
+        return string
+
+
+class BaseTrainerComponent(autoPyTorchTrainingComponent):
+
+    def __init__(self, random_state: Optional[Union[np.random.RandomState, int]] = None) -> None:
+        super().__init__()
+        self.random_state = random_state
+
+    def prepare(
+        self,
+        metrics: List[Any],
+        model: torch.nn.Module,
+        criterion: torch.nn.Module,
+        budget_tracker: BudgetTracker,
+        optimizer: Optimizer,
+        device: torch.device,
+        metrics_during_training: bool,
+        scheduler: _LRScheduler,
+        task_type: int
+    ) -> None:
+
+        # Save the device to be used
+        self.device = device
+
+        # Setup the metrics
+        self.metrics = metrics
+
+        # Setup the loss function
+        self.criterion = criterion.to(device)
+
+        # setup the model
+        self.model = model.to(device)
+
+        # setup the optimizers
+        self.optimizer = optimizer
+
+        # The budget tracker
+        self.budget_tracker = budget_tracker
+
+        # For best performance, we allow option to prevent comparing metrics every time
+        self.metrics_during_training = metrics_during_training
+
+        # Scheduler
+        self.scheduler = scheduler
+
+        # task type (used for calculating metrics)
+        self.task_type = task_type
+
+    def on_epoch_start(self, X: Dict[str, Any], epoch: int) -> None:
+        """
+        Optional place holder for AutoPytorch Extensions.
+
+        An user can define what happens on every epoch start or every epoch end.
+        """
+        pass
+
+    def on_epoch_end(self, X: Dict[str, Any], epoch: int) -> bool:
+        """
+        Optional place holder for AutoPytorch Extensions.
+        An user can define what happens on every epoch start or every epoch end.
+        If returns True, the training is stopped
+
+        """
+        return False
+
+    def train_epoch(self, train_loader: torch.utils.data.DataLoader, epoch: int,
+                    logger: PicklableClientLogger, writer: Optional[SummaryWriter],
+                    ) -> Tuple[float, Dict[str, float]]:
+        '''
+            Trains the model for a single epoch.
+
+        Args:
+            train_loader (torch.utils.data.DataLoader): generator of features/label
+            epoch (int): The current epoch used solely for tracking purposes
+
+        Returns:
+            float: training loss
+            Dict[str, float]: scores for each desired metric
+        '''
+
+        loss_sum = 0.0
+        N = 0
+        self.model.train()
+        outputs_data = list()
+        targets_data = list()
+
+        for step, (data, targets) in enumerate(train_loader):
+
+            if self.budget_tracker.is_max_time_reached():
+                logger.info("Stopping training as max time reached")
+                break
+
+            loss, outputs = self.train_step(data, targets)
+
+            # save for metric evaluation
+            outputs_data.append(outputs.detach())
+            targets_data.append(targets.detach())
+
+            batch_size = data.size(0)
+            loss_sum += loss * batch_size
+            N += batch_size
+
+            if writer:
+                writer.add_scalar(
+                    'Train/loss',
+                    loss,
+                    epoch * len(train_loader) + step,
+                )
+
+        if self.metrics_during_training:
+            return loss_sum / N, self.compute_metrics(outputs_data, targets_data)
+        else:
+            return loss_sum / N, {}
+
+    def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torch.Tensor]:
+        """
+        Allows to train 1 step of gradient descent, given a batch of train/labels
+
+        Args:
+            data (np.ndarray): input features to the network
+            targets (np.ndarray): ground truth to calculate loss
+
+        Returns:
+            torch.Tensor: The predictions of the network
+            float: the loss incurred in the prediction
+        """
+        # prepare
+        data = data.float().to(self.device)
+        targets = targets.long().to(self.device)
+
+        data, criterion_kwargs = self.data_preparation(data, targets)
+        data = Variable(data)
+
+        # training
+        self.optimizer.zero_grad()
+        outputs = self.model(data)
+        loss_func = self.criterion_preparation(**criterion_kwargs)
+        loss = loss_func(self.criterion, outputs)
+        loss.backward()
+        self.optimizer.step()
+        if self.scheduler:
+            if 'ReduceLROnPlateau' in self.scheduler.__class__.__name__:
+                self.scheduler.step(loss)
+            else:
+                self.scheduler.step()
+
+        return loss.item(), outputs
+
+    def evaluate(self, test_loader: torch.utils.data.DataLoader, epoch: int,
+                 writer: Optional[SummaryWriter],
+                 ) -> Tuple[float, Dict[str, float]]:
+        '''
+            Evaluates the model in both metrics and criterion
+
+        Args:
+            test_loader (torch.utils.data.DataLoader): generator of features/label
+            epoch (int): the current epoch for tracking purposes
+
+        Returns:
+            float: test loss
+            Dict[str, float]: scores for each desired metric
+        '''
+        self.model.eval()
+
+        loss_sum = 0.0
+        N = 0
+        outputs_data = list()
+        targets_data = list()
+
+        with torch.no_grad():
+            for step, (data, targets) in enumerate(test_loader):
+
+                batch_size = data.shape[0]
+                data = data.float().to(self.device)
+                targets = targets.long().to(self.device)
+                outputs = self.model(data)
+                loss = self.criterion(outputs, targets)
+                loss_sum += loss.item() * batch_size
+                N += batch_size
+
+                outputs_data.append(outputs.detach())
+                targets_data.append(targets.detach())
+
+                if writer:
+                    writer.add_scalar(
+                        'Val/loss',
+                        loss.item(),
+                        epoch * len(test_loader) + step,
+                    )
+
+        self.model.train()
+        return loss_sum / N, self.compute_metrics(outputs_data, targets_data)
+
+    def compute_metrics(self, outputs_data: np.ndarray, targets_data: np.ndarray
+                        ) -> Dict[str, float]:
+        # TODO: change once Ravin Provides the PR
+        outputs_data = torch.cat(outputs_data, dim=0)
+        targets_data = torch.cat(targets_data, dim=0)
+        return calculate_score(targets_data, outputs_data, self.task_type, self.metrics)
+
+    def data_preparation(self, X: np.ndarray, y: np.ndarray,
+                         ) -> Tuple[np.ndarray, Dict[str, np.ndarray]]:
+        """
+        Depending on the trainer choice, data fed to the network might be pre-processed
+        on a different way. That is, in standard training we provide the data to the
+        network as we receive it to the loader. Some regularization techniques, like mixup
+        alter the data.
+
+        Args:
+            X (np.ndarray): The batch training features
+            y (np.ndarray): The batch training labels
+
+        Returns:
+            np.ndarray: that processes data
+            Dict[str, np.ndarray]: arguments to the criterion function
+        """
+        raise NotImplementedError()
+
+    def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: float = 1.0
+                              ) -> Callable:  # type: ignore
+        """
+        Depending on the trainer choice, the criterion is not directly applied to the
+        traditional y_pred/y_ground_truth pairs, but rather it might have a slight transformation.
+        For example, in the case of mixup training, we need to account for the lambda mixup
+
+        Args:
+            kwargs (Dict): an expanded dictionary with modifiers to the
+                                  criterion calculation
+
+        Returns:
+            Callable: a lambda that contains the new criterion calculation recipe
+        """
+        raise NotImplementedError()
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
new file mode 100755
index 000000000..e65086cb6
--- /dev/null
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer_choice.py
@@ -0,0 +1,509 @@
+import collections
+import logging.handlers
+import os
+import time
+from typing import Any, Dict, List, Optional, Tuple, cast
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.hyperparameters import (
+    CategoricalHyperparameter,
+)
+
+import numpy as np
+
+import pynisher
+
+import torch
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+from torch.utils.tensorboard.writer import SummaryWriter
+
+from autoPyTorch.constants import STRING_TO_TASK_TYPES
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import (
+    ThirdPartyComponents,
+    autoPyTorchComponent,
+    find_components,
+)
+from autoPyTorch.pipeline.components.training.losses import get_loss_instance
+from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics
+from autoPyTorch.pipeline.components.training.trainer.base_trainer import (
+    BaseTrainerComponent,
+    BudgetTracker,
+    RunSummary,
+)
+from autoPyTorch.utils.common import FitRequirement
+from autoPyTorch.utils.logging_ import get_named_client_logger
+
+trainer_directory = os.path.split(__file__)[0]
+_trainers = find_components(__package__,
+                            trainer_directory,
+                            BaseTrainerComponent)
+_addons = ThirdPartyComponents(BaseTrainerComponent)
+
+
+def add_trainer(trainer: BaseTrainerComponent) -> None:
+    _addons.add_component(trainer)
+
+
+class TrainerChoice(autoPyTorchChoice):
+    """This class is an interface to the PyTorch trainer.
+
+
+    To map to pipeline terminology, a choice component will implement the epoch
+    loop through fit, whereas the component who is chosen will dictate how a single
+    epoch happens, that is, how batches of data are fed and used to train the network.
+
+    """
+    def __init__(self,
+                 dataset_properties: Dict[str, Any],
+                 random_state: Optional[np.random.RandomState] = None
+                 ):
+
+        super().__init__(dataset_properties=dataset_properties,
+                         random_state=random_state)
+        self.run_summary = None  # type: Optional[RunSummary]
+        self.writer = None  # type: Optional[SummaryWriter]
+        self._fit_requirements: Optional[List[FitRequirement]] = [
+            FitRequirement("lr_scheduler", (_LRScheduler,), user_defined=False, dataset_property=False),
+            FitRequirement("job_id", (str,), user_defined=False, dataset_property=False),
+            FitRequirement("network", (torch.nn.Sequential,), user_defined=False, dataset_property=False),
+            FitRequirement(
+                "optimizer", (Optimizer,), user_defined=False, dataset_property=False),
+            FitRequirement("train_data_loader",
+                           (torch.utils.data.DataLoader,),
+                           user_defined=False, dataset_property=False),
+            FitRequirement("val_data_loader",
+                           (torch.utils.data.DataLoader,),
+                           user_defined=False, dataset_property=False)]
+
+    def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
+        return self._fit_requirements
+
+    def get_components(self) -> Dict[str, autoPyTorchComponent]:
+        """Returns the available trainer components
+
+        Args:
+            None
+
+        Returns:
+            Dict[str, autoPyTorchComponent]: all components available
+                as choices for learning rate scheduling
+        """
+        components = collections.OrderedDict()  # type: Dict[str, autoPyTorchComponent]
+        components.update(_trainers)
+        components.update(_addons.components)
+        return components
+
+    def get_hyperparameter_search_space(
+        self,
+        dataset_properties: Optional[Dict[str, str]] = None,
+        default: Optional[str] = None,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> ConfigurationSpace:
+        """Returns the configuration space of the current chosen components
+
+        Args:
+            dataset_properties (Optional[Dict[str, str]]): Describes the dataset to work on
+            default (Optional[str]): Default scheduler to use
+            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
+                list, and will exclusively use this components.
+            exclude: Optional[Dict[str, Any]]: which components to skip
+
+        Returns:
+            ConfigurationSpace: the configuration space of the hyper-parameters of the
+                 chosen component
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None:
+            dataset_properties = {}
+
+        # Compile a list of legal trainers for this problem
+        available_trainers = self.get_available_components(
+            dataset_properties=dataset_properties,
+            include=include, exclude=exclude)
+
+        if len(available_trainers) == 0:
+            raise ValueError("No trainer found")
+
+        if default is None:
+            defaults = ['StandardTrainer',
+                        ]
+            for default_ in defaults:
+                if default_ in available_trainers:
+                    default = default_
+                    break
+
+        trainer = CategoricalHyperparameter(
+            '__choice__',
+            list(available_trainers.keys()),
+            default_value=default
+        )
+        cs.add_hyperparameter(trainer)
+        for name in available_trainers:
+            trainer_configuration_space = available_trainers[name]. \
+                get_hyperparameter_search_space(dataset_properties)
+            parent_hyperparameter = {'parent': trainer, 'value': name}
+            cs.add_configuration_space(
+                name,
+                trainer_configuration_space,
+                parent_hyperparameter=parent_hyperparameter
+            )
+
+        self.configuration_space_ = cs
+        self.dataset_properties_ = dataset_properties
+        return cs
+
+    def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
+        """The transform function calls the transform function of the
+        underlying model and returns the transformed array.
+
+        Args:
+            X (np.ndarray): input features
+
+        Returns:
+            np.ndarray: Transformed features
+        """
+        X.update({'run_summary': self.run_summary})
+        return X
+
+    def fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> autoPyTorchComponent:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+        # Make sure that the prerequisites are there
+        self.check_requirements(X, y)
+
+        # Setup the logger
+        self.logger = get_named_client_logger(
+            name=X['job_id'],
+            # Log to a user provided port else to the default logging port
+            port=X['logger_port'
+                   ] if 'logger_port' in X else logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+        )
+
+        fit_function = self._fit
+        if X['use_pynisher']:
+            wall_time_in_s = X['runtime'] if 'runtime' in X else None
+            memory_limit = X['cpu_memory_limit'] if 'cpu_memory_limit' in X else None
+            fit_function = pynisher.enforce_limits(
+                wall_time_in_s=wall_time_in_s,
+                mem_in_mb=memory_limit,
+                logger=self.logger
+            )(self._fit)
+
+        # Call the actual fit function.
+        state_dict = fit_function(
+            X=X,
+            y=y,
+            **kwargs
+        )
+
+        if X['use_pynisher']:
+            # Normally the X[network] is a pointer to the object, so at the
+            # end, when we train using X, the pipeline network is updated for free
+            # If we do multiprocessing (because of pynisher) we have to update
+            # X[network] manually. we do so in a way that every pipeline component
+            # can see this new network -- via an update, not overwrite of the pointer
+            state_dict = state_dict.result
+            X['network'].load_state_dict(state_dict)
+
+        # TODO: when have the optimizer code, the pynisher object might have failed
+        # We should process this function as Failure if so trough fit_function.exit_status
+        return cast(autoPyTorchComponent, self.choice)
+
+    def _fit(self, X: Dict[str, Any], y: Any = None, **kwargs: Any) -> torch.nn.Module:
+        """
+        Fits a component by using an input dictionary with pre-requisites
+
+        Args:
+            X (X: Dict[str, Any]): Dependencies needed by current component to perform fit
+            y (Any): not used. To comply with sklearn API
+
+        Returns:
+            A instance of self
+        """
+
+        # Comply with mypy
+        # Notice that choice here stands for the component choice framework,
+        # where we dynamically build the configuration space by selecting the available
+        # component choices. In this case, is what trainer choices are available
+        assert self.choice is not None
+
+        # Setup a Logger and other logging support
+        # Writer is not pickable -- make sure it is not saved in self
+        writer = None
+        if 'use_tensorboard_logger' in X and X['use_tensorboard_logger']:
+            writer = SummaryWriter(log_dir=X['backend'].temporary_directory)
+
+        if X["torch_num_threads"] > 0:
+            torch.set_num_threads(X["torch_num_threads"])
+
+        budget_tracker = BudgetTracker(
+            budget_type=X['budget_type'],
+            max_runtime=X['runtime'] if 'runtime' in X else None,
+            max_epochs=X['epochs'] if 'epochs' in X else None,
+        )
+
+        # Support additional user metrics
+        additional_metrics = X['additional_metrics'] if 'additional_metrics' in X else None
+        additional_losses = X['additional_losses'] if 'additional_losses' in X else None
+        self.choice.prepare(
+            model=X['network'],
+            metrics=get_metrics(dataset_properties=X['dataset_properties'],
+                                names=additional_metrics),
+            criterion=get_loss_instance(X['dataset_properties'],
+                                        name=additional_losses),
+            budget_tracker=budget_tracker,
+            optimizer=X['optimizer'],
+            device=self.get_device(X),
+            metrics_during_training=X['metrics_during_training'],
+            scheduler=X['lr_scheduler'],
+            task_type=STRING_TO_TASK_TYPES[X['dataset_properties']['task_type']]
+        )
+        total_parameter_count, trainable_parameter_count = self.count_parameters(X['network'])
+        self.run_summary = RunSummary(
+            total_parameter_count,
+            trainable_parameter_count,
+        )
+
+        epoch = 1
+
+        while True:
+
+            # prepare epoch
+            start_time = time.time()
+
+            self.choice.on_epoch_start(X=X, epoch=epoch)
+
+            # training
+            train_loss, train_metrics = self.choice.train_epoch(
+                train_loader=X['train_data_loader'],
+                epoch=epoch,
+                logger=self.logger,
+                writer=writer,
+            )
+
+            val_loss, val_metrics, test_loss, test_metrics = None, {}, None, {}
+            if self.eval_valid_each_epoch(X):
+                val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'], epoch, writer)
+                if 'test_data_loader' in X and X['test_data_loader']:
+                    test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'], epoch, writer)
+
+            # Save training information
+            self.run_summary.add_performance(
+                epoch=epoch,
+                start_time=start_time,
+                end_time=time.time(),
+                train_loss=train_loss,
+                val_loss=val_loss,
+                test_loss=test_loss,
+                train_metrics=train_metrics,
+                val_metrics=val_metrics,
+                test_metrics=test_metrics,
+            )
+
+            # Save the weights of the best model and, if patience
+            # exhausted break training
+            if self.early_stop_handler(X):
+                break
+
+            if self.choice.on_epoch_end(X=X, epoch=epoch):
+                break
+
+            self.logger.debug(self.run_summary.repr_last_epoch())
+
+            # Reached max epoch on next iter, don't even go there
+            if budget_tracker.is_max_epoch_reached(epoch + 1):
+                break
+
+            epoch += 1
+
+            torch.cuda.empty_cache()
+
+        # wrap up -- add score if not evaluating every epoch
+        if not self.eval_valid_each_epoch(X):
+            val_loss, val_metrics = self.choice.evaluate(X['val_data_loader'])
+            if 'test_data_loader' in X and X['val_data_loader']:
+                test_loss, test_metrics = self.choice.evaluate(X['test_data_loader'])
+            self.run_summary.add_performance(
+                epoch=epoch,
+                start_time=start_time,
+                end_time=time.time(),
+                train_loss=train_loss,
+                val_loss=val_loss,
+                test_loss=test_loss,
+                train_metrics=train_metrics,
+                val_metrics=val_metrics,
+                test_metrics=test_metrics,
+            )
+            self.logger.debug(self.run_summary.repr_last_epoch())
+            self.save_model_for_ensemble()
+
+        self.logger.info(f"Finished training with {self.run_summary.repr_last_epoch()}")
+
+        # Tag as fitted
+        self.fitted_ = True
+
+        return X['network'].state_dict()
+
+    def early_stop_handler(self, X: Dict[str, Any]) -> bool:
+        """
+        If early stopping is enabled, this procedure stops the training after a
+        given patience
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+
+        Returns:
+            bool: If true, training should be stopped
+        """
+        assert self.run_summary is not None
+        epochs_since_best = self.run_summary.get_best_epoch() - self.run_summary.get_last_epoch()
+        if epochs_since_best > X['early_stopping']:
+            return True
+
+        return False
+
+    def eval_valid_each_epoch(self, X: Dict[str, Any]) -> bool:
+        """
+        Returns true if we are supposed to evaluate the model on every epoch,
+        on the validation data. Usually, we only validate the data at the end,
+        but in the case of early stopping, is appealing to evaluate each epoch.
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+
+        Returns:
+            bool: if True, the model is evaluated in every epoch
+
+        """
+        if 'early_stopping' in X and X['early_stopping']:
+            return True
+
+        # We need to know if we should reduce the rate based on val loss
+        if 'ReduceLROnPlateau' in X['lr_scheduler'].__class__.__name__:
+            return True
+
+        return False
+
+    def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
+        """
+        A mechanism in code to ensure the correctness of the fit dictionary
+        It recursively makes sure that the children and parent level requirements
+        are honored before fit.
+
+        Args:
+            X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing
+                mechanism, in which during a transform, a components adds relevant information
+                so that further stages can be properly fitted
+        """
+
+        # make sure the parent requirements are honored
+        super().check_requirements(X, y)
+
+        # We need a working dir in where to put our data
+        if 'backend' not in X:
+            raise ValueError('Need a backend to provide the working directory, '
+                             "yet 'backend' was not found in the fit dictionary")
+
+        # For resource allocation, we need to know if pynisher is enabled
+        if 'use_pynisher' not in X:
+            raise ValueError('To fit a Trainer, expected fit dictionary to have use_pynisher')
+
+        # Whether we should evaluate metrics during training or no
+        if 'metrics_during_training' not in X:
+            raise ValueError('Missing metrics_during_training in the fit dictionary')
+
+        # Setup Components
+        if 'lr_scheduler' not in X:
+            raise ValueError("Learning rate scheduler not found in the fit dictionary!")
+
+        if 'network' not in X:
+            raise ValueError("Network not found in the fit dictionary!")
+
+        if 'optimizer' not in X:
+            raise ValueError("Optimizer not found in the fit dictionary!")
+
+        # Training Components
+        if 'train_data_loader' not in X:
+            raise ValueError("train_data_loader not found in the fit dictionary!")
+
+        if 'val_data_loader' not in X:
+            raise ValueError("val_data_loader not found in the fit dictionary!")
+
+        if 'budget_type' not in X:
+            raise ValueError("Budget type not found in the fit dictionary!")
+        else:
+            if 'epochs' not in X or 'runtime' not in X or 'epoch_or_time' not in X:
+                if X['budget_type'] in ['epochs', 'epoch_or_time'] and 'epochs' not in X:
+                    raise ValueError("Budget type is epochs but "
+                                     "no epochs was not found in the fit dictionary!")
+                elif X['budget_type'] in ['runtime', 'epoch_or_time'] and 'runtime' not in X:
+                    raise ValueError("Budget type is runtime but "
+                                     "no maximum number of seconds was provided!")
+            else:
+                raise ValueError("Unsupported budget type provided: {}".format(
+                    X['budget_type']
+                ))
+
+        if 'job_id' not in X:
+            raise ValueError('To fit a trainer, expected fit dictionary to have a job_id')
+
+        for config_option in ["torch_num_threads", 'device']:
+            if config_option not in X:
+                raise ValueError("To fit a trainer, expected fit dictionary to have a {}".format(
+                    config_option
+                ))
+
+    def get_device(self, X: Dict[str, Any]) -> torch.device:
+        """
+        Returns the device to do torch operations
+
+        Args:
+            X (Dict[str, Any]): A fit dictionary to control how the pipeline
+                is fitted
+
+        Returns:
+            torch.device: the device in which to compute operations. Cuda/cpu
+        """
+        if not torch.cuda.is_available():
+            return torch.device('cpu')
+        return torch.device(X['device'])
+
+    @staticmethod
+    def count_parameters(model: torch.nn.Module) -> Tuple[int, int]:
+        """
+        A method to get the total/trainable parameter count from the model
+
+        Args:
+            model (torch.nn.Module): the module from which to count parameters
+
+        Returns:
+            total_parameter_count: the total number of parameters of the model
+            trainable_parameter_count: only the parameters being optimized
+        """
+        total_parameter_count = sum(
+            p.numel() for p in model.parameters())
+        trainable_parameter_count = sum(
+            p.numel() for p in model.parameters() if p.requires_grad)
+        return total_parameter_count, trainable_parameter_count
+
+    def save_model_for_ensemble(self) -> str:
+        raise NotImplementedError()
+
+    def __str__(self) -> str:
+        """ Allow a nice understanding of what components where used """
+        string = str(self.run_summary)
+        return string
diff --git a/autoPyTorch/pipeline/create_searchspace_util.py b/autoPyTorch/pipeline/create_searchspace_util.py
new file mode 100644
index 000000000..ca5710498
--- /dev/null
+++ b/autoPyTorch/pipeline/create_searchspace_util.py
@@ -0,0 +1,225 @@
+import itertools
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+from ConfigSpace.forbidden import ForbiddenAndConjunction
+from ConfigSpace.forbidden import ForbiddenEqualsClause
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
+
+
+def get_match_array(
+    pipeline: List[Tuple[str, autoPyTorchChoice]],
+    dataset_properties: Dict[str, Any],
+    include: Optional[Dict[str, Any]] = None,
+    exclude: Optional[Dict[str, Any]] = None
+) -> np.ndarray:
+
+    # Duck typing, not sure if it's good...
+    node_i_is_choice = []
+    node_i_choices = []  # type: List[List[Union[autoPyTorchComponent, autoPyTorchChoice]]]
+    node_i_choices_names = []
+    all_nodes = []
+    for node_name, node in pipeline:
+        all_nodes.append(node)
+        is_choice = hasattr(node, "get_available_components")
+        node_i_is_choice.append(is_choice)
+
+        node_include = include.get(
+            node_name) if include is not None else None
+        node_exclude = exclude.get(
+            node_name) if exclude is not None else None
+
+        if is_choice:
+            node_i_choices_names.append(list(node.get_available_components(
+                dataset_properties, include=node_include,
+                exclude=node_exclude).keys()))
+            node_i_choices.append(list(node.get_available_components(
+                dataset_properties, include=node_include,
+                exclude=node_exclude).values()))
+
+        else:
+            node_i_choices.append([node])
+
+    matches_dimensions = [len(choices) for choices in node_i_choices]
+    # Start by allowing every combination of nodes. Go through all
+    # combinations/pipelines and erase the illegal ones
+    matches = np.ones(matches_dimensions, dtype=int)
+
+    # TODO: Check if we need this, like are there combinations from the
+    # pipeline we should dynamically avoid?
+    return matches
+
+
+def find_active_choices(
+    matches: np.ndarray,
+    node: Union[autoPyTorchComponent, autoPyTorchChoice],
+    node_idx: int,
+    dataset_properties: Dict[str, Any],
+    include: Optional[List[str]] = None,
+    exclude: Optional[List[str]] = None
+) -> List[str]:
+    if not hasattr(node, "get_available_components"):
+        raise ValueError()
+    available_components = node.get_available_components(dataset_properties,
+                                                         include=include,
+                                                         exclude=exclude)
+    assert matches.shape[node_idx] == len(available_components), \
+        (matches.shape[node_idx], len(available_components))
+
+    choices = []
+    for c_idx, component in enumerate(available_components):
+        slices = tuple(slice(None) if idx != node_idx else slice(c_idx, c_idx + 1)
+                       for idx in range(len(matches.shape)))
+
+        if np.sum(matches[slices]) > 0:
+            choices.append(component)
+    return choices
+
+
+def add_forbidden(
+    conf_space: ConfigurationSpace,
+    pipeline: List[Tuple[str, autoPyTorchChoice]],
+    matches: np.ndarray,
+    dataset_properties: Dict[str, Any],
+    include: Optional[Dict[str, Any]] = None,
+    exclude: Optional[Dict[str, Any]] = None
+) -> ConfigurationSpace:
+    # Not sure if this works for 3D
+    node_i_is_choice = []
+    node_i_choices_names = []  # type: List[List[str]]
+    node_i_choices = []  # type: List[List[Union[autoPyTorchComponent, autoPyTorchChoice]]]
+    all_nodes = []
+    for node_name, node in pipeline:
+        all_nodes.append(node)
+        is_choice = hasattr(node, "get_available_components")
+        node_i_is_choice.append(is_choice)
+
+        node_include = include.get(
+            node_name) if include is not None else None
+        node_exclude = exclude.get(
+            node_name) if exclude is not None else None
+
+        if is_choice:
+            node_i_choices_names.append(
+                [str(element) for element in
+                    node.get_available_components(
+                    dataset_properties, include=node_include,
+                    exclude=node_exclude).keys()]
+
+            )
+            node_i_choices.append(
+                list(node.get_available_components(
+                    dataset_properties, include=node_include,
+                    exclude=node_exclude
+                ).values()))
+
+        else:
+            node_i_choices_names.append([node_name])
+            node_i_choices.append([node])
+
+    # Find out all chains of choices. Only in such a chain its possible to
+    # have several forbidden constraints
+    choices_chains = []
+    idx = 0
+    while idx < len(pipeline):
+        if node_i_is_choice[idx]:
+            chain_start = idx
+            idx += 1
+            while idx < len(pipeline) and node_i_is_choice[idx]:
+                idx += 1
+            chain_stop = idx
+            choices_chains.append((chain_start, chain_stop))
+        idx += 1
+
+    for choices_chain in choices_chains:
+        constraints = set()  # type: Set[Tuple]
+
+        chain_start = choices_chain[0]
+        chain_stop = choices_chain[1]
+        chain_length = chain_stop - chain_start
+
+        # Add one to have also have chain_length in the range
+        for sub_chain_length in range(2, chain_length + 1):
+            for start_idx in range(chain_start, chain_stop - sub_chain_length + 1):
+                indices = range(start_idx, start_idx + sub_chain_length)
+                node_names = [pipeline[idx][0] for idx in indices]
+
+                num_node_choices = []
+                node_choice_names = []
+                skip_array_shape = []
+
+                for idx in indices:
+                    node = all_nodes[idx]
+                    available_components = node.get_available_components(
+                        dataset_properties,
+                        include=node_i_choices_names[idx])
+                    assert len(available_components) > 0, len(available_components)
+                    skip_array_shape.append(len(available_components))
+                    num_node_choices.append(range(len(available_components)))
+                    node_choice_names.append([name for name in available_components])
+
+                # Figure out which choices were already abandoned
+                skip_array = np.zeros(skip_array_shape)
+                for product in itertools.product(*num_node_choices):
+                    for node_idx, choice_idx in enumerate(product):
+                        node_idx += start_idx
+                        slices_ = tuple(
+                            slice(None) if idx != node_idx else
+                            slice(choice_idx, choice_idx + 1) for idx in
+                            range(len(matches.shape)))
+
+                        if np.sum(matches[slices_]) == 0:
+                            skip_array[product] = 1
+
+                for product in itertools.product(*num_node_choices):
+                    if skip_array[product]:
+                        continue
+
+                    slices = tuple(
+                        slice(None) if idx not in indices else
+                        slice(product[idx - start_idx],
+                              product[idx - start_idx] + 1) for idx in
+                        range(len(matches.shape)))
+
+                    # This prints the affected nodes
+                    # print [node_choice_names[i][product[i]]
+                    #        for i in range(len(product))], \
+                    #     np.sum(matches[slices])
+
+                    if np.sum(matches[slices]) == 0:
+                        constraint = tuple([(node_names[i],
+                                             node_choice_names[i][product[i]])
+                                            for i in range(len(product))])
+
+                        # Check if a more general constraint/forbidden clause
+                        #  was already added
+                        continue_ = False
+                        for constraint_length in range(2, len(constraint)):
+                            constr_starts = len(constraint) - constraint_length + 1
+                            for constraint_start_idx in range(constr_starts):
+                                constraint_end_idx = constraint_start_idx + constraint_length
+                                sub_constraint = constraint[constraint_start_idx:constraint_end_idx]
+                                if sub_constraint in constraints:
+                                    continue_ = True
+                                    break
+                            if continue_:
+                                break
+                        if continue_:
+                            continue
+
+                        constraints.add(constraint)
+
+                        forbiddens = []
+                        for i in range(len(product)):
+                            forbiddens.append(
+                                ForbiddenEqualsClause(conf_space.get_hyperparameter(
+                                    node_names[i] + ":__choice__"),
+                                    node_choice_names[i][product[i]]))
+                        forbidden = ForbiddenAndConjunction(*forbiddens)
+                        conf_space.add_forbidden_clause(forbidden)
+
+    return conf_space
diff --git a/autoPyTorch/pipeline/image_classification.py b/autoPyTorch/pipeline/image_classification.py
new file mode 100644
index 000000000..04e70c3d5
--- /dev/null
+++ b/autoPyTorch/pipeline/image_classification.py
@@ -0,0 +1,206 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+import numpy as np
+
+from sklearn.base import ClassifierMixin
+
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer_choice import (
+    NormalizerChoice
+)
+from autoPyTorch.pipeline.components.setup.augmentation.image.ImageAugmenter import ImageAugmenter
+from autoPyTorch.pipeline.components.setup.early_preprocessor.EarlyPreprocessing import EarlyPreprocessing
+# from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice import SchedulerChoice
+# from autoPyTorch.pipeline.components.setup.network.base_network_choice import NetworkChoice
+# from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice import OptimizerChoice
+# from autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice import (
+#     NetworkInitializerChoice
+# )
+
+
+class ImageClassificationPipeline(ClassifierMixin, BasePipeline):
+    """This class is a proof of concept to integrate AutoSklearn Components
+
+    It implements a pipeline, which includes as steps:
+
+        ->One preprocessing step
+        ->One neural network
+
+    Contrary to the sklearn API it is not possible to enumerate the
+    possible parameters in the __init__ function because we only know the
+    available classifiers at runtime. For this reason the user must
+    specifiy the parameters by passing an instance of
+    ConfigSpace.configuration_space.Configuration.
+
+
+    Args:
+        config (Configuration)
+            The configuration to evaluate.
+        random_state (Optional[RandomState): random_state is the random number generator
+
+    Attributes:
+    Examples
+    """
+
+    def __init__(
+        self,
+        config: Optional[Configuration] = None,
+        steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
+        dataset_properties: Optional[Dict[str, Any]] = None,
+        include: Optional[Dict[str, Any]] = None,
+        exclude: Optional[Dict[str, Any]] = None,
+        random_state: Optional[np.random.RandomState] = None,
+        init_params: Optional[Dict[str, Any]] = None
+    ):
+        super().__init__(
+            config, steps, dataset_properties, include, exclude,
+            random_state, init_params)
+
+    def fit_transformer(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        fit_params: Optional[Dict[str, Any]] = None
+    ) -> Tuple[np.ndarray, Optional[Dict[str, Any]]]:
+        """Fits the pipeline given a training (X,y) pair
+
+        Args:
+            X (np.ndarray): features from which to guess targets
+            y (np.ndarray): classification targets for this task
+            fit_params (Optional[Dict[str, Any]]]): handy communication dictionary,
+                so that inter-stages of the pipeline can share information
+
+        Returns:
+            np.ndarray: the transformed features
+            Optional[Dict[str, Any]]]: A dictionary to share fit informations
+                within the pipeline stages
+        """
+
+        if fit_params is None:
+            fit_params = {}
+
+        X, fit_params = super().fit_transformer(
+            X, y, fit_params=fit_params)
+
+        return X, fit_params
+
+    def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray:
+        """predict_proba.
+
+        Args:
+            X (np.ndarray): input to the pipeline, from which to guess targets
+            batch_size (Optional[int]): batch_size controls whether the pipeline
+                will be called on small chunks of the data. Useful when calling the
+                predict method on the whole array X results in a MemoryError.
+        Returns:
+            np.ndarray: Probabilities of the target being certain class
+        """
+        if batch_size is None:
+            return super().predict_proba(X)
+
+        else:
+            if not isinstance(batch_size, int):
+                raise ValueError("Argument 'batch_size' must be of type int, "
+                                 "but is '%s'" % type(batch_size))
+            if batch_size <= 0:
+                raise ValueError("Argument 'batch_size' must be positive, "
+                                 "but is %d" % batch_size)
+
+            else:
+                # Probe for the target array dimensions
+                target = self.predict_proba(X[0:2].copy())
+
+                y = np.zeros((X.shape[0], target.shape[1]),
+                             dtype=np.float32)
+
+                for k in range(max(1, int(np.ceil(float(X.shape[0]) / batch_size)))):
+                    batch_from = k * batch_size
+                    batch_to = min([(k + 1) * batch_size, X.shape[0]])
+                    pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None)
+                    y[batch_from:batch_to] = pred_prob.astype(np.float32)
+
+                return y
+
+    def _get_hyperparameter_search_space(self,
+                                         dataset_properties: Dict[str, Any],
+                                         include: Optional[Dict[str, Any]] = None,
+                                         exclude: Optional[Dict[str, Any]] = None,
+                                         ) -> ConfigurationSpace:
+        """Create the hyperparameter configuration space.
+
+        For the given steps, and the Choices within that steps,
+        this procedure returns a configuration space object to
+        explore.
+
+        Args:
+            include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to honor when creating the configuration space
+            exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to remove from the configuration space
+            dataset_properties (Optional[Dict[str, Union[str, int]]]): Caracteristics
+                of the dataset to guide the pipeline choices of components
+
+        Returns:
+            cs (Configuration): The configuration space describing
+                the SimpleRegressionClassifier.
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None or not isinstance(dataset_properties, dict):
+            dataset_properties = dict()
+        if 'target_type' not in dataset_properties:
+            dataset_properties['target_type'] = 'image_classification'
+        if dataset_properties['target_type'] != 'image_classification':
+            dataset_properties['target_type'] = 'image_classification'
+        # get the base search space given this
+        # dataset properties. Then overwrite with custom
+        # classification requirements
+        cs = self._get_base_search_space(
+            cs=cs, dataset_properties=dataset_properties,
+            exclude=exclude, include=include, pipeline=self.steps)
+
+        # Here we add custom code, like this with this
+        # is not a valid configuration
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
+                            ) -> List[Tuple[str, autoPyTorchChoice]]:
+        """
+        Defines what steps a pipeline should follow.
+        The step itself has choices given via autoPyTorchChoice.
+
+        Returns:
+            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
+                by the pipeline.
+        """
+        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]
+
+        default_dataset_properties = {'target_type': 'image_classification'}
+        if dataset_properties is not None:
+            default_dataset_properties.update(dataset_properties)
+
+        steps.extend([
+            ("normalizer", NormalizerChoice(default_dataset_properties)),
+            ("preprocessing", EarlyPreprocessing()),
+            ("image_augmenter", ImageAugmenter())
+            # ("network", NetworkChoice(default_dataset_properties)),
+            # ("network_init", NetworkInitializerChoice(default_dataset_properties)),
+            # ("optimizer", OptimizerChoice(default_dataset_properties)),
+            # ("lr_scheduler", SchedulerChoice(default_dataset_properties)),
+        ])
+        return steps
+
+    def _get_estimator_hyperparameter_name(self) -> str:
+        """
+        Returns the name of the current estimator.
+
+        Returns:
+            str: name of the pipeline type
+        """
+        return "image_classifier"
diff --git a/autoPyTorch/pipeline/nodes/__init__.py b/autoPyTorch/pipeline/nodes/__init__.py
deleted file mode 100644
index 03fbf0e51..000000000
--- a/autoPyTorch/pipeline/nodes/__init__.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
-from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
-from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
-from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector
-from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm
-from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
-from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector
-from autoPyTorch.pipeline.nodes.train_node import TrainNode
-from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings
-from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector
-from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector
-from autoPyTorch.pipeline.nodes.imputation import Imputation
-from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding
-from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
-from autoPyTorch.pipeline.nodes.initialization_selector import InitializationSelector
-from autoPyTorch.pipeline.nodes.ensemble import EnableComputePredictionsForEnsemble, SavePredictionsForEnsemble, BuildEnsemble, EnsembleServer
-from autoPyTorch.pipeline.nodes.create_dataloader import CreateDataLoader
-from autoPyTorch.pipeline.nodes.create_dataset_info import CreateDatasetInfo
-from autoPyTorch.pipeline.nodes.baseline_trainer import BaselineTrainer
diff --git a/autoPyTorch/pipeline/nodes/autonet_settings.py b/autoPyTorch/pipeline/nodes/autonet_settings.py
deleted file mode 100644
index 6f44ba662..000000000
--- a/autoPyTorch/pipeline/nodes/autonet_settings.py
+++ /dev/null
@@ -1,51 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import logging
-import numpy as np
-import sys
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.hyperparameter_search_space_update import parse_hyperparameter_search_space_updates
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from hpbandster.core.result import json_result_logger
-
-class AutoNetSettings(PipelineNode):
-    def __init__(self):
-        super(AutoNetSettings, self).__init__()
-
-        self.logger_settings = dict()
-        self.logger_settings['debug'] = logging.DEBUG
-        self.logger_settings['info'] = logging.INFO
-        self.logger_settings['warning'] = logging.WARNING
-        self.logger_settings['error'] = logging.ERROR
-        self.logger_settings['critical'] = logging.CRITICAL
-
-
-    def fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid, refit=False):
-
-        autonet_logger = logging.getLogger('autonet')
-        hpbandster_logger = logging.getLogger('hpbandster')
-
-        level = self.logger_settings[pipeline_config['log_level']]
-        autonet_logger.setLevel(level)
-        hpbandster_logger.setLevel(level)
-
-        autonet_logger.info("Start autonet with config:\n" + str(pipeline_config))
-        result_logger = []
-        if not refit:
-            result_logger = json_result_logger(directory=pipeline_config["result_logger_dir"], overwrite=True)
-        return { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'Y_valid': Y_valid,
-            'result_loggers':  [result_logger], 'shutdownables': []}
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name='log_level', default='warning', type=str, choices=list(self.logger_settings.keys())),
-            ConfigOption(name='random_seed', default=lambda c: abs(hash(c["run_id"])) % (2 ** 32), type=int, depends=True, info="Make sure to specify the same seed for all workers."),
-            ConfigOption(name='hyperparameter_search_space_updates', default=None, type=["directory", parse_hyperparameter_search_space_updates],
-                info="object of type HyperparameterSearchSpaceUpdates"),
-            ConfigOption("result_logger_dir", default=".", type="directory")
-        ]
-        return options
\ No newline at end of file
diff --git a/autoPyTorch/pipeline/nodes/baseline_trainer.py b/autoPyTorch/pipeline/nodes/baseline_trainer.py
deleted file mode 100644
index e093ff281..000000000
--- a/autoPyTorch/pipeline/nodes/baseline_trainer.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import torch
-import os as os
-import json
-import pickle
-import numpy as np
-import scipy.sparse
-import logging
-import collections
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-        
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.ensemble import read_ensemble_prediction_file, combine_predictions, combine_test_predictions, ensemble_logger
-    
-from autoPyTorch.components.baselines import baselines
-
-def get_dimensions(a):
-    if isinstance(a, list):
-        return len(np.array(a).shape)
-    return len(a.shape)
-
-class BaselineTrainer(PipelineNode):
-
-    #TODO: Order
-    models = collections.OrderedDict({
-            "random_forest" : baselines.RFBaseline, 
-            "extra_trees" : baselines.ExtraTreesBaseline,
-            "lgb" : baselines.LGBBaseline,
-            "catboost" : baselines.CatboostBaseline,
-            #"rotation_forest" : baselines.RotationForestBaseline,
-            "knn" : baselines.KNNBaseline})
-
-    identifiers = {
-            "random_forest": (-6, 0, 0, 0.0),
-            "extra_trees": (-5, 0, 0, 0.0),
-            "lgb": (-4, 0, 0, 0.0),
-            "catboost": (-3, 0, 0, 0.0),
-            #"rotation_forest": (-2, 0, 0, 0.0),
-            "knn": (-1, 0, 0, 0.0)}
-
-    identifiers_ens = {
-            -6: baselines.RFBaseline,
-            -5: baselines.ExtraTreesBaseline,
-            -4: baselines.LGBBaseline,
-            -3: baselines.CatboostBaseline,
-            #-2: baselines.RotationForestBaseline,
-            -1: baselines.KNNBaseline}
-
-    def __init__(self):
-        super(BaselineTrainer, self).__init__()
-
-        self.X_test = None
-
-    def add_test_data(self, X_test):
-        self.X_test = X_test
-
-    def fit(self, pipeline_config, X, Y, train_indices, valid_indices, refit):
-
-        baseline_name = self.get_baseline_to_train(pipeline_config)
-
-        if baseline_name is not None:
-            baseline_model = BaselineTrainer.models[baseline_name]()
-        else:
-            return {"baseline_id": None, "baseline_predictions_for_ensemble": None}
-
-        # Fit
-        fit_output = baseline_model.fit(X[train_indices], Y[train_indices], X[valid_indices], Y[valid_indices])
-        baseline_preds = np.array(fit_output["val_preds"])
-
-        # Test data
-        if self.X_test is not None:
-            test_preds = baseline_model.predict(X_test=self.X_test, predict_proba=True)
-            test_preds = np.array(test_preds)
-        else:
-            test_preds = None
-
-        # Save model
-        identifier = BaselineTrainer.identifiers[baseline_name]
-        model_savedir = os.path.join(pipeline_config["result_logger_dir"], "models", str(identifier) + ".pkl")
-        info_savedir = os.path.join(pipeline_config["result_logger_dir"], "models", str(identifier) + "_info.pkl")
-        os.makedirs(os.path.dirname(model_savedir), exist_ok=True)
-        
-        baseline_model.save(model_path=model_savedir, info_path=info_savedir)
-
-        
-        return {"baseline_id": identifier, "baseline_predictions_for_ensemble": baseline_preds, "baseline_test_predictions_for_ensemble": test_preds}
-
-    def get_baseline_to_train(self, pipeline_config):
-        trained_baseline_logdir = os.path.join(pipeline_config["result_logger_dir"], "trained_baselines.txt")
-
-        baselines = pipeline_config["baseline_models"]
-
-        trained_baselines = []
-        if os.path.isfile(trained_baseline_logdir):
-            with open(trained_baseline_logdir, "r") as f:
-                for l in f:
-                    trained_baselines.append(l.replace("\n",""))
-
-        for baseline in baselines:
-            if baseline not in trained_baselines:
-                with open(trained_baseline_logdir, "a+") as f:
-                    f.write(baseline+"\n")
-                return baseline
-        return None
-
-
-    def predict(self, X):
-        return { 'X': X }
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name='baseline_models', default=list(BaselineTrainer.models.keys()), type=str, list=True, choices=list(BaselineTrainer.models.keys()))
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/create_dataloader.py b/autoPyTorch/pipeline/nodes/create_dataloader.py
deleted file mode 100644
index 187332c9f..000000000
--- a/autoPyTorch/pipeline/nodes/create_dataloader.py
+++ /dev/null
@@ -1,74 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import inspect
-import logging
-import numpy as np
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config_space_hyperparameter import get_hyperparameter, add_hyperparameter
-
-import torch
-import scipy.sparse
-from torch.utils.data import DataLoader, TensorDataset
-from torch.utils.data.dataset import Subset
-from torch.utils.data.sampler import SubsetRandomSampler
-
-
-class CreateDataLoader(PipelineNode):
-
-    def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices):
-    
-        torch.manual_seed(pipeline_config["random_seed"])
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        # prepare data
-        drop_last = hyperparameter_config['batch_size'] < train_indices.shape[0]
-        X, Y = to_dense(X), to_dense(Y)
-        X, Y = torch.from_numpy(X).float(), torch.from_numpy(Y)
-
-        train_dataset = TensorDataset(X, Y)
-        train_loader = DataLoader(
-            dataset=train_dataset,
-            batch_size=hyperparameter_config['batch_size'], 
-            sampler=SubsetRandomSampler(train_indices),
-            shuffle=False,
-            drop_last=drop_last)
-            
-        valid_loader = None
-        if valid_indices is not None:
-            valid_loader = DataLoader(
-                dataset=Subset(train_dataset, valid_indices),
-                batch_size=hyperparameter_config['batch_size'],
-                shuffle=False,
-                drop_last=False)
-
-        return {'train_loader': train_loader, 'valid_loader': valid_loader, 'batch_size': hyperparameter_config['batch_size']}
-
-    def predict(self, pipeline_config, X, batch_size):
-        X = torch.from_numpy(to_dense(X)).float()
-        y_placeholder = torch.zeros(X.size()[0])
-
-        predict_loader = DataLoader(TensorDataset(X.float(), y_placeholder), batch_size)
-
-        return {'predict_loader': predict_loader}
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        import ConfigSpace
-        import ConfigSpace.hyperparameters as CSH
-
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        batch_size_range = self._get_search_space_updates().get('batch_size', ((32, 500), True))
-        add_hyperparameter(cs, CSH.UniformIntegerHyperparameter, 'batch_size', batch_size_range)
-        self._check_search_space_updates('batch_size')
-        return cs
-
-    
-def to_dense(matrix):
-    if (matrix is not None and scipy.sparse.issparse(matrix)):
-        return matrix.todense()
-    return matrix
diff --git a/autoPyTorch/pipeline/nodes/create_dataset_info.py b/autoPyTorch/pipeline/nodes/create_dataset_info.py
deleted file mode 100644
index 4f3e9dabb..000000000
--- a/autoPyTorch/pipeline/nodes/create_dataset_info.py
+++ /dev/null
@@ -1,60 +0,0 @@
-__author__ = "Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os
-import numpy as np
-import scipy.sparse
-from torchvision import datasets
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-
-
-class DataSetInfo():
-    def __init__(self):
-        self.categorical_features = []
-        self.x_shape = []
-        self.y_shape = []
-        self.x_min_value = None
-        self.x_max_value = None
-        self.is_sparse = False
-        self.name = None
-
-class CreateDatasetInfo(PipelineNode):
-
-    def fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid):
-        info = DataSetInfo()
-        info.is_sparse = scipy.sparse.issparse(X_train)
-
-        info.x_shape = X_train.shape
-        info.y_shape = Y_train.shape
-
-        info.x_min_value = X_train.min()
-        info.x_max_value = X_train.max()
-
-        if 'categorical_features' in pipeline_config and pipeline_config['categorical_features']:
-            info.categorical_features = pipeline_config['categorical_features']
-        else:
-            info.categorical_features = [False] * info.x_shape[1]
-        
-        if 'dataset_name' in pipeline_config and pipeline_config['dataset_name']:
-            info.name = pipeline_config['dataset_name']
-
-        return {'X_train' : X_train, 'Y_train' : Y_train, 'X_valid' : X_valid, 'Y_valid' : Y_valid, 'dataset_info' : info}
-        
-
-    def predict(self, pipeline_config, X_train, Y_train, X_valid, Y_valid):
-        return self.fit(pipeline_config, X_train, Y_train, X_valid, Y_valid)
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name='categorical_features', default=None, type=to_bool, list=True,
-                info='List of booleans that specifies for each feature whether it is categorical.'),
-            ConfigOption(name='dataset_name', default=None, type=str)
-        ]
-        return options
-            
-
diff --git a/autoPyTorch/pipeline/nodes/cross_validation.py b/autoPyTorch/pipeline/nodes/cross_validation.py
deleted file mode 100644
index 74033518b..000000000
--- a/autoPyTorch/pipeline/nodes/cross_validation.py
+++ /dev/null
@@ -1,359 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import torch
-import logging
-import scipy.sparse
-import numpy as np
-import pandas as pd
-import signal
-import time
-import math
-import inspect
-import sys
-from copy import deepcopy
-
-from sklearn.model_selection import BaseCrossValidator
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool, to_dict
-from autoPyTorch.components.training.budget_types import BudgetTypeTime
-
-import time
-
-class CrossValidation(SubPipelineNode):
-    def __init__(self, train_pipeline_nodes):
-        """CrossValidation pipeline node.
-        It will run the train_pipeline by providing different train and validation datasets given the cv_split value defined in the config.
-        Cross validation can be disabled by setting cv_splits to <= 1 in the config
-        This enables the validation_split config parameter which, if no validation data is provided, will split the train dataset according its value (percent of train dataset)
-
-        Train:
-        The train_pipeline will receive the following inputs:
-        {hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, training_techniques, fit_start_time, categorical_features}
-
-        Prediction:
-        The train_pipeline will receive the following inputs:
-        {pipeline_config, X}
-        
-        Arguments:
-            train_pipeline {Pipeline} -- training pipeline that will be computed cv_split times
-            train_result_node {PipelineNode} -- pipeline node that provides the results of the train_pipeline
-        """
-
-        super(CrossValidation, self).__init__(train_pipeline_nodes)
-
-        self.cross_validators = {'none': None}
-        self.cross_validators_adjust_y = dict()
-
-
-    def fit(self, hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type, optimize_start_time,
-            refit, rescore, dataset_info, hyperparameter_config_id):
-        """Perform cross validation.
-        
-        Arguments:
-            hyperparameter_config {dict} -- The sampled hyperparameter config
-            pipeline_config {dict} -- The user specified configuration of the pipeline
-            X_train {data} -- The data. Cross Validation might split the data,
-            Y_train {data} -- The data. Cross Validation might split the data,
-            X_valid {data} -- The data. Cross Validation might split the data,
-            Y_valid {data} -- The data. Cross Validation might split the data,
-            budget {float} -- The budget for training. 
-            budget_type {BaseTrainingTechnique} -- The type of budget.
-            optimize_start_time {float} -- Time when optimization has been started.
-            refit {bool} -- Whether we refit currently or not.
-            rescore {bool} -- Whether we refit in order to get the exact score of a hp-config during training.
-            dataset_info {DatasetInfo} -- Object containing information about the dataset.
-        
-        Raises:
-            Exception: Not a single CV split could be finished.
-        
-        Returns:
-            dict -- loss, info and additional results.
-        """
-        logger = logging.getLogger('autonet')
-        loss = 0
-        infos = []
-        X, Y, num_cv_splits, cv_splits, loss_penalty, budget = self.initialize_cross_validation(
-            pipeline_config=pipeline_config, budget=budget, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid,
-            dataset_info=dataset_info, refit=(refit and not rescore), logger=logger)
-        
-        # adjust budget in case of budget type time
-        cv_start_time = time.time()
-        if budget_type == BudgetTypeTime:
-            budget = budget - (cv_start_time - optimize_start_time)
-
-        # start cross validation
-        logger.debug("Took " + str(time.time() - optimize_start_time) + " s to initialize optimization.")
-        all_sub_pipeline_kwargs = dict()
-        additional_results = dict()
-        for i, split_indices in enumerate(cv_splits):
-            logger.info("[AutoNet] CV split " + str(i) + " of " + str(num_cv_splits))
-
-            # fit training pipeline
-            cur_budget = self.get_current_budget(cv_index=i, budget=budget, budget_type=budget_type,
-                cv_start_time=cv_start_time, num_cv_splits=num_cv_splits, logger=logger)
-            sub_pipeline_kwargs = {
-                "hyperparameter_config": hyperparameter_config, "pipeline_config": pipeline_config,
-                "budget": cur_budget, "training_techniques": [budget_type()],
-                "fit_start_time": time.time(),
-                "train_indices": split_indices[0],
-                "valid_indices": split_indices[1],
-                "dataset_info": deepcopy(dataset_info),
-                "refit": refit,
-                "loss_penalty": loss_penalty,
-                "hyperparameter_config_id": hyperparameter_config_id}
-            all_sub_pipeline_kwargs[i] = deepcopy(sub_pipeline_kwargs)
-            result = self.sub_pipeline.fit_pipeline(X=X, Y=Y, **sub_pipeline_kwargs)
-            logger.info("[AutoNet] Done with current split!")
-
-            if result is not None:
-                loss += result['loss']
-                infos.append(result['info'])
-                additional_results[i] = {key: value for key, value in result.items() if key not in ["loss", "info"]}
-
-        if (len(infos) == 0):
-            raise Exception("Could not finish a single cv split due to memory or time limitation")
-
-        # aggregate logs
-        logger.info("Aggregate the results across the splits")
-        df = pd.DataFrame(infos)
-        info = dict(df.mean())
-        additional_results = self.process_additional_results(additional_results=additional_results, all_sub_pipeline_kwargs=all_sub_pipeline_kwargs,
-            X=X, Y=Y, logger=logger)
-        loss = loss / num_cv_splits + loss_penalty
-        logger.debug("Send additional results %s to master" % str(additional_results))
-        return dict({'loss': loss, 'info': info}, **additional_results)
-
-    def predict(self, pipeline_config, X):
-       
-        result = self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X)
-
-        return {'Y': result['Y']}
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("validation_split", default=0.3, type=float, choices=[0, 1],
-                info='In range [0, 1). Part of train dataset used for validation. Ignored in fit if cross validator or valid data given.'),
-            ConfigOption("refit_validation_split", default=0.0, type=float, choices=[0, 1],
-                info='In range [0, 1). Part of train dataset used for validation in refit.'),
-            ConfigOption("cross_validator", default="none", type=str, choices=self.cross_validators.keys(),
-                info='Class inheriting from sklearn.model_selection.BaseCrossValidator. Ignored if validation data is given.'),
-            ConfigOption("cross_validator_args", default=dict(), type=to_dict,
-                info="Args of cross validator. \n\t\tNote that random_state and shuffle are set by " +
-                     "pipeline config options random_seed and shuffle, if not specified here."),
-            ConfigOption("min_budget_for_cv", default=0, type=float,
-                info='Specify minimum budget for cv. If budget is smaller use specified validation split.'),
-            ConfigOption('shuffle', default=True, type=to_bool, choices=[True, False],
-                info='Shuffle train and validation set'),
-        ]
-        return options
-
-    def clean_fit_data(self):
-        super(CrossValidation, self).clean_fit_data()
-        self.sub_pipeline.root.clean_fit_data()
-    
-    def initialize_cross_validation(self, pipeline_config, budget, X_train, Y_train, X_valid, Y_valid, dataset_info, refit, logger):
-        """Initialize CV by computing split indices, 
-        
-        Arguments:
-            pipeline_config {dict} -- User-defined configuration of the pipeline.
-            budget {float} -- The current budget.
-            X_train {array} -- The data
-            Y_train {array} -- The data
-            X_valid {array} -- The data
-            Y_valid {array} -- The data
-            dataset_info {DatasetInfo} -- Object describing the dataset
-            refit {bool} -- Wether we currently perform a refit.
-            logger {Logger} -- Logger to log stuff on the console.
-        
-        Returns:
-            tuple -- X, Y, number of splits, split indices, a penalty added to the loss, the budget for each cv split
-        """
-        budget_too_low_for_cv = budget < pipeline_config['min_budget_for_cv']
-        val_split = max(0, min(1, pipeline_config['validation_split']))
-        if refit:
-            val_split = max(0, min(1, pipeline_config['refit_validation_split']))
-
-        # validation set given. cv ignored.
-        if X_valid is not None and Y_valid is not None:
-            if pipeline_config['cross_validator'] != "none":
-                logger.warning('Cross validator ' + pipeline_config['cross_validator'] + ' given and validation set is specified, ' +
-                                    'autonet will ignore cv splits and evaluate on given validation set')
-            if val_split > 0.0:
-                logger.warning('Validation split is set to ' + str(val_split) + ' and validation set is specified, ' +
-                                    'autonet will ignore split and evaluate on given validation set')
-            
-            X, Y, indices = self.get_validation_set_split_indices(pipeline_config,
-                X_train=X_train, X_valid=X_valid, Y_train=Y_train, Y_valid=Y_valid)
-
-            logger.info("[AutoNet] Validation set given. Continue with validation set (no cross validation).")
-            return X, Y, 1, [indices], 0, budget
-        
-        # no cv, split train data
-        if pipeline_config['cross_validator'] == "none" or budget_too_low_for_cv:
-            logger.debug("[AutoNet] No validation set given and either no cross validator given or budget too low for CV." + 
-                             " Continue by splitting " + str(val_split) + " of training data.")
-            indices = self.shuffle_indices(np.array(list(range(dataset_info.x_shape[0]))), pipeline_config['shuffle'], pipeline_config["random_seed"])
-            split = int(len(indices) * (1-val_split))
-            train_indices, valid_indices = indices[:split], indices[split:]
-            valid_indices = None if val_split == 0 else valid_indices
-            return X_train, Y_train, 1, [(train_indices, valid_indices)], (1000 if budget_too_low_for_cv else 0), budget
-
-        # cross validation
-        logger.warning('Validation split is set to ' + str(val_split) + ' and cross validator specified, autonet will ignore validation split')
-        cross_validator_class = self.cross_validators[pipeline_config['cross_validator']]
-        adjust_y = self.cross_validators_adjust_y[pipeline_config['cross_validator']]
-        available_cross_validator_args = inspect.getfullargspec(cross_validator_class.__init__)[0]
-        cross_validator_args = pipeline_config['cross_validator_args']
-
-        if "shuffle" not in cross_validator_args and "shuffle" in available_cross_validator_args:
-            cross_validator_args["shuffle"] = pipeline_config["shuffle"]
-        if "random_state" not in cross_validator_args and "random_state" in available_cross_validator_args:
-            cross_validator_args["random_state"] = pipeline_config["random_seed"]
-
-        cross_validator = cross_validator_class(**cross_validator_args)
-        num_cv_splits = cross_validator.get_n_splits(X_train, adjust_y(Y_train))
-        cv_splits = cross_validator.split(X_train, adjust_y(Y_train))
-        if not refit:
-            logger.info("[Autonet] Continue with cross validation using " + str(pipeline_config['cross_validator']))
-            return X_train, Y_train, num_cv_splits, cv_splits, 0, budget
-        
-        # refit
-        indices = self.shuffle_indices(np.array(list(range(dataset_info.x_shape[0]))), pipeline_config['shuffle'], pipeline_config["random_seed"])
-        split = int(len(indices) * (1-val_split))
-        train_indices, valid_indices = indices[:split], indices[split:]
-        valid_indices = None if val_split == 0 else valid_indices
-        logger.info("[Autonet] No cross validation when refitting! Continue by splitting " + str(val_split) + " of training data.")
-        return X_train, Y_train, 1, [(train_indices, valid_indices)], 0, budget / num_cv_splits
-
-    def add_cross_validator(self, name, cross_validator, adjust_y=None):
-        self.cross_validators[name] = cross_validator
-        self.cross_validators_adjust_y[name] = adjust_y if adjust_y is not None else identity
-    
-    def remove_cross_validator(self, name):
-        del self.cross_validators[name]
-        del self.cross_validators_adjust_y[name]
-    
-    def get_current_budget(self, cv_index, budget, budget_type, cv_start_time, num_cv_splits, logger):
-        """Get the budget for the current CV split.
-        
-        Arguments:
-            cv_index {int} -- The index of the current cv split.
-            budget {float} -- The current budget.
-            budget_type {BaseTrainingTechnique} -- The type of budget.
-            cv_start_time {float} -- Start time of cross validation.
-            num_cv_splits {int} -- total number of cv splits.
-            logger {Logger} -- A logger to log stuff on the console.
-        
-        Returns:
-            float -- The budget of the current
-        """
-        # adjust budget in case of budget type time
-        if budget_type == BudgetTypeTime:
-            remaining_budget = budget - (time.time() - cv_start_time)
-            should_be_remaining_budget = (budget - cv_index * budget / num_cv_splits)
-            budget_type.compensate = max(10, should_be_remaining_budget - remaining_budget)
-            cur_budget = remaining_budget / (num_cv_splits - cv_index)
-            logger.info("Reduced initial budget " + str(budget / num_cv_splits) + " to cv budget " + 
-                                str(cur_budget) + " compensate for " + str(should_be_remaining_budget - remaining_budget))
-        else:
-            cur_budget = budget / num_cv_splits
-        return cur_budget
-    
-    def process_additional_results(self, additional_results, all_sub_pipeline_kwargs, X, Y, logger):
-        """Process additional results, like predictions for ensemble for example.
-        The data of additional results will be combined across the splits.
-        
-        Arguments:
-            additional_results {dict} -- Mapping from cv index to additional_results organized in a dictionary. This dictionary has the following structure:
-                                         {name1: {data1: ..., combinator1: }, name2: {data2: ..., combinator2: ...}, ...}
-                                         for each name, the given combinator should be identical across the splits.
-                                         for each name, the given combinator is called with a dictionary from split index to data                    
-            all_sub_pipeline_kwargs {dict} -- Mapping from cv index to kwargs with which the subpipeline has been called.
-            X {array} -- The full data, concatenation of training and validation.
-            Y {array} -- The target full data, concatenation of training and validation.
-            logger {Logger} -- a logger to print stuff on the console
-        
-        Returns:
-            dict -- mapping from name to combined data
-        """
-        combinators = dict()
-        data = dict()
-        result = dict()
-        logger.info("Process %s additional result(s)" % len(additional_results))
-        for split in additional_results.keys():
-            for name in additional_results[split].keys():
-                combinators[name] = additional_results[split][name]["combinator"]
-                if name not in data:
-                    data[name] = dict()
-                data[name][split] = additional_results[split][name]["data"]
-        for name in data.keys():
-            result[name] = combinators[name](data=data[name], pipeline_kwargs=all_sub_pipeline_kwargs, X=X, Y=Y)
-        return result
-    
-    @staticmethod
-    def concat(upper, lower):
-        """Concatenate training and validation data
-        
-        Arguments:
-            upper {array} -- upper part of concatenated array
-            lower {array} -- lower part of concatenated value
-        
-        Returns:
-            array -- concatenated array
-        """
-        if (scipy.sparse.issparse(upper)):
-            return scipy.sparse.vstack([upper, lower])
-        else:
-            return np.concatenate([upper, lower])
-
-    @staticmethod
-    def shuffle_indices(indices, shuffle=True, seed=42):
-        """Shuffle the indices
-        
-        Arguments:
-            indices {array} -- The indices to shuffle
-        
-        Keyword Arguments:
-            shuffle {bool} -- Whether the indices should be shuffled (default: {True})
-            seed {int} -- A random seed (default: {42})
-        
-        Returns:
-            array -- Shuffled indices
-        """
-        rng = np.random.RandomState(42)
-        if shuffle:
-            rng.shuffle(indices)
-        return indices
-    
-    @staticmethod
-    def get_validation_set_split_indices(pipeline_config, X_train, X_valid, Y_train, Y_valid, allow_shuffle=True):
-        """Get the indices for cv.
-        
-        Arguments:
-            pipeline_config {dict} -- The user specified configuration of the pipeline
-            X_train {array} -- The data
-            X_valid {array} -- The data
-            Y_train {array} -- The data
-            Y_valid {array} -- The data
-        
-        Keyword Arguments:
-            allow_shuffle {bool} -- shuffle data indices if it is specified in pipeline config and allow_shuffle is True (default: {True})
-        
-        Returns:
-            tuple -- The concatenated data and the indices
-        """
-        train_indices = CrossValidation.shuffle_indices(np.array(list(range(X_train.shape[0]))),
-            pipeline_config['shuffle'] and allow_shuffle, pipeline_config['random_seed'])
-        valid_indices = CrossValidation.shuffle_indices(np.array(list(range(X_train.shape[0], X_train.shape[0] + X_valid.shape[0]))),
-            pipeline_config['shuffle'] and allow_shuffle, pipeline_config['random_seed'])
-
-        X = CrossValidation.concat(X_train, X_valid)
-        Y = CrossValidation.concat(Y_train, Y_valid)
-        return X, Y, (train_indices, valid_indices)
-
-def identity(x):
-    return x
diff --git a/autoPyTorch/pipeline/nodes/embedding_selector.py b/autoPyTorch/pipeline/nodes/embedding_selector.py
deleted file mode 100644
index 71d24a6f5..000000000
--- a/autoPyTorch/pipeline/nodes/embedding_selector.py
+++ /dev/null
@@ -1,99 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import torch.nn as nn
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector
-
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-from autoPyTorch.components.networks.feature.embedding import NoEmbedding
-
-class EmbeddingSelector(PipelineNode):
-    def __init__(self):
-        """ Embedding selector. """
-
-        super(EmbeddingSelector, self).__init__()
-
-        self.embedding_modules = dict()
-        self.add_embedding_module('none', NoEmbedding)
-
-    def fit(self, hyperparameter_config, pipeline_config, X, one_hot_encoder):
-
-        if not one_hot_encoder or not one_hot_encoder.categories_:
-            # no categorical features -> no embedding
-            return {'embedding': nn.Sequential()}
-
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-        
-        embedding_name = hyperparameter_config['embedding'] if 'embedding' in hyperparameter_config else 'none'
-        embedding_type = self.embedding_modules[embedding_name]
-        embedding_config = ConfigWrapper(embedding_name, hyperparameter_config)
-
-        return {'embedding': embedding_type(embedding_config, X.shape[1], one_hot_encoder)}
-
-
-    def add_embedding_module(self, name, embedding_module):
-        """Add embedding module.
-        Will be created with (hyperparameter_config, in_features, categorical_embedding).
-        
-        Arguments:
-            name {string} -- name of embedding
-            embedding_module {nn.Module} -- embedding module type has to inherit from nn.Module and provide static 'get_config_space' function
-        """
-
-
-        if (not issubclass(embedding_module, nn.Module)):
-            raise ValueError("Specified embedding module has to inherit from nn.Module")
-        if (not hasattr(embedding_module, 'get_config_space')):
-            raise ValueError("Specified embedding module has to implement get_config_space function")
-        self.embedding_modules[name] = embedding_module
-
-
-    def remove_log_function(self, name):
-        del self.embedding_modules[name]
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="embeddings", default=list(self.embedding_modules.keys()), type=str, list=True, choices=list(self.embedding_modules.keys())),
-        ]
-        return options
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        if pipeline_config['categorical_features'] is None or not any(pipeline_config['categorical_features']) or 'none' not in pipeline_config['preprocessors']:
-            # no categorical features -> no embedding
-            return cs
-
-        possible_embeddings = set(pipeline_config["embeddings"]).intersection(self.embedding_modules.keys())
-        selector = cs.add_hyperparameter(CSH.CategoricalHyperparameter("embedding", sorted(possible_embeddings), default_value="none"))
-        
-        for embedding_name, embedding_type in self.embedding_modules.items():
-            if (embedding_name not in possible_embeddings):
-                continue
-            embedding_cs = embedding_type.get_config_space(pipeline_config['categorical_features'],
-                **self._get_search_space_updates(prefix=embedding_name))
-            cs.add_configuration_space(prefix=embedding_name, configuration_space=embedding_cs, delimiter=ConfigWrapper.delimiter, 
-                                       parent_hyperparameter={'parent': selector, 'value': embedding_name})
-        
-        self._check_search_space_updates((possible_embeddings, "*"))
-        return cs
-    
-    def insert_inter_node_hyperparameter_dependencies(self, config_space, dataset_info=None, **pipeline_config):
-        if pipeline_config['categorical_features'] is None or not any(pipeline_config['categorical_features']) or 'none' not in pipeline_config['preprocessors']:
-            # no categorical features -> no embedding
-            return config_space
-        embedding_hyperparameter = config_space.get_hyperparameter(EmbeddingSelector.get_name() + ConfigWrapper.delimiter + "embedding")
-        preprocessor_hyperparameter = config_space.get_hyperparameter(PreprocessorSelector.get_name() + ConfigWrapper.delimiter + "preprocessor")
-
-        condition = ConfigSpace.EqualsCondition(embedding_hyperparameter, preprocessor_hyperparameter, "none")
-
-        config_space.add_condition(condition)
-        return config_space
diff --git a/autoPyTorch/pipeline/nodes/ensemble.py b/autoPyTorch/pipeline/nodes/ensemble.py
deleted file mode 100644
index 6ba9acd41..000000000
--- a/autoPyTorch/pipeline/nodes/ensemble.py
+++ /dev/null
@@ -1,171 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector, AutoNetMetric, no_transform
-from autoPyTorch.pipeline.nodes import OneHotEncoding, OptimizationAlgorithm
-from autoPyTorch.pipeline.nodes.metric_selector import AutoNetMetric
-from autoPyTorch.utils.ensemble import build_ensemble, read_ensemble_prediction_file, combine_predictions, combine_test_predictions, \
-    ensemble_logger, start_server
-from hpbandster.core.result import logged_results_to_HBS_result
-import json
-import asyncio
-from hpbandster.core.nameserver import nic_name_to_host
-import time
-import numpy as np
-import logging
-
-
-def predictions_for_ensemble(y_true, y_pred):
-    return y_pred
-
-class EnableComputePredictionsForEnsemble(PipelineNode):
-    """Put this Node in the training pipeline after the metric selector node"""
-    def fit(self, pipeline_config, additional_metrics, refit, loss_penalty):
-        if refit or pipeline_config["ensemble_size"] == 0 or loss_penalty > 0:
-            return dict()
-        return {'additional_metrics': additional_metrics + [
-            AutoNetMetric(name="predictions_for_ensemble",
-                          metric=predictions_for_ensemble,
-                          loss_transform=no_transform,
-                          ohe_transform=no_transform)]}
-
-
-class SavePredictionsForEnsemble(PipelineNode):
-    """Put this Node in the training pipeline after the training node"""
-    def fit(self, pipeline_config, loss, info, refit, loss_penalty, baseline_predictions_for_ensemble=None, baseline_id=None, baseline_test_predictions_for_ensemble=None):
-        if refit or pipeline_config["ensemble_size"] == 0 or loss_penalty > 0:
-            return {"loss": loss, "info": info}
-
-        if "val_predictions_for_ensemble" in info:
-            predictions = info["val_predictions_for_ensemble"]
-            del info["val_predictions_for_ensemble"]
-        else:
-            raise ValueError("You need to specify some kind of validation for ensemble building")
-        del info["train_predictions_for_ensemble"]
-
-        combinator = {
-            "combinator": combine_predictions,
-            "data": predictions
-        }
-
-        #logging.info("Val: ", type(predictions), len(predictions))
-        #logging.info("Val: ", type(predictions[0]), type(predictions[1]))
-        #logging.info("Val: ", predictions[0].shape, predictions[1].shape)
-
-        # has to be int or float to be passed to logger
-        info["baseline_id"] = baseline_id[0] if baseline_id is not None else None
-
-        if baseline_predictions_for_ensemble is not None:
-            baseline_predictions = baseline_predictions_for_ensemble
-
-            baseline_combinator = {
-                    "combinator": combine_predictions,
-                    "data": baseline_predictions
-                    }
-        else:
-            baseline_combinator = None
-
-
-        if not "test_predictions_for_ensemble" in info:
-            if baseline_combinator is not None:
-                return {"loss": loss, "info": info, "predictions_for_ensemble": combinator, "baseline_predictions_for_ensemble": baseline_combinator}
-            else:
-                return {"loss": loss, "info": info, "predictions_for_ensemble": combinator}
-        
-        #logging.info("Test: ", type(info["test_predictions_for_ensemble"]), len(info["test_predictions_for_ensemble"]))
-        #logging.info("Test: ", type(info["test_predictions_for_ensemble"][0]), type(info["test_predictions_for_ensemble"][1]))
-        #logging.info("Test: ", info["test_predictions_for_ensemble"][0].shape, info["test_predictions_for_ensemble"][1].shape)
-
-        test_combinator = {
-            "combinator": combine_test_predictions,
-            "data": info["test_predictions_for_ensemble"]
-        }
-        del info["test_predictions_for_ensemble"]
-
-        if baseline_test_predictions_for_ensemble is not None:
-            baseline_test_combinator = {
-                    "combinator" : combine_test_predictions,
-                    "data" : (baseline_test_predictions_for_ensemble, np.argmax(baseline_test_predictions_for_ensemble, axis=1))
-                    }
-        else:
-            baseline_test_combinator = None
-
-        #logging.info("Baseline test: ", type(baseline_test_predictions_for_ensemble), np.array(baseline_test_predictions_for_ensemble).shape)
-
-        return_dict = {"loss": loss, "info": info, "predictions_for_ensemble": combinator, "test_predictions_for_ensemble": test_combinator}
-
-        if baseline_combinator is not None:
-            return_dict["baseline_predictions_for_ensemble"] = baseline_combinator
-        if baseline_test_combinator is not None:
-            return_dict["baseline_test_predictions_for_ensemble"] = baseline_test_combinator
-
-        return return_dict
-
-    def predict(self, Y):
-        return {"Y": Y}
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("ensemble_server_credentials", default=None)
-        ]
-        return options
-
-
-class BuildEnsemble(PipelineNode):
-    """Put this node after the optimization algorithm node"""
-    def fit(self, pipeline_config, optimized_hyperparameter_config, budget, loss, info, refit=None):
-        if refit or pipeline_config["ensemble_size"] == 0 or pipeline_config["task_id"] not in [-1, 1]:
-            return {"optimized_hyperparameter_config": optimized_hyperparameter_config, "budget": budget}
-        
-        filename = os.path.join(pipeline_config["result_logger_dir"], 'predictions_for_ensemble.npy')
-        optimize_metric = self.pipeline[MetricSelector.get_name()].metrics[pipeline_config["optimize_metric"]]
-        y_transform = self.pipeline[OneHotEncoding.get_name()].complete_y_tranformation
-        result = logged_results_to_HBS_result(pipeline_config["result_logger_dir"])
-
-        all_predictions, labels, model_identifiers, _ = read_ensemble_prediction_file(filename=filename, y_transform=y_transform)
-        ensemble_selection, ensemble_configs = build_ensemble(result=result,
-            optimize_metric=optimize_metric, ensemble_size=pipeline_config["ensemble_size"],
-            all_predictions=all_predictions, labels=labels, model_identifiers=model_identifiers,
-            only_consider_n_best=pipeline_config["ensemble_only_consider_n_best"],
-            sorted_initialization_n_best=pipeline_config["ensemble_sorted_initialization_n_best"])
-
-        return {"optimized_hyperparameter_config": optimized_hyperparameter_config, "budget": budget,
-            "ensemble": ensemble_selection,
-            "ensemble_configs": ensemble_configs,
-            "loss": loss,
-            "info": info
-            }
-    
-    def predict(self, Y):
-        return {"Y": Y}
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("ensemble_size", default=50, type=int, info="Build a ensemble of well performing autonet configurations. 0 to disable."),
-            ConfigOption("ensemble_only_consider_n_best", default=30, type=int, info="Only consider the n best models for ensemble building."),
-            ConfigOption("ensemble_sorted_initialization_n_best", default=0, type=int, info="Initialize ensemble with n best models.")
-        ]
-        return options
-
-class EnsembleServer(PipelineNode):
-    """Put this node in front of the optimization algorithm node"""
-
-    def fit(self, pipeline_config, result_loggers, shutdownables, refit=False):
-        if refit or pipeline_config["ensemble_size"] == 0:
-            return dict()
-        es_credentials_file = os.path.join(pipeline_config["working_dir"], "es_credentials_%s.json" % pipeline_config["run_id"])
-
-        # start server
-        if pipeline_config["task_id"] != 1 or pipeline_config["run_worker_on_master_node"]:
-            host = nic_name_to_host(OptimizationAlgorithm.get_nic_name(pipeline_config))
-            host, port, process = start_server(host)
-            pipeline_config["ensemble_server_credentials"] = (host, port)
-            shutdownables = shutdownables + [process]
-
-        result_loggers = [ensemble_logger(directory=pipeline_config["result_logger_dir"], overwrite=True)] + result_loggers
-        return {"result_loggers": result_loggers, "shutdownables": shutdownables}
diff --git a/autoPyTorch/pipeline/nodes/image/autonet_settings_no_shuffle.py b/autoPyTorch/pipeline/nodes/image/autonet_settings_no_shuffle.py
deleted file mode 100644
index 476fec091..000000000
--- a/autoPyTorch/pipeline/nodes/image/autonet_settings_no_shuffle.py
+++ /dev/null
@@ -1,71 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import logging
-import numpy as np
-import sys, os
-import pprint
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.hyperparameter_search_space_update import parse_hyperparameter_search_space_updates
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-
-import random, torch
-
-class AutoNetSettingsNoShuffle(PipelineNode):
-    def __init__(self):
-        super(AutoNetSettingsNoShuffle, self).__init__()
-
-        self.logger_settings = dict()
-        self.logger_settings['debug'] = logging.DEBUG
-        self.logger_settings['info'] = logging.INFO
-        self.logger_settings['warning'] = logging.WARNING
-        self.logger_settings['error'] = logging.ERROR
-        self.logger_settings['critical'] = logging.CRITICAL
-
-
-    def fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid):
-
-        autonet_logger = logging.getLogger('autonet')
-        hpbandster_logger = logging.getLogger('hpbandster')
-
-        level = self.logger_settings[pipeline_config['log_level']]
-        autonet_logger.setLevel(level)
-        hpbandster_logger.setLevel(level)
-
-        random.seed(pipeline_config['random_seed'])
-        torch.manual_seed(pipeline_config['random_seed'])
-        np.random.seed(pipeline_config['random_seed'])
-
-        if 'result_logger_dir' in pipeline_config:
-            directory = os.path.join(pipeline_config['result_logger_dir'], "worker_logs_" + str(pipeline_config['task_id']))
-            os.makedirs(directory, exist_ok=True)
-
-            if level == logging.DEBUG:
-                self.addHandler([autonet_logger, hpbandster_logger], level, os.path.join(directory, 'autonet_debug.log'))
-                self.addHandler([autonet_logger, hpbandster_logger], logging.INFO, os.path.join(directory, 'autonet_info.log'))
-            else:
-                self.addHandler([autonet_logger, hpbandster_logger], level, os.path.join(directory, 'autonet.log'))
-
-        autonet_logger.info("Start autonet with config:\n" + str(pprint.pformat(pipeline_config)))
-
-        return { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'Y_valid': Y_valid }
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name='log_level', default='warning', type=str, choices=list(self.logger_settings.keys())),
-            ConfigOption(name='random_seed', default=lambda c: abs(hash(c["run_id"])) % (2 ** 32), type=int, depends=True, info="Make sure to specify the same seed for all workers."),
-            ConfigOption(name='hyperparameter_search_space_updates', default=None, type=["directory", parse_hyperparameter_search_space_updates],
-                info="object of type HyperparameterSearchSpaceUpdates"),
-        ]
-        return options
-
-    def addHandler(self, loggers, level, path):
-        formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-        fh = logging.FileHandler(path)
-        fh.setLevel(level)
-        fh.setFormatter(formatter)
-        for logger in loggers:
-            logger.addHandler(fh)
diff --git a/autoPyTorch/pipeline/nodes/image/create_dataset_info.py b/autoPyTorch/pipeline/nodes/image/create_dataset_info.py
deleted file mode 100644
index 8c88e8875..000000000
--- a/autoPyTorch/pipeline/nodes/image/create_dataset_info.py
+++ /dev/null
@@ -1,154 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os
-import numpy as np
-import scipy.sparse
-
-from torchvision import datasets
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-
-
-class DataSetInfo():
-    def __init__(self):
-        self.categorical_features = []
-        self.x_shape = []
-        self.y_shape = []
-        self.is_sparse = False
-        self.default_dataset = None # could be set to CIFAR to download official CIFAR dataset from pytorch
-
-class CreateDatasetInfo(PipelineNode):
-
-    default_datasets = {
-        # NAME       # dataset              # shape                 # classes
-        'CIFAR10' :  (datasets.CIFAR10,     [50000, 3, 32, 32],     10),
-        'CIFAR100' : (datasets.CIFAR100,    [50000, 3, 32, 32],     10),
-        'SVHN' :     (datasets.SVHN,        [70000, 3, 32, 32],     10),
-        'MNIST' :    (datasets.MNIST,       [60000, 28, 28],        10),
-    }
-
-
-    def fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid, dataset_path):
-        info = DataSetInfo()
-        info.is_sparse = scipy.sparse.issparse(X_train)
-        info.path = dataset_path
-
-        if X_train[0] in self.default_datasets:
-            dataset_type, shape, classes = self.default_datasets[X_train[0]]
-            info.default_dataset = dataset_type
-            info.x_shape = shape
-            info.y_shape = [shape[0], classes]
-            X_train = np.array([X_train[0]])
-            Y_train = np.array([])
-
-        elif len(X_train.shape) == 1:
-            if 'max_class_size' not in pipeline_config.keys():
-                pipeline_config['max_class_size'] = None # backwards compatibility
-            
-            if "file_extensions" not in pipeline_config.keys():
-                pipeline_config['file_extensions'] = ['.png', '.jpg', '.JPEG', '.pgm']
-
-            X_train, Y_train = self.add_subpaths(X_train, Y_train, 
-                pipeline_config['images_root_folders'], pipeline_config['file_extensions'], pipeline_config['max_class_size'] or float('inf'))
-            X_valid, Y_valid = self.add_subpaths(X_valid, Y_valid, 
-                pipeline_config['images_root_folders'], pipeline_config['file_extensions'], pipeline_config['max_class_size'] or float('inf'))
-
-            info.x_shape = [X_train.shape[0]] + pipeline_config['images_shape']
-            info.y_shape = Y_train.shape
-            
-            if len(info.y_shape) == 1 or info.y_shape[1] == 1:
-                info.y_shape = (info.y_shape[0], len(np.unique(Y_train)))
-        else:
-            info.x_shape = X_train.shape
-            info.y_shape = Y_train.shape
-
-        return {'X_train' : X_train, 'Y_train' : Y_train, 'X_valid' : X_valid, 'Y_valid' : Y_valid, 'dataset_info' : info}
-        
-
-    def predict(self, pipeline_config, X):
-        fit_res = self.fit(pipeline_config, X, np.zeros(X.shape[0]), None, None, pipeline_config)
-        return { 'X': fit_res['X_train'], 'dataset_info': fit_res['dataset_info'] }
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="file_extensions", default=['.png', '.jpg', '.JPEG', '.pgm'], type=str, list=True),
-            ConfigOption(name="images_shape", default=[3, 32, 32], type=int, list=True, info="Image size input to the networks, images will be rescaled to this."),
-            ConfigOption(name="images_root_folders", default=[ConfigFileParser.get_autonet_home()], type='directory', list=True, info="Directory relative to which image paths are given."),
-            ConfigOption(name="max_class_size", default=None, type=int),
-        ]
-        return options
-
-    def add_subpaths(self, X, Y, root_folders, extensions, max_class_size):
-        if X is None or Y is None:
-            return None, None
-
-        new_X, new_Y = [], []
-        #for i, path in enumerate(X):
-        #    for root in root_folders:
-        #        tmp = os.path.join(root, path)
-        #        if os.path.exists(tmp):
-        #            path = tmp
-        #            break
-        #    if "."+path.split(".")[1] in extensions:
-        #        new_X.append(X)
-        #        new_Y = Y
-        #        continue
-        #    if not os.path.exists(path):
-        #        print(path)
-        #        raise Exception('Invalid path: ' + str(root_folders) + str(path))
-        #    if os.path.isfile(path) and os.path.splitext(path)[1] == '.h5':
-        #        import h5py
-        #        return h5py.File(path, 'r')['x'].value, h5py.File(os.path.join(root, Y[i]), 'r')['y'].value.squeeze()
-        #    self.add_path(path, Y[i], new_X, new_Y, extensions, max_class_size)
-
-        for i, path in enumerate(X):
-            for root in root_folders:
-                tmp = os.path.join(root, path)
-                if os.path.exists(tmp):
-                    path = tmp
-                    break
-            if not os.path.exists(path):
-                raise Exception('Invalid path: ' + str(root_folders) + str(path))
-            if os.path.isfile(path) and os.path.splitext(path)[1] == '.h5':
-                import h5py
-                return h5py.File(path, 'r')['x'].value, h5py.File(os.path.join(root, Y[i]), 'r')['y'].value.squeeze()
-            self.add_path(path, Y[i], new_X, new_Y, extensions, max_class_size)
-
-        if len(new_X) == 0:
-            raise Exception('Could not find any images in ' + str(root_folders) + '...' + str(extensions))
-        return np.array(new_X), np.array(new_Y)
-
-    def add_path(self, cur_X, cur_Y, new_X, new_Y, extensions, max_class_size):
-        is_file, max_class_size = self.add_file(cur_X, cur_Y, new_X, new_Y, extensions, max_class_size)
-        if is_file:
-            return
-
-        for sub in os.listdir(cur_X):
-            if max_class_size <= 0:
-                return max_class_size
-            path = os.path.join(cur_X, sub)
-            is_file, max_class_size = self.add_file(path, cur_Y, new_X, new_Y, extensions, max_class_size)
-
-            if not is_file:
-                max_class_size = self.add_path(path, cur_Y, new_X, new_Y, extensions, max_class_size)
-                
-    def add_file(self, cur_X, cur_Y, new_X, new_Y, extensions, max_class_size):
-        if not os.path.isfile(cur_X):
-            return False, max_class_size
-        if not os.path.splitext(cur_X)[1] in extensions:
-            return True, max_class_size
-        if os.path.getsize(cur_X) > 0:
-            new_X.append(cur_X)
-            new_Y.append(cur_Y)
-            max_class_size -= 1
-            return True, max_class_size - 1
-        else:
-            import logging
-            logging.getLogger('autonet').debug('Image is invalid! - size == 0:' + str(cur_X))
-        return True, max_class_size
-        
diff --git a/autoPyTorch/pipeline/nodes/image/create_image_dataloader.py b/autoPyTorch/pipeline/nodes/image/create_image_dataloader.py
deleted file mode 100644
index cda010164..000000000
--- a/autoPyTorch/pipeline/nodes/image/create_image_dataloader.py
+++ /dev/null
@@ -1,101 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import inspect
-import logging
-import numpy as np
-
-from autoPyTorch.pipeline.nodes.create_dataloader import CreateDataLoader
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-import torch
-import scipy.sparse
-from torch.utils.data import DataLoader, TensorDataset, Dataset
-from autoPyTorch.data_management.image_loader import ImageFilelist, XYDataset
-from torch.utils.data.sampler import SubsetRandomSampler
-from torchvision import datasets, models, transforms
-
-from autoPyTorch.utils.transforms import transform_int64
-
-
-class CreateImageDataLoader(CreateDataLoader):
-
-    def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices, train_transform, valid_transform, dataset_info):
-        
-        # if len(X.shape) > 1:
-        #     return super(CreateImageDataLoader, self).fit(pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices)
-
-        torch.manual_seed(pipeline_config["random_seed"])
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-        to_int64 = transform_int64
-
-        if dataset_info.default_dataset:
-            train_dataset = dataset_info.default_dataset(root=pipeline_config['default_dataset_download_dir'], train=True, download=True, transform=train_transform)
-            if valid_indices is not None:
-                valid_dataset = dataset_info.default_dataset(root=pipeline_config['default_dataset_download_dir'], train=True, download=True, transform=valid_transform)
-        elif len(X.shape) > 1:
-            train_dataset = XYDataset(X, Y, transform=train_transform, target_transform=to_int64)
-            valid_dataset = XYDataset(X, Y, transform=valid_transform, target_transform=to_int64)
-        else:
-            train_dataset = ImageFilelist(X, Y, transform=train_transform, target_transform=to_int64, cache_size=pipeline_config['dataloader_cache_size_mb'] * 1000, image_size=dataset_info.x_shape[2:])
-            if valid_indices is not None:
-                valid_dataset = ImageFilelist(X, Y, transform=valid_transform, target_transform=to_int64, cache_size=0, image_size=dataset_info.x_shape[2:])
-                valid_dataset.cache = train_dataset.cache
-
-        train_loader = DataLoader(
-            dataset=train_dataset,
-            batch_size=int(hyperparameter_config['batch_size']),
-            sampler=SubsetRandomSampler(train_indices),
-            drop_last=True,
-            pin_memory=True,
-            num_workers=pipeline_config['dataloader_worker'])
-
-        valid_loader = None
-        if valid_indices is not None:
-            valid_loader = DataLoader(
-                dataset=valid_dataset,
-                batch_size=int(hyperparameter_config['batch_size']),
-                sampler=SubsetRandomSampler(valid_indices),
-                drop_last=False,
-                pin_memory=True,
-                num_workers=pipeline_config['dataloader_worker'])
-
-        return {'train_loader': train_loader, 'valid_loader': valid_loader, 'batch_size': hyperparameter_config['batch_size']}
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("default_dataset_download_dir", default=ConfigFileParser.get_autonet_home(), type='directory', info="Directory default datasets will be downloaded to."),
-            ConfigOption("dataloader_worker", default=1, type=int),
-            ConfigOption("dataloader_cache_size_mb", default=0, type=int)
-        ]
-        return options
-
-    def predict(self, pipeline_config, X, batch_size, predict_transform, dataset_info):
-
-        if len(X.shape) > 1:
-            return super(CreateImageDataLoader, self).predict(pipeline_config, X, batch_size)
-
-
-        if dataset_info.default_dataset:
-            predict_dataset = dataset_info.default_dataset(root=pipeline_config['default_dataset_download_dir'], train=False, download=True, transform=predict_transform)
-        else:
-            try:
-                y_placeholder = torch.zeros(X.size()[0])
-            except:
-                y_placeholder = torch.zeros(len(X))
-            predict_dataset = ImageFilelist(X, y_placeholder, transform=predict_transform)
-
-        predict_loader = DataLoader(
-            dataset=predict_dataset,
-            batch_size=int(batch_size),
-            shuffle=False,
-            pin_memory=True,
-            num_workers=pipeline_config['dataloader_worker'])
-
-        return {'predict_loader': predict_loader}
-
-
-    
diff --git a/autoPyTorch/pipeline/nodes/image/cross_validation_indices.py b/autoPyTorch/pipeline/nodes/image/cross_validation_indices.py
deleted file mode 100644
index d96493bf3..000000000
--- a/autoPyTorch/pipeline/nodes/image/cross_validation_indices.py
+++ /dev/null
@@ -1,223 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import torch
-import logging
-import scipy.sparse
-import numpy as np
-import pandas as pd
-import signal
-import time
-import math
-import copy
-
-from sklearn.model_selection import StratifiedKFold
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.components.training.image.budget_types import BudgetTypeTime
-from sklearn.model_selection import StratifiedShuffleSplit
-
-import time
-
-class CrossValidationIndices(SubPipelineNode):
-    def __init__(self, train_pipeline_nodes):
-        """CrossValidation pipeline node.
-        It will run the train_pipeline by providing different train and validation datasets given the cv_split value defined in the config.
-        Cross validation can be disabled by setting cv_splits to <= 1 in the config
-        This enables the validation_split config parameter which, if no validation data is provided, will split the train dataset according its value (percent of train dataset)
-
-        Train:
-        The train_pipeline will receive the following inputs:
-        {hyperparameter_config, pipeline_config, X, Y, train_sampler, valid_sampler, budget, training_techniques, fit_start_time, categorical_features}
-
-        Prediction:
-        The train_pipeline will receive the following inputs:
-        {pipeline_config, X}
-        
-        Arguments:
-            train_pipeline {Pipeline} -- training pipeline that will be computed cv_split times
-            train_result_node {PipelineNode} -- pipeline node that provides the results of the train_pipeline
-        """
-
-        super(CrossValidationIndices, self).__init__(train_pipeline_nodes)
-
-        self.use_stratified_cv_split_default = False
-        self.logger = logging.getLogger('autonet')
-        
-
-    def fit(self, hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type, dataset_info, config_id, working_directory):
-        
-        cv_splits = max(1, pipeline_config['cv_splits'])
-        val_split = max(0, min(1, pipeline_config['validation_split']))
-
-        budget_too_low_for_cv, cv_splits, loss_penalty = self.incorporate_num_cv_splits_in_budget(budget, pipeline_config, cv_splits)
-
-        loss = 0
-        infos = []
-
-        np.random.seed(pipeline_config['random_seed'])
-
-        split_indices = []
-        X = X_train
-        Y = Y_train
-
-        if X_valid is not None and Y_valid is not None:
-            if cv_splits > 1:
-                self.logger.warning('CV splits are set to ' + str(cv_splits) + ' and validation set is specified, autonet will ignore cv splits and evaluate on given validation set')
-            if val_split > 0.0:
-                self.logger.warning('Validation split is set to ' + str(val_split) + ' and validation set is specified, autonet will ignore split and evaluate on given validation set')
-            
-            train_indices = self.shuffle_indices(list(range(X_train.shape[0])), pipeline_config['shuffle'])
-            valid_indices = self.shuffle_indices(list(range(X_train.shape[0], X_train.shape[0] + X_valid.shape[0])), pipeline_config['shuffle'])
-
-            X = self.concat(X_train, X_valid)
-            Y = self.concat(Y_train, Y_valid)
-
-            split_indices.append([train_indices, valid_indices])
-
-        elif cv_splits > 1:
-            if val_split > 0.0:
-                self.logger.warning('Validation split is set to ' + str(val_split) + ' and cv splits are specified, autonet will ignore validation split and evaluate on ' + str(cv_splits) + ' cv splits')
-            
-            if pipeline_config['use_stratified_cv_split'] and Y.shape[0] == dataset_info.x_shape[0]:
-                assert len(Y.shape) == 1 or Y.shape[1] == 1, "Y is in wrong shape for stratified CV split"
-                skf = StratifiedKFold(n_splits=cv_splits, shuffle=pipeline_config['shuffle'])
-                split_indices = list(skf.split(np.zeros(dataset_info.x_shape[0]), Y.reshape((-1, ))))
-            else:
-                indices = self.shuffle_indices(list(range(dataset_info.x_shape[0])), pipeline_config['shuffle'])
-                split_size = len(indices) / cv_splits
-                for split in range(cv_splits):
-                    i1 = int(split*split_size)
-                    i2 = int((split+1)*split_size)
-                    train_indices, valid_indices = indices[:i1] + indices[i2:], indices[i1:i2]
-                    split_indices.append([train_indices, valid_indices])
-
-        elif val_split > 0.0:
-            if pipeline_config['use_stratified_cv_split'] and Y.shape[0] == dataset_info.x_shape[0] and (len(Y.shape) == 1 or Y.shape[1] == 1):
-                sss = StratifiedShuffleSplit(n_splits=1, test_size=val_split, random_state=pipeline_config['random_seed'])
-                train, valid = list(sss.split(np.zeros(dataset_info.x_shape[0]), Y.reshape((-1, ))))[0]
-                split_indices.append([train.tolist(), valid.tolist()])
-                
-                # samples = dataset_info.x_shape[0]
-                # skf = StratifiedKFold(n_splits=math.ceil(samples / (samples * val_split)), shuffle=pipeline_config['shuffle'])
-                # split_indices = [list(skf.split(np.zeros(dataset_info.x_shape[0]), Y.reshape((-1, ))))[0]]
-            else:
-                indices = self.shuffle_indices(list(range(dataset_info.x_shape[0])), pipeline_config['shuffle'])
-                split = int(len(indices) * (1-val_split))
-
-                train_indices, valid_indices = indices[:split], indices[split:]
-                split_indices.append([train_indices, valid_indices])
-        else:
-            train_indices = self.shuffle_indices(list(range(dataset_info.x_shape[0])), pipeline_config['shuffle'])
-            split_indices.append([train_indices, []])
-
-
-
-
-        if 'categorical_features' in pipeline_config and pipeline_config['categorical_features']:
-            categorical_features = pipeline_config['categorical_features']
-        else:
-            categorical_features = [False] * dataset_info.x_shape[1]
-        
-        for i, split in enumerate(split_indices):
-            
-            self.logger.debug("CV split " + str(i))
-
-            train_indices = split[0]
-            valid_indices = split[1] if len(split[1]) > 0 else None
-
-            if budget_too_low_for_cv:
-                cv_splits = 1
-
-            cur_budget = budget/cv_splits
-
-            result = self.sub_pipeline.fit_pipeline(
-                hyperparameter_config=hyperparameter_config, pipeline_config=pipeline_config, 
-                X=X, Y=Y, dataset_info=dataset_info,
-                train_indices=train_indices, valid_indices=valid_indices, 
-                budget=cur_budget, budget_type=budget_type,
-                categorical_features=categorical_features,
-                config_id=config_id,
-                working_directory=working_directory)
-
-            if result is not None:
-                loss += result['loss']
-                infos.append(result['info'])
-
-            if budget_too_low_for_cv:
-                break
-
-        if (len(infos) == 0):
-            raise Exception("Could not finish a single cv split due to memory or time limitation")
-
-        if len(infos) == 1:
-            info = infos[0]
-        else:
-            df = pd.DataFrame(infos)
-            info = dict(df.mean())
-
-        loss = loss / cv_splits + loss_penalty
-
-        return {'loss': loss, 'info': info}
-
-    def predict(self, pipeline_config, X, dataset_info):
-        return self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X, dataset_info=dataset_info)
-
-    def get_pipeline_config_options(self):
-        options = [
-            # percent/100 of train dataset used for validation if no validation and cv_splits == 1
-            ConfigOption("validation_split", default=0.0, type=float, choices=[0, 1]),
-            # number of cross validation splits 1 -> no cv
-            ConfigOption("cv_splits", default=1, type=int),
-            ConfigOption("use_stratified_cv_split", default=self.use_stratified_cv_split_default, type=to_bool, choices=[True, False]),
-            # specify minimum budget for cv. If budget is smaller only evaluate a single fold.
-            ConfigOption("min_budget_for_cv", default=0, type=float),
-            # incorporate number of cv splits in budget: Use half the number of specified cv splits below given budget.
-            ConfigOption("half_num_cv_splits_below_budget", default=0, type=float),
-            # shuffle train and validation set
-            ConfigOption('shuffle', default=True, type=to_bool, choices=[True, False]),
-        ]
-        return options
-
-    def split_cv(self, X_shape, split, max_splits):
-        split_size = X_shape[0] / max_splits
-        i1 = int(split*split_size)
-        i2 = int((split+1)*split_size)
-
-        train_indices = list(range(0, i1)) + list(range(i2, X_shape[0]))
-        valid_indices = list(range(i1, i2))
-
-        return train_indices, valid_indices
-
-    def concat(self, upper, lower):
-        if (scipy.sparse.issparse(upper)):
-            return scipy.sparse.vstack([upper, lower])
-        else:
-            return np.concatenate([upper, lower])
-
-    def shuffle_indices(self, indices, shuffle):
-        if shuffle:
-            np.random.shuffle(indices)
-        return indices
-
-
-    def clean_fit_data(self):
-        super(CrossValidationIndices, self).clean_fit_data()
-        self.sub_pipeline.root.clean_fit_data()
-    
-    def incorporate_num_cv_splits_in_budget(self, budget, pipeline_config, cv_splits):
-        budget_too_low_for_cv = budget < pipeline_config["min_budget_for_cv"] and cv_splits > 1
-        half_num_cv_splits = not budget_too_low_for_cv and budget < pipeline_config["half_num_cv_splits_below_budget"] and cv_splits > 1
-
-        if budget_too_low_for_cv:
-            self.logger.debug("Only evaluate a single fold of CV, since the budget is lower than the min_budget for cv")
-            return True, cv_splits, 1000
-        
-        if half_num_cv_splits:
-            self.logger.debug("Using half number of cv splits since budget is lower than the budget you specified for half number of cv splits")
-            return False, int(math.ceil(cv_splits / 2)), 1000
-
-        return False, cv_splits, 0
diff --git a/autoPyTorch/pipeline/nodes/image/image_augmentation.py b/autoPyTorch/pipeline/nodes/image/image_augmentation.py
deleted file mode 100644
index 33612237f..000000000
--- a/autoPyTorch/pipeline/nodes/image/image_augmentation.py
+++ /dev/null
@@ -1,223 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import inspect
-import logging
-import numpy as np
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-
-import torch
-from torchvision import datasets, models, transforms
-from autoPyTorch.components.preprocessing.image_preprocessing.transforms import Cutout, AutoAugment, FastAutoAugment
-
-
-import time
-from autoPyTorch.data_management.image_loader import ThreadCounter
-class TimeCompose(object):
-    """Composes several transforms together.
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.ToTensor(),
-        >>> ])
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-        self.counters = [ThreadCounter() for _ in transforms]
-
-    def __call__(self, img):
-        for i, t in enumerate(self.transforms):
-            start_time = time.time()
-            img = t(img)
-            self.counters[i].add(time.time() - start_time)
-        return img
-
-    def get_times(self):
-        return {str(t): self.counters[i].value() for i, t in enumerate(self.transforms) }
-
-    def __repr__(self):
-        format_string = self.__class__.__name__ + '('
-        for t in self.transforms:
-            format_string += '\n'
-            format_string += '    {0}'.format(t)
-        format_string += '\n)'
-        return format_string
-
-class ImageAugmentation(PipelineNode):
-    def __init__(self):
-        super(ImageAugmentation, self).__init__()
-        self.mean_std_cache = dict()
-
-    def fit(self, pipeline_config, hyperparameter_config, dataset_info, X, Y, train_indices, valid_indices):
-        mean, std = self.compute_mean_std(pipeline_config, hyperparameter_config, X, Y, train_indices, dataset_info) #dataset_info.mean, dataset_info.std
-
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        transform_list = []
-        image_size = min(dataset_info.x_shape[-2], dataset_info.x_shape[-1])
-
-        if len(X.shape) > 1:
-            transform_list.append(transforms.ToPILImage())
-        
-        if hyperparameter_config['augment']:
-            if hyperparameter_config['fastautoaugment'] and hyperparameter_config['autoaugment']:
-                # fast autoaugment and autoaugment
-                transform_list.extend([
-                    FastAutoAugment(),
-                    AutoAugment(),
-                    transforms.Resize(image_size),
-                    transforms.RandomCrop(image_size, padding=4),
-                    transforms.RandomHorizontalFlip()
-                ])
-            elif hyperparameter_config['fastautoaugment']:
-                # fast autoaugment
-                transform_list.extend([
-                    FastAutoAugment(),
-                    transforms.Resize(image_size),
-                    transforms.RandomCrop(image_size, padding=4),
-                    transforms.RandomHorizontalFlip()
-                ])
-            elif hyperparameter_config['autoaugment']:
-                # autoaugment
-                transform_list.extend([
-                    AutoAugment(),
-                    transforms.Resize(image_size),
-                    transforms.RandomCrop(image_size, padding=4),
-                    transforms.RandomHorizontalFlip()
-                ])
-            else:
-                # default augment color, rotation, size
-                transform_list.extend([
-                    transforms.ColorJitter(brightness=0.196, saturation=0.196, hue=0.141),
-                    transforms.RandomAffine(degrees=10, shear=0.1, fillcolor=127),
-                    transforms.RandomResizedCrop(image_size, scale=(0.533, 1), ratio=(0.75, 1.25)),
-                    transforms.RandomHorizontalFlip()
-                ])
-        else:
-            transform_list.extend([
-                transforms.Resize(image_size),
-                transforms.CenterCrop(image_size),
-            ])
-
-            
-        # grayscale if only one channel
-        if dataset_info.x_shape[1] == 1:
-            transform_list.append(transforms.Grayscale(1))
-            
-        # normalize
-        transform_list.append(transforms.ToTensor())
-        transform_list.append(transforms.Normalize(mean, std))
-
-        # cutout
-        if hyperparameter_config['cutout']:
-            n_holes = hyperparameter_config['cutout_holes']
-            transform_list.append(Cutout(n_holes=1, length=hyperparameter_config['length'], probability=1))
-
-
-        train_transform = transforms.Compose(transform_list)
-
-        transform_list = []
-        if len(X.shape) > 1:
-            transform_list.append(transforms.ToPILImage())
-
-        transform_list.extend([
-            transforms.Resize(image_size),
-            transforms.CenterCrop(image_size),
-            transforms.ToTensor(),
-            transforms.Normalize(mean, std),
-        ])
-        valid_transform = transforms.Compose([transforms.Grayscale(1)] + transform_list if dataset_info.x_shape[1] == 1 else transform_list)
-
-        return { 'train_transform': train_transform, 'valid_transform': valid_transform, 'mean': mean, 'std': std }
-
-    def predict(self, pipeline_config, mean, std):
-    
-        predict_transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize(mean, std),
-        ])
-
-        return {'predict_transform': predict_transform}
-
-    def get_hyperparameter_search_space(self, **pipeline_config):
-        import ConfigSpace as CS
-        import ConfigSpace.hyperparameters as CSH
-        cs = CS.ConfigurationSpace()
-
-        augment = cs.add_hyperparameter(CSH.CategoricalHyperparameter('augment', [True, False]))
-        autoaugment = cs.add_hyperparameter(CSH.CategoricalHyperparameter('autoaugment', [True, False]))
-        fastautoaugment = cs.add_hyperparameter(CSH.CategoricalHyperparameter('fastautoaugment', [True, False]))
-
-        cutout = cs.add_hyperparameter(CSH.CategoricalHyperparameter('cutout', [True, False]))
-        cutout_length = cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('length', lower=0, upper=20, log=False))
-        cutout_holes = cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('cutout_holes', lower=1, upper=3, log=False))
-
-        cs.add_condition(CS.EqualsCondition(cutout_length, cutout, True))
-        cs.add_condition(CS.EqualsCondition(cutout_holes, cutout, True))
-        
-        cs.add_condition(CS.EqualsCondition(autoaugment, augment, True))
-        cs.add_condition(CS.EqualsCondition(fastautoaugment, augment, True))
-
-        return cs
-
-    def compute_mean_std(self, pipeline_config, hyperparameter_config, X, Y, train_indices, dataset_info):
-        log = logging.getLogger('autonet')
-
-        if dataset_info.path in self.mean_std_cache:
-            mean, std = self.mean_std_cache[dataset_info.path]
-            log.debug('CACHED: MEAN: ' + str(mean) + ' -- STD: ' + str(std))
-            return mean, std
-
-        from autoPyTorch.pipeline.nodes.image.create_image_dataloader import CreateImageDataLoader
-        loader = CreateImageDataLoader()
-
-        image_size = min(dataset_info.x_shape[-2], dataset_info.x_shape[-1])
-        transform_list = []
-        if len(X.shape) > 1:
-            transform_list.append(transforms.ToPILImage())
-        transform_list.append(transforms.Resize(image_size))
-        transform_list.append(transforms.CenterCrop(image_size))
-        if dataset_info.x_shape[1] == 1:
-            transform_list.append(transforms.Grayscale(1))
-        transform_list.append(transforms.ToTensor())
-        train_transform = transforms.Compose(transform_list)
-
-        cache_size = pipeline_config['dataloader_cache_size_mb']
-        pipeline_config['dataloader_cache_size_mb'] = 0
-        train_loader = loader.fit(pipeline_config, hyperparameter_config, X, Y, train_indices, None, train_transform, None, dataset_info)['train_loader']
-        pipeline_config['dataloader_cache_size_mb'] = cache_size
-
-        mean = 0.
-        std = 0.
-        nb_samples = 0.
-
-        with torch.no_grad():
-            for data, _ in train_loader:
-                
-                # import matplotlib.pyplot as plt
-                # img = plt.imshow(data.numpy()[0,1,:])
-                # plt.show()
-
-                batch_samples = data.size(0)
-                data = data.view(batch_samples, data.size(1), -1)
-                mean = mean + data.mean(2).sum(0)
-                std = std + data.std(2).sum(0)
-                nb_samples += batch_samples
-
-        if nb_samples > 0.:
-            mean /= nb_samples
-            std /= nb_samples
-            mean, std = mean.numpy().tolist(), std.numpy().tolist()
-        else:
-            mean, std = [mean], [std]
-
-        log.debug('MEAN: ' + str(mean) + ' -- STD: ' + str(std))
-        
-        self.mean_std_cache[dataset_info.path] = [mean, std]
-        return mean, std
diff --git a/autoPyTorch/pipeline/nodes/image/image_dataset_reader.py b/autoPyTorch/pipeline/nodes/image/image_dataset_reader.py
deleted file mode 100644
index 85156d36a..000000000
--- a/autoPyTorch/pipeline/nodes/image/image_dataset_reader.py
+++ /dev/null
@@ -1,57 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os
-import numpy as np
-import math
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-
-from autoPyTorch.data_management.data_manager import ImageManager
-
-class ImageDatasetReader(PipelineNode):
-    def __init__(self):
-        super(ImageDatasetReader, self).__init__()
-
-    def fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid):
-
-        if len(X_train.shape)==1 and len(X_train)==1:
-            X_train = X_train[0]
-            Y_train = 0
-
-        if X_valid is not None:
-            if len(X_valid.shape)==1 and len(X_valid)==1:
-                X_valid = X_valid[0]
-                Y_valid = None
-
-        X_train, Y_train, path = self.read_data(X_train, Y_train)
-        X_valid, Y_valid, _ = self.read_data(X_valid, Y_valid)
-
-        return { 'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'Y_valid': Y_valid, 'dataset_path': path }
-
-    def get_pipeline_config_options(self):
-        options = [
-        ]
-        return options
-
-    def read_data(self, path, y):
-        if path is None:
-            return None, None, None
-        
-        if not isinstance(path, str):
-            return path, y, str(path)[0:300]
-        
-        if not os.path.isabs(path):
-            path = os.path.abspath(os.path.join(ConfigFileParser.get_autonet_home(), path))
-
-        if not os.path.exists(path):
-            raise ValueError('Path ' + str(path) + ' is not a valid path.')
-
-        im = ImageManager()
-        im.read_data(path, is_classification=True)
-
-        return im.X_train, im.Y_train, path
diff --git a/autoPyTorch/pipeline/nodes/image/loss_module_selector_indices.py b/autoPyTorch/pipeline/nodes/image/loss_module_selector_indices.py
deleted file mode 100644
index be9741cb0..000000000
--- a/autoPyTorch/pipeline/nodes/image/loss_module_selector_indices.py
+++ /dev/null
@@ -1,35 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import inspect
-import numpy as np
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-
-from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-
-class LossModuleSelectorIndices(LossModuleSelector):
-    def fit(self, hyperparameter_config, pipeline_config, X, Y, train_indices, dataset_info):
-
-        if Y.shape[0] == dataset_info.y_shape[0]:
-            return super(LossModuleSelectorIndices, self).fit(hyperparameter_config, pipeline_config, X=np.zeros((Y.shape[0], 1)), Y=Y, train_indices=train_indices)
-
-        print(Y.shape[0], dataset_info.y_shape[0])
-
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-        
-        loss_module_name = hyperparameter_config["loss_module"]
-        loss_module = self.loss_modules[loss_module_name]
-        loss = loss_module.module
-        if inspect.isclass(loss):
-            loss = loss()
-        loss_module.set_loss_function(loss)
-        
-        return {'loss_function': loss_module}
-
-        
diff --git a/autoPyTorch/pipeline/nodes/image/multiple_datasets.py b/autoPyTorch/pipeline/nodes/image/multiple_datasets.py
deleted file mode 100644
index 1b1adaf74..000000000
--- a/autoPyTorch/pipeline/nodes/image/multiple_datasets.py
+++ /dev/null
@@ -1,115 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os
-import numpy as np
-import math
-import time
-import pandas as pd
-import logging
-import random
-import torch
-
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-
-class MultipleDatasets(SubPipelineNode):
-    
-    def __init__(self, sub_pipeline_nodes):
-        super(MultipleDatasets, self).__init__(sub_pipeline_nodes)
-        
-        self.logger = logging.getLogger('autonet')
-
-
-    def fit(self, hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type, config_id, working_directory):
-        if len(X_train.shape) > 1:
-            return self.sub_pipeline.fit_pipeline(  hyperparameter_config=hyperparameter_config, 
-                                                    pipeline_config=pipeline_config, 
-                                                    X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, 
-                                                    budget=budget, budget_type=budget_type, config_id=config_id, working_directory=working_directory)
-
-
-        max_datasets = X_train.shape[0]
-        max_steps = math.floor((math.log(pipeline_config['max_budget']) - math.log(pipeline_config['min_budget'])) / math.log(pipeline_config['eta']))
-        current_step = max_steps - math.floor((math.log(pipeline_config['max_budget']) - math.log(budget)) / math.log(pipeline_config['eta'])) if budget > 1e-10 else 0
-        n_datasets = math.floor(math.pow(max_datasets, current_step/max(1, max_steps)) + 1e-10)
-        
-        # refit can cause issues with different budget
-        if max_steps == 0 or n_datasets > max_datasets or not pipeline_config['increase_number_of_trained_datasets']:
-            n_datasets = max_datasets
-
-        if X_valid is None or Y_valid is None:
-            X_valid = [None] * n_datasets
-            Y_valid = [None] * n_datasets
-        
-        if 'use_tensorboard_logger' in pipeline_config and pipeline_config['use_tensorboard_logger']:
-            import tensorboard_logger as tl
-            tl.log_value('Train/datasets', float(n_datasets), int(time.time()))
-
-        infos = []
-        loss = 0
-        losses = []
-
-        self.logger.debug('Start fitting ' + str(n_datasets) + ' dataset(s). Current budget: ' + str(budget) + ' - Step: ' + str(current_step) + '/' + str(max_steps))
-
-        #dataset_order = list(range(n_datasets))
-        #random.shuffle(dataset_order)
-        #if pipeline_config['dataset_order'] and len(pipeline_config['dataset_order']) == n_datasets:
-        #    dataset_order = pipeline_config['dataset_order']
-        #    dataset_order = [i for i in dataset_order if i < n_datasets]
-        #X_train = X_train[dataset_order]
-        if np.any(pipeline_config['dataset_order']):
-            dataset_order = pipeline_config['dataset_order']
-        else:
-            dataset_order = list(range(n_datasets))
-        X_train = X_train[dataset_order]
-
-        for dataset in range(n_datasets):
-            self.logger.info('Fit dataset (' + str(dataset+1) + '/' + str(n_datasets) + '): ' + str(X_train[dataset]) + ' for ' + str(round(budget / n_datasets)) + 's')
-
-            result = self.sub_pipeline.fit_pipeline(hyperparameter_config=hyperparameter_config, 
-                                                    pipeline_config=pipeline_config, 
-                                                    X_train=X_train[dataset], Y_train=Y_train[dataset], X_valid=X_valid[dataset], Y_valid=Y_valid[dataset], 
-                                                    budget=budget / n_datasets, budget_type=budget_type, config_id=config_id, working_directory=working_directory)
-            
-            # copy/rename checkpoint - save one checkpoint for each trained dataset
-            if 'checkpoint' in result['info']:
-                src = result['info']['checkpoint']
-                folder, file = os.path.split(src)
-                dest = os.path.join(folder, os.path.splitext(file)[0] + '_' + str(dataset) + '.pt')
-                import shutil
-                if dataset < n_datasets - 1:
-                    shutil.copy(src, dest)
-                else:
-                    os.rename(src, dest)
-                result['info']['checkpoint'] = dest
-
-            result['info']['dataset_path'] = str(X_train[dataset])
-            result['info']['dataset_id'] = dataset_order[dataset]
-            
-            infos.append(result['info'])
-            loss += result['loss']
-            losses.append(result['loss'])
-        
-        if 'use_tensorboard_logger' in pipeline_config and pipeline_config['use_tensorboard_logger']:
-            import tensorboard_logger as tl
-            tl.log_value('Train/datasets', float(n_datasets), int(time.time()))
-
-        loss = loss / n_datasets
-
-        return {'loss': loss, 'losses': losses, 'info': infos}
-
-    def predict(self, pipeline_config, X):
-        return self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X)
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('dataset_order', default=None, type=int, list=True, info="Order in which datasets are considered."),
-
-            #autonet.refit sets this to false to avoid refit budget issues
-            ConfigOption('increase_number_of_trained_datasets', default=True, type=to_bool, info="Wether to increase the number of considered datasets with each successive halfing iteration.") 
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/image/network_selector_datasetinfo.py b/autoPyTorch/pipeline/nodes/image/network_selector_datasetinfo.py
deleted file mode 100644
index 4adf11c4b..000000000
--- a/autoPyTorch/pipeline/nodes/image/network_selector_datasetinfo.py
+++ /dev/null
@@ -1,35 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-from autoPyTorch.components.networks.base_net import BaseNet
-
-import torch.nn as nn
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-import torchvision.models as models
-
-class NetworkSelectorDatasetInfo(NetworkSelector):
-    def fit(self, hyperparameter_config, pipeline_config, dataset_info):
-        config = ConfigWrapper(self.get_name(), hyperparameter_config)
-        network_name = config['network']
-
-        network_type = self.networks[network_name]
-        network_config = ConfigWrapper(network_name, config)
-        activation = self.final_activations[pipeline_config["final_activation"]]
-
-        in_features = dataset_info.x_shape[1:]
-        if len(in_features) == 1:
-            # feature data - otherwise image data (channels, width, height)
-            in_features = in_features[0]
-
-        network = network_type( config=network_config, 
-                                in_features=in_features, out_features=dataset_info.y_shape[1],
-                                final_activation=activation)
-
-        # self.logger.debug('NETWORK:\n' + str(network))
-        return {'network': network}
diff --git a/autoPyTorch/pipeline/nodes/image/optimization_algorithm_no_timelimit.py b/autoPyTorch/pipeline/nodes/image/optimization_algorithm_no_timelimit.py
deleted file mode 100644
index cf98bcead..000000000
--- a/autoPyTorch/pipeline/nodes/image/optimization_algorithm_no_timelimit.py
+++ /dev/null
@@ -1,359 +0,0 @@
-import numpy as np
-import os
-import time
-import shutil
-import netifaces
-import traceback
-import logging
-import itertools
-import random
-
-import autoPyTorch.utils.thread_read_write as thread_read_write
-import datetime
-
-from hpbandster.core.nameserver import NameServer, nic_name_to_host
-from hpbandster.core.result import (json_result_logger,
-                                    logged_results_to_HBS_result)
-
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-
-from autoPyTorch.core.hpbandster_extensions.bohb_ext import BOHBExt
-from autoPyTorch.core.hpbandster_extensions.hyperband_ext import HyperBandExt
-from autoPyTorch.core.worker_no_timelimit import ModuleWorkerNoTimeLimit
-
-from autoPyTorch.components.training.image.budget_types import BudgetTypeTime, BudgetTypeEpochs
-import copy
-
-from autoPyTorch.utils.modify_config_space import remove_constant_hyperparameter
-
-from autoPyTorch.utils.loggers import combined_logger, bohb_logger, tensorboard_logger
-
-import pprint
-
-tensorboard_logger_configured = False
-
-class OptimizationAlgorithmNoTimeLimit(SubPipelineNode):
-    def __init__(self, optimization_pipeline_nodes):
-        """OptimizationAlgorithm pipeline node.
-        It will run either the optimization algorithm (BOHB, Hyperband - defined in config) or start workers
-        Each worker will run the provided optimization_pipeline and will return the output 
-        of the pipeline_result_node to the optimization algorithm
-
-        Train:
-        The optimization_pipeline will get the following inputs:
-        {hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type}
-        The pipeline_result_node has to provide the following outputs:
-        - 'loss': the optimization value (minimize)
-        - 'info': dict containing info for the respective training process
-
-        Predict:
-        The optimization_pipeline will get the following inputs:
-        {pipeline_config, X}
-        The pipeline_result_node has to provide the following outputs:
-        - 'Y': result of prediction for 'X'
-        Note: predict will not call the optimization algorithm
-        
-        Arguments:
-            optimization_pipeline {Pipeline} -- pipeline that will be optimized (hyperparamter)
-            pipeline_result_node {PipelineNode} -- pipeline node that provides the results of the optimization_pieline
-        """
-
-        super(OptimizationAlgorithmNoTimeLimit, self).__init__(optimization_pipeline_nodes)
-
-        self.algorithms = dict()
-        self.algorithms["bohb"] = BOHBExt
-        self.algorithms["hyperband"] = HyperBandExt
-
-        self.logger = logging.getLogger('autonet')
-
-        self.n_datasets=1
-
-    def fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid, refit=None):
-        res = None
-
-        config_space = self.pipeline.get_hyperparameter_search_space(**pipeline_config)
-        config_space, constants = remove_constant_hyperparameter(config_space)
-        config_space.seed(pipeline_config['random_seed'])
-
-        self.n_datasets = X_train.shape[0] if X_train.shape[0]<10 else 1
-
-        #Get number of budgets
-        max_budget = pipeline_config["max_budget"]
-        min_budget = pipeline_config["min_budget"]
-        eta = pipeline_config["eta"]
-        max_SH_iter = -int(np.log(min_budget/max_budget)/np.log(eta)) + 1
-        budgets = max_budget * np.power(eta, -np.linspace(max_SH_iter-1, 0, max_SH_iter))
-        n_budgets = len(budgets)
-
-        # Get permutations
-        self.permutations = self.get_permutations(n_budgets)
-
-        self.logger.debug('BOHB-ConfigSpace:\n' + str(config_space))
-        self.logger.debug('Constant Hyperparameter:\n' + str(pprint.pformat(constants)))
-
-        run_id, task_id = pipeline_config['run_id'], pipeline_config['task_id']
-
-
-        global tensorboard_logger_configured
-        if pipeline_config['use_tensorboard_logger'] and not tensorboard_logger_configured:            
-            import tensorboard_logger as tl
-            directory = os.path.join(pipeline_config['result_logger_dir'], "worker_logs_" + str(task_id))
-            os.makedirs(directory, exist_ok=True)
-            tl.configure(directory, flush_secs=60)
-            tensorboard_logger_configured = True
-
-        if (refit is not None):
-            return self.run_refit(pipeline_config, refit, constants, X_train, Y_train, X_valid, Y_valid)
-
-        try:
-            ns_credentials_dir, tmp_models_dir, network_interface_name = self.prepare_environment(pipeline_config)
-
-            # start nameserver if not on cluster or on master node in cluster
-            if task_id in [1, -1]:
-                NS = self.get_nameserver(run_id, task_id, ns_credentials_dir, network_interface_name)
-                ns_host, ns_port = NS.start()
-
-            self.run_worker(pipeline_config=pipeline_config, run_id=run_id, task_id=task_id, ns_credentials_dir=ns_credentials_dir,
-                network_interface_name=network_interface_name, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid,
-                constant_hyperparameter=constants)
-
-            # start BOHB if not on cluster or on master node in cluster
-            if task_id in [1, -1]:
-                self.run_optimization_algorithm(pipeline_config, config_space, constants, run_id, ns_host, ns_port, NS, task_id)
-            
-            res = self.parse_results(pipeline_config["result_logger_dir"])
-
-        except Exception as e:
-            print(e)
-            traceback.print_exc()
-        finally:
-            self.clean_up(pipeline_config, ns_credentials_dir, tmp_models_dir)
-
-        if (res):
-            return {'loss': res[0], 'optimized_hyperparameter_config': res[1], 'budget': res[2], 'info': dict()}
-        else:
-            return {'optimized_hyperparameter_config': dict(), 'budget': 0, 'loss': float('inf'), 'info': dict()}
-
-    def predict(self, pipeline_config, X):
-        return self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X)
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("run_id", default="0", type=str, info="Unique id for each run."),
-            ConfigOption("task_id", default=-1, type=int, info="ID for each worker, if you run AutoNet on a cluster. Set to -1, if you run it locally. "),
-            ConfigOption("algorithm", default="bohb", type=str, choices=list(self.algorithms.keys())),
-            ConfigOption("budget_type", default="time", type=str, choices=['time', 'epochs']),
-            ConfigOption("min_budget", default=lambda c: 120 if c['budget_type'] == 'time' else 5, type=float, depends=True, info="Min budget for fitting configurations."),
-            ConfigOption("max_budget", default=lambda c: 6000 if c['budget_type'] == 'time' else 150, type=float, depends=True, info="Max budget for fitting configurations."),
-            ConfigOption("max_runtime", 
-                default=lambda c: ((-int(np.log(c["min_budget"] / c["max_budget"]) / np.log(c["eta"])) + 1) * c["max_budget"])
-                        if c["budget_type"] == "time" else float("inf"),
-                type=float, depends=True, info="Total time for the run."),
-            ConfigOption("num_iterations", 
-                default=lambda c:  (-int(np.log(c["min_budget"] / c["max_budget"]) / np.log(c["eta"])) + 1)
-                        if c["budget_type"] == "epochs" else float("inf"),
-                type=float, depends=True, info="Number of successive halving iterations"),
-            ConfigOption("eta", default=3, type=float, info='eta parameter of Hyperband.'),
-            ConfigOption("min_workers", default=1, type=int),
-            ConfigOption("working_dir", default=".", type="directory"),
-            ConfigOption("network_interface_name", default=self.get_default_network_interface_name(), type=str),
-            ConfigOption("memory_limit_mb", default=1000000, type=int),
-            ConfigOption("result_logger_dir", default=".", type="directory"),
-            ConfigOption("use_tensorboard_logger", default=False, type=to_bool),
-            ConfigOption("keep_only_incumbent_checkpoints", default=True, type=to_bool),
-            ConfigOption("global_results_dir", default=None, type='directory'),
-        ]
-        return options
-
-    def get_default_network_interface_name(self):
-        try:
-            return netifaces.gateways()['default'][netifaces.AF_INET][1]
-        except:
-            return 'lo'
-
-    def prepare_environment(self, pipeline_config):
-        if not os.path.exists(pipeline_config["working_dir"]) and pipeline_config['task_id'] in [1, -1]:
-            try:
-                os.mkdir(pipeline_config["working_dir"])
-            except:
-                pass
-        tmp_models_dir = os.path.join(pipeline_config["working_dir"], "tmp_models_" + str(pipeline_config['run_id']))
-        ns_credentials_dir = os.path.abspath(os.path.join(pipeline_config["working_dir"], "ns_credentials_" + str(pipeline_config['run_id'])))
-        network_interface_name = pipeline_config["network_interface_name"] or (netifaces.interfaces()[1] if len(netifaces.interfaces()) > 1 else "lo")
-        
-        if os.path.exists(tmp_models_dir) and pipeline_config['task_id'] in [1, -1]:
-            shutil.rmtree(tmp_models_dir)
-        if os.path.exists(ns_credentials_dir) and pipeline_config['task_id'] in [1, -1]:
-            shutil.rmtree(ns_credentials_dir)
-        return ns_credentials_dir, tmp_models_dir, network_interface_name
-
-    def clean_up(self, pipeline_config, tmp_models_dir, ns_credentials_dir):
-        if pipeline_config['task_id'] in [1, -1]:
-            # Delete temporary files
-            if os.path.exists(tmp_models_dir):
-                shutil.rmtree(tmp_models_dir)
-            if os.path.exists(ns_credentials_dir):
-                shutil.rmtree(ns_credentials_dir)
-
-    def get_nameserver(self, run_id, task_id, ns_credentials_dir, network_interface_name):
-        if not os.path.isdir(ns_credentials_dir):
-            try:
-                os.mkdir(ns_credentials_dir)
-            except:
-                pass
-        return NameServer(run_id=run_id, nic_name=network_interface_name, working_directory=ns_credentials_dir)
-    
-    def get_optimization_algorithm_instance(self, config_space, run_id, pipeline_config, ns_host, ns_port, result_logger, previous_result=None):
-        optimization_algorithm = self.algorithms[pipeline_config["algorithm"]]
-
-        if pipeline_config["algorithm"]=="bohb_multi_kde":
-            hb = optimization_algorithm(configspace=config_space, run_id = run_id,
-                                        eta=pipeline_config["eta"], min_budget=pipeline_config["min_budget"], max_budget=pipeline_config["max_budget"],
-                                        host=ns_host, nameserver=ns_host, nameserver_port=ns_port,
-                                        result_logger=result_logger,
-                                        ping_interval=10**6,
-                                        working_directory=pipeline_config["working_dir"],
-                                        previous_result=previous_result,
-                                        n_kdes=self.n_datasets,
-                                        permutations=self.permutations)
-        else:
-            hb = optimization_algorithm(configspace=config_space, run_id = run_id,
-                                        eta=pipeline_config["eta"], min_budget=pipeline_config["min_budget"], max_budget=pipeline_config["max_budget"],
-                                        host=ns_host, nameserver=ns_host, nameserver_port=ns_port,
-                                        result_logger=result_logger,
-                                        ping_interval=10**6,
-                                        working_directory=pipeline_config["working_dir"],
-                                        previous_result=previous_result)
-        return hb
-
-
-    def parse_results(self, result_logger_dir):
-        res = logged_results_to_HBS_result(result_logger_dir)
-        id2config = res.get_id2config_mapping()
-        incumbent_trajectory = res.get_incumbent_trajectory(bigger_is_better=False, non_decreasing_budget=False)
-        
-        if (len(incumbent_trajectory['config_ids']) == 0):
-            return dict()
-        
-        final_config_id = incumbent_trajectory['config_ids'][-1]
-        return incumbent_trajectory['losses'][-1], id2config[final_config_id]['config'], incumbent_trajectory['budgets'][-1]
-
-
-    def run_worker(self, pipeline_config, constant_hyperparameter, run_id, task_id, ns_credentials_dir, network_interface_name,
-            X_train, Y_train, X_valid, Y_valid):
-        if not task_id == -1:
-            time.sleep(5)
-        while not os.path.isdir(ns_credentials_dir):
-            time.sleep(5)
-        host = nic_name_to_host(network_interface_name)
-        
-        worker = ModuleWorkerNoTimeLimit(   pipeline=self.sub_pipeline, pipeline_config=pipeline_config,
-                                            constant_hyperparameter=constant_hyperparameter,
-                                            X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, 
-                                            budget_type=pipeline_config['budget_type'],
-                                            max_budget=pipeline_config["max_budget"],
-                                            host=host, run_id=run_id,
-                                            id=task_id,
-                                            working_directory=pipeline_config["result_logger_dir"],
-                                            permutations=self.permutations)
-        worker.load_nameserver_credentials(ns_credentials_dir)
-        # run in background if not on cluster
-        worker.run(background=(task_id <= 1))
-
-
-    def run_optimization_algorithm(self, pipeline_config, config_space, constant_hyperparameter, run_id, ns_host, ns_port, nameserver, task_id):
-        self.logger.info("[AutoNet] Start " + pipeline_config["algorithm"])
-
-        # initialize optimization algorithm
-        
-        result_logger = self.get_result_logger(pipeline_config, constant_hyperparameter)
-        HB = self.get_optimization_algorithm_instance(config_space=config_space, run_id=run_id,
-            pipeline_config=pipeline_config, ns_host=ns_host, ns_port=ns_port, result_logger=result_logger)
-
-        # start algorithm
-        min_num_workers = pipeline_config["min_workers"] if task_id != -1 else 1
-
-        reduce_runtime = pipeline_config["max_budget"] if pipeline_config["budget_type"] == "time" else 0
-        
-        HB.wait_for_workers(min_num_workers)
-        self.logger.debug('Workers are ready!')
-
-        thread_read_write.append('runs.log', "{0}: {1} | {2}-{3}\n".format(
-            str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
-            run_id,
-            pipeline_config['min_budget'],
-            pipeline_config['max_budget']))
-
-        HB.run_until(runtime=(pipeline_config["max_runtime"] - reduce_runtime),
-                     n_iterations=pipeline_config["num_iterations"],
-                     min_n_workers=min_num_workers)
-
-        HB.shutdown(shutdown_workers=True)
-        nameserver.shutdown()
-
-    
-    def clean_fit_data(self):
-        super(OptimizationAlgorithmNoTimeLimit, self).clean_fit_data()
-        self.sub_pipeline.root.clean_fit_data()
-
-    def run_refit(self, pipeline_config, refit, constants, X_train, Y_train, X_valid, Y_valid):
-        start_time = time.time()
-
-        result_logger = self.get_result_logger(pipeline_config, constants)
-        result_logger.new_config((0, 0, 0), refit["hyperparameter_config"], {'model_based_pick': False})
-
-        full_config = dict()
-        full_config.update(constants)
-        full_config.update(refit["hyperparameter_config"])
-
-        self.logger.debug('Refit-Config:\n' + str(pprint.pformat(full_config)))
-
-        class Job():
-            pass
-        job = Job()
-        job.id = (0, 0, 0)
-        job.kwargs = {
-            'budget': refit['budget'],
-            'config': refit["hyperparameter_config"],
-        }
-
-        try:
-            res = self.sub_pipeline.fit_pipeline( 
-                                    hyperparameter_config=full_config, pipeline_config=pipeline_config, 
-                                    X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, 
-                                    budget=refit["budget"], budget_type=pipeline_config['budget_type'], config_id='refit', working_directory=pipeline_config['result_logger_dir'])
-            job.exception = None
-        except Exception as e:
-            self.logger.exception('Exception during refit')
-            res = None
-            job.exception = str(e)
-
-        end_time = time.time()
-        
-        job.timestamps = {'submitted': start_time, 'started': start_time, 'finished': end_time}
-        job.result = res
-
-        result_logger(job)
-
-        return {'loss': res['loss'] if res else float('inf'),
-                'optimized_hyperparameter_config': full_config,
-                'budget': refit['budget'],
-                'info': res['info'] if res else dict()}
-
-    def get_result_logger(self, pipeline_config, constant_hyperparameter):
-        loggers = [bohb_logger(constant_hyperparameter=constant_hyperparameter, directory=pipeline_config["result_logger_dir"], overwrite=True)]
-        if pipeline_config['use_tensorboard_logger']:
-            loggers.append(tensorboard_logger(pipeline_config, constant_hyperparameter, pipeline_config['global_results_dir']))
-        return combined_logger(*loggers)
-
-    def get_permutations(self, n_budgets=1):
-        # Get permutations, since HB fits like this: b1 - b2 -b3 - b2 -b3, repeat them accordingly
-        idx = [i for i in range(self.n_datasets)]
-        permutations = np.array(list(itertools.permutations(idx)))
-        ret = []
-        for perm in permutations:
-            for ind in range(n_budgets):
-                ret.append(perm)
-        return np.array(ret)
diff --git a/autoPyTorch/pipeline/nodes/image/simple_scheduler_selector.py b/autoPyTorch/pipeline/nodes/image/simple_scheduler_selector.py
deleted file mode 100644
index 59de24aa1..000000000
--- a/autoPyTorch/pipeline/nodes/image/simple_scheduler_selector.py
+++ /dev/null
@@ -1,26 +0,0 @@
-__author__ = "Michael Burkart"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-class SimpleLearningrateSchedulerSelector(LearningrateSchedulerSelector):
-
-    def fit(self, hyperparameter_config, pipeline_config, optimizer):
-        config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        scheduler_name = config['lr_scheduler']
-
-        lr_scheduler_type = self.lr_scheduler[scheduler_name]
-        lr_scheduler_config = ConfigWrapper(scheduler_name, config)
-        lr_scheduler = lr_scheduler_type(optimizer, lr_scheduler_config)
-
-        return {'lr_scheduler': lr_scheduler}
\ No newline at end of file
diff --git a/autoPyTorch/pipeline/nodes/image/simple_train_node.py b/autoPyTorch/pipeline/nodes/image/simple_train_node.py
deleted file mode 100644
index 8b3796d64..000000000
--- a/autoPyTorch/pipeline/nodes/image/simple_train_node.py
+++ /dev/null
@@ -1,348 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import torch
-import time
-import logging
-
-import os, pprint
-import scipy.sparse
-import numpy as np
-import torch.nn as nn
-from torch.autograd import Variable
-from torch.utils.data import DataLoader, TensorDataset
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.components.training.image.base_training import BaseTrainingTechnique, BaseBatchLossComputationTechnique
-
-from autoPyTorch.components.training.image.trainer import Trainer
-from autoPyTorch.components.training.image.checkpoints.save_load import save_checkpoint, load_checkpoint, get_checkpoint_dir
-from autoPyTorch.components.training.image.checkpoints.load_specific import load_model #, load_optimizer, load_scheduler
-
-torch.backends.cudnn.benchmark = True
-
-import signal
-
-class SimpleTrainNode(PipelineNode):
-    def __init__(self):
-        super(SimpleTrainNode, self).__init__()
-        self.default_minimize_value = True
-        self.logger = logging.getLogger('autonet')
-        self.training_techniques = dict()
-        self.batch_loss_computation_techniques = dict()
-        self.add_batch_loss_computation_technique("standard", BaseBatchLossComputationTechnique)
-
-    def fit(self, hyperparameter_config, pipeline_config,
-            train_loader, valid_loader,
-            network, optimizer, lr_scheduler,
-            optimize_metric, additional_metrics,
-            log_functions,
-            budget,
-            loss_function,
-            budget_type,
-            config_id, working_directory,
-            train_indices, valid_indices):
-
-
-        if budget < 1e-5:
-            return {'loss': float('inf') if pipeline_config["minimize"] else -float('inf'), 'info': dict()}
-
-        training_start_time = time.time()
-        # prepare
-        if not torch.cuda.is_available():
-            pipeline_config["cuda"] = False
-
-        device = torch.device('cuda' if pipeline_config['cuda'] else 'cpu')
-
-        checkpoint_path = get_checkpoint_dir(working_directory)
-        checkpoint = None
-        if pipeline_config['save_checkpoints']:
-            checkpoint = load_checkpoint(checkpoint_path, config_id, budget)
-
-        network         = load_model(network, checkpoint)
-
-        tensorboard_logging = 'use_tensorboard_logger' in pipeline_config and pipeline_config['use_tensorboard_logger']
-
-        # from torch.optim import SGD
-        # optimizer = SGD(network.parameters(), lr=0.3)
-
-        # optimizer       = load_optimizer(optimizer, checkpoint, device)
-        # lr_scheduler    = load_scheduler(lr_scheduler, checkpoint)
-
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-        
-        batch_loss_name = hyperparameter_config["batch_loss_computation_technique"] if "batch_loss_computation_technique" in hyperparameter_config else pipeline_config["batch_loss_computation_techniques"][0]
-
-        batch_loss_computation_technique = self.batch_loss_computation_techniques[batch_loss_name]()
-        batch_loss_computation_technique.set_up(
-            pipeline_config, ConfigWrapper(batch_loss_name, hyperparameter_config), self.logger)
-            
-
-        # Training loop
-        logs = []
-        epoch = 0
-
-        optimize_metrics = []
-        val_metrics = [optimize_metric] + additional_metrics
-        if pipeline_config['evaluate_on_train_data']:
-            optimize_metrics = val_metrics
-        elif valid_loader is None:
-            self.logger.warning('No valid data specified and train process should not evaluate on train data! Will ignore \"evaluate_on_train_data\" and evaluate on train data!')
-            optimize_metrics = val_metrics
-        
-        trainer = Trainer(
-            model=network,
-            loss_computation=batch_loss_computation_technique,
-            criterion=loss_function,
-            budget=budget,
-            optimizer=optimizer,
-            scheduler=lr_scheduler,
-            budget_type=budget_type,
-            device=device,
-            config_id=config_id,
-            checkpoint_path=checkpoint_path if pipeline_config['save_checkpoints'] else None,
-            images_to_plot=tensorboard_logging * pipeline_config['tensorboard_images_count'])
-
-        model_params = self.count_parameters(network)
-
-        start_up = time.time() - training_start_time
-        epoch_train_time = 0
-        val_time = 0
-        log_time = 0
-
-        # tmp = time.time()
-        # for _ in range(100):
-        #     for _ in train_loader:
-        #         pass
-        # time_used = time.time() - tmp
-        # self.logger.debug("Test time: " + str(time_used) + "s : \n" + str(pprint.pformat(train_loader.dataset.get_times('train_'))))
-        
-        self.logger.debug("Start train. Budget: " + str(budget))
-
-        last_log_time = time.time()
-        while True:
-            # prepare epoch
-            log = dict()
-            
-            # train
-            tmp = time.time()
-            optimize_metric_results, train_loss, stop_training = trainer.train(epoch + 1, train_loader, optimize_metrics)
-
-            log['train_loss'] = train_loss
-            for i, metric in enumerate(optimize_metrics):
-                log['train_' + metric.name] = optimize_metric_results[i]
-            epoch_train_time += time.time() - tmp
-
-            # evaluate
-            tmp = time.time()
-            if valid_loader is not None:
-                valid_metric_results = trainer.evaluate(valid_loader, val_metrics, epoch=epoch + 1)
-
-                for i, metric in enumerate(val_metrics):
-                    log['val_' + metric.name] = valid_metric_results[i]
-            val_time += time.time() - tmp
-
-            # additional los - e.g. test evaluation
-            tmp = time.time()
-            for func in log_functions:
-                log[func.name] = func(network, epoch + 1)
-            log_time += time.time() - tmp
-
-            log['epochs'] = epoch + 1
-            log['model_parameters'] = model_params
-            log['learning_rate'] = optimizer.param_groups[0]['lr']
-
-            # log.update(train_loader.dataset.get_times('train_'))
-            # log.update(valid_loader.dataset.get_times('val_'))
-
-            logs.append(log)
-
-            epoch += 1
-
-            self.logger.debug("Epoch: " + str(epoch) + " : " + str(log))
-
-            if budget_type == 'epochs' and epoch + 1 >= budget:
-                break
-
-            if stop_training:
-                break
-
-            if tensorboard_logging and time.time() - last_log_time >= pipeline_config['tensorboard_min_log_interval']:
-                import tensorboard_logger as tl
-                worker_path = 'Train/'
-                tl.log_value(worker_path + 'budget', float(budget), epoch)
-                for name, value in log.items():
-                    tl.log_value(worker_path + name, float(value), epoch)
-                last_log_time = time.time()
-            
-
-        # wrap up
-        wrap_up_start_time = time.time()
-
-        self.logger.debug("Finished Training")
-
-        opt_metric_name = 'train_' + optimize_metric.name
-        if valid_loader is not None:
-            opt_metric_name = 'val_' + optimize_metric.name
-
-        if pipeline_config["minimize"]:
-            final_log = min(logs, key=lambda x:x[opt_metric_name])
-        else:
-            final_log = max(logs, key=lambda x:x[opt_metric_name])
-
-        if tensorboard_logging:
-            import tensorboard_logger as tl
-            worker_path = 'Train/'
-            tl.log_value(worker_path + 'budget', float(budget), epoch)
-            for name, value in final_log.items():
-                tl.log_value(worker_path + name, float(value), epoch)
-
-        if trainer.latest_checkpoint:
-            final_log['checkpoint'] = trainer.latest_checkpoint
-        elif pipeline_config['save_checkpoints']:
-            path = save_checkpoint(checkpoint_path, config_id, budget, network, optimizer, lr_scheduler)
-            final_log['checkpoint'] = path
-
-        final_log['train_datapoints'] = len(train_indices)
-        if valid_loader is not None:
-            final_log['val_datapoints'] = len(valid_indices)
-
-        loss = final_log[opt_metric_name] * (1 if pipeline_config["minimize"] else -1)
-
-        self.logger.info("Finished train with budget " + str(budget) +
-                         "s, Training took " + str(int(wrap_up_start_time - training_start_time)) + 
-                         "s, Wrap up took " + str(int(time.time() - wrap_up_start_time)) +
-                         "s, Init took " + str(int(start_up)) +
-                         "s, Train took " + str(int(epoch_train_time)) +
-                         "s, Validation took " + str(int(val_time)) +
-                         "s, Log functions took " + str(int(log_time)) +
-                         "s, Cumulative time " + str(int(trainer.cumulative_time)) +
-                         "s.\nTotal time consumption in s: " + str(int(time.time() - training_start_time)))
-    
-        return {'loss': loss, 'info': final_log}
-
-    def get_dataloader_times(self, dataloader):
-        read = dataloader.dataset.readTime.value()
-        read_avg = dataloader.dataset.readTime.avg()
-        augment = dataloader.dataset.augmentTime.value()
-        augment_avg = dataloader.dataset.augmentTime.avg()
-        return read, read_avg, augment, augment_avg
-
-    @staticmethod
-    def count_parameters(model):
-        return sum(p.numel() for p in model.parameters() if p.requires_grad)
-
-    def predict(self, pipeline_config, network, predict_loader, dataset_info, optimize_metric):
-
-        if not torch.cuda.is_available():
-            pipeline_config["cuda"] = False
-        else:
-            pipeline_config["cuda"] = True
-
-        device = torch.device('cuda:0' if pipeline_config['cuda'] else 'cpu')
-
-        if dataset_info.default_dataset:
-            metric_results = Trainer(None, network, None, None, None, None, None, device).evaluate(predict_loader, [optimize_metric])
-            return { 'score': metric_results[0] }
-        else:
-            Y = predict(network, predict_loader, None, device)
-            return { 'Y': Y.detach().cpu().numpy() }
-    
-    def add_training_technique(self, name, training_technique):
-        if (not issubclass(training_technique, BaseTrainingTechnique)):
-            raise ValueError("training_technique type has to inherit from BaseTrainingTechnique")
-        self.training_techniques[name] = training_technique
-    
-    def remove_training_technique(self, name):
-        del self.training_techniques[name]
-    
-    def add_batch_loss_computation_technique(self, name, batch_loss_computation_technique):
-        if (not issubclass(batch_loss_computation_technique, BaseBatchLossComputationTechnique)):
-            raise ValueError("batch_loss_computation_technique type has to inherit from BaseBatchLossComputationTechnique, got " + str(batch_loss_computation_technique))
-        self.batch_loss_computation_techniques[name] = batch_loss_computation_technique
-    
-    def remove_batch_loss_computation_technique(self, name, batch_loss_computation_technique):
-        del self.batch_loss_computation_techniques[name]
-
-    def get_hyperparameter_search_space(self, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        hp_batch_loss_computation = cs.add_hyperparameter(CSH.CategoricalHyperparameter("batch_loss_computation_technique", sorted(self.batch_loss_computation_techniques.keys())))
-
-        for name, technique in self.batch_loss_computation_techniques.items():
-            parent = {'parent': hp_batch_loss_computation, 'value': name} if hp_batch_loss_computation is not None else None
-            cs.add_configuration_space(prefix=name, configuration_space=technique.get_hyperparameter_search_space(**pipeline_config),
-                delimiter=ConfigWrapper.delimiter, parent_hyperparameter=parent)
-
-        possible_loss_comps = sorted(list(set(pipeline_config["batch_loss_computation_techniques"]).intersection(self.batch_loss_computation_techniques.keys())))
-
-        if 'batch_loss_computation_techniques' not in pipeline_config.keys():
-            cs.add_hyperparameter(CSH.CategoricalHyperparameter("batch_loss_computation_technique", possible_loss_comps))
-            self._check_search_space_updates()
-
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="batch_loss_computation_techniques", default=list(self.batch_loss_computation_techniques.keys()),
-                type=str, list=True, choices=list(self.batch_loss_computation_techniques.keys())),
-            ConfigOption("minimize", default=self.default_minimize_value, type=to_bool, choices=[True, False]),
-            ConfigOption("cuda", default=True, type=to_bool, choices=[True, False]),
-            ConfigOption("save_checkpoints", default=False, type=to_bool, choices=[True, False], info="Wether to save state dicts as checkpoints."),
-            ConfigOption("tensorboard_min_log_interval", default=30, type=int),
-            ConfigOption("tensorboard_images_count", default=0, type=int),
-            ConfigOption("evaluate_on_train_data", default=True, type=to_bool),
-        ]
-        for name, technique in self.training_techniques.items():
-            options += technique.get_pipeline_config_options()
-        for name, technique in self.batch_loss_computation_techniques.items():
-            options += technique.get_pipeline_config_options()
-        return options
-
-
-def predict(network, test_loader, metrics, device, move_network=True):
-    """ predict batchwise """
-    # Build DataLoader
-    if move_network:
-        if torch.cuda.device_count() > 1:
-            network = nn.DataParallel(network)
-        network = network.to(device)
-
-    # Batch prediction
-    network.eval()
-    if metrics is not None:
-        metric_results = [0] * len(metrics)
-    
-    N = 0.0
-    for i, (X_batch, Y_batch) in enumerate(test_loader):
-        # Predict on batch
-        X_batch = Variable(X_batch).to(device)
-        batch_size = X_batch.size(0)
-
-        Y_batch_pred = network(X_batch).detach().cpu()
-
-        if metrics is None:
-            # Infer prediction shape
-            if i == 0:
-                Y_pred = Y_batch_pred
-            else:
-                # Add to prediction tensor
-                Y_pred = torch.cat((Y_pred, Y_batch_pred), 0)
-        else:
-            for i, metric in enumerate(metrics):
-                metric_results[i] += metric(Y_batch, Y_batch_pred) * batch_size
-
-        N += batch_size
-    
-    if metrics is None:
-        return Y_pred
-    else:
-        return [res / N for res in metric_results]
-
diff --git a/autoPyTorch/pipeline/nodes/image/single_dataset.py b/autoPyTorch/pipeline/nodes/image/single_dataset.py
deleted file mode 100644
index 0509518e5..000000000
--- a/autoPyTorch/pipeline/nodes/image/single_dataset.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import logging
-
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-
-class SingleDataset(SubPipelineNode):
-    # Node for compatibility with MultipleDatasets model
-
-    def __init__(self, sub_pipeline_nodes):
-        super(SingleDataset, self).__init__(sub_pipeline_nodes)
-
-        self.logger = logging.getLogger('autonet')
-
-
-    def fit(self, hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type, config_id, working_directory):
-        return self.sub_pipeline.fit_pipeline(hyperparameter_config=hyperparameter_config,
-                                              pipeline_config=pipeline_config,
-                                              X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid,
-                                              budget=budget, budget_type=budget_type, config_id=config_id, working_directory=working_directory)
-
-
-    def predict(self, pipeline_config, X):
-        return self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X)
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('dataset_order', default=None, type=int, list=True, info="Only used for multiple datasets."),
-
-            #autonet.refit sets this to false to avoid refit budget issues
-            ConfigOption('increase_number_of_trained_datasets', default=False, type=to_bool, info="Only used for multiple datasets.")
-        ]
-        return options
-
diff --git a/autoPyTorch/pipeline/nodes/imputation.py b/autoPyTorch/pipeline/nodes/imputation.py
deleted file mode 100644
index 2b8c1b5df..000000000
--- a/autoPyTorch/pipeline/nodes/imputation.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import torch
-import numpy as np
-import scipy.sparse
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-        
-from sklearn.impute import SimpleImputer
-from sklearn.compose import ColumnTransformer
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-
-class Imputation(PipelineNode):
-
-    strategies = ["mean", "median", "most_frequent"]
-
-    def fit(self, hyperparameter_config, X, train_indices, dataset_info):
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        if dataset_info.is_sparse:
-            return {'imputation_preprocessor': None, 'all_nan_columns': None}
-
-        # delete all nan columns
-        all_nan = np.all(np.isnan(X), axis=0)
-        X = X[:, ~all_nan]
-        dataset_info.categorical_features = [dataset_info.categorical_features[i] for i, is_nan in enumerate(all_nan) if not is_nan]
-
-        
-        strategy = hyperparameter_config['strategy']
-        fill_value = int(np.nanmax(X)) + 1 if not dataset_info.is_sparse else 0
-        numerical_imputer = SimpleImputer(strategy=strategy, copy=False)
-        categorical_imputer = SimpleImputer(strategy='constant', copy=False, fill_value=fill_value)
-        transformer = ColumnTransformer(
-            transformers=[('numerical_imputer', numerical_imputer, [i for i, c in enumerate(dataset_info.categorical_features) if not c]),
-                          ('categorical_imputer', categorical_imputer,  [i for i, c in enumerate(dataset_info.categorical_features) if c])])
-        transformer.fit(X[train_indices])
-        X = transformer.transform(X)
-        
-        dataset_info.categorical_features = sorted(dataset_info.categorical_features)
-        return { 'X': X, 'imputation_preprocessor': transformer, 'dataset_info': dataset_info , 'all_nan_columns': all_nan}
-
-
-    def predict(self, X, imputation_preprocessor, all_nan_columns):
-        if imputation_preprocessor is None:
-            return dict()
-        X = X[:, ~all_nan_columns]
-        X = imputation_preprocessor.transform(X)
-        return { 'X': X }
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-
-        possible_strategies = sorted(set(Imputation.strategies).intersection(pipeline_config['imputation_strategies']))
-
-        cs = ConfigSpace.ConfigurationSpace()
-        cs.add_hyperparameter(CSH.CategoricalHyperparameter("strategy", sorted(possible_strategies)))
-        self._check_search_space_updates()
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name='imputation_strategies', default=Imputation.strategies, type=str, list=True, choices=Imputation.strategies)
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/initialization_selector.py b/autoPyTorch/pipeline/nodes/initialization_selector.py
deleted file mode 100644
index fe7975edb..000000000
--- a/autoPyTorch/pipeline/nodes/initialization_selector.py
+++ /dev/null
@@ -1,87 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.components.networks.initialization import BaseInitialization, SimpleInitializer
-
-import torch
-import torch.nn as nn
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-Initializer = SimpleInitializer
-
-class InitializationSelector(PipelineNode):
-    def __init__(self):
-        super(InitializationSelector, self).__init__()
-
-        self.initialization_methods = {
-            "default": BaseInitialization
-        }
-
-        self.initializers = dict()
-        self.default_initializer = None
-
-    def fit(self, hyperparameter_config, pipeline_config, network):
-        config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        method_type = self.initialization_methods[config["initialization_method"]]
-        method_config = ConfigWrapper(config["initialization_method"], config)
-        initializer_type = self.initializers[pipeline_config["initializer"]]
-        initializer_config = ConfigWrapper("initializer", config)
-
-        torch.manual_seed(pipeline_config["random_seed"])
-        initializer = initializer_type(initializer_config)
-        method = method_type(initializer, method_config)
-        method.apply(network)
-        
-        return dict()
-    
-    def add_initialization_method(self, name, initialization_method):
-        if not issubclass(initialization_method, BaseInitialization):
-            raise ValueError("initialization has to inherit from BaseInitialization")
-        self.initialization_methods[name] = initialization_method
-    
-    def remove_initialization_method(self, name):
-        del self.initialization_methods[name]
-
-    def add_initializer(self, name, initializer, is_default_initializer=False):
-        self.initializers[name] = initializer
-
-        if (not self.default_initializer or is_default_initializer):
-            self.default_initializer = name
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        # add hyperparameters of initialization method
-        possible_initialization_methods = set(pipeline_config["initialization_methods"]).intersection(self.initialization_methods.keys())
-        selector = cs.add_hyperparameter(CSH.CategoricalHyperparameter("initialization_method", sorted(possible_initialization_methods)))
-
-        for method_name, method_type in self.initialization_methods.items():
-            if (method_name not in possible_initialization_methods):
-                continue
-            method_cs = method_type.get_hyperparameter_search_space(
-                **self._get_search_space_updates(prefix=method_name))
-            cs.add_configuration_space(prefix=method_name, configuration_space=method_cs, delimiter=ConfigWrapper.delimiter, 
-                                       parent_hyperparameter={'parent': selector, 'value': method_name})
-
-        # add hyperparameter of initializer
-        initializer = self.initializers[pipeline_config["initializer"]]
-        initializer_cs = initializer.get_hyperparameter_search_space(**self._get_search_space_updates(prefix="initializer"))
-        cs.add_configuration_space(prefix="initializer", configuration_space=initializer_cs, delimiter=ConfigWrapper.delimiter)
-
-        self._check_search_space_updates(("initializer", "*"), (possible_initialization_methods, "*"))
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="initialization_methods", default=list(self.initialization_methods.keys()), type=str, list=True, choices=list(self.initialization_methods.keys())),
-            ConfigOption(name="initializer", default=self.default_initializer, type=str, choices=list(self.initializers.keys()))
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/log_functions_selector.py b/autoPyTorch/pipeline/nodes/log_functions_selector.py
deleted file mode 100644
index 81e90caab..000000000
--- a/autoPyTorch/pipeline/nodes/log_functions_selector.py
+++ /dev/null
@@ -1,56 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import inspect
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.pipeline.nodes.metric_selector import default_minimize_transform, no_transform
-
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-class LogFunctionsSelector(PipelineNode):
-    def __init__(self):
-        super(LogFunctionsSelector, self).__init__()
-
-        self.log_functions = dict()
-
-    def fit(self, pipeline_config):
-        return {'log_functions': [self.log_functions[log_function] for log_function in pipeline_config["additional_logs"]]}
-
-    def add_log_function(self, name, log_function, loss_transform=False):
-        """Add a log function, will be called with the current trained network and the current training epoch
-        
-        Arguments:
-            name {string} -- name of log function for definition in config
-            log_function {function} -- log function called with network and epoch
-        """
-
-        if (not hasattr(log_function, '__call__')):
-            raise ValueError("log function has to be a function")
-
-        if isinstance(loss_transform, bool):
-            loss_transform = default_minimize_transform if loss_transform else no_transform
-
-        self.log_functions[name] = AutoNetLog(name, log_function, loss_transform)
-
-    def remove_log_function(self, name):
-        del self.log_functions[name]
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="additional_logs", default=[], type=str, list=True, choices=list(self.log_functions.keys())),
-        ]
-        return options
-
-
-class AutoNetLog():
-    def __init__(self, name, log, loss_transform):
-        self.loss_transform = loss_transform
-        self.log = log
-        self.name = name
-
-    def __call__(self, *args):
-        return self.log(*args)
-
-    def get_loss_value(self, *args):
-        return self.loss_transform(self.__call__(*args))
diff --git a/autoPyTorch/pipeline/nodes/loss_module_selector.py b/autoPyTorch/pipeline/nodes/loss_module_selector.py
deleted file mode 100644
index 92a887b4a..000000000
--- a/autoPyTorch/pipeline/nodes/loss_module_selector.py
+++ /dev/null
@@ -1,94 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import inspect
-import torch
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from torch.nn.modules.loss import _Loss
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-
-class LossModuleSelector(PipelineNode):
-    def __init__(self):
-        super(LossModuleSelector, self).__init__()
-        self.loss_modules = dict()
-
-    def fit(self, hyperparameter_config, pipeline_config, X, Y, train_indices):
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        weights = None
-        loss_module = self.loss_modules[hyperparameter_config["loss_module"]]
-        if (loss_module.weight_strategy != None):
-            weights = loss_module.weight_strategy(pipeline_config, X[train_indices], Y[train_indices])
-            weights = torch.from_numpy(weights).float()
-
-        # pass weights to loss module
-        loss = loss_module.module
-        if "pos_weight" in inspect.getfullargspec(loss)[0] and weights is not None and inspect.isclass(loss):
-            loss = loss(pos_weight=weights)
-        elif "weight" in inspect.getfullargspec(loss)[0] and weights is not None and inspect.isclass(loss):
-            loss = loss(weight=weights)
-        elif inspect.isclass(loss):
-            loss = loss()
-        loss_module.set_loss_function(loss)
-        return {'loss_function': loss_module}
-
-    def add_loss_module(self, name, loss_module, weight_strategy=None, requires_target_class_labels=False):
-        """Add a loss module, has to be a pytorch loss module type
-        
-        Arguments:
-            name {string} -- name of loss module for definition in config
-            loss_module {type} -- a pytorch loss module type
-            weight_strategy {function} -- callable that computes label weights
-        """
-
-        self.loss_modules[name] = AutoNetLossModule(loss_module, weight_strategy, requires_target_class_labels)
-
-    def remove_loss_module(self, name):
-        del self.loss_modules[name]
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        possible_loss_modules = set(pipeline_config["loss_modules"]).intersection(self.loss_modules.keys())
-        cs.add_hyperparameter(CSH.CategoricalHyperparameter('loss_module', sorted(possible_loss_modules)))
-        self._check_search_space_updates(self.loss_modules.keys(), "*")
-        return cs
-        
-
-    def get_pipeline_config_options(self):
-        loss_module_names = list(self.loss_modules.keys())
-        options = [
-            ConfigOption(name="loss_modules", default=loss_module_names, type=str, list=True, choices=loss_module_names),
-        ]
-        return options
-
-class AutoNetLossModule():
-    def __init__(self, module, weight_strategy, requires_target_class_labels):
-        self.module = module
-        self.weight_strategy = weight_strategy
-        self.requires_target_class_labels = requires_target_class_labels
-        self.function = None
-
-    def set_loss_function(self, function):
-        self.function = function
-
-    def __call__(self, x, y):
-        if not self.requires_target_class_labels:
-            return self.function(x, y)
-        elif len(y.shape) == 1:
-            return self.function(x, y)
-        else:
-            return self.function(x, y.max(1)[1])
-
-    def to(self, device):
-        result = AutoNetLossModule(self.module, self.weight_strategy, self.requires_target_class_labels)
-        result.set_loss_function(self.function.to(device))
-        return result
diff --git a/autoPyTorch/pipeline/nodes/lr_scheduler_selector.py b/autoPyTorch/pipeline/nodes/lr_scheduler_selector.py
deleted file mode 100644
index 8e1df37bb..000000000
--- a/autoPyTorch/pipeline/nodes/lr_scheduler_selector.py
+++ /dev/null
@@ -1,73 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.components.lr_scheduler.lr_schedulers import AutoNetLearningRateSchedulerBase
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.components.training.lr_scheduling import LrScheduling
-
-class LearningrateSchedulerSelector(PipelineNode):
-    def __init__(self):
-        super(LearningrateSchedulerSelector, self).__init__()
-
-        self.lr_scheduler = dict()
-        self.lr_scheduler_settings = dict()
-        self.num_evals = 0
-
-    def fit(self, hyperparameter_config, pipeline_config, optimizer, training_techniques):
-        config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        lr_scheduler_type = self.lr_scheduler[config["lr_scheduler"]]
-        lr_scheduler_config = ConfigWrapper(config["lr_scheduler"], config)
-        lr_scheduler_settings = self.lr_scheduler_settings[config["lr_scheduler"]]
-        if lr_scheduler_type=="cosine_annealing" and pipeline_config["algorithm"]=="portfolio_bohb" and self.num_evals<=16:
-            config["cosine_annealing:T_max"] = 50
-            #self.num_evals += 1
-        lr_scheduling = LrScheduling(training_components={"lr_scheduler": lr_scheduler_type(optimizer, lr_scheduler_config)},
-                                     **lr_scheduler_settings)
-        return {'training_techniques': [lr_scheduling] + training_techniques}
-
-    def add_lr_scheduler(self, name, lr_scheduler_type, lr_step_after_batch=False, lr_step_with_time=False, allow_snapshot=True):
-        if (not issubclass(lr_scheduler_type, AutoNetLearningRateSchedulerBase)):
-            raise ValueError("learningrate scheduler type has to inherit from AutoNetLearningRateSchedulerBase")
-        self.lr_scheduler[name] = lr_scheduler_type
-        self.lr_scheduler_settings[name] = {
-            "lr_step_after_batch": lr_step_after_batch,
-            "lr_step_with_time": lr_step_with_time,
-            "allow_snapshot": allow_snapshot
-        }
-
-    def remove_lr_scheduler(self, name):
-        del self.lr_scheduler[name]
-        del self.lr_scheduler_settings[name]
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        possible_lr_scheduler = set(pipeline_config["lr_scheduler"]).intersection(self.lr_scheduler.keys())
-        selector = cs.add_hyperparameter(CSH.CategoricalHyperparameter("lr_scheduler", sorted(possible_lr_scheduler)))
-        
-        for lr_scheduler_name, lr_scheduler_type in self.lr_scheduler.items():
-            if (lr_scheduler_name not in possible_lr_scheduler):
-                continue
-            lr_scheduler_cs = lr_scheduler_type.get_config_space(
-                **self._get_search_space_updates(prefix=lr_scheduler_name))
-            cs.add_configuration_space( prefix=lr_scheduler_name, configuration_space=lr_scheduler_cs, delimiter=ConfigWrapper.delimiter, 
-                                        parent_hyperparameter={'parent': selector, 'value': lr_scheduler_name})
-
-        self._check_search_space_updates((possible_lr_scheduler, "*"))
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="lr_scheduler", default=list(self.lr_scheduler.keys()), type=str, list=True, choices=list(self.lr_scheduler.keys())),
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/metric_selector.py b/autoPyTorch/pipeline/nodes/metric_selector.py
deleted file mode 100644
index 4338a8d83..000000000
--- a/autoPyTorch/pipeline/nodes/metric_selector.py
+++ /dev/null
@@ -1,105 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-import torch
-import numpy as np
-
-class MetricSelector(PipelineNode):
-    def __init__(self):
-        super(MetricSelector, self).__init__()
-
-        self.metrics = dict()
-        self.default_optimize_metric = None
-
-    def fit(self, pipeline_config):
-        optimize_metric = self.metrics[pipeline_config["optimize_metric"]]
-        additional_metrics = [self.metrics[metric] for metric in pipeline_config["additional_metrics"] if metric != pipeline_config["optimize_metric"]]
-
-        return {'optimize_metric': optimize_metric, 'additional_metrics': additional_metrics}
-
-    def predict(self, optimize_metric):
-        return { 'optimize_metric': optimize_metric }
-
-    def add_metric(self, name, metric, loss_transform=False, 
-                   requires_target_class_labels=False, is_default_optimize_metric=False):
-        """Add a metric, this metric has to be a function that takes to arguments y_true and y_predict
-        
-        Arguments:
-            name {string} -- name of metric for definition in config
-            loss_transform {callable / boolean} -- transform metric value to minimizable loss. If True: loss = 1 - metric_value
-            metric {function} -- metric function takes y_true and y_pred
-            is_default_optimize_metric {bool} -- should the given metric be the default train metric if not specified in config
-        """
-
-        if (not hasattr(metric, '__call__')):
-            raise ValueError("Metric has to be a function")
-
-        ohe_transform = undo_ohe if requires_target_class_labels else no_transform
-        if isinstance(loss_transform, bool):
-            loss_transform = default_minimize_transform if loss_transform else no_transform
-
-        self.metrics[name] = AutoNetMetric(name=name,
-                                           metric=metric,
-                                           loss_transform=loss_transform,
-                                           ohe_transform=ohe_transform)
-
-        if (not self.default_optimize_metric or is_default_optimize_metric):
-            self.default_optimize_metric = name
-
-    def remove_metric(self, name):
-        del self.metrics[name]
-        if (self.default_optimize_metric == name):
-            if (len(self.metrics) > 0):
-                self.default_optimize_metric = list(self.metrics.keys())[0]
-            else:
-                self.default_optimize_metric = None
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="optimize_metric", default=self.default_optimize_metric, type=str, choices=list(self.metrics.keys()),
-                info="This is the meta train metric BOHB will try to optimize."),
-            ConfigOption(name="additional_metrics", default=[], type=str, list=True, choices=list(self.metrics.keys()))
-        ]
-        return options
-
-
-def default_minimize_transform(value):
-    return -1 * value
-
-def no_transform(value):
-    return value
-
-def ensure_numpy(y):
-    if type(y)==torch.Tensor:
-        return y.detach().cpu().numpy()
-    return y
-
-def undo_ohe(y):
-    if len(y.shape) == 1:
-        return(y)
-    return np.argmax(y, axis=1)
-
-class AutoNetMetric():
-    def __init__(self, name, metric, loss_transform, ohe_transform):
-        self.loss_transform = loss_transform
-        self.metric = metric
-        self.ohe_transform = ohe_transform
-        self.name = name
-    
-    def __call__(self, Y_pred, Y_true):
-
-        Y_pred = ensure_numpy(Y_pred)
-        Y_true = ensure_numpy(Y_true)
-
-        if len(Y_pred.shape) !=  len(Y_true.shape):
-            Y_pred = undo_ohe(Y_pred)
-            Y_true = undo_ohe(Y_true)
-        return self.metric(self.ohe_transform(Y_true), self.ohe_transform(Y_pred))
-
-    def get_loss_value(self, Y_pred, Y_true):
-        return self.loss_transform(self.__call__(Y_pred, Y_true))
diff --git a/autoPyTorch/pipeline/nodes/network_selector.py b/autoPyTorch/pipeline/nodes/network_selector.py
deleted file mode 100644
index 84460c845..000000000
--- a/autoPyTorch/pipeline/nodes/network_selector.py
+++ /dev/null
@@ -1,99 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.components.networks.base_net import BaseNet
-
-import torch
-import torch.nn as nn
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-class NetworkSelector(PipelineNode):
-    def __init__(self):
-        super(NetworkSelector, self).__init__()
-
-        self.networks = dict()
-
-        self.final_activations = dict()
-        self.default_final_activation = None
-
-    def fit(self, hyperparameter_config, pipeline_config, X, Y, embedding):
-        config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        network_type = self.networks[config["network"]]
-        network_config = ConfigWrapper(config["network"], config)
-        activation = self.final_activations[pipeline_config["final_activation"]]
-
-        in_features = X.shape[1:] if not embedding else (embedding.num_out_feats, )
-        if len(in_features) == 1:
-            # feature data
-            in_features = in_features[0]
-
-        torch.manual_seed(pipeline_config["random_seed"]) 
-        network = network_type( config=network_config, 
-                                in_features=in_features, out_features=Y.shape[1],
-                                embedding=embedding, final_activation=activation)
-        return {'network': network}
-
-    def predict(self, network):
-        return {'network': network}
-
-    def add_network(self, name, network_type):
-        if (not issubclass(network_type, BaseNet)):
-            raise ValueError("network type has to inherit from BaseNet")
-        if (not hasattr(network_type, "get_config_space")):
-            raise ValueError("network type has to implement the function get_config_space")
-            
-        self.networks[name] = network_type
-
-    def remove_network(self, name):
-        del self.networks[name]
-
-    def add_final_activation(self, name, activation, is_default_final_activation=False):
-        """Add possible final activation layer.
-        One can be specified in config and will be used as a final network layer.
-        
-        Arguments:
-            name {string} -- name of final activation, can be used to specify in the config file
-            activation {nn.Module} -- final activation layer
-        
-        Keyword Arguments:
-            is_default_final_activation {bool} -- should the given activation be the default case (default: {False})
-        """
-
-        self.final_activations[name] = activation
-
-        if (not self.default_final_activation or is_default_final_activation):
-            self.default_final_activation = name
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        possible_networks = set(pipeline_config["networks"]).intersection(self.networks.keys())
-        selector = cs.add_hyperparameter(CSH.CategoricalHyperparameter("network", sorted(possible_networks)))
-        
-        network_list = list()
-        for network_name, network_type in self.networks.items():
-            if (network_name not in possible_networks):
-                continue
-            network_list.append(network_name)
-            network_cs = network_type.get_config_space(
-                **self._get_search_space_updates(prefix=network_name))
-            cs.add_configuration_space(prefix=network_name, configuration_space=network_cs, delimiter=ConfigWrapper.delimiter, 
-                                       parent_hyperparameter={'parent': selector, 'value': network_name})
-        self._check_search_space_updates((possible_networks, "*"))
-
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="networks", default=list(self.networks.keys()), type=str, list=True, choices=list(self.networks.keys())),
-            ConfigOption(name="final_activation", default=self.default_final_activation, type=str, choices=list(self.final_activations.keys()))
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/normalization_strategy_selector.py b/autoPyTorch/pipeline/nodes/normalization_strategy_selector.py
deleted file mode 100644
index 472357d49..000000000
--- a/autoPyTorch/pipeline/nodes/normalization_strategy_selector.py
+++ /dev/null
@@ -1,81 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from sklearn.compose import ColumnTransformer
-from sklearn.base import BaseEstimator, TransformerMixin
-from scipy.sparse.csr import csr_matrix
-
-class NormalizationStrategySelector(PipelineNode):
-    def __init__(self):
-        super(NormalizationStrategySelector, self).__init__()
-
-        self.normalization_strategies = {'none': None}
-
-    def fit(self, hyperparameter_config, X, train_indices, dataset_info):
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        normalizer_name = hyperparameter_config['normalization_strategy']
-
-        if normalizer_name == 'none':
-            return {'normalizer': None}
-
-        if isinstance(X, csr_matrix):
-            normalizer = self.normalization_strategies[normalizer_name](with_mean=False)
-        else:
-            normalizer = self.normalization_strategies[normalizer_name]()
-        
-        transformer = ColumnTransformer(transformers=[("normalize", normalizer, [i for i, c in enumerate(dataset_info.categorical_features) if not c])],
-                                        remainder='passthrough')
-
-        transformer.fit(X[train_indices])
-
-        X = transformer.transform(X)
-        
-        dataset_info.categorical_features = sorted(dataset_info.categorical_features)
-
-        return {'X': X, 'normalizer': transformer, 'dataset_info': dataset_info}
-
-    def predict(self, X, normalizer):
-        if normalizer is None:
-            return {'X': X}
-        return {'X': normalizer.transform(X)}
-
-    def add_normalization_strategy(self, name, normalization_type, is_default_normalization_strategy=False):
-        """Add a normalization strategy.
-        Will be called with {pipeline_config, X, Y}
-        
-        Arguments:
-            name {string} -- name of normalization strategy for definition in config
-            normalization_strategy {function} -- callable with {pipeline_config, X}
-            is_default_normalization_strategy {bool} -- should the given normalization_strategy be the default normalization_strategy if not specified in config
-        """
-
-        if (not issubclass(normalization_type, BaseEstimator) and not issubclass(normalization_type, TransformerMixin)):
-            raise ValueError("normalization_type must be subclass of BaseEstimator")
-        self.normalization_strategies[name] = normalization_type
-
-    def remove_normalization_strategy(self, name):
-        del self.normalization_strategies[name]
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="normalization_strategies", default=list(self.normalization_strategies.keys()), type=str, list=True, choices=list(self.normalization_strategies.keys())),
-        ]
-        return options
-    
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        possible_normalization_strategies = set(pipeline_config["normalization_strategies"]).intersection(self.normalization_strategies.keys())
-        cs.add_hyperparameter(CSH.CategoricalHyperparameter("normalization_strategy", sorted(possible_normalization_strategies)))
-
-        self._check_search_space_updates()
-        return cs
diff --git a/autoPyTorch/pipeline/nodes/one_hot_encoding.py b/autoPyTorch/pipeline/nodes/one_hot_encoding.py
deleted file mode 100644
index 23d6737f2..000000000
--- a/autoPyTorch/pipeline/nodes/one_hot_encoding.py
+++ /dev/null
@@ -1,63 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.compose import ColumnTransformer
-import numpy as np
-import scipy.sparse
-
-class OneHotEncoding(PipelineNode):
-    def __init__(self):
-        super(OneHotEncoding, self).__init__()
-        self.encode_Y = False
-
-    def fit(self, pipeline_config, X, Y, dataset_info):
-        categorical_features = dataset_info.categorical_features
-        ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore")
-        encoder = ColumnTransformer(transformers=[("ohe", ohe, [i for i, f in enumerate(categorical_features) if f])], remainder="passthrough")
-        encoder.categories_ = np.array([])
-        encoder.categorical_features = categorical_features
-
-        if any(categorical_features) and not dataset_info.is_sparse:
-            # encode X
-            X = encoder.fit_transform(X)
-            encoder.categories_ = encoder.transformers_[0][1].categories_
-
-        # Y to matrix
-        Y, y_encoder = self.complete_y_tranformation(Y)
-
-        dataset_info.categorical_features = None
-        return {'X': X, 'one_hot_encoder': encoder, 'Y': Y, 'y_one_hot_encoder': y_encoder, 'dataset_info': dataset_info}
-
-    def predict(self, pipeline_config, X, one_hot_encoder):
-        categorical_features = pipeline_config["categorical_features"]
-        if categorical_features and any(categorical_features) and not scipy.sparse.issparse(X):
-            X = one_hot_encoder.transform(X)
-        return {'X': X, 'one_hot_encoder': one_hot_encoder}
-    
-    def reverse_transform_y(self, Y, y_one_hot_encoder):
-        if y_one_hot_encoder is None:
-            return Y
-        return y_one_hot_encoder.categories_[0][np.argmax(Y, axis=1)].reshape(-1, 1)
-    
-    def transform_y(self, Y, y_one_hot_encoder):
-        if y_one_hot_encoder is None:
-            return Y
-        return y_one_hot_encoder.transform(Y.reshape(-1, 1))
-    
-    def complete_y_tranformation(self, Y):
-        # Y to matrix
-        y_encoder = None
-        Y = Y.astype(np.float32)
-        if len(Y.shape) == 1:
-            Y = Y.reshape(-1, 1)
-
-        # encode Y
-        if self.encode_Y:
-            y_encoder = OneHotEncoder(sparse=False, categories="auto", handle_unknown='ignore')
-            y_encoder.categories_ = np.array([])
-            Y = y_encoder.fit_transform(Y)
-        return Y, y_encoder
\ No newline at end of file
diff --git a/autoPyTorch/pipeline/nodes/optimization_algorithm.py b/autoPyTorch/pipeline/nodes/optimization_algorithm.py
deleted file mode 100644
index ea1c1a243..000000000
--- a/autoPyTorch/pipeline/nodes/optimization_algorithm.py
+++ /dev/null
@@ -1,452 +0,0 @@
-
-import numpy as np
-import os
-import time
-import shutil
-import netifaces
-import traceback
-import logging
-
-from hpbandster.core.nameserver import NameServer, nic_name_to_host
-from hpbandster.core.result import logged_results_to_HBS_result
-
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes import MetricSelector
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_condition import ConfigCondition
-
-from autoPyTorch.core.hpbandster_extensions.bohb_ext import BOHBExt
-from autoPyTorch.core.hpbandster_extensions.hyperband_ext import HyperBandExt
-from autoPyTorch.core.hpbandster_extensions.portfolio_bohb_ext import PortfolioBOHBExt
-from autoPyTorch.core.worker import AutoNetWorker
-
-from autoPyTorch.components.training.budget_types import BudgetTypeTime, BudgetTypeEpochs, BudgetTypeTrainingTime
-import copy
-
-class OptimizationAlgorithm(SubPipelineNode):
-    def __init__(self, optimization_pipeline_nodes):
-        """OptimizationAlgorithm pipeline node.
-        It will run either the optimization algorithm (BOHB, Hyperband - defined in config) or start workers
-        Each worker will run the provided optimization_pipeline and will return the output 
-        of the pipeline_result_node to the optimization algorithm
-
-        Train:
-        The optimization_pipeline will get the following inputs:
-        {hyperparameter_config, pipeline_config, X_train, Y_train, X_valid, Y_valid, budget, budget_type}
-        The pipeline_result_node has to provide the following outputs:
-        - 'loss': the optimization value (minimize)
-        - 'info': dict containing info for the respective training process
-
-        Predict:
-        The optimization_pipeline will get the following inputs:
-        {pipeline_config, X}
-        The pipeline_result_node has to provide the following outputs:
-        - 'Y': result of prediction for 'X'
-        Note: predict will not call the optimization algorithm
-        
-        Arguments:
-            optimization_pipeline {Pipeline} -- pipeline that will be optimized (hyperparamter)
-            pipeline_result_node {PipelineNode} -- pipeline node that provides the results of the optimization_pieline
-        """
-
-        super(OptimizationAlgorithm, self).__init__(optimization_pipeline_nodes)
-
-        self.algorithms = {"bohb": BOHBExt,
-                           "hyperband": HyperBandExt,
-                           "portfolio_bohb": PortfolioBOHBExt}
-
-        self.budget_types = dict()
-        self.budget_types["time"] = BudgetTypeTime
-        self.budget_types["epochs"] = BudgetTypeEpochs
-        self.budget_types["training_time"] = BudgetTypeTrainingTime
-
-    def fit(self, pipeline_config, X_train, Y_train, X_valid, Y_valid, result_loggers, dataset_info, shutdownables, refit=None):
-        """Run the optimization algorithm.
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline.
-            X_train {array} -- The data
-            Y_train {array} -- The data
-            X_valid {array} -- The data
-            Y_valid {array} -- The data
-            result_loggers {list} -- List of loggers that log the result
-            dataset_info {DatasetInfo} -- Object with information about the dataset
-            shutdownables {list} -- List of objects that need to shutdown when optimization is finished.
-        
-        Keyword Arguments:
-            refit {dict} -- dict containing information for refitting. None if optimization run should be started. (default: {None})
-        
-        Returns:
-            dict -- Summary of optimization run.
-        """
-        logger = logging.getLogger('autonet')
-        res = None
-
-        run_id, task_id = pipeline_config['run_id'], pipeline_config['task_id']
-
-        # Use tensorboard logger
-        if pipeline_config['use_tensorboard_logger'] and not refit:            
-            import tensorboard_logger as tl
-            directory = os.path.join(pipeline_config['result_logger_dir'], "worker_logs_" + str(task_id))
-            os.makedirs(directory, exist_ok=True)
-            tl.configure(directory, flush_secs=5)
-
-        # Only do refitting
-        if (refit is not None):
-            logger.info("Start Refitting")
-
-            loss_info_dict = self.sub_pipeline.fit_pipeline( 
-                                    hyperparameter_config=refit["hyperparameter_config"], pipeline_config=pipeline_config, 
-                                    X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, 
-                                    budget=refit["budget"], rescore=refit["rescore"], budget_type=self.budget_types[pipeline_config['budget_type']],
-                                    optimize_start_time=time.time(), refit=True, hyperparameter_config_id=None, dataset_info=dataset_info)
-            logger.info("Done Refitting")
-            
-            return {'optimized_hyperparameter_config': refit["hyperparameter_config"],
-                    'budget': refit['budget'],
-                    'loss': loss_info_dict['loss'],
-                    'info': loss_info_dict['info']}
-
-        # Start Optimization Algorithm
-        try:
-            ns_credentials_dir, tmp_models_dir, network_interface_name = self.prepare_environment(pipeline_config)
-
-            # start nameserver if not on cluster or on master node in cluster
-            if task_id in [1, -1]:
-                NS = self.get_nameserver(run_id, task_id, ns_credentials_dir, network_interface_name)
-                ns_host, ns_port = NS.start()
-                
-            if task_id != 1 or pipeline_config["run_worker_on_master_node"]:
-                self.run_worker(pipeline_config=pipeline_config, run_id=run_id, task_id=task_id, ns_credentials_dir=ns_credentials_dir,
-                    network_interface_name=network_interface_name, X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid,
-                    dataset_info=dataset_info, shutdownables=shutdownables)
-
-            # start BOHB if not on cluster or on master node in cluster
-            res = None
-            if task_id in [1, -1]:
-                self.run_optimization_algorithm(pipeline_config=pipeline_config, run_id=run_id, ns_host=ns_host,
-                    ns_port=ns_port, nameserver=NS, task_id=task_id, result_loggers=result_loggers,
-                    dataset_info=dataset_info, logger=logger)
-   
-            
-                res = self.parse_results(pipeline_config)
-
-        except Exception as e:
-            print(e)
-            traceback.print_exc()
-        finally:
-            self.clean_up(pipeline_config, ns_credentials_dir, tmp_models_dir)
-
-        if res:
-            return res
-        return {'optimized_hyperparameter_config': dict(), 'budget': 0, 'loss': float('inf'), 'info': dict()}
-
-    def predict(self, pipeline_config, X):
-        """Run the predict pipeline.
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline
-            X {array} -- The data
-        
-        Returns:
-            dict -- The predicted values in a dictionary
-        """
-        result = self.sub_pipeline.predict_pipeline(pipeline_config=pipeline_config, X=X)
-        return {'Y': result['Y']}
-
-    # OVERRIDE
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("run_id", default="0", type=str, info="Unique id for each run."),
-            ConfigOption("task_id", default=-1, type=int, info="ID for each worker, if you run AutoNet on a cluster. Set to -1, if you run it locally."),
-            ConfigOption("algorithm", default="bohb", type=str, choices=list(self.algorithms.keys()), info="Algorithm to use for config sampling."),
-            ConfigOption("portfolio_type", default="greedy", type=str, choices=["greedy", "simple"]),
-            ConfigOption("budget_type", default="time", type=str, choices=list(self.budget_types.keys())),
-            ConfigOption("min_budget", default=lambda c: self.budget_types[c["budget_type"]].default_min_budget, type=float, depends=True, info="Min budget for fitting configurations."),
-            ConfigOption("max_budget", default=lambda c: self.budget_types[c["budget_type"]].default_max_budget, type=float, depends=True, info="Max budget for fitting configurations."),
-            ConfigOption("max_runtime", 
-                default=lambda c: ((-int(np.log(c["min_budget"] / c["max_budget"]) / np.log(c["eta"])) + 1) * c["max_budget"])
-                        if c["budget_type"] == "time" else float("inf"),
-                type=float, depends=True, info="Total time for the run."),
-            ConfigOption("num_iterations", 
-                default=lambda c:  (-int(np.log(c["min_budget"] / c["max_budget"]) / np.log(c["eta"])) + 1)
-                        if c["budget_type"] == "epochs" else float("inf"),
-                type=float, depends=True, info="Number of successive halving iterations."),
-            ConfigOption("eta", default=3, type=float, info='eta parameter of Hyperband.'),
-            ConfigOption("min_workers", default=1, type=int),
-            ConfigOption("working_dir", default=".", type="directory"),
-            ConfigOption("network_interface_name", default=self.get_default_network_interface_name(), type=str),
-            ConfigOption("memory_limit_mb", default=1000000, type=int),
-            ConfigOption("use_tensorboard_logger", default=False, type=to_bool),
-            ConfigOption("run_worker_on_master_node", default=True, type=to_bool),
-            ConfigOption("use_pynisher", default=True, type=to_bool)
-        ]
-        return options
-
-    # OVERRIDE
-    def get_pipeline_config_conditions(self):
-        def check_runtime(pipeline_config):
-            return pipeline_config["budget_type"] != "time" or pipeline_config["max_runtime"] >= pipeline_config["max_budget"]
-
-        return [
-            ConfigCondition.get_larger_equals_condition("max budget must be greater than or equal to min budget", "max_budget", "min_budget"),
-            ConfigCondition("When time is used as budget, the max_runtime must be larger than the max_budget", check_runtime)
-        ]
-
-
-    def get_default_network_interface_name(self):
-        """Get the default network interface name
-        
-        Returns:
-            str -- The default network interface name
-        """
-        try:
-            return netifaces.gateways()['default'][netifaces.AF_INET][1]
-        except:
-            return 'lo'
-
-    def prepare_environment(self, pipeline_config):
-        """Create necessary folders and get network interface name
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline
-        
-        Returns:
-            tuple -- path to created directories and network interface namei
-        """
-        if not os.path.exists(pipeline_config["working_dir"]) and pipeline_config['task_id'] in [1, -1]:
-            try:
-                os.mkdir(pipeline_config["working_dir"])
-            except:
-                pass
-        tmp_models_dir = os.path.join(pipeline_config["working_dir"], "tmp_models_" + str(pipeline_config['run_id']))
-        ns_credentials_dir = os.path.abspath(os.path.join(pipeline_config["working_dir"], "ns_credentials_" + str(pipeline_config['run_id'])))
-        network_interface_name = self.get_nic_name(pipeline_config)
-        
-        if os.path.exists(tmp_models_dir) and pipeline_config['task_id'] in [1, -1]:
-            shutil.rmtree(tmp_models_dir)  # not used right now
-        if os.path.exists(ns_credentials_dir) and pipeline_config['task_id'] in [1, -1]:
-            shutil.rmtree(ns_credentials_dir)
-        return ns_credentials_dir, tmp_models_dir, network_interface_name
-
-    def clean_up(self, pipeline_config, tmp_models_dir, ns_credentials_dir):
-        """Remove created folders
-        
-        Arguments:
-            pipeline_config {dict} -- The pipeline config
-            tmp_models_dir {[type]} -- The path to the temporary models (not used right now)
-            ns_credentials_dir {[type]} --  The path to the nameserver credentials
-        """
-        if pipeline_config['task_id'] in [1, -1]:
-            # Delete temporary files
-            if os.path.exists(tmp_models_dir):
-                shutil.rmtree(tmp_models_dir)
-            if os.path.exists(ns_credentials_dir):
-                shutil.rmtree(ns_credentials_dir)
-
-    def get_nameserver(self, run_id, task_id, ns_credentials_dir, network_interface_name):
-        """Get the namesever object
-        
-        Arguments:
-            run_id {str} -- The id of the run
-            task_id {int} -- An id for the worker
-            ns_credentials_dir {str} -- Path to ns credentials
-            network_interface_name {str} -- The network interface name
-        
-        Returns:
-            NameServer -- The NameServer object
-        """
-        if not os.path.isdir(ns_credentials_dir):
-            try:
-                os.mkdir(ns_credentials_dir)
-            except:
-                pass
-        return NameServer(run_id=run_id, nic_name=network_interface_name, working_directory=ns_credentials_dir)
-    
-    def get_optimization_algorithm_instance(self, config_space, run_id, pipeline_config, ns_host, ns_port, loggers, previous_result=None):
-        """Get an instance of the optimization algorithm
-        
-        Arguments:
-            config_space {ConfigurationSpace} -- The config space to optimize.
-            run_id {str} -- An Id for the current run.
-            pipeline_config {dict} -- The configuration of the pipeline.
-            ns_host {str} -- Nameserver host.
-            ns_port {int} -- Nameserver port.
-            loggers {list} -- Loggers to log the results.
-        
-        Keyword Arguments:
-            previous_result {Result} -- A previous result to warmstart the search (default: {None})
-        
-        Returns:
-            Master -- An optimization algorithm.
-        """
-        optimization_algorithm = self.algorithms[pipeline_config["algorithm"]]
-        kwargs = {"configspace": config_space, "run_id": run_id,
-                  "eta": pipeline_config["eta"], "min_budget": pipeline_config["min_budget"], "max_budget": pipeline_config["max_budget"],
-                  "host": ns_host, "nameserver": ns_host, "nameserver_port": ns_port,
-                  "result_logger": combined_logger(*loggers),
-                  "ping_interval": 10**6,
-                  "working_directory": pipeline_config["working_dir"],
-                  "previous_result": previous_result}
-
-        if "portfolio" in pipeline_config["algorithm"]:
-             kwargs["portfolio_type"] = pipeline_config["portfolio_type"]
-
-        hb = optimization_algorithm(**kwargs)
-        return hb
-
-
-    def parse_results(self, pipeline_config):
-        """Parse the results of the optimization run
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline.
-        
-        Raises:
-            RuntimeError:  An Error occurred when parsing the results.
-        
-        Returns:
-            dict -- Dictionary summarizing the results
-        """
-        try:
-            res = logged_results_to_HBS_result(pipeline_config["result_logger_dir"])
-            id2config = res.get_id2config_mapping()
-            incumbent_trajectory = res.get_incumbent_trajectory(bigger_is_better=False, non_decreasing_budget=False)
-        except Exception as e:
-            raise RuntimeError("Error parsing results. Check results.json and output for more details. An empty results.json is usually caused by a misconfiguration of AutoNet.")
-
-        if (len(incumbent_trajectory['config_ids']) == 0):
-            return dict()
-        
-        final_config_id = incumbent_trajectory['config_ids'][-1]
-        final_budget = incumbent_trajectory['budgets'][-1]
-        best_run = [r for r in res.get_runs_by_id(final_config_id) if r.budget == final_budget][0]
-        return {'optimized_hyperparameter_config': id2config[final_config_id]['config'],
-                'budget': final_budget,
-                'loss': best_run.loss,
-                'info': best_run.info}
-
-
-    def run_worker(self, pipeline_config, run_id, task_id, ns_credentials_dir, network_interface_name,
-            X_train, Y_train, X_valid, Y_valid, dataset_info, shutdownables):
-        """ Run the AutoNetWorker
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline
-            run_id {str} -- An id for the run
-            task_id {int} -- An id for the worker
-            ns_credentials_dir {str} -- path to nameserver credentials
-            network_interface_name {str} -- the name of the network interface
-            X_train {array} -- The data
-            Y_train {array} -- The data
-            X_valid {array} -- The data
-            Y_valid {array} -- The data
-            dataset_info {DatasetInfo} -- Object describing the dataset
-            shutdownables {list} -- A list of objects that need to shutdown when the optimization is finished
-        """
-        if not task_id == -1:
-            time.sleep(5)
-        while not os.path.isdir(ns_credentials_dir):
-            time.sleep(5)
-        host = nic_name_to_host(network_interface_name)
-        
-        worker = AutoNetWorker(pipeline=self.sub_pipeline, pipeline_config=pipeline_config,
-                              X_train=X_train, Y_train=Y_train, X_valid=X_valid, Y_valid=Y_valid, dataset_info=dataset_info,
-                              budget_type=self.budget_types[pipeline_config['budget_type']],
-                              max_budget=pipeline_config["max_budget"],
-                              host=host, run_id=run_id,
-                              id=task_id, shutdownables=shutdownables,
-                              use_pynisher=pipeline_config["use_pynisher"])
-        worker.load_nameserver_credentials(ns_credentials_dir)
-        # run in background if not on cluster
-        worker.run(background=(task_id <= 1))
-
-
-    def run_optimization_algorithm(self, pipeline_config, run_id, ns_host, ns_port, nameserver, task_id, result_loggers,
-            dataset_info, logger):
-        """ 
-        
-        Arguments:
-            pipeline_config {dict} -- The configuration of the pipeline
-            run_id {str} -- An id for the run
-            ns_host {str} -- Nameserver host.
-            ns_port {int} -- Nameserver port.
-            nameserver {[type]} -- The nameserver.
-            task_id {int} -- An id for the worker
-            result_loggers {[type]} -- [description]
-            dataset_info {DatasetInfo} -- Object describing the dataset
-            logger {list} -- Loggers to log the results.
-        """
-        config_space = self.pipeline.get_hyperparameter_search_space(dataset_info=dataset_info, **pipeline_config)
-
-
-        logger.info("[AutoNet] Start " + pipeline_config["algorithm"])
-
-        # initialize optimization algorithm
-        if pipeline_config['use_tensorboard_logger']:
-            result_loggers.append(tensorboard_logger())
-
-        HB = self.get_optimization_algorithm_instance(config_space=config_space, run_id=run_id,
-            pipeline_config=pipeline_config, ns_host=ns_host, ns_port=ns_port, loggers=result_loggers)
-
-        # start algorithm
-        min_num_workers = pipeline_config["min_workers"] if task_id != -1 else 1
-
-        reduce_runtime = pipeline_config["max_budget"] if pipeline_config["budget_type"] == "time" else 0
-        HB.run_until(runtime=(pipeline_config["max_runtime"] - reduce_runtime),
-                     n_iterations=pipeline_config["num_iterations"],
-                     min_n_workers=min_num_workers)
-
-        HB.shutdown(shutdown_workers=True)
-        nameserver.shutdown()
-    
-    @staticmethod
-    def get_nic_name(pipeline_config):
-        """Get the nic name from the pipeline config"""
-        return pipeline_config["network_interface_name"] or (netifaces.interfaces()[1] if len(netifaces.interfaces()) > 1 else "lo")
-
-    
-    def clean_fit_data(self):
-        super(OptimizationAlgorithm, self).clean_fit_data()
-        self.sub_pipeline.root.clean_fit_data()
-
-
-class tensorboard_logger(object):
-    def __init__(self):
-        self.start_time = time.time()
-        self.incumbent = float('inf')
-
-    def new_config(self, config_id, config, config_info):
-        pass
-
-    def __call__(self, job):
-        import tensorboard_logger as tl 
-        # id = job.id
-        budget = job.kwargs['budget']
-        # config = job.kwargs['config']
-        timestamps = job.timestamps
-        result = job.result
-        exception = job.exception
-
-        time_step = int(timestamps['finished'] - self.start_time)
-
-        if result is not None:
-            tl.log_value('BOHB/all_results', result['loss'] * -1, time_step)
-            if result['loss'] < self.incumbent:
-                self.incumbent = result['loss']
-            tl.log_value('BOHB/incumbent_results', self.incumbent * -1, time_step)
-
-
-class combined_logger(object):
-    def __init__(self, *loggers):
-        self.loggers = loggers
-
-    def new_config(self, config_id, config, config_info):
-        for logger in self.loggers:
-            logger.new_config(config_id, config, config_info)
-
-    def __call__(self, job):
-        for logger in self.loggers:
-            logger(job)
-        
diff --git a/autoPyTorch/pipeline/nodes/optimizer_selector.py b/autoPyTorch/pipeline/nodes/optimizer_selector.py
deleted file mode 100644
index 507a8d9ad..000000000
--- a/autoPyTorch/pipeline/nodes/optimizer_selector.py
+++ /dev/null
@@ -1,60 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.components.optimizer.optimizer import AutoNetOptimizerBase
-
-import torch.nn as nn
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-
-class OptimizerSelector(PipelineNode):
-    def __init__(self):
-        super(OptimizerSelector, self).__init__()
-
-        self.optimizer = dict()
-
-    def fit(self, hyperparameter_config, network):
-        config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        optimizer_type = self.optimizer[config["optimizer"]]
-        optimizer_config = ConfigWrapper(config["optimizer"], config)
-        
-        return {'optimizer': optimizer_type(network.parameters(), optimizer_config)}
-
-    def add_optimizer(self, name, optimizer_type):
-        if (not issubclass(optimizer_type, AutoNetOptimizerBase)):
-            raise ValueError("optimizer type has to inherit from AutoNetOptimizerBase")
-        self.optimizer[name] = optimizer_type
-
-    def remove_optimizer(self, name):
-        del self.optimizer[name]
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-        
-        possible_optimizer = set(pipeline_config["optimizer"]).intersection(self.optimizer.keys())
-        selector = cs.add_hyperparameter(CSH.CategoricalHyperparameter("optimizer", sorted(possible_optimizer)))
-        
-        for optimizer_name, optimizer_type in self.optimizer.items():
-            if (optimizer_name not in possible_optimizer):
-                continue
-            optimizer_cs = optimizer_type.get_config_space(
-                **self._get_search_space_updates(prefix=optimizer_name))
-            cs.add_configuration_space( prefix=optimizer_name, configuration_space=optimizer_cs, delimiter=ConfigWrapper.delimiter, 
-                                        parent_hyperparameter={'parent': selector, 'value': optimizer_name})
-
-        self._check_search_space_updates(possible_optimizer, "*")
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="optimizer", default=list(self.optimizer.keys()), type=str, list=True, choices=list(self.optimizer.keys())),
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/preprocessor_selector.py b/autoPyTorch/pipeline/nodes/preprocessor_selector.py
deleted file mode 100644
index 8338ab2dc..000000000
--- a/autoPyTorch/pipeline/nodes/preprocessor_selector.py
+++ /dev/null
@@ -1,73 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.components.preprocessing.preprocessor_base import PreprocessorBase
-
-class PreprocessorSelector(PipelineNode):
-    def __init__(self):
-        super(PreprocessorSelector, self).__init__()
-        self.preprocessors = dict()
-        self.add_preprocessor('none', PreprocessorBase)
-
-    def fit(self, hyperparameter_config, pipeline_config, X, Y, train_indices, one_hot_encoder):
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-
-        preprocessor_name = hyperparameter_config['preprocessor']
-        preprocessor_type = self.preprocessors[preprocessor_name]
-        preprocessor_config = ConfigWrapper(preprocessor_name, hyperparameter_config)
-        preprocessor = preprocessor_type(preprocessor_config)
-        preprocessor.fit(X[train_indices], Y[train_indices])
-
-        if preprocessor_name != 'none':
-            one_hot_encoder = None
-
-        X = preprocessor.transform(X)
-
-        return {'X': X, 'preprocessor': preprocessor, 'one_hot_encoder': one_hot_encoder}
-
-    def predict(self, preprocessor, X):
-        return { 'X': preprocessor.transform(X) }
-
-    def add_preprocessor(self, name, preprocessor_type):
-        if (not issubclass(preprocessor_type, PreprocessorBase)):
-            raise ValueError("preprocessor type has to inherit from PreprocessorBase")
-        if (not hasattr(preprocessor_type, "get_hyperparameter_search_space")):
-            raise ValueError("preprocessor type has to implement the function get_hyperparameter_search_space")
-            
-        self.preprocessors[name] = preprocessor_type
-
-    def remove_preprocessor(self, name):
-        del self.preprocessors[name]
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        possible_preprocessors = set(pipeline_config["preprocessors"]).intersection(self.preprocessors.keys())
-        selector = cs.add_hyperparameter(CSH.CategoricalHyperparameter("preprocessor", sorted(possible_preprocessors)))
-        
-        for preprocessor_name, preprocessor_type in self.preprocessors.items():
-            if (preprocessor_name not in possible_preprocessors):
-                continue
-            preprocessor_cs = preprocessor_type.get_hyperparameter_search_space(dataset_info=dataset_info,
-                **self._get_search_space_updates(prefix=preprocessor_name))
-            cs.add_configuration_space( prefix=preprocessor_name, configuration_space=preprocessor_cs, delimiter=ConfigWrapper.delimiter, 
-                                        parent_hyperparameter={'parent': selector, 'value': preprocessor_name})
-
-        self._check_search_space_updates((possible_preprocessors, "*"))
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="preprocessors", default=list(self.preprocessors.keys()), type=str, list=True, choices=list(self.preprocessors.keys())),
-        ]
-        return options
diff --git a/autoPyTorch/pipeline/nodes/resampling_strategy_selector.py b/autoPyTorch/pipeline/nodes/resampling_strategy_selector.py
deleted file mode 100644
index fd2181933..000000000
--- a/autoPyTorch/pipeline/nodes/resampling_strategy_selector.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import numpy as np
-
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.components.preprocessing.resampling_base import ResamplingMethodNone, ResamplingMethodBase, TargetSizeStrategyBase
-from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
-from sklearn.preprocessing import OneHotEncoder
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-import logging
-
-class ResamplingStrategySelector(PipelineNode):
-    def __init__(self):
-        super(ResamplingStrategySelector, self).__init__()
-
-        self.over_sampling_methods = dict()
-        self.add_over_sampling_method('none', ResamplingMethodNone)
-
-        self.under_sampling_methods = dict()
-        self.add_under_sampling_method('none',   ResamplingMethodNone)
-
-        self.target_size_strategies = {'none': None}
-
-    def fit(self, pipeline_config, hyperparameter_config, X, Y, train_indices, valid_indices):
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config)
-        logger = logging.getLogger('autonet')
-        
-        if hyperparameter_config['target_size_strategy'] == 'none':
-            return dict()
-
-        over_sampling_method = self.over_sampling_methods[hyperparameter_config['over_sampling_method']](
-            ConfigWrapper(hyperparameter_config['over_sampling_method'], hyperparameter_config)
-        )
-        under_sampling_method = self.under_sampling_methods[hyperparameter_config['under_sampling_method']](
-            ConfigWrapper(hyperparameter_config['under_sampling_method'], hyperparameter_config)
-        )
-        target_size_strategy = self.target_size_strategies[hyperparameter_config['target_size_strategy']]()
-
-        y = np.argmax(Y[train_indices], axis=1).astype(int)
-        ohe = OneHotEncoder(categories="auto", sparse=False)
-        ohe.fit(y.reshape((-1, 1)))
-
-        over_sampling_target_size = target_size_strategy.over_sample_strategy(y)
-        under_sampling_target_size = target_size_strategy.under_sample_strategy(y)
-
-        logger.debug("Distribution before resample: " + str(np.unique(y, return_counts=True)[1]))
-        X_resampled, y_resampled = over_sampling_method.resample(X[train_indices], y, over_sampling_target_size, pipeline_config["random_seed"])
-        X_resampled, y_resampled  = under_sampling_method.resample(X_resampled, y_resampled, under_sampling_target_size, pipeline_config["random_seed"])
-        logger.debug("Distribution after resample: " + str(np.unique(y_resampled, return_counts=True)[1]))
-
-        if valid_indices is None:
-            return {"X": X_resampled, "Y": ohe.transform(y_resampled.reshape((-1, 1))), "train_indices": np.array(list(range(X_resampled.shape[0])))}
-
-        X, Y, split_indices = CrossValidation.get_validation_set_split_indices(pipeline_config,
-                X_train=X_resampled, X_valid=X[valid_indices],
-                Y_train=ohe.transform(y_resampled.reshape((-1, 1))), Y_valid=Y[valid_indices], allow_shuffle=False)
-        return {"X": X, "Y": Y, "train_indices": split_indices[0], "valid_indices": split_indices[1]}
-
-    def add_over_sampling_method(self, name, resampling_method):
-        """Add a resampling strategy.
-        Will be called with {X_train, Y_train}
-        
-        Arguments:
-            name {string} -- name of resampling strategy for definition in config
-            resampling_strategy {function} -- callable with {pipeline_config, X_train, Y_train}
-        """
-
-        if (not issubclass(resampling_method, ResamplingMethodBase)):
-            raise ValueError("Resampling method must be subclass of ResamplingMethodBase")
-
-        self.over_sampling_methods[name] = resampling_method
-
-    def add_under_sampling_method(self, name, resampling_method):
-        """Add a resampling strategy.
-        Will be called with {X_train, Y_train}
-        
-        Arguments:
-            name {string} -- name of resampling strategy for definition in config
-            resampling_strategy {function} -- callable with {pipeline_config, X_train, Y_train}
-        """
-
-        if (not issubclass(resampling_method, ResamplingMethodBase)):
-            raise ValueError("Resampling method must be subclass of ResamplingMethodBase")
-
-        self.under_sampling_methods[name] = resampling_method
-    
-    def add_target_size_strategy(self, name, target_size_strategy):
-        """Add a resampling strategy.
-        Will be called with {X_train, Y_train}
-        
-        Arguments:
-            name {string} -- name of resampling strategy for definition in config
-            resampling_strategy {function} -- callable with {pipeline_config, X_train, Y_train}
-        """
-
-        if (not issubclass(target_size_strategy, TargetSizeStrategyBase)):
-            raise ValueError("Resampling method must be subclass of TargetSizeStrategyBase")
-
-        self.target_size_strategies[name] = target_size_strategy
-
-    def remove_over_sampling_method(self, name):
-        del self.over_sampling_methods[name]
-
-    def remove_under_sampling_method(self, name):
-        del self.under_sampling_methods[name]
-
-    def remove_target_size_strategy(self, name):
-        del self.target_size_strategies[name]
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="over_sampling_methods", default=list(self.over_sampling_methods.keys()), type=str, list=True, choices=list(self.over_sampling_methods.keys())),
-            ConfigOption(name="under_sampling_methods", default=list(self.under_sampling_methods.keys()), type=str, list=True, choices=list(self.under_sampling_methods.keys())),
-            ConfigOption(name="target_size_strategies", default=list(self.target_size_strategies.keys()), type=str, list=True, choices=list(self.target_size_strategies.keys())),
-        ]
-        return options
-    
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        possible_over_sampling_methods = set(pipeline_config["over_sampling_methods"]).intersection(self.over_sampling_methods.keys())
-        possible_under_sampling_methods = set(pipeline_config["under_sampling_methods"]).intersection(self.under_sampling_methods.keys())
-        possible_target_size_strategies = set(pipeline_config["target_size_strategies"]).intersection(self.target_size_strategies.keys())
-        selector_over_sampling = cs.add_hyperparameter(CSH.CategoricalHyperparameter("over_sampling_method", sorted(possible_over_sampling_methods)))
-        selector_under_sampling = cs.add_hyperparameter(CSH.CategoricalHyperparameter("under_sampling_method", sorted(possible_under_sampling_methods)))
-        cs.add_hyperparameter(CSH.CategoricalHyperparameter("target_size_strategy", sorted(possible_target_size_strategies)))
-
-        for method_name, method_type in self.over_sampling_methods.items():
-            if method_name not in possible_over_sampling_methods:
-                continue
-            method_cs = method_type.get_hyperparameter_search_space(
-                **self._get_search_space_updates(prefix=method_name))
-            cs.add_configuration_space( prefix=method_name, configuration_space=method_cs, delimiter=ConfigWrapper.delimiter, 
-                                        parent_hyperparameter={'parent': selector_over_sampling, 'value': method_name})
-        
-        for method_name, method_type in self.under_sampling_methods.items():
-            if method_name not in possible_under_sampling_methods:
-                continue
-            method_cs = method_type.get_hyperparameter_search_space(
-                **self._get_search_space_updates(prefix=method_name))
-            cs.add_configuration_space( prefix=method_name, configuration_space=method_cs, delimiter=ConfigWrapper.delimiter, 
-                                        parent_hyperparameter={'parent': selector_under_sampling, 'value': method_name})
-
-        self._check_search_space_updates((possible_over_sampling_methods, "*"),
-                                         (possible_under_sampling_methods, "*"))
-        return cs
diff --git a/autoPyTorch/pipeline/nodes/train_node.py b/autoPyTorch/pipeline/nodes/train_node.py
deleted file mode 100644
index 95e23f4db..000000000
--- a/autoPyTorch/pipeline/nodes/train_node.py
+++ /dev/null
@@ -1,302 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os as os
-import torch
-import time
-import logging
-
-import scipy.sparse
-import numpy as np
-import torch.nn as nn
-from torch.autograd import Variable
-from torch.utils.data import DataLoader, TensorDataset
-
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-import ConfigSpace
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.components.training.base_training import BaseTrainingTechnique, BaseBatchLossComputationTechnique
-from autoPyTorch.components.training.trainer import Trainer
-
-import signal
-
-class TrainNode(PipelineNode):
-    """Training pipeline node. In this node, the network will be trained."""
-
-    def __init__(self):
-        """Construct the node"""
-        super(TrainNode, self).__init__()
-        self.default_minimize_value = True
-        self.training_techniques = dict()
-        self.batch_loss_computation_techniques = dict()
-        self.add_batch_loss_computation_technique("standard", BaseBatchLossComputationTechnique)
-
-    def fit(self, hyperparameter_config, pipeline_config,
-            train_loader, valid_loader,
-            network, optimizer,
-            optimize_metric, additional_metrics,
-            log_functions,
-            budget,
-            loss_function,
-            training_techniques,
-            fit_start_time,
-            refit,
-            hyperparameter_config_id):
-        """Train the network.
-        
-        Arguments:
-            hyperparameter_config {dict} -- The sampled hyperparameter config.
-            pipeline_config {dict} -- The user specified configuration of the pipeline
-            train_loader {DataLoader} -- Data for training.
-            valid_loader {DataLoader} -- Data for validation.
-            network {BaseNet} -- The neural network to be trained.
-            optimizer {AutoNetOptimizerBase} -- The selected optimizer.
-            optimize_metric {AutoNetMetric} -- The selected metric to optimize
-            additional_metrics {list} -- List of metrics, that should be logged
-            log_functions {list} -- List of AutoNetLofFunctions that can log additional stuff like test performance
-            budget {float} -- The budget for training
-            loss_function {_Loss} -- The selected PyTorch loss module
-            training_techniques {list} -- List of objects inheriting from BaseTrainingTechnique.
-            fit_start_time {float} -- Start time of fit
-            refit {bool} -- Whether training for refit or not.
-        
-        Returns:
-            dict -- loss and info reported to bohb
-        """
-        self.hyperparameter_config_id = hyperparameter_config_id
-        self.pipeline_config = pipeline_config
-        self.budget = budget
-        hyperparameter_config = ConfigWrapper(self.get_name(), hyperparameter_config) 
-        logger = logging.getLogger('autonet')
-        logger.debug("Start train. Budget: " + str(budget))
-
-        if pipeline_config["torch_num_threads"] > 0:
-            torch.set_num_threads(pipeline_config["torch_num_threads"])
-
-        trainer = Trainer(
-            model=network,
-            loss_computation=self.batch_loss_computation_techniques[hyperparameter_config["batch_loss_computation_technique"]](),
-            metrics=[optimize_metric] + additional_metrics,
-            log_functions=log_functions,
-            criterion=loss_function,
-            budget=budget,
-            optimizer=optimizer,
-            training_techniques=training_techniques,
-            device=Trainer.get_device(pipeline_config),
-            logger=logger,
-            full_eval_each_epoch=pipeline_config["full_eval_each_epoch"])
-        trainer.prepare(pipeline_config, hyperparameter_config, fit_start_time)
-
-        model_params = self.count_parameters(network)
-
-        logs = trainer.model.logs
-        epoch = trainer.model.epochs_trained
-        training_start_time = time.time()
-        while True:
-            # prepare epoch
-            log = dict()
-            trainer.on_epoch_start(log=log, epoch=epoch)
-            
-            # training
-            optimize_metric_results, train_loss, stop_training = trainer.train(epoch + 1, train_loader)
-            if valid_loader is not None and trainer.eval_valid_each_epoch:
-                valid_metric_results = trainer.evaluate(valid_loader)
-
-            # evaluate
-            log['loss'] = train_loss
-            log['model_parameters'] = model_params
-            for i, metric in enumerate(trainer.metrics):
-                log['train_' + metric.name] = optimize_metric_results[i]
-
-                if valid_loader is not None and trainer.eval_valid_each_epoch:
-                    log['val_' + metric.name] = valid_metric_results[i]
-            if trainer.eval_additional_logs_each_epoch:
-                for additional_log in trainer.log_functions:
-                    log[additional_log.name] = additional_log(trainer.model, epoch)
-
-            # wrap up epoch
-            stop_training = trainer.on_epoch_end(log=log, epoch=epoch) or stop_training
-
-            # handle logs
-            logs.append(log)
-            log = {key: value for key, value in log.items() if not isinstance(value, np.ndarray)}
-            logger.debug("Epoch: " + str(epoch) + " : " + str(log))
-            if 'use_tensorboard_logger' in pipeline_config and pipeline_config['use_tensorboard_logger']:
-                self.tensorboard_log(budget=budget, epoch=epoch, log=log, logdir=pipeline_config["result_logger_dir"])
-
-            if stop_training:
-                break
-            
-            epoch += 1
-            torch.cuda.empty_cache()
-
-        # wrap up
-        loss, final_log = self.wrap_up_training(trainer=trainer, logs=logs, epoch=epoch,
-            train_loader=train_loader, valid_loader=valid_loader, budget=budget, training_start_time=training_start_time, fit_start_time=fit_start_time,
-            best_over_epochs=pipeline_config['best_over_epochs'], refit=refit, logger=logger)
-    
-        return {'loss': loss, 'info': final_log}
-
-
-    def predict(self, pipeline_config, network, predict_loader):
-        """Predict using trained neural network
-        
-        Arguments:
-            pipeline_config {dict} -- The user specified configuration of the pipeline
-            network {BaseNet} -- The trained neural network.
-            predict_loader {DataLoader} -- The data to predict the labels for.
-        
-        Returns:
-            dict -- The predicted labels in a dict.
-        """
-        if pipeline_config["predict_model"] is not None:
-            network=pipeline_config["predict_model"]
-
-        if pipeline_config["torch_num_threads"] > 0:
-            torch.set_num_threads(pipeline_config["torch_num_threads"])
-
-        device = Trainer.get_device(pipeline_config)
-        
-        Y = predict(network, predict_loader, device)
-        return {'Y': Y.detach().cpu().numpy()}
-    
-    def add_training_technique(self, name, training_technique):
-        if (not issubclass(training_technique, BaseTrainingTechnique)):
-            raise ValueError("training_technique type has to inherit from BaseTrainingTechnique")
-        self.training_techniques[name] = training_technique
-    
-    def remove_training_technique(self, name):
-        del self.training_techniques[name]
-    
-    def add_batch_loss_computation_technique(self, name, batch_loss_computation_technique):
-        if (not issubclass(batch_loss_computation_technique, BaseBatchLossComputationTechnique)):
-            raise ValueError("batch_loss_computation_technique type has to inherit from BaseBatchLossComputationTechnique, got " + str(batch_loss_computation_technique))
-        self.batch_loss_computation_techniques[name] = batch_loss_computation_technique
-    
-    def remove_batch_loss_computation_technique(self, name, batch_loss_computation_technique):
-        del self.batch_loss_computation_techniques[name]
-
-    def get_hyperparameter_search_space(self, dataset_info=None, **pipeline_config):
-        pipeline_config = self.pipeline.get_pipeline_config(**pipeline_config)
-        cs = ConfigSpace.ConfigurationSpace()
-
-        possible_techniques = set(pipeline_config['batch_loss_computation_techniques']).intersection(self.batch_loss_computation_techniques.keys())
-        hp_batch_loss_computation = CSH.CategoricalHyperparameter("batch_loss_computation_technique", sorted(possible_techniques))
-        cs.add_hyperparameter(hp_batch_loss_computation)
-
-        for name, technique in self.batch_loss_computation_techniques.items():
-            if name not in possible_techniques:
-                continue
-            technique = self.batch_loss_computation_techniques[name]
-
-            technique_cs = technique.get_hyperparameter_search_space(
-                **self._get_search_space_updates(prefix=("batch_loss_computation_technique", name)))
-            cs.add_configuration_space(prefix=name, configuration_space=technique_cs,
-                delimiter=ConfigWrapper.delimiter, parent_hyperparameter={'parent': hp_batch_loss_computation, 'value': name})
-
-        self._check_search_space_updates((possible_techniques, "*"))
-        return cs
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption(name="batch_loss_computation_techniques", default=list(self.batch_loss_computation_techniques.keys()),
-                type=str, list=True, choices=list(self.batch_loss_computation_techniques.keys())),
-            ConfigOption("cuda", default=True, type=to_bool, choices=[True, False]),
-            ConfigOption("torch_num_threads", default=1, type=int),
-            ConfigOption("full_eval_each_epoch", default=False, type=to_bool, choices=[True, False],
-                info="Whether to evaluate everything every epoch. Results in more useful output"),
-            ConfigOption("best_over_epochs", default=False, type=to_bool, choices=[True, False],
-                info="Whether to report the best performance occurred to BOHB"),
-            ConfigOption("save_models", default=False, type=to_bool, choices=[True, False]),
-            ConfigOption("predict_model", default=None, info="Model to use for predicting"),
-        ]
-        for name, technique in self.training_techniques.items():
-            options += technique.get_pipeline_config_options()
-        return options
-    
-    def tensorboard_log(self, budget, epoch, log, logdir):
-        import tensorboard_logger as tl
-        worker_path = 'Train/'
-        try:
-            tl.log_value(worker_path + 'budget', float(budget), int(time.time()))
-        except:
-            tl.configure(logdir)
-            tl.log_value(worker_path + 'budget', float(budget), int(time.time()))
-        tl.log_value(worker_path + 'epoch', float(epoch + 1), int(time.time()))
-        for name, value in log.items():
-            tl.log_value(worker_path + name, float(value), int(time.time()))
-
-    @staticmethod
-    def count_parameters(model):
-        return sum(p.numel() for p in model.parameters() if p.requires_grad)
-    
-    def wrap_up_training(self, trainer, logs, epoch, train_loader, valid_loader, budget,
-            training_start_time, fit_start_time, best_over_epochs, refit, logger):
-        """Wrap up and evaluate the training by computing missing log values
-        
-        Arguments:
-            trainer {Trainer} -- The trainer used for training.
-            logs {dict} -- The logs of the training
-            epoch {int} -- Number of Epochs trained
-            train_loader {DataLoader} -- The data for training
-            valid_loader {DataLoader} -- The data for validation
-            budget {float} -- Budget of training
-            training_start_time {float} -- Start time of training
-            fit_start_time {float} -- Start time of fit
-            best_over_epochs {bool} -- Whether best validation data over epochs should be used
-            refit {bool} -- Whether training was for refitting
-            logger {Logger} -- Logger for logging stuff to the console
-        
-        Returns:
-            tuple -- loss and selected final loss
-        """
-        wrap_up_start_time = time.time()
-        trainer.model.epochs_trained = epoch
-        trainer.model.logs = logs
-        optimize_metric = trainer.metrics[0]
-        opt_metric_name = 'train_' + optimize_metric.name
-        if valid_loader is not None:
-            opt_metric_name = 'val_' + optimize_metric.name
-
-        final_log = trainer.final_eval(opt_metric_name=opt_metric_name,
-            logs=logs, train_loader=train_loader, valid_loader=valid_loader, best_over_epochs=best_over_epochs, refit=refit)
-        loss = trainer.metrics[0].loss_transform(final_log[opt_metric_name])
-
-        # Save for ensembles
-        if self.pipeline_config["save_models"]:
-            identifier = self.hyperparameter_config_id + (self.budget,)
-            save_dir = os.path.join(self.pipeline_config["result_logger_dir"], "models", str(identifier) + ".torch")
-            os.makedirs(os.path.dirname(save_dir), exist_ok=True)
-            torch.save(trainer.model, save_dir)
-
-        logger.info("Finished train with budget " + str(budget) +
-                         ": Preprocessing took " + str(int(training_start_time - fit_start_time)) +
-                         "s, Training took " + str(int(wrap_up_start_time - training_start_time)) + 
-                         "s, Wrap up took " + str(int(time.time() - wrap_up_start_time)) +
-                         "s. Total time consumption in s: " + str(int(time.time() - fit_start_time)))
-        return loss, final_log
-
-
-def predict(network, test_loader, device, move_network=True):
-    """ predict batchwise """
-    # Build DataLoader
-    if move_network:
-        network = network.to(device)
-
-    # Batch prediction
-    network.eval()
-    Y_batch_preds = list()
-    
-    for i, (X_batch, Y_batch) in enumerate(test_loader):
-        # Predict on batch
-        X_batch = Variable(X_batch).to(device)
-        batch_size = X_batch.size(0)
-
-        Y_batch_pred = network(X_batch).detach().cpu()
-        Y_batch_preds.append(Y_batch_pred)
-    
-    return torch.cat(Y_batch_preds, 0)
diff --git a/autoPyTorch/pipeline/tabular_classification.py b/autoPyTorch/pipeline/tabular_classification.py
new file mode 100644
index 000000000..fa8947f62
--- /dev/null
+++ b/autoPyTorch/pipeline/tabular_classification.py
@@ -0,0 +1,256 @@
+import warnings
+from typing import Any, Dict, List, Optional, Tuple
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+import numpy as np
+
+import sklearn.preprocessing
+from sklearn.base import ClassifierMixin
+
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import (
+    TabularColumnTransformer
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
+    EncoderChoice
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+from autoPyTorch.pipeline.components.setup.early_preprocessor.EarlyPreprocessing import EarlyPreprocessing
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice import SchedulerChoice
+from autoPyTorch.pipeline.components.setup.network.base_network_choice import NetworkChoice
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice import (
+    NetworkInitializerChoice
+)
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice import OptimizerChoice
+from autoPyTorch.pipeline.components.training.data_loader.feature_data_loader import FeatureDataLoader
+from autoPyTorch.pipeline.components.training.trainer.base_trainer_choice import (
+    TrainerChoice
+)
+
+
+class TabularClassificationPipeline(ClassifierMixin, BasePipeline):
+    """This class is a proof of concept to integrate AutoSklearn Components
+
+    It implements a pipeline, which includes as steps:
+
+        ->One preprocessing step
+        ->One neural network
+
+    Contrary to the sklearn API it is not possible to enumerate the
+    possible parameters in the __init__ function because we only know the
+    available classifiers at runtime. For this reason the user must
+    specifiy the parameters by passing an instance of
+    ConfigSpace.configuration_space.Configuration.
+
+
+    Args:
+        config (Configuration)
+            The configuration to evaluate.
+        random_state (Optional[RandomState): random_state is the random number generator
+
+    Attributes:
+    Examples
+    """
+
+    def __init__(
+        self,
+        config: Optional[Configuration] = None,
+        steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
+        dataset_properties: Optional[Dict[str, Any]] = None,
+        include: Optional[Dict[str, Any]] = None,
+        exclude: Optional[Dict[str, Any]] = None,
+        random_state: Optional[np.random.RandomState] = None,
+        init_params: Optional[Dict[str, Any]] = None
+    ):
+        super().__init__(
+            config, steps, dataset_properties, include, exclude,
+            random_state, init_params)
+
+    def fit_transformer(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        fit_params: Optional[Dict[str, Any]] = None
+    ) -> Tuple[np.ndarray, Optional[Dict[str, Any]]]:
+        """Fits the pipeline given a training (X,y) pair
+
+        Args:
+            X (np.ndarray): features from which to guess targets
+            y (np.ndarray): classification targets for this task
+            fit_params (Optional[Dict[str, Any]]]): handy communication dictionary,
+                so that inter-stages of the pipeline can share information
+
+        Returns:
+            np.ndarray: the transformed features
+            Optional[Dict[str, Any]]]: A dictionary to share fit informations
+                within the pipeline stages
+        """
+
+        if fit_params is None:
+            fit_params = {}
+
+        X, fit_params = super().fit_transformer(
+            X, y, fit_params=fit_params)
+
+        return X, fit_params
+
+    def _predict_proba(self, X: np.ndarray) -> np.ndarray:
+        # Pre-process X
+        loader = self.named_steps['data_loader'].get_loader(X=X)
+        pred = self.named_steps['network'].predict(loader)
+        if self.dataset_properties['output_shape'] == 1:
+            proba = pred[:, :self.dataset_properties['num_classes']]
+            normalizer = proba.sum(axis=1)[:, np.newaxis]
+            normalizer[normalizer == 0.0] = 1.0
+            proba /= normalizer
+
+            return proba
+
+        else:
+            all_proba = []
+
+            for k in range(self.dataset_properties['output_shape']):
+                proba_k = pred[:, k, :self.dataset_properties['num_classes'][k]]
+                normalizer = proba_k.sum(axis=1)[:, np.newaxis]
+                normalizer[normalizer == 0.0] = 1.0
+                proba_k /= normalizer
+                all_proba.append(proba_k)
+
+            return all_proba
+
+    def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray:
+        """predict_proba.
+
+        Args:
+            X (np.ndarray): input to the pipeline, from which to guess targets
+            batch_size (Optional[int]): batch_size controls whether the pipeline
+                will be called on small chunks of the data. Useful when calling the
+                predict method on the whole array X results in a MemoryError.
+        Returns:
+            np.ndarray: Probabilities of the target being certain class
+        """
+        if batch_size is None:
+            y = self._predict_proba(X)
+
+        else:
+            if not isinstance(batch_size, int):
+                raise ValueError("Argument 'batch_size' must be of type int, "
+                                 "but is '%s'" % type(batch_size))
+            if batch_size <= 0:
+                raise ValueError("Argument 'batch_size' must be positive, "
+                                 "but is %d" % batch_size)
+
+            else:
+                # Probe for the target array dimensions
+                target = self.predict_proba(X[0:2].copy())
+
+                y = np.zeros((X.shape[0], target.shape[1]),
+                             dtype=np.float32)
+
+                for k in range(max(1, int(np.ceil(float(X.shape[0]) / batch_size)))):
+                    batch_from = k * batch_size
+                    batch_to = min([(k + 1) * batch_size, X.shape[0]])
+                    pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None)
+                    y[batch_from:batch_to] = pred_prob.astype(np.float32)
+
+        # Neural networks might not be fit to produce a [0-1] output
+        # For instance, after small number of epochs.
+        y = np.clip(y, 0, 1)
+        y = sklearn.preprocessing.normalize(y, axis=1, norm='l1')
+
+        return y
+
+    def _get_hyperparameter_search_space(
+            self,
+            dataset_properties: Dict[str, Any],
+            include: Optional[Dict[str, Any]] = None,
+            exclude: Optional[Dict[str, Any]] = None,
+    ) -> ConfigurationSpace:
+        """Create the hyperparameter configuration space.
+
+        For the given steps, and the Choices within that steps,
+        this procedure returns a configuration space object to
+        explore.
+
+        Args:
+            include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to honor when creating the configuration space
+            exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to remove from the configuration space
+            dataset_properties (Optional[Dict[str, Union[str, int]]]): Characteristics
+                of the dataset to guide the pipeline choices of components
+
+        Returns:
+            cs (Configuration): The configuration space describing
+                the SimpleRegressionClassifier.
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None or not isinstance(dataset_properties, dict):
+            if not isinstance(dataset_properties, dict):
+                warnings.warn('The given dataset_properties argument contains an illegal value.'
+                              'Proceeding with the default value')
+            dataset_properties = dict()
+
+        if 'target_type' not in dataset_properties:
+            dataset_properties['target_type'] = 'tabular_classification'
+        if dataset_properties['target_type'] != 'tabular_classification':
+            warnings.warn('Tabular classification is being used, however the target_type'
+                          'is not given as "tabular_classification". Overriding it.')
+            dataset_properties['target_type'] = 'tabular_classification'
+        # get the base search space given this
+        # dataset properties. Then overwrite with custom
+        # classification requirements
+        cs = self._get_base_search_space(
+            cs=cs, dataset_properties=dataset_properties,
+            exclude=exclude, include=include, pipeline=self.steps)
+
+        # Here we add custom code, like this with this
+        # is not a valid configuration
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
+                            ) -> List[Tuple[str, autoPyTorchChoice]]:
+        """
+        Defines what steps a pipeline should follow.
+        The step itself has choices given via autoPyTorchChoice.
+
+        Returns:
+            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
+                by the pipeline.
+        """
+        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]
+
+        default_dataset_properties = {'target_type': 'tabular_classification'}
+        if dataset_properties is not None:
+            default_dataset_properties.update(dataset_properties)
+
+        steps.extend([
+            ("imputer", SimpleImputer()),
+            ("encoder", EncoderChoice(default_dataset_properties)),
+            ("scaler", ScalerChoice(default_dataset_properties)),
+            ("tabular_transformer", TabularColumnTransformer()),
+            ("preprocessing", EarlyPreprocessing()),
+            ("network", NetworkChoice(default_dataset_properties)),
+            ("network_init", NetworkInitializerChoice(default_dataset_properties)),
+            ("optimizer", OptimizerChoice(default_dataset_properties)),
+            ("lr_scheduler", SchedulerChoice(default_dataset_properties)),
+            ("data_loader", FeatureDataLoader()),
+            ("trainer", TrainerChoice(default_dataset_properties)),
+        ])
+        return steps
+
+    def _get_estimator_hyperparameter_name(self) -> str:
+        """
+        Returns the name of the current estimator.
+
+        Returns:
+            str: name of the pipeline type
+        """
+        return "tabular_classifier"
diff --git a/autoPyTorch/pipeline/tabular_regression.py b/autoPyTorch/pipeline/tabular_regression.py
new file mode 100644
index 000000000..3909e3484
--- /dev/null
+++ b/autoPyTorch/pipeline/tabular_regression.py
@@ -0,0 +1,208 @@
+import warnings
+from typing import Any, Dict, List, Optional, Tuple
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+import numpy as np
+
+from sklearn.base import RegressorMixin
+
+from autoPyTorch.constants import STRING_TO_TASK_TYPES
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import (
+    TabularColumnTransformer
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
+    EncoderChoice
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+from autoPyTorch.pipeline.components.setup.early_preprocessor.EarlyPreprocessing import EarlyPreprocessing
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice import SchedulerChoice
+from autoPyTorch.pipeline.components.setup.network.base_network_choice import NetworkChoice
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice import (
+    NetworkInitializerChoice
+)
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice import OptimizerChoice
+from autoPyTorch.pipeline.components.training.data_loader.feature_data_loader import FeatureDataLoader
+from autoPyTorch.pipeline.components.training.trainer.base_trainer_choice import (
+    TrainerChoice
+)
+
+
+class TabularRegressionPipeline(RegressorMixin, BasePipeline):
+    """This class is a proof of concept to integrate AutoSklearn Components
+
+    It implements a pipeline, which includes as steps:
+
+        ->One preprocessing step
+        ->One neural network
+
+    Contrary to the sklearn API it is not possible to enumerate the
+    possible parameters in the __init__ function because we only know the
+    available classifiers at runtime. For this reason the user must
+    specifiy the parameters by passing an instance of
+    ConfigSpace.configuration_space.Configuration.
+
+
+    Args:
+        config (Configuration)
+            The configuration to evaluate.
+        random_state (Optional[RandomState): random_state is the random number generator
+
+    Attributes:
+    Examples
+    """
+
+    def __init__(
+        self,
+        config: Optional[Configuration] = None,
+        steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
+        dataset_properties: Optional[Dict[str, Any]] = None,
+        include: Optional[Dict[str, Any]] = None,
+        exclude: Optional[Dict[str, Any]] = None,
+        random_state: Optional[np.random.RandomState] = None,
+        init_params: Optional[Dict[str, Any]] = None
+    ):
+        super().__init__(
+            config, steps, dataset_properties, include, exclude,
+            random_state, init_params)
+
+    def fit_transformer(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        fit_params: Optional[Dict[str, Any]] = None
+    ) -> Tuple[np.ndarray, Optional[Dict[str, Any]]]:
+        """Fits the pipeline given a training (X,y) pair
+
+        Args:
+            X (np.ndarray): features from which to guess targets
+            y (np.ndarray): classification targets for this task
+            fit_params (Optional[Dict[str, Any]]]): handy communication dictionary,
+                so that inter-stages of the pipeline can share information
+
+        Returns:
+            np.ndarray: the transformed features
+            Optional[Dict[str, Any]]]: A dictionary to share fit informations
+                within the pipeline stages
+        """
+
+        if fit_params is None:
+            fit_params = {}
+
+        X, fit_params = super().fit_transformer(
+            X, y, fit_params=fit_params)
+
+        return X, fit_params
+
+    def score(self, X: np.ndarray, y: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray:
+        """score.
+
+                Args:
+                    X (np.ndarray): input to the pipeline, from which to guess targets
+                    batch_size (Optional[int]): batch_size controls whether the pipeline
+                        will be called on small chunks of the data. Useful when calling the
+                        predict method on the whole array X results in a MemoryError.
+                Returns:
+                    np.ndarray: coefficient of determination R^2 of the prediction
+                """
+        from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics, calculate_score
+        metrics = get_metrics(self.dataset_properties, ['r2'])
+        y_pred = self.predict(X, batch_size=batch_size)
+        r2 = calculate_score(y, y_pred, task_type=STRING_TO_TASK_TYPES[self.dataset_properties['task_type']],
+                             metrics=metrics)['r2']
+        return r2
+
+    def _get_hyperparameter_search_space(
+            self,
+            dataset_properties: Dict[str, Any],
+            include: Optional[Dict[str, Any]] = None,
+            exclude: Optional[Dict[str, Any]] = None,
+    ) -> ConfigurationSpace:
+        """Create the hyperparameter configuration space.
+
+        For the given steps, and the Choices within that steps,
+        this procedure returns a configuration space object to
+        explore.
+
+        Args:
+            include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to honor when creating the configuration space
+            exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to remove from the configuration space
+            dataset_properties (Optional[Dict[str, Union[str, int]]]): Characteristics
+                of the dataset to guide the pipeline choices of components
+
+        Returns:
+            cs (Configuration): The configuration space describing
+                the SimpleRegressionClassifier.
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None or not isinstance(dataset_properties, dict):
+            if not isinstance(dataset_properties, dict):
+                warnings.warn('The given dataset_properties argument contains an illegal value.'
+                              'Proceeding with the default value')
+            dataset_properties = dict()
+
+        if 'target_type' not in dataset_properties:
+            dataset_properties['target_type'] = 'tabular_regression'
+        if dataset_properties['target_type'] != 'tabular_regression':
+            warnings.warn('Tabular classification is being used, however the target_type'
+                          'is not given as "tabular_regression". Overriding it.')
+            dataset_properties['target_type'] = 'tabular_regression'
+        # get the base search space given this
+        # dataset properties. Then overwrite with custom
+        # classification requirements
+        cs = self._get_base_search_space(
+            cs=cs, dataset_properties=dataset_properties,
+            exclude=exclude, include=include, pipeline=self.steps)
+
+        # Here we add custom code, like this with this
+        # is not a valid configuration
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
+                            ) -> List[Tuple[str, autoPyTorchChoice]]:
+        """
+        Defines what steps a pipeline should follow.
+        The step itself has choices given via autoPyTorchChoice.
+
+        Returns:
+            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
+                by the pipeline.
+        """
+        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]
+
+        default_dataset_properties = {'target_type': 'tabular_regression'}
+        if dataset_properties is not None:
+            default_dataset_properties.update(dataset_properties)
+
+        steps.extend([
+            ("imputer", SimpleImputer()),
+            ("encoder", EncoderChoice(default_dataset_properties)),
+            ("scaler", ScalerChoice(default_dataset_properties)),
+            ("tabular_transformer", TabularColumnTransformer()),
+            ("preprocessing", EarlyPreprocessing()),
+            ("network", NetworkChoice(default_dataset_properties)),
+            ("network_init", NetworkInitializerChoice(default_dataset_properties)),
+            ("optimizer", OptimizerChoice(default_dataset_properties)),
+            ("lr_scheduler", SchedulerChoice(default_dataset_properties)),
+            ("data_loader", FeatureDataLoader()),
+            ("trainer", TrainerChoice(default_dataset_properties)),
+        ])
+        return steps
+
+    def _get_estimator_hyperparameter_name(self) -> str:
+        """
+        Returns the name of the current estimator.
+
+        Returns:
+            str: name of the pipeline type
+        """
+        return "tabular_regresser"
diff --git a/autoPyTorch/pipeline/traditional_tabular_classification.py b/autoPyTorch/pipeline/traditional_tabular_classification.py
new file mode 100644
index 000000000..3ac29efc1
--- /dev/null
+++ b/autoPyTorch/pipeline/traditional_tabular_classification.py
@@ -0,0 +1,227 @@
+import warnings
+from typing import Any, Dict, List, Optional, Tuple
+
+from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
+
+import numpy as np
+
+from sklearn.base import ClassifierMixin
+
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
+
+
+class TraditionalTabularClassificationPipeline(ClassifierMixin, BasePipeline):
+    """
+    A pipeline that contains steps to fit traditional ML methods for tabular classification.
+
+    Args:
+        config (Configuration)
+            The configuration to evaluate.
+        random_state (Optional[RandomState): random_state is the random number generator
+
+    Attributes:
+    """
+
+    def __init__(
+        self,
+        config: Optional[Configuration] = None,
+        steps: Optional[List[Tuple[str, autoPyTorchChoice]]] = None,
+        dataset_properties: Optional[Dict[str, Any]] = None,
+        include: Optional[Dict[str, Any]] = None,
+        exclude: Optional[Dict[str, Any]] = None,
+        random_state: Optional[np.random.RandomState] = None,
+        init_params: Optional[Dict[str, Any]] = None
+    ):
+        super().__init__(
+            config, steps, dataset_properties, include, exclude,
+            random_state, init_params)
+
+    def fit_transformer(
+        self,
+        X: np.ndarray,
+        y: np.ndarray,
+        fit_params: Optional[Dict[str, Any]] = None
+    ) -> Tuple[np.ndarray, Optional[Dict[str, Any]]]:
+        """Fits the pipeline given a training (X,y) pair
+
+        Args:
+            X (np.ndarray): features from which to guess targets
+            y (np.ndarray): classification targets for this task
+            fit_params (Optional[Dict[str, Any]]]): handy communication dictionary,
+                so that inter-stages of the pipeline can share information
+
+        Returns:
+            np.ndarray: the transformed features
+            Optional[Dict[str, Any]]]: A dictionary to share fit informations
+                within the pipeline stages
+        """
+
+        if fit_params is None:
+            fit_params = {}
+
+        X, fit_params = super().fit_transformer(
+            X, y, fit_params=fit_params)
+
+        return X, fit_params
+
+    def predict(self, X: np.ndarray, batch_size: Optional[int] = None
+                ) -> np.ndarray:
+        """Predict the output using the selected model.
+
+        Args:
+            X (np.ndarray): input data to the array
+            batch_size (Optional[int]): batch_size controls whether the pipeline will be
+                called on small chunks of the data. Useful when calling the
+                predict method on the whole array X results in a MemoryError.
+
+        Returns:
+            np.ndarray: the predicted values given input X
+        """
+
+        if batch_size is None:
+            return self.named_steps['model_trainer'].predict(X)
+
+        else:
+            if not isinstance(batch_size, int):
+                raise ValueError("Argument 'batch_size' must be of type int, "
+                                 "but is '%s'" % type(batch_size))
+            if batch_size <= 0:
+                raise ValueError("Argument 'batch_size' must be positive, "
+                                 "but is %d" % batch_size)
+
+            else:
+                # Probe for the target array dimensions
+                target = self.predict(X[0:2].copy())
+                if (target.shape) == 1:
+                    target = target.reshape((-1, 1))
+                y = np.zeros((X.shape[0], target.shape[1]),
+                             dtype=np.float32)
+
+                for k in range(max(1, int(np.ceil(float(X.shape[0]) / batch_size)))):
+                    batch_from = k * batch_size
+                    batch_to = min([(k + 1) * batch_size, X.shape[0]])
+                    pred_prob = self.predict(X[batch_from:batch_to], batch_size=None)
+                    y[batch_from:batch_to] = pred_prob.astype(np.float32)
+
+                return y
+
+    def predict_proba(self, X: np.ndarray, batch_size: Optional[int] = None) -> np.ndarray:
+        """predict_proba.
+
+        Args:
+            X (np.ndarray): input to the pipeline, from which to guess targets
+            batch_size (Optional[int]): batch_size controls whether the pipeline
+                will be called on small chunks of the data. Useful when calling the
+                predict method on the whole array X results in a MemoryError.
+        Returns:
+            np.ndarray: Probabilities of the target being certain class
+        """
+        if batch_size is None:
+            return self.named_steps['model_trainer'].predict_proba(X)
+
+        else:
+            if not isinstance(batch_size, int):
+                raise ValueError("Argument 'batch_size' must be of type int, "
+                                 "but is '%s'" % type(batch_size))
+            if batch_size <= 0:
+                raise ValueError("Argument 'batch_size' must be positive, "
+                                 "but is %d" % batch_size)
+
+            else:
+                # Probe for the target array dimensions
+                target = self.predict_proba(X[0:2].copy())
+
+                y = np.zeros((X.shape[0], target.shape[1]),
+                             dtype=np.float32)
+
+                for k in range(max(1, int(np.ceil(float(X.shape[0]) / batch_size)))):
+                    batch_from = k * batch_size
+                    batch_to = min([(k + 1) * batch_size, X.shape[0]])
+                    pred_prob = self.predict_proba(X[batch_from:batch_to], batch_size=None)
+                    y[batch_from:batch_to] = pred_prob.astype(np.float32)
+
+                return y
+
+    def _get_hyperparameter_search_space(
+            self,
+            dataset_properties: Dict[str, Any],
+            include: Optional[Dict[str, Any]] = None,
+            exclude: Optional[Dict[str, Any]] = None,
+    ) -> ConfigurationSpace:
+        """Create the hyperparameter configuration space.
+
+        For the given steps, and the Choices within that steps,
+        this procedure returns a configuration space object to
+        explore.
+
+        Args:
+            include (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to honor when creating the configuration space
+            exclude (Optional[Dict[str, Any]]): what hyper-parameter configurations
+                to remove from the configuration space
+            dataset_properties (Optional[Dict[str, Union[str, int]]]): Characteristics
+                of the dataset to guide the pipeline choices of components
+
+        Returns:
+            cs (Configuration): The configuration space describing
+                the SimpleRegressionClassifier.
+        """
+        cs = ConfigurationSpace()
+
+        if dataset_properties is None or not isinstance(dataset_properties, dict):
+            if not isinstance(dataset_properties, dict):
+                warnings.warn('The given dataset_properties argument contains an illegal value.'
+                              'Proceeding with the default value')
+            dataset_properties = dict()
+
+        if 'target_type' not in dataset_properties:
+            dataset_properties['target_type'] = 'tabular_classification'
+        if dataset_properties['target_type'] != 'tabular_classification':
+            warnings.warn('Tabular classification is being used, however the target_type'
+                          'is not given as "tabular_classification". Overriding it.')
+            dataset_properties['target_type'] = 'tabular_classification'
+        # get the base search space given this
+        # dataset properties. Then overwrite with custom
+        # classification requirements
+        cs = self._get_base_search_space(
+            cs=cs, dataset_properties=dataset_properties,
+            exclude=exclude, include=include, pipeline=self.steps)
+
+        # Here we add custom code, like this with this
+        # is not a valid configuration
+
+        self.configuration_space = cs
+        self.dataset_properties = dataset_properties
+        return cs
+
+    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
+                            ) -> List[Tuple[str, autoPyTorchChoice]]:
+        """
+        Defines what steps a pipeline should follow.
+        The step itself has choices given via autoPyTorchChoice.
+
+        Returns:
+            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
+                by the pipeline.
+        """
+        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]
+
+        default_dataset_properties = {'target_type': 'tabular_classification'}
+        if dataset_properties is not None:
+            default_dataset_properties.update(dataset_properties)
+
+        steps.extend([
+            ("model_trainer", ModelChoice(default_dataset_properties)),
+        ])
+        return steps
+
+    def _get_estimator_hyperparameter_name(self) -> str:
+        """
+        Returns the name of the current estimator.
+
+        Returns:
+            str: name of the pipeline type
+        """
+        return "tabular_classifier"
diff --git a/examples/__init__.py b/autoPyTorch/search_space/__init__.py
similarity index 100%
rename from examples/__init__.py
rename to autoPyTorch/search_space/__init__.py
diff --git a/autoPyTorch/search_space/search_space.py b/autoPyTorch/search_space/search_space.py
new file mode 100644
index 000000000..5587eff15
--- /dev/null
+++ b/autoPyTorch/search_space/search_space.py
@@ -0,0 +1,153 @@
+import typing
+from typing import Optional
+
+import ConfigSpace as cs
+
+
+class SearchSpace:
+
+    hyperparameter_types = {
+        'categorical': cs.CategoricalHyperparameter,
+        'integer': cs.UniformIntegerHyperparameter,
+        'float': cs.UniformFloatHyperparameter,
+        'constant': cs.Constant,
+    }
+
+    @typing.no_type_check
+    def __init__(
+            self,
+            cs_name: str = 'Default Hyperparameter Config',
+            seed: int = 11,
+    ):
+        """Fit the selected algorithm to the training data.
+
+        Args:
+            cs_name (str): The name of the configuration space.
+            seed (int): Seed value used for the configuration space.
+
+        Returns:
+        """
+        self._hp_search_space = cs.ConfigurationSpace(
+            name=cs_name,
+            seed=seed,
+        )
+
+    @typing.no_type_check
+    def add_hyperparameter(
+        self,
+        name: str,
+        hyperparameter_type: str,
+        **kwargs,
+    ):
+        """Add a new hyperparameter to the configuration space.
+
+        Args:
+            name (str): The name of the hyperparameter to be added.
+            hyperparameter_type (str): The type of the hyperparameter to be added.
+
+        Returns:
+            hyperparameter (cs.Hyperparameter): The hyperparameter that was added
+                to the hyperparameter search space.
+        """
+        missing_arg = SearchSpace._assert_necessary_arguments_given(
+            hyperparameter_type,
+            **kwargs,
+        )
+
+        if missing_arg is not None:
+            raise TypeError(f'A {hyperparameter_type} must have a value for {missing_arg}')
+        else:
+            hyperparameter = SearchSpace.hyperparameter_types[hyperparameter_type](
+                name=name,
+                **kwargs,
+            )
+            self._hp_search_space.add_hyperparameter(
+                hyperparameter
+            )
+
+        return hyperparameter
+
+    @staticmethod
+    @typing.no_type_check
+    def _assert_necessary_arguments_given(
+        hyperparameter_type: str,
+        **kwargs,
+    ) -> Optional[str]:
+        """Assert that given a particular hyperparameter type, all the
+        necessary arguments are given to create the hyperparameter.
+
+        Args:
+            hyperparameter_type (str): The type of the hyperparameter to be added.
+
+        Returns:
+            missing_argument (str|None): The argument that is missing
+                to create the given hyperparameter.
+        """
+        necessary_args = {
+            'categorical': {'choices', 'default_value'},
+            'integer': {'lower', 'upper', 'default', 'log'},
+            'float': {'lower', 'upper', 'default', 'log'},
+            'constant': {'value'},
+        }
+
+        hp_necessary_args = necessary_args[hyperparameter_type]
+        for hp_necessary_arg in hp_necessary_args:
+            if hp_necessary_arg not in kwargs:
+                return hp_necessary_arg
+
+        return None
+
+    @typing.no_type_check
+    def set_parent_hyperperparameter(
+            self,
+            child_hp,
+            parent_hp,
+            parent_value,
+    ):
+        """Activate the child hyperparameter on the search space only if the
+        parent hyperparameter takes a particular value.
+
+        Args:
+            child_hp (cs.Hyperparameter): The child hyperparameter to be added.
+            parent_hp (cs.Hyperparameter): The parent hyperparameter to be considered.
+            parent_value (str|float|int): The value of the parent hyperparameter for when the
+                child hyperparameter will be added to the search space.
+
+        Returns:
+        """
+        self._hp_search_space.add_condition(
+            cs.EqualsCondition(
+                child_hp,
+                parent_hp,
+                parent_value,
+            )
+        )
+
+    @typing.no_type_check
+    def add_configspace_condition(
+        self,
+        child_hp,
+        parent_hp,
+        configspace_condition,
+        value,
+    ):
+        """Add a condition on the chi
+
+        Args:
+            child_hp (cs.Hyperparameter): The child hyperparameter to be added.
+            parent_hp (cs.Hyperparameter): The parent hyperparameter to be considered.
+            configspace_condition (cs.AbstractCondition): The condition to be fullfilled
+                by the parent hyperparameter. A list of all the possible conditions can be
+                found at ConfigSpace/conditions.py.
+            value (str|float|int|list): The value of the parent hyperparameter to be matched
+                in the condition. value needs to be a list only for the InCondition.
+
+        Returns:
+        """
+        self._hp_search_space.add_condition(
+            configspace_condition(
+                child_hp,
+                parent_hp,
+                value,
+            )
+        )
diff --git a/autoPyTorch/utils/backend.py b/autoPyTorch/utils/backend.py
new file mode 100644
index 000000000..dd24c2340
--- /dev/null
+++ b/autoPyTorch/utils/backend.py
@@ -0,0 +1,512 @@
+import glob
+import os
+import pickle
+import shutil
+import tempfile
+import time
+import uuid
+import warnings
+from typing import Dict, List, Optional, Tuple, Union
+
+import lockfile
+
+import numpy as np
+
+from autoPyTorch.datasets.base_dataset import BaseDataset
+from autoPyTorch.ensemble.abstract_ensemble import AbstractEnsemble
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.utils.logging_ import PicklableClientLogger, get_named_client_logger
+
+__all__ = [
+    'Backend'
+]
+
+
+def create(
+        temporary_directory: Optional[str],
+        output_directory: Optional[str],
+        delete_tmp_folder_after_terminate: bool = True,
+        delete_output_folder_after_terminate: bool = True,
+) -> 'Backend':
+    """
+    Creates a backend object that manages disk related transactions
+
+    Args:
+        temporary_directory (str): where all temporal data is to be dumped
+        output_directory (str): where all predictions are to be output
+        delete_tmp_folder_after_terminate (bool): whether to delete the
+            temporal directory when then run completes
+        delete_output_folder_after_terminate (bool): whether to delete
+            the output directory when the run completes
+
+    Returns:
+        Backend object
+    """
+    context = BackendContext(temporary_directory, output_directory,
+                             delete_tmp_folder_after_terminate,
+                             delete_output_folder_after_terminate,
+                             )
+    backend = Backend(context)
+
+    return backend
+
+
+def get_randomized_directory_name(temporary_directory: Optional[str] = None) -> str:
+    uuid_str = str(uuid.uuid1(clock_seq=os.getpid()))
+
+    temporary_directory = (
+        temporary_directory
+        if temporary_directory
+        else os.path.join(
+            tempfile.gettempdir(),
+            "autoPyTorch_tmp_{}".format(
+                uuid_str,
+            ),
+        )
+    )
+
+    return temporary_directory
+
+
+class BackendContext(object):
+
+    def __init__(self,
+                 temporary_directory: Optional[str],
+                 output_directory: Optional[str],
+                 delete_tmp_folder_after_terminate: bool,
+                 delete_output_folder_after_terminate: bool,
+                 ):
+
+        # Check that the names of tmp_dir and output_dir is not the same.
+        if temporary_directory == output_directory and temporary_directory is not None:
+            raise ValueError("The temporary and the output directory "
+                             "must be different.")
+
+        self.delete_tmp_folder_after_terminate = delete_tmp_folder_after_terminate
+        self.delete_output_folder_after_terminate = delete_output_folder_after_terminate
+        # attributes to check that directories were created by autoPyTorch
+        self._tmp_dir_created = False
+        self._output_dir_created = False
+
+        self._temporary_directory = (
+            get_randomized_directory_name(
+                temporary_directory=temporary_directory,
+            )
+        )
+        self._output_directory = output_directory
+        self.create_directories()
+        self._logger = None  # type: Optional[PicklableClientLogger]
+
+    @property
+    def output_directory(self) -> Optional[str]:
+        if self._output_directory is not None:
+            # make sure that tilde does not appear on the path.
+            return os.path.expanduser(os.path.expandvars(self._output_directory))
+        else:
+            return None
+
+    @property
+    def temporary_directory(self) -> str:
+        # make sure that tilde does not appear on the path.
+        return os.path.expanduser(os.path.expandvars(self._temporary_directory))
+
+    def create_directories(self) -> None:
+        # Exception is raised if self.temporary_directory already exists.
+        os.makedirs(self.temporary_directory)
+        self._tmp_dir_created = True
+
+        # Exception is raised if self.output_directory already exists.
+        if self.output_directory is not None:
+            os.makedirs(self.output_directory)
+            self._output_dir_created = True
+
+    def delete_directories(self, force: bool = True) -> None:
+        if self.output_directory and (self.delete_output_folder_after_terminate or force):
+            if self._output_dir_created is False:
+                raise ValueError("Failed to delete output dir: %s because autoPyTorch did not "
+                                 "create it. Please make sure that the specified output dir does "
+                                 "not exist when instantiating autoPyTorch."
+                                 % self.output_directory)
+            try:
+                shutil.rmtree(self.output_directory)
+            except Exception:
+                try:
+                    if self._logger is not None:
+                        self._logger.warning("Could not delete output dir: %s" %
+                                             self.output_directory)
+                    else:
+                        warnings.warn("Could not delete output dir: %s" % self.output_directory)
+                except Exception:
+                    warnings.warn("Could not delete output dir: %s" % self.output_directory)
+
+        if self.delete_tmp_folder_after_terminate or force:
+            if self._tmp_dir_created is False:
+                raise ValueError("Failed to delete tmp dir: % s because autoPyTorch did not "
+                                 "create it. Please make sure that the specified tmp dir does not "
+                                 "exist when instantiating autoPyTorch."
+                                 % self.temporary_directory)
+            try:
+                shutil.rmtree(self.temporary_directory)
+            except Exception:
+                try:
+                    if self._logger is not None:
+                        self._logger.warning(
+                            "Could not delete tmp dir: %s" % self.temporary_directory)
+                    else:
+                        warnings.warn("Could not delete tmp dir: %s" % self.temporary_directory)
+                except Exception:
+                    warnings.warn("Could not delete tmp dir: %s" % self.temporary_directory)
+
+
+class Backend(object):
+    """Utility class to load and save all objects to be persisted.
+    These are:
+    * start time of auto-pytorch
+    * true targets of the ensemble
+    """
+
+    def __init__(self, context: BackendContext):
+        self._logger = None  # type: Optional[PicklableClientLogger]
+        self.context = context
+
+        # Create the temporary directory if it does not yet exist
+        try:
+            os.makedirs(self.temporary_directory)
+        except Exception:
+            pass
+        # This does not have to exist or be specified
+        if self.output_directory is not None:
+            if not os.path.exists(self.output_directory):
+                raise ValueError("Output directory %s does not exist." % self.output_directory)
+
+        self.internals_directory = os.path.join(self.temporary_directory, ".autoPyTorch")
+        self._make_internals_directory()
+
+    def setup_logger(self, name: str, port: int) -> None:
+        self._logger = get_named_client_logger(
+            name=name,
+            port=port,
+        )
+        self.context._logger = self._logger
+        return
+
+    @property
+    def output_directory(self) -> Optional[str]:
+        return self.context.output_directory
+
+    @property
+    def temporary_directory(self) -> str:
+        return self.context.temporary_directory
+
+    def _make_internals_directory(self) -> None:
+        try:
+            os.makedirs(self.internals_directory)
+        except Exception as e:
+            if self._logger is not None:
+                self._logger.debug("_make_internals_directory: %s" % e)
+        try:
+            os.makedirs(self.get_runs_directory())
+        except Exception as e:
+            if self._logger is not None:
+                self._logger.debug("_make_internals_directory: %s" % e)
+
+    def _get_start_time_filename(self, seed: Union[str, int]) -> str:
+        if isinstance(seed, str):
+            seed = int(seed)
+        return os.path.join(self.internals_directory, "start_time_%d" % seed)
+
+    def save_start_time(self, seed: str) -> str:
+        self._make_internals_directory()
+        start_time = time.time()
+
+        filepath = self._get_start_time_filename(seed)
+
+        if not isinstance(start_time, float):
+            raise ValueError("Start time must be a float, but is %s." % type(start_time))
+
+        if os.path.exists(filepath):
+            raise ValueError(
+                "{filepath} already exist. Different seeds should be provided for different jobs."
+            )
+
+        with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(filepath), delete=False) as fh:
+            fh.write(str(start_time))
+            tempname = fh.name
+        os.rename(tempname, filepath)
+
+        return filepath
+
+    def load_start_time(self, seed: int) -> float:
+        with open(self._get_start_time_filename(seed), 'r') as fh:
+            start_time = float(fh.read())
+        return start_time
+
+    def get_smac_output_directory(self) -> str:
+        return os.path.join(self.temporary_directory, 'smac3-output')
+
+    def get_smac_output_directory_for_run(self, seed: int) -> str:
+        return os.path.join(
+            self.temporary_directory,
+            'smac3-output',
+            'run_%d' % seed
+        )
+
+    def _get_targets_ensemble_filename(self) -> str:
+        return os.path.join(self.internals_directory,
+                            "true_targets_ensemble.npy")
+
+    def save_targets_ensemble(self, targets: np.ndarray) -> str:
+        self._make_internals_directory()
+        if not isinstance(targets, np.ndarray):
+            raise ValueError('Targets must be of type np.ndarray, but is %s' %
+                             type(targets))
+
+        filepath = self._get_targets_ensemble_filename()
+
+        # Try to open the file without locking it, this will reduce the
+        # number of times where we erroneously keep a lock on the ensemble
+        # targets file although the process already was killed
+        try:
+            existing_targets = np.load(filepath, allow_pickle=True)
+            if existing_targets.shape[0] > targets.shape[0] or (
+                    existing_targets.shape == targets.shape and np.allclose(existing_targets, targets)):
+                return filepath
+        except Exception:
+            pass
+
+        with lockfile.LockFile(filepath):
+            if os.path.exists(filepath):
+                with open(filepath, 'rb') as fh:
+                    existing_targets = np.load(fh, allow_pickle=True)
+                    if existing_targets.shape[0] > targets.shape[0] or (
+                            existing_targets.shape == targets.shape and np.allclose(existing_targets, targets)):
+                        return filepath
+
+            with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname(
+                    filepath), delete=False) as fh_w:
+                np.save(fh_w, targets.astype(np.float32))
+                tempname = fh_w.name
+
+            os.rename(tempname, filepath)
+
+        return filepath
+
+    def load_targets_ensemble(self) -> np.ndarray:
+        filepath = self._get_targets_ensemble_filename()
+
+        with lockfile.LockFile(filepath):
+            with open(filepath, 'rb') as fh:
+                targets = np.load(fh, allow_pickle=True)
+
+        return targets
+
+    def _get_datamanager_pickle_filename(self) -> str:
+        return os.path.join(self.internals_directory, 'datamanager.pkl')
+
+    def save_datamanager(self, datamanager: BaseDataset) -> str:
+        self._make_internals_directory()
+        filepath = self._get_datamanager_pickle_filename()
+
+        with lockfile.LockFile(filepath):
+            if not os.path.exists(filepath):
+                with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname(
+                        filepath), delete=False) as fh:
+                    pickle.dump(datamanager, fh, -1)
+                    tempname = fh.name
+                os.rename(tempname, filepath)
+
+        return filepath
+
+    def load_datamanager(self) -> BaseDataset:
+        filepath = self._get_datamanager_pickle_filename()
+        with lockfile.LockFile(filepath):
+            with open(filepath, 'rb') as fh:
+                return pickle.load(fh)
+
+    def get_runs_directory(self) -> str:
+        return os.path.join(self.internals_directory, 'runs')
+
+    def get_numrun_directory(self, seed: int, num_run: int, budget: float) -> str:
+        return os.path.join(self.internals_directory, 'runs', '%d_%d_%s' % (seed, num_run, budget))
+
+    def get_model_filename(self, seed: int, idx: int, budget: float) -> str:
+        return '%s.%s.%s.model' % (seed, idx, budget)
+
+    def get_cv_model_filename(self, seed: int, idx: int, budget: float) -> str:
+        return '%s.%s.%s.cv_model' % (seed, idx, budget)
+
+    def list_all_models(self, seed: int) -> List[str]:
+        runs_directory = self.get_runs_directory()
+        model_files = glob.glob(
+            os.path.join(glob.escape(runs_directory), '%d_*' % seed, '%s.*.*.model' % seed)
+        )
+        return model_files
+
+    def load_models_by_identifiers(self, identifiers: List[Tuple[int, int, float]]
+                                   ) -> Dict:
+        models = dict()
+
+        for identifier in identifiers:
+            seed, idx, budget = identifier
+            models[identifier] = self.load_model_by_seed_and_id_and_budget(
+                seed, idx, budget)
+
+        return models
+
+    def load_model_by_seed_and_id_and_budget(self, seed: int,
+                                             idx: int,
+                                             budget: float
+                                             ) -> BasePipeline:
+        model_directory = self.get_numrun_directory(seed, idx, budget)
+
+        model_file_name = '%s.%s.%s.model' % (seed, idx, budget)
+        model_file_path = os.path.join(model_directory, model_file_name)
+        with open(model_file_path, 'rb') as fh:
+            return pickle.load(fh)
+
+    def load_cv_models_by_identifiers(self, identifiers: List[Tuple[int, int, float]]
+                                      ) -> Dict:
+        models = dict()
+
+        for identifier in identifiers:
+            seed, idx, budget = identifier
+            models[identifier] = self.load_cv_model_by_seed_and_id_and_budget(
+                seed, idx, budget)
+
+        return models
+
+    def load_cv_model_by_seed_and_id_and_budget(self,
+                                                seed: int,
+                                                idx: int,
+                                                budget: float
+                                                ) -> BasePipeline:
+        model_directory = self.get_numrun_directory(seed, idx, budget)
+
+        model_file_name = '%s.%s.%s.cv_model' % (seed, idx, budget)
+        model_file_path = os.path.join(model_directory, model_file_name)
+        with open(model_file_path, 'rb') as fh:
+            return pickle.load(fh)
+
+    def save_numrun_to_dir(
+            self, seed: int, idx: int, budget: float, model: Optional[BasePipeline],
+            cv_model: Optional[BasePipeline], ensemble_predictions: Optional[np.ndarray],
+            valid_predictions: Optional[np.ndarray], test_predictions: Optional[np.ndarray],
+    ) -> None:
+        runs_directory = self.get_runs_directory()
+        tmpdir = tempfile.mkdtemp(dir=runs_directory)
+        if model is not None:
+            file_path = os.path.join(tmpdir, self.get_model_filename(seed, idx, budget))
+            with open(file_path, 'wb') as fh:
+                pickle.dump(model, fh, -1)
+
+        if cv_model is not None:
+            file_path = os.path.join(tmpdir, self.get_cv_model_filename(seed, idx, budget))
+            with open(file_path, 'wb') as fh:
+                pickle.dump(cv_model, fh, -1)
+
+        for preds, subset in (
+                (ensemble_predictions, 'ensemble'),
+                (valid_predictions, 'valid'),
+                (test_predictions, 'test')
+        ):
+            if preds is not None:
+                file_path = os.path.join(
+                    tmpdir,
+                    self.get_prediction_filename(subset, seed, idx, budget)
+                )
+                with open(file_path, 'wb') as fh:
+                    pickle.dump(preds.astype(np.float32), fh, -1)
+        try:
+            os.rename(tmpdir, self.get_numrun_directory(seed, idx, budget))
+        except OSError:
+            if os.path.exists(self.get_numrun_directory(seed, idx, budget)):
+                os.rename(self.get_numrun_directory(seed, idx, budget),
+                          os.path.join(runs_directory, tmpdir + '.old'))
+                os.rename(tmpdir, self.get_numrun_directory(seed, idx, budget))
+                shutil.rmtree(os.path.join(runs_directory, tmpdir + '.old'))
+
+    def get_ensemble_dir(self) -> str:
+        return os.path.join(self.internals_directory, 'ensembles')
+
+    def load_ensemble(self, seed: int) -> Optional[AbstractEnsemble]:
+        ensemble_dir = self.get_ensemble_dir()
+
+        if not os.path.exists(ensemble_dir):
+            if self._logger is not None:
+                self._logger.warning('Directory %s does not exist' % ensemble_dir)
+            else:
+                warnings.warn('Directory %s does not exist' % ensemble_dir)
+            return None
+
+        if seed >= 0:
+            indices_files = glob.glob(
+                os.path.join(glob.escape(ensemble_dir), '%s.*.ensemble' % seed)
+            )
+            indices_files.sort()
+        else:
+            indices_files = os.listdir(ensemble_dir)
+            indices_files = [os.path.join(ensemble_dir, f) for f in indices_files]
+            indices_files.sort(key=lambda f: time.ctime(os.path.getmtime(f)))
+
+        with open(indices_files[-1], 'rb') as fh:
+            ensemble_members_run_numbers = pickle.load(fh)
+
+        return ensemble_members_run_numbers
+
+    def save_ensemble(self, ensemble: AbstractEnsemble, idx: int, seed: int) -> None:
+        try:
+            os.makedirs(self.get_ensemble_dir())
+        except Exception:
+            pass
+
+        filepath = os.path.join(
+            self.get_ensemble_dir(),
+            '%s.%s.ensemble' % (str(seed), str(idx).zfill(10))
+        )
+        with tempfile.NamedTemporaryFile('wb', dir=os.path.dirname(
+                filepath), delete=False) as fh:
+            pickle.dump(ensemble, fh)
+            tempname = fh.name
+        os.rename(tempname, filepath)
+
+    def get_prediction_filename(self, subset: str,
+                                automl_seed: Union[str, int],
+                                idx: int,
+                                budget: float
+                                ) -> str:
+        return 'predictions_%s_%s_%s_%s.npy' % (subset, automl_seed, idx, budget)
+
+    def save_predictions_as_txt(self,
+                                predictions: np.ndarray,
+                                subset: str,
+                                idx: int, precision: int,
+                                prefix: Optional[str] = None) -> None:
+        if not self.output_directory:
+            return
+        # Write prediction scores in prescribed format
+        filepath = os.path.join(
+            self.output_directory,
+            ('%s_' % prefix if prefix else '') + '%s_%s.predict' % (subset, str(idx)),
+        )
+
+        format_string = '{:.%dg} ' % precision
+        with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(
+                filepath), delete=False) as output_file:
+            for row in predictions:
+                if not isinstance(row, np.ndarray) and not isinstance(row, list):
+                    row = [row]
+                for val in row:
+                    output_file.write(format_string.format(float(val)))
+                output_file.write('\n')
+            tempname = output_file.name
+        os.rename(tempname, filepath)
+
+    def write_txt_file(self, filepath: str, data: str, name: str) -> None:
+        with lockfile.LockFile(filepath):
+            with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(
+                    filepath), delete=False) as fh:
+                fh.write(data)
+                tempname = fh.name
+            os.rename(tempname, filepath)
+            if self._logger is not None:
+                self._logger.debug('Created %s file %s' % (name, filepath))
diff --git a/autoPyTorch/utils/benchmarking/benchmark.py b/autoPyTorch/utils/benchmarking/benchmark.py
deleted file mode 100644
index 271e4ce62..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark.py
+++ /dev/null
@@ -1,98 +0,0 @@
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.utils.benchmarking.benchmark_pipeline import (BenchmarkSettings,
-                                                               CreateAutoNet,
-                                                               FitAutoNet,
-                                                               ForAutoNetConfig,
-                                                               ForInstance, ForRun,
-                                                               PrepareResultFolder,
-                                                               ReadInstanceData,
-                                                               SaveResults,
-                                                               SetAutoNetConfig,
-                                                               ApplyUserUpdates,
-                                                               SaveEnsembleLogs,
-                                                               SetEnsembleConfig)
-from autoPyTorch.utils.benchmarking.visualization_pipeline import (CollectAutoNetConfigTrajectories,
-                                                               CollectRunTrajectories,
-                                                               CollectInstanceTrajectories,
-                                                               GetRunTrajectories,
-                                                               PlotTrajectories,
-                                                               ReadInstanceInfo,
-                                                               VisualizationSettings,
-                                                               GetEnsembleTrajectories,
-                                                               PlotSummary,
-                                                               GetAdditionalTrajectories)
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-
-class Benchmark():
-    def __init__(self):
-        self.benchmark_pipeline = self.get_benchmark_pipeline()
-        self.visualization_pipeline = self.get_visualization_pipeline()
-        self.compute_ensemble_performance_pipeline = self.get_ensemble_performance_pipeline()
-
-    def get_benchmark_config_file_parser(self):
-        return ConfigFileParser(self.benchmark_pipeline.get_pipeline_config_options())
-
-    def run_benchmark(self, **benchmark_config):
-        config = self.benchmark_pipeline.get_pipeline_config(**benchmark_config)
-        self.benchmark_pipeline.fit_pipeline(pipeline_config=config)
-    
-    def visualize_benchmark(self, **benchmark_config):
-        config = self.visualization_pipeline.get_pipeline_config(throw_error_if_invalid=False, **benchmark_config)
-        self.visualization_pipeline.fit_pipeline(pipeline_config=config)
-    
-    def compute_ensemble_performance(self, **benchmark_config):
-        config = self.compute_ensemble_performance_pipeline.get_pipeline_config(throw_error_if_invalid=False, **benchmark_config)
-        self.compute_ensemble_performance_pipeline.fit_pipeline(pipeline_config=config)
-
-    def get_benchmark_pipeline(self):
-        return Pipeline([
-            BenchmarkSettings(),
-            ForInstance([                    # loop through instance files
-                ReadInstanceData(),          # get test_split, is_classification, instance
-                CreateAutoNet(),
-                #ApplyUserUpdates(),
-                ForAutoNetConfig([           # loop through autonet_config_file
-                    SetAutoNetConfig(),      # use_dataset_metric, use_dataset_max_runtime
-                    ForRun([                 # loop through num_runs, run_ids
-                        PrepareResultFolder(),
-                        FitAutoNet(),
-                        SaveResults(),
-                        SaveEnsembleLogs()
-                    ])
-                ])
-            ])
-        ])
-
-    def get_visualization_pipeline(self):
-        return Pipeline([
-            VisualizationSettings(),
-            CollectInstanceTrajectories([
-                CollectAutoNetConfigTrajectories([
-                    CollectRunTrajectories([
-                        ReadInstanceInfo(),
-                        CreateAutoNet(),
-                        GetRunTrajectories(),
-                        GetEnsembleTrajectories()
-                    ])
-                ]),
-                GetAdditionalTrajectories(),
-                PlotTrajectories()
-            ]),
-            PlotSummary()
-        ])
-    
-    def get_ensemble_performance_pipeline(self):
-        return Pipeline([
-            VisualizationSettings(),
-            CollectInstanceTrajectories([
-                CollectAutoNetConfigTrajectories([
-                    CollectRunTrajectories([
-                        ReadInstanceInfo(),
-                        CreateAutoNet(),
-                        SetEnsembleConfig(),
-                        SaveEnsembleLogs(),
-                        GetEnsembleTrajectories()
-                    ])
-                ])
-            ])
-        ])
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/__init__.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/__init__.py
deleted file mode 100644
index c2975a56c..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.create_autonet import CreateAutoNet
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.fit_autonet import FitAutoNet
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.for_autonet_config import ForAutoNetConfig
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.for_instance import ForInstance
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.for_run import ForRun
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.set_autonet_config import SetAutoNetConfig
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.prepare_result_folder import PrepareResultFolder
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.read_instance_data import ReadInstanceData
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.save_results import SaveResults
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.benchmark_settings import BenchmarkSettings
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.save_ensemble_logs import SaveEnsembleLogs
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.set_ensemble_config import SetEnsembleConfig
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.apply_user_updates import ApplyUserUpdates
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/apply_user_updates.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/apply_user_updates.py
deleted file mode 100644
index 9fce0d4b7..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/apply_user_updates.py
+++ /dev/null
@@ -1,71 +0,0 @@
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-
-import re
-import os
-import pandas as pd
-import math
-import numpy as np
-
-
-class ApplyUserUpdates(PipelineNode):
-
-    def fit(self, pipeline_config, autonet):
-
-        path = pipeline_config['user_updates_config']
-        if path is None:
-            return dict()
-
-        if not os.path.exists(path):
-            raise ValueError('Invalid path: ' + path)
-
-        data = np.array(pd.read_csv(path, header=None, sep=';'))
-
-        for row in data:
-            name, value_range, is_log = row[0].strip(), self.string_to_list(str(row[1])), to_bool(row[2].strip())
-            name_split = name.split(ConfigWrapper.delimiter)
-            autonet.pipeline[name_split[0]]._update_hyperparameter_range(ConfigWrapper.delimiter.join(name_split[1:]), value_range, is_log, check_validity=False)
-
-        # print(autonet.get_hyperparameter_search_space())
-
-        return { 'autonet': autonet }
-
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("user_updates_config", default=None, type='directory'),
-        ]
-        return options
-
-    def string_to_list(self, string):
-        pattern = "\[(.*)\]"
-        match = re.search(pattern, string)
-
-        if match is None:
-            # no list > make constant range
-            match = re.search(pattern, '[' + string + ',' + string + ']')
-
-        if match is None:
-            raise ValueError('No valid range specified got: ' + string)
-
-        lst = map(self.try_convert, match.group(1).split(','))
-        return list(lst)
-
-    def try_convert(self, string):
-        string = string.strip()
-        try:
-            return int(string)
-        except:
-            try:
-                return float(string)
-            except:
-                if string == 'True':
-                    return True
-                if string == 'False':
-                    return False
-                return string
-        
-
-
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/benchmark_settings.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/benchmark_settings.py
deleted file mode 100644
index 118053abf..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/benchmark_settings.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import logging
-
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-class BenchmarkSettings(PipelineNode):
-    def __init__(self):
-        super(BenchmarkSettings, self).__init__()
-
-        self.logger_settings = dict()
-        self.logger_settings['debug'] = logging.DEBUG
-        self.logger_settings['info'] = logging.INFO
-        self.logger_settings['warning'] = logging.WARNING
-        self.logger_settings['error'] = logging.ERROR
-        self.logger_settings['critical'] = logging.CRITICAL
-
-    def fit(self, pipeline_config):
-        logging.getLogger('benchmark').info("Start benchmark")
-
-        logger = logging.getLogger('benchmark')
-        logger.setLevel(self.logger_settings[pipeline_config['log_level']])
-
-        # log level for autonet is set in SetAutoNetConfig
-
-        return { 'task_id': pipeline_config['task_id'], 'run_id': pipeline_config['run_id']}
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("task_id", default=-1, type=int),
-            ConfigOption("run_id", default="0", type=str),
-            ConfigOption("log_level", default="info", type=str, choices=list(self.logger_settings.keys())),
-            ConfigOption("benchmark_name", default=None, type=str, required=True),
-
-            # pseudo options that allow to store host information in host_config... Used in run_benchmark_cluster.py
-            ConfigOption("memory_per_core", default=float("inf"), type=float),
-            ConfigOption("time_limit", default=2**32, type=int)
-        ]
-        return options
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/create_autonet.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/create_autonet.py
deleted file mode 100644
index 00dae6ede..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/create_autonet.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from autoPyTorch import AutoNetClassification, AutoNetRegression, AutoNetMultilabel, AutoNetEnsemble
-from autoPyTorch.utils.ensemble import test_predictions_for_ensemble
-import autoPyTorch.pipeline.nodes as autonet_nodes
-import autoPyTorch.components.metrics as autonet_metrics
-from autoPyTorch.components.metrics.additional_logs import test_result
-
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-from autoPyTorch.data_management.data_manager import ProblemType
-
-class CreateAutoNet(PipelineNode):
-
-    def fit(self, pipeline_config, data_manager):
-        if (data_manager.problem_type == ProblemType.FeatureRegression):
-            autonet_type = AutoNetRegression
-        elif (data_manager.problem_type == ProblemType.FeatureMultilabel):
-            autonet_type = AutoNetMultilabel
-        elif (data_manager.problem_type == ProblemType.FeatureClassification):
-            autonet_type = AutoNetClassification
-        elif data_manager.problem_type == ProblemType.ImageClassification:
-            autonet = AutoNetImageClassification()
-        elif data_manager.problem_type == ProblemType.ImageClassificationMultipleDatasets:
-            autonet = AutoNetImageClassificationMultipleDatasets()
-        else:
-            raise ValueError('Problem type ' + str(data_manager.problem_type) + ' is not defined')
-
-        autonet = autonet_type() if not pipeline_config["enable_ensemble"] else AutoNetEnsemble(autonet_type)
-        test_logger = test_result if not pipeline_config["enable_ensemble"] else test_predictions_for_ensemble
-        autonet.pipeline[autonet_nodes.LogFunctionsSelector.get_name()].add_log_function(
-            name=test_logger.__name__, 
-            log_function=test_logger(autonet, data_manager.X_test, data_manager.Y_test),
-            loss_transform=(not pipeline_config["enable_ensemble"]))
-
-        return { 'autonet': autonet }
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("enable_ensemble", default=False, type=to_bool)
-        ]
-        return options
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/fit_autonet.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/fit_autonet.py
deleted file mode 100644
index e53694975..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/fit_autonet.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import time
-import logging
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption
-import json
-import numpy as np
-
-class FitAutoNet(PipelineNode):
-
-    def __init__(self):
-        super(FitAutoNet, self).__init__()
-
-        # if we have the required module 'resource' (not available on windows!)
-        self.guarantee_limits = module_exists("resource") and module_exists("pynisher")
-
-    def fit(self, pipeline_config, autonet, data_manager, **kwargs):
-
-        start_time = time.time()
-        test_score = None
-
-        if pipeline_config['refit_config'] is None:
-            # Start search
-            logging.getLogger('benchmark').debug("Fit autonet")
-
-            # Email confirmation
-            if pipeline_config['confirmation_gmail_user']:
-                self.send_confirmation_mail(pipeline_config, autonet, data_manager)
-
-            # Run fit
-            fit_result = self.fit_autonet(autonet, data_manager)
-
-            if pipeline_config['refit_budget'] is not None:
-                # Refit
-                import os
-                import numpy as np
-                autonet_config = autonet.get_current_autonet_config()
-                from autoPyTorch.utils.loggers import get_refit_config
-                refit_config = get_refit_config(autonet_config['result_logger_dir'])
-                directory = os.path.join(autonet_config['result_logger_dir'], 'refit')
-
-                autonet_config['result_logger_dir'] = directory
-                autonet_config['save_checkpoints'] = False
-                pipeline_config['refit_config'] = refit_config
-                
-                pipeline_config['refit_budget'] *= len(data_manager.X_train)
-                job_id = max(autonet_config['task_id'], 1)
-                if job_id == 1:
-                    self.refit_autonet(
-                        pipeline_config, autonet, autonet_config, 
-                        data_manager.X_train, data_manager.Y_train, 
-                        data_manager.X_valid, data_manager.Y_valid)
-
-        else:
-            # Refit
-            autonet_config= autonet.get_current_autonet_config()
-            fit_result = self.refit_autonet(
-                pipeline_config, autonet, autonet_config, 
-                data_manager.X_train, data_manager.Y_train, 
-                data_manager.X_valid, data_manager.Y_valid)
-
-        if data_manager.X_test is not None:
-            # Score on test set
-            import numpy as np
-            test_score = autonet.score(data_manager.X_test, data_manager.Y_test.astype(np.int32))
-
-        return { 'fit_duration': int(time.time() - start_time), 
-                 'fit_result': fit_result,
-                 'test_score': test_score}
-
-    def fit_autonet(self, autonet, data_manager):
-        return autonet.fit( data_manager.X_train, data_manager.Y_train, 
-                            data_manager.X_valid, data_manager.Y_valid, 
-                            refit=False)
-
-    def refit_autonet(self, pipeline_config, autonet, autonet_config, X_train, Y_train, X_valid, Y_valid):
-        logging.getLogger('benchmark').debug("Refit autonet")
-        
-        import torch
-        if torch.cuda.is_available():
-            torch.backends.cudnn.deterministic = True
-            torch.backends.cudnn.benchmark = False
-
-        with open(pipeline_config['refit_config'], 'r') as f:
-            refit_config = json.load(f)
-        
-        if 'incumbent_config_path' in refit_config:
-            # > updates in set_autonet_config
-            with open(refit_config['incumbent_config_path'], 'r') as f:
-                config = json.load(f)
-                autonet_config['random_seed'] = refit_config['seed']
-                autonet_config['dataset_order'] = refit_config['dataset_order']
-        else:
-            config = refit_config
-
-        fit_result = autonet.refit(
-            X_train, Y_train, 
-            X_valid, Y_valid, 
-            autonet_config=autonet_config,
-            hyperparameter_config=config,
-            budget=pipeline_config['refit_budget'] or autonet_config['max_budget'])
-
-        logging.getLogger('benchmark').info("Result: " + str(fit_result))
-        return fit_result
-
-    def send_confirmation_mail(self, pipeline_config, autonet, data_manager):
-        user = pipeline_config['confirmation_gmail_user']
-        import pprint
-        message = "\r\n".join(["Autonet run",
-                               "Data:",
-                               "%s",
-                               "",
-                               "Autonet Config:",
-                               "%s"
-                               "",
-                               "",
-                               "%s"]) % (pprint.pformat(data_manager.X_train.tolist()), pprint.pformat(autonet.get_current_autonet_config()), str(autonet.get_hyperparameter_search_space()))
-        user = user + '+benchmark@gmail.com'
-        from autoPyTorch.utils.mail import send_mail
-        send_mail(user, 'Benchmark Start', message)
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("refit_config", default=None, type='directory'),
-            ConfigOption("refit_budget", default=None, type=int),
-            ConfigOption("confirmation_gmail_user", default=None, type=str),
-        ]
-        return options
-
-
-def module_exists(module_name):
-    try:
-        __import__(module_name)
-    except ImportError:
-        return False
-    else:
-        return True
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_autonet_config.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_autonet_config.py
deleted file mode 100644
index 372895ec0..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_autonet_config.py
+++ /dev/null
@@ -1,56 +0,0 @@
-
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-import traceback
-
-class ForAutoNetConfig(SubPipelineNode):
-    def fit(self, pipeline_config, autonet, instance, data_manager, run_id, task_id):
-        for config_file in self.get_config_files(pipeline_config):
-            try:
-                self.sub_pipeline.fit_pipeline(pipeline_config=pipeline_config,
-                    autonet=autonet, instance=instance, data_manager=data_manager,
-                    autonet_config_file=config_file, run_id=run_id, task_id=task_id)
-            except Exception as e:
-                print(e)
-                traceback.print_exc()
-        return dict()
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("autonet_configs", default=None, type='directory', list=True, required=True),
-            ConfigOption("autonet_config_root", default=ConfigFileParser.get_autonet_home(), type='directory'),
-            ConfigOption("autonet_config_slice", default=None, type=str)
-        ]
-        return options
-
-    @staticmethod
-    def get_config_files(pipeline_config, parse_slice=True):
-        config_files = pipeline_config['autonet_configs']
-        if pipeline_config['autonet_config_root'] is not None:
-            config_files = [os.path.join(pipeline_config['autonet_config_root'], config) if not os.path.isabs(config) else config for config in config_files]
-
-        autonet_config_slice = ForAutoNetConfig.parse_slice(pipeline_config['autonet_config_slice'])
-        if autonet_config_slice is not None and parse_slice:
-            return config_files[autonet_config_slice]
-
-        return config_files
-
-    @staticmethod
-    def parse_slice(splice_string):
-        if (splice_string is None):
-            return None
-
-        split = splice_string.split(":")
-        if len(split) == 1:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = (int(split[0]) + 1) if split[0] != "" else None
-            step = 1
-        elif len(split) == 2:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = int(split[1]) if split[1] != "" else None
-            step = 1
-        elif len(split) == 3:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = int(split[1]) if split[1] != "" else None
-            step = int(split[2]) if split[2] != "" else 1
-        return slice(start, stop, step)
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_instance.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_instance.py
deleted file mode 100644
index dd04fea54..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_instance.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import os
-import logging
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-import traceback
-
-class ForInstance(SubPipelineNode):
-    def fit(self, pipeline_config, task_id, run_id):
-        instances = self.get_instances(pipeline_config, instance_slice=self.parse_slice(pipeline_config["instance_slice"]))
-        for instance in instances:
-            try:
-                self.sub_pipeline.fit_pipeline(pipeline_config=pipeline_config, instance=instance, run_id=run_id, task_id=task_id)
-            except Exception as e:
-                print(e)
-                traceback.print_exc()
-        return dict()
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("instances", default=None, type='directory', required=True),
-            ConfigOption("instance_slice", default=None, type=str),
-            ConfigOption("dataset_root", default=ConfigFileParser.get_autonet_home(), type='directory'),
-            ConfigOption("multiple_datasets_indices", default=None, type=int, list=True),
-        ]
-        return options
-
-    @staticmethod
-    def get_instances(benchmark_config, instances_must_exist=True, instance_slice=None):
-        # get list of instances
-        instances = []
-        if os.path.isfile(benchmark_config["instances"]):
-            with open(benchmark_config["instances"], "r") as instances_file:
-                if os.path.splitext(benchmark_config['instances'])[1] == '.json':
-                    import json
-                    datasets = [make_path(path, benchmark_config["dataset_root"]) for path in json.load(instances_file)]
-                    instances.append(datasets if benchmark_config['multiple_datasets_indices'] is None else [datasets[i] for i in benchmark_config['multiple_datasets_indices']])
-                else:
-                    for line in instances_file:
-                        if line.strip().startswith("openml"):
-                            instances.append(line.strip())
-                            continue
-
-                        if line.strip().startswith("["):
-                            datasets = [make_path(path, benchmark_config["dataset_root"]) for path in line.strip(' []\n').split(',')]
-                            instances.append(datasets if benchmark_config['multiple_datasets_indices'] is None else [datasets[i] for i in benchmark_config['multiple_datasets_indices']])
-                            continue
-
-                        instance = os.path.abspath(os.path.join(benchmark_config["dataset_root"], line.strip()))
-                        if os.path.isfile(instance) or os.path.isdir(instance):
-                            instances.append(instance)
-                        else:
-                            if not instances_must_exist:
-                                instances.append(instance)
-                            logging.getLogger('benchmark').warning(str(instance) + " does not exist")
-        elif os.path.isdir(benchmark_config["instances"]):
-            for root, directories, filenames in os.walk(benchmark_config["instances"]):
-                for filename in filenames: 
-                    instances.append(os.path.join(root,filename))
-        if instance_slice is not None:
-            return instances[instance_slice]
-        return instances
-
-    @staticmethod
-    def parse_slice(splice_string):
-        if (splice_string is None):
-            return None
-
-        split = splice_string.split(":")
-        if len(split) == 1:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = (int(split[0]) + 1) if split[0] != "" else None
-            step = 1
-        elif len(split) == 2:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = int(split[1]) if split[1] != "" else None
-            step = 1
-        elif len(split) == 3:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = int(split[1]) if split[1] != "" else None
-            step = int(split[2]) if split[2] != "" else 1
-        return slice(start, stop, step)
-
-
-def make_path(path, root):
-    path = path.strip()
-    if not os.path.isabs(path):
-        path = os.path.join(root, path)
-    if os.path.exists(path):
-        return os.path.abspath(path)
-    return None
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_run.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_run.py
deleted file mode 100644
index 69b57e34d..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/for_run.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import logging
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.pipeline.base.sub_pipeline_node import SubPipelineNode
-import traceback
-
-class ForRun(SubPipelineNode):
-    def fit(self, pipeline_config, autonet, data_manager, instance, run_id, task_id):
-        for run_number in self.parse_range(pipeline_config['run_number_range'], pipeline_config['num_runs']):
-            try:
-                logging.getLogger('benchmark').info("Start run " + str(run_id) + "_" + str(run_number))
-                self.sub_pipeline.fit_pipeline(pipeline_config=pipeline_config,
-                    data_manager=data_manager, instance=instance,
-                    run_number=run_number, run_id=run_id, task_id=task_id)
-            except Exception as e:
-                print(e)
-                traceback.print_exc()
-        return dict()
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("num_runs", default=1, type=int),
-            ConfigOption("run_number_range", default=None, type=str)
-        ]
-        return options
-
-    @staticmethod
-    def parse_range(range_string, fallback):
-        if (range_string is None):
-            return range(fallback)
-
-        split = range_string.split(":")
-        if len(split) == 1:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = (int(split[0]) + 1) if split[0] != "" else fallback
-            step = 1
-        elif len(split) == 2:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = int(split[1]) if split[1] != "" else fallback
-            step = 1
-        elif len(split) == 3:
-            start = int(split[0]) if split[0] != "" else 0
-            stop = int(split[1]) if split[1] != "" else fallback
-            step = int(split[2]) if split[2] != "" else 1
-        return range(start, stop, step)
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/prepare_result_folder.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/prepare_result_folder.py
deleted file mode 100644
index 27a9ee627..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/prepare_result_folder.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-import logging
-from ConfigSpace.read_and_write import json as cs_json, pcs_new as cs_pcs
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
-from autoPyTorch.utils.modify_config_space import remove_constant_hyperparameter
-
-class PrepareResultFolder(PipelineNode):
-
-    def fit(self, pipeline_config, data_manager, instance,
-            autonet, run_number, run_id, task_id):
-
-        instance_name, run_name = get_names(instance, run_id, run_number)
-        run_result_dir = get_run_result_dir(pipeline_config, instance, run_id, run_number, autonet)
-        instance_run_id = str(run_name) + "-" + str(instance_name)
-        instance_run_id = '_'.join(instance_run_id.split(':'))
-        
-        autonet.autonet_config = None #clean results of last fit
-        autonet.update_autonet_config(task_id=task_id, run_id=instance_run_id,  result_logger_dir=run_result_dir)
-
-        if (task_id not in [-1, 1]):
-            return { 'result_dir': run_result_dir }
-
-        if not os.path.exists(run_result_dir):
-            try:
-                os.makedirs(run_result_dir)
-            except Exception as e:
-                print(e)
-
-
-        logging.getLogger('benchmark').debug("Create config and info files for current run " + str(run_name))
-
-        instance_info = dict()
-        instance_info['path'] = instance
-        instance_info['is_classification'] = data_manager.is_classification
-        instance_info['is_multilabel'] = data_manager.is_multilabel
-        instance_info['instance_shape'] = data_manager.X_train.shape
-        instance_info['categorical_features'] = data_manager.categorical_features
-
-        autonet_config = autonet.get_current_autonet_config()
-        if autonet_config["hyperparameter_search_space_updates"] is not None:
-            autonet_config["hyperparameter_search_space_updates"].save_as_file(
-                os.path.join(run_result_dir, "hyperparameter_search_space_updates.txt"))
-
-        if 'user_updates_config' in pipeline_config:
-            user_updates_config = pipeline_config['user_updates_config']
-            if user_updates_config:
-                from shutil import copyfile
-                copyfile(user_updates_config, os.path.join(run_result_dir, 'user_updates_config.csv'))
-
-        self.write_config_to_file(run_result_dir, "instance.info", instance_info)
-        self.write_config_to_file(run_result_dir, "benchmark.config", pipeline_config)
-        self.write_config_to_file(run_result_dir, "autonet.config", autonet_config)
-
-        # save refit config - add indent and sort keys
-        if 'refit_config' in pipeline_config and pipeline_config['refit_config'] is not None:
-            import json
-            with open(pipeline_config['refit_config'], 'r') as f:
-                refit_config = json.loads(f.read())
-            with open(os.path.join(run_result_dir, 'refit_config.json'), 'w+') as f:
-                f.write(json.dumps(refit_config, indent=4, sort_keys=True))
-
-        # save search space
-        search_space = autonet.pipeline.get_hyperparameter_search_space(**autonet_config)
-        with open(os.path.join(run_result_dir, "configspace.json"), "w") as f:
-            f.write(cs_json.write(search_space))
-
-        # save search space without constants - used by bohb - as pcs (simple)
-        simplified_search_space, _ = remove_constant_hyperparameter(search_space)
-        with open(os.path.join(run_result_dir, "configspace_simple.pcs"), "w") as f:
-            f.write(cs_pcs.write(simplified_search_space))
-
-        return { 'result_dir': run_result_dir }
-        
-
-    def write_config_to_file(self, folder, filename, config):
-        do_not_write = ["hyperparameter_search_space_updates"]
-        with open(os.path.join(folder, filename), "w") as f:
-            f.write("\n".join([(key + '=' + str(value)) for (key, value) in sorted(config.items(), key=lambda x: x[0]) if not key in do_not_write]))
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('result_dir', default=None, type='directory', required=True),
-            ConfigOption('name', default=None, type=str, required=True)
-        ]
-        return options
-
-def get_names(instance, run_id, run_number):
-    if isinstance(instance, list):
-        for p in instance:
-            if not os.path.exists(p):
-                raise Exception('Invalid path: ' + str(p))	
-        instance_name = "-".join(sorted([os.path.split(p)[1].split(".")[0] for p in instance]))
-        if len(instance_name) > 40:
-            instance_name = "-".join(sorted([os.path.split(q)[1] for q in sorted(set(os.path.split(p)[0] for p in instance))] + [str(len(instance))]))
-    else:
-        instance_name = os.path.basename(instance).split(".")[0]
-
-    run_name = "run_" + str(run_id) + "_" + str(run_number)
-
-    return "_".join(instance_name.split(':')), run_name
-
-def get_run_result_dir(pipeline_config, instance, run_id, run_number, autonet):
-    instance_name, run_name = get_names(instance, run_id, run_number)
-    autonet_config = autonet.get_current_autonet_config()
-    benchmark_name = '_'.join(pipeline_config['name'].split(' '))
-
-    if 'refit_config' not in pipeline_config or pipeline_config['refit_config'] is None:
-        benchmark_name += "[{0}_{1}]".format(int(autonet_config['min_budget']), int(autonet_config['max_budget']))
-    elif 'refit_budget' not in pipeline_config or pipeline_config['refit_budget'] is None:
-        benchmark_name += "[refit_{0}]".format(int(autonet_config['max_budget']))
-    else:
-        benchmark_name += "[refit_{0}]".format(int(pipeline_config['refit_budget']))
-
-    run_result_dir = os.path.join(pipeline_config['result_dir'], instance_name, benchmark_name, run_name)
-    return run_result_dir
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/read_instance_data.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/read_instance_data.py
deleted file mode 100644
index 3a74c3fce..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/read_instance_data.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import numpy as np
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.data_management.data_manager import DataManager, ImageManager
-
-class ReadInstanceData(PipelineNode):
-
-    def fit(self, pipeline_config, instance):
-        # Get data manager for train, val, test data
-        if pipeline_config['problem_type'] in ['feature_classification', 'feature_multilabel', 'feature_regression']:
-            dm = DataManager(verbose=pipeline_config["data_manager_verbose"])
-            if pipeline_config['test_instances'] is not None:
-                dm_test = DataManager(verbose=pipeline_config["data_manager_verbose"])
-        else:
-            dm = ImageManager(verbose=pipeline_config["data_manager_verbose"])
-            if pipeline_config['test_instances'] is not None:
-                dm_test = ImageManager(verbose=pipeline_config["data_manager_verbose"])
-
-        # Read data
-        if pipeline_config['test_instances'] is not None:
-            # Use given test set
-            dm.read_data(instance,
-                     is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']),
-                     test_split=0.0)
-            dm_test.read_data(pipeline_config['test_instances'],
-                              is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']),
-                              test_split=0.0)
-            dm.X_test, dm.Y_test = dm_test.X_train, dm_test.Y_train.astype(np.int32)
-
-        else:
-            # Use test split
-            dm.read_data(instance,
-                is_classification=(pipeline_config["problem_type"] in ['feature_classification', 'feature_multilabel', 'image_classification']),
-                test_split=pipeline_config["test_split"])
-
-        return {"data_manager": dm}
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("test_split", default=0.0, type=float),
-            ConfigOption("problem_type", default='feature_classification', type=str, choices=['feature_classification', 'feature_multilabel', 'feature_regression', 'image_classification']),
-            ConfigOption("data_manager_verbose", default=False, type=to_bool),
-            ConfigOption("test_instances", default=None, type=str)
-        ]
-        return options
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/save_ensemble_logs.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/save_ensemble_logs.py
deleted file mode 100644
index 964039454..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/save_ensemble_logs.py
+++ /dev/null
@@ -1,126 +0,0 @@
-from hpbandster.core.result import logged_results_to_HBS_result
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool, to_list
-from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-from autoPyTorch.pipeline.nodes import OneHotEncoding
-from autoPyTorch.pipeline.nodes.ensemble import build_ensemble, read_ensemble_prediction_file
-from hpbandster.core.result import logged_results_to_HBS_result
-from autoPyTorch.utils.ensemble import filter_nan_predictions
-from copy import copy
-import os
-import logging
-import math
-import numpy as np
-import json
-import traceback
-import time
-
-class SaveEnsembleLogs(PipelineNode):
-
-    def fit(self, pipeline_config, autonet, result_dir):
-        if not pipeline_config["enable_ensemble"]:
-            return dict()
-        save_ensemble_logs(pipeline_config, autonet, result_dir)
-        save_ensemble_logs(pipeline_config, autonet, result_dir, ensemble_size=1, log_filename="test_result.json")
-        return dict()
- 
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('num_ensemble_evaluations', default=100, type=int)
-        ]
-        return options
-
-
-def save_ensemble_logs(pipeline_config, autonet, result_dir, ensemble_size=None, log_filename=None):
-    # prepare some variables
-    autonet_config = autonet.get_current_autonet_config()
-    metrics = autonet.pipeline[MetricSelector.get_name()].metrics
-    optimize_metric = metrics[autonet_config["optimize_metric"]]
-    y_transform = autonet.pipeline[OneHotEncoding.get_name()].complete_y_tranformation
-    result = logged_results_to_HBS_result(result_dir)
-    filename = os.path.join(result_dir, "predictions_for_ensemble.npy")
-    test_filename = os.path.join(result_dir, "test_predictions_for_ensemble.npy")
-    ensemble_log_filename = os.path.join(result_dir, log_filename or "ensemble_log.json")
-    with open(ensemble_log_filename, "w") as f: pass
-
-    # read the predictions
-    predictions, labels, model_identifiers, timestamps = read_ensemble_prediction_file(filename=filename, y_transform=y_transform)
-    assert(list(map(lambda x: x["finished"], timestamps)) == sorted(list(map(lambda x: x["finished"], timestamps))))
-    test_data_available = False
-    try:
-        test_predictions, test_labels, test_model_identifiers, test_timestamps = read_ensemble_prediction_file(filename=test_filename, y_transform=y_transform)
-        test_predictions = [np.mean(p, axis=0) for p in test_predictions]     
-        assert test_model_identifiers == model_identifiers and test_timestamps == timestamps, "Different model identifiers or timestamps in test file"
-        predictions, model_identifiers, timestamps, test_predictions = \
-            filter_nan_predictions(predictions, model_identifiers, timestamps, test_predictions)
-        test_data_available = True
-    except IOError:
-        logging.getLogger("benchmark").info("No test data available when building ensemble logs.")
-        predictions, model_identifiers, timestamps = \
-            filter_nan_predictions(predictions, model_identifiers, timestamps)
-
-    # compute the prediction subset used to compute performance over time
-    start_time = min(map(lambda t: t["submitted"], timestamps))
-    end_time = max(map(lambda t: t["finished"], timestamps))
-    step = math.log(end_time - start_time) / (pipeline_config["num_ensemble_evaluations"] - 1)
-    steps = start_time + np.exp(np.arange(step, step * (pipeline_config["num_ensemble_evaluations"] + 1), step))
-    subset_indices = [np.array([i for i, t in enumerate(timestamps) if t["finished"] < s]) for s in steps]
-
-    # iterate over the subset to compute performance over time
-    last_finished = 0
-    for subset in subset_indices:
-        if len(subset) == 0:
-            continue
-        
-        finished = max(timestamps[s]["finished"] for s in subset)
-        if finished == last_finished:
-            continue
-        last_finished = finished
-        subset_predictions = [np.copy(predictions[s]) for s in subset]
-        subset_model_identifiers = [model_identifiers[s] for s in subset]
-
-        # build an ensemble with current subset and size
-        ensemble_start_time = time.time()
-        ensemble, _ = build_ensemble(result=result,
-            optimize_metric=optimize_metric, ensemble_size=ensemble_size or autonet_config["ensemble_size"],
-            all_predictions=subset_predictions, labels=labels, model_identifiers=subset_model_identifiers,
-            only_consider_n_best=autonet_config["ensemble_only_consider_n_best"],
-            sorted_initialization_n_best=autonet_config["ensemble_sorted_initialization_n_best"])
-
-        # get the ensemble predictions
-        ensemble_prediction = ensemble.predict(subset_predictions)
-        if test_data_available:
-            subset_test_predictions = [np.copy(test_predictions[s]) for s in subset]
-            test_ensemble_prediction = ensemble.predict(subset_test_predictions)
-
-        # evaluate the metrics
-        metric_performances = dict()
-        for metric_name, metric in metrics.items():
-            if metric_name != autonet_config["optimize_metric"] and metric_name not in autonet_config["additional_metrics"]:
-                continue
-            metric_performances[metric_name] = metric(ensemble_prediction, labels)
-            if test_data_available:
-                metric_performances["test_%s" % metric_name] = metric(test_ensemble_prediction, test_labels)
-
-        ensemble_time = time.time() - ensemble_start_time
-
-        # write to log
-        with open(ensemble_log_filename, "a") as f:
-            print(json.dumps([
-                finished + ensemble_time,
-                metric_performances,
-                sorted([(identifier, weight) for identifier, weight in zip(ensemble.identifiers_, ensemble.weights_) if weight > 0],
-                        key=lambda x: -x[1]),
-                [ensemble.identifiers_[i] for i in ensemble.indices_],
-                {
-                    "ensemble_size": ensemble.ensemble_size,
-                    "metric": autonet_config["optimize_metric"],
-                    "sorted_initialization_n_best": ensemble.sorted_initialization_n_best,
-                    "only_consider_n_best": ensemble.only_consider_n_best,
-                    "bagging": ensemble.bagging,
-                    "mode": ensemble.mode,
-                    "num_input_models": ensemble.num_input_models_,
-                    "trajectory": ensemble.trajectory_,
-                    "train_score": ensemble.train_score_
-                }
-            ]), file=f)
\ No newline at end of file
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/save_results.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/save_results.py
deleted file mode 100644
index 7891e80b5..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/save_results.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import os
-import json
-import time
-import logging
-
-from hpbandster.core.result import logged_results_to_HBS_result
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-class SaveResults(PipelineNode):
-
-    def fit(self, result_dir, fit_duration, test_score, fit_result, autonet, task_id):
-        if (task_id not in [-1, 1]):
-            time.sleep(60)
-            return dict()
-
-        logging.getLogger('benchmark').info("Create and save summary")
-
-        summary = {
-            "incumbent_config": fit_result["optimized_hyperparameter_config"],
-            "budget": fit_result["budget"],
-            "loss": fit_result["loss"],
-            "test_score": test_score,
-            "incumbent_config" : incumbent_config,
-            "info": fit_result["info"],
-            "duration": fit_duration,
-            }
-
-        if "ensemble_configs" in fit_result:
-            summary["ensemble_configs"] = list(fit_result["ensemble_configs"].values())
-
-        # write as json
-        with open(os.path.join(result_dir, "summary.json"), "w") as f:
-            json.dump(summary, f)
-
-        return dict()
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/set_autonet_config.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/set_autonet_config.py
deleted file mode 100644
index b283bf613..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/set_autonet_config.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import os
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-
-class SetAutoNetConfig(PipelineNode):
-
-    def fit(self, pipeline_config, autonet, autonet_config_file, data_manager, instance):
-        parser = autonet.get_autonet_config_file_parser()
-        config = parser.read(autonet_config_file)
-
-        if ('additional_logs' not in config):
-            config['additional_logs'] = ['test_result' if not pipeline_config['enable_ensemble'] else 'test_predictions_for_ensemble']
-
-        if (pipeline_config['use_dataset_metric'] and data_manager.metric is not None):
-            config['optimize_metric'] = data_manager.metric
-        if (pipeline_config['use_dataset_max_runtime'] and data_manager.max_runtime is not None):
-            config['max_runtime'] = data_manager.max_runtime
-
-        if (pipeline_config['working_dir'] is not None):
-            config['working_dir'] = pipeline_config['working_dir']
-        if (pipeline_config['network_interface_name'] is not None):
-            config['network_interface_name'] = pipeline_config['network_interface_name']
-
-        config['log_level'] = pipeline_config['log_level']
-        
-        if data_manager.categorical_features:
-            config['categorical_features'] = data_manager.categorical_features
-
-        # Note: PrepareResultFolder will make a small run dependent update of the autonet_config
-        autonet.update_autonet_config(**config)
-        return dict()
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("use_dataset_metric", default=False, type=to_bool),
-            ConfigOption("use_dataset_max_runtime", default=False, type=to_bool),
-            ConfigOption("working_dir", default=None, type='directory'),
-            ConfigOption("network_interface_name", default=None, type=str)
-        ]
-        return options
diff --git a/autoPyTorch/utils/benchmarking/benchmark_pipeline/set_ensemble_config.py b/autoPyTorch/utils/benchmarking/benchmark_pipeline/set_ensemble_config.py
deleted file mode 100644
index 6c1327cbe..000000000
--- a/autoPyTorch/utils/benchmarking/benchmark_pipeline/set_ensemble_config.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from hpbandster.core.result import logged_results_to_HBS_result
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.prepare_result_folder import get_run_result_dir
-from copy import copy
-import os
-import logging
-
-class SetEnsembleConfig(PipelineNode):
-
-    def fit(self, pipeline_config, autonet, run_result_dir):
-        parser = autonet.get_autonet_config_file_parser()
-        autonet_config = parser.read(os.path.join(run_result_dir, "autonet.config"))
-        
-        if pipeline_config["ensemble_size"]:
-            autonet_config["ensemble_size"] = pipeline_config["ensemble_size"]
-        
-        if pipeline_config["ensemble_only_consider_n_best"]:
-            autonet_config["ensemble_only_consider_n_best"] = pipeline_config["ensemble_only_consider_n_best"]
-
-        if pipeline_config["ensemble_sorted_initialization_n_best"]:
-            autonet_config["ensemble_sorted_initialization_n_best"] = pipeline_config["ensemble_sorted_initialization_n_best"]
-        
-        autonet.autonet_config = autonet_config
-
-        return {"result_dir": run_result_dir,
-                "optimize_metric": autonet_config["optimize_metric"],
-                "trajectories": []}
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('ensemble_size', default=0, type=int),
-            ConfigOption('ensemble_only_consider_n_best', default=0, type=int),
-            ConfigOption('ensemble_sorted_initialization_n_best', default=0, type=int)
-        ]
-        return options
\ No newline at end of file
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/__init__.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/__init__.py
deleted file mode 100644
index 7bc3ce18b..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from autoPyTorch.utils.benchmarking.visualization_pipeline.collect_trajectories import CollectAutoNetConfigTrajectories, CollectRunTrajectories, CollectInstanceTrajectories
-from autoPyTorch.utils.benchmarking.visualization_pipeline.get_run_trajectories import GetRunTrajectories
-from autoPyTorch.utils.benchmarking.visualization_pipeline.plot_trajectories import PlotTrajectories
-from autoPyTorch.utils.benchmarking.visualization_pipeline.read_instance_info import ReadInstanceInfo
-from autoPyTorch.utils.benchmarking.visualization_pipeline.visualization_settings import VisualizationSettings
-from autoPyTorch.utils.benchmarking.visualization_pipeline.get_ensemble_trajectories import GetEnsembleTrajectories
-from autoPyTorch.utils.benchmarking.visualization_pipeline.plot_summary import PlotSummary
-from autoPyTorch.utils.benchmarking.visualization_pipeline.get_additional_trajectories import GetAdditionalTrajectories
\ No newline at end of file
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/collect_trajectories.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/collect_trajectories.py
deleted file mode 100644
index 8dcfe14c9..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/collect_trajectories.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.utils.benchmarking.benchmark_pipeline import ForRun, ForAutoNetConfig, ForInstance
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.prepare_result_folder import get_run_result_dir
-import os
-import logging
-import traceback
-
-class CollectInstanceTrajectories(ForInstance):
-    def fit(self, pipeline_config, run_id_range):
-        instances = self.get_instances(pipeline_config, instance_slice=self.parse_slice(pipeline_config["instance_slice"]))
-
-        result_trajectories = dict()
-        result_optimize_metrics = set()
-
-        for instance in instances:
-            try:
-                pipeline_result = self.sub_pipeline.fit_pipeline(pipeline_config=pipeline_config, instance=instance, run_id_range=run_id_range)
-
-                # merge the trajectories into one dict
-                instance_trajectories = pipeline_result["trajectories"]
-                optimize_metrics = pipeline_result["optimize_metrics"]
-
-                for metric, config_trajectories in instance_trajectories.items():
-                    if metric not in result_trajectories:
-                        result_trajectories[metric] = dict()
-                    for config, run_trajectories in config_trajectories.items():
-                        if config not in result_trajectories[metric]:
-                            result_trajectories[metric][config] = dict()
-                        result_trajectories[metric][config][instance] = run_trajectories
-                result_optimize_metrics |= optimize_metrics
-
-            except Exception as e:
-                print(e)
-                traceback.print_exc()
-        return {"trajectories": result_trajectories,
-                "optimize_metrics": result_optimize_metrics}
-
-
-class CollectAutoNetConfigTrajectories(ForAutoNetConfig):
-    def fit(self, pipeline_config, instance, run_id_range):
-        logging.getLogger('benchmark').info('Collecting data for dataset ' + instance)
-
-        result_trajectories = dict()
-        result_optimize_metrics = set()
-
-        # iterate over all configs
-        for config_file in self.get_config_files(pipeline_config):
-            autonet_config_name = os.path.basename(config_file).split(".")[0]
-            pipeline_result = self.sub_pipeline.fit_pipeline(pipeline_config=pipeline_config,
-                                                             instance=instance,
-                                                             run_id_range=run_id_range,
-                                                             autonet_config_file=config_file)
-            
-            # merge the trajectories into one dict
-            config_trajectories = pipeline_result["trajectories"]
-            optimize_metrics = pipeline_result["optimize_metrics"]
-
-            for metric, run_trajectories in config_trajectories.items():
-                if metric not in result_trajectories:
-                    result_trajectories[metric] = dict()
-                result_trajectories[metric][autonet_config_name] = run_trajectories
-
-            result_optimize_metrics |= optimize_metrics
-        return {"trajectories": result_trajectories,
-                "optimize_metrics": result_optimize_metrics}
-
-
-class CollectRunTrajectories(ForRun):
-    def fit(self, pipeline_config, instance, run_id_range, autonet_config_file):
-        logging.getLogger('benchmark').info('Collecting data for autonet config ' + autonet_config_file)
-
-        result_trajectories = dict()
-        optimize_metrics = set()
-
-        run_number_range = self.parse_range(pipeline_config['run_number_range'], pipeline_config['num_runs'])
-        instance_result_dir = os.path.abspath(os.path.join(get_run_result_dir(pipeline_config, instance, autonet_config_file, "0", "0"), ".."))
-        if not os.path.exists(instance_result_dir):
-            logging.getLogger('benchmark').warn("Skipping %s because it no results exist" % instance_result_dir)
-            return {"trajectories": result_trajectories, "optimize_metrics": optimize_metrics}
-        run_result_dirs = next(os.walk(instance_result_dir))[1]
-
-        # iterate over all run_numbers and run_ids
-        for run_result_dir in run_result_dirs:
-            run_id, run_number = parse_run_folder_name(run_result_dir)
-            run_result_dir = get_run_result_dir(pipeline_config, instance, autonet_config_file, run_id, run_number)
-            if (run_id_range is not None and run_id not in run_id_range) or run_number not in run_number_range:
-                continue
-
-            run_result_dir = get_run_result_dir(pipeline_config, instance, autonet_config_file, run_id, run_number)
-            if not os.path.exists(run_result_dir):
-                logging.getLogger('benchmark').debug("Skipping " + run_result_dir + "because it does not exist")
-                continue
-            pipeline_result = self.sub_pipeline.fit_pipeline(pipeline_config=pipeline_config,
-                                                                instance=instance,
-                                                                run_number=run_number,
-                                                                run_id=run_id,
-                                                                autonet_config_file=autonet_config_file,
-                                                                run_result_dir=run_result_dir)
-            run_trajectories = pipeline_result["trajectories"]
-            optimize_metric = pipeline_result["optimize_metric"]
-
-            # merge the trajectories into one dict
-            for metric, trajectory in run_trajectories.items():
-                if metric not in result_trajectories:
-                    result_trajectories[metric] = list()
-                result_trajectories[metric].append(trajectory)
-
-            if optimize_metric is not None:
-                optimize_metrics |= set([optimize_metric])
-        return {"trajectories": result_trajectories, "optimize_metrics": optimize_metrics}
-
-def parse_run_folder_name(run_folder_name):
-    assert run_folder_name.startswith("run_")
-    run_folder_name = run_folder_name[4:].split("_")
-    run_id = int(run_folder_name[0])
-    run_number = int(run_folder_name[1])
-    return run_id, run_number
\ No newline at end of file
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/get_additional_trajectories.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/get_additional_trajectories.py
deleted file mode 100644
index 162537f84..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/get_additional_trajectories.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption
-import json, os, csv, traceback
-
-
-class GetAdditionalTrajectories(PipelineNode):
-    def fit(self, pipeline_config, trajectories, optimize_metrics, instance):
-        for additional_trajectory_path in pipeline_config["additional_trajectories"]:
-
-            # open trajectory description file
-            with open(additional_trajectory_path, "r") as f:
-                trajectories_description = json.load(f)
-                config_name = trajectories_description["name"]
-                file_format = trajectories_description["format"]
-                assert file_format in trajectory_loaders.keys(), "Unsupported file type %s" % file_format
-                assert not any(config_name in t.keys() for t in trajectories.values()), "Invalid additional trajectory name %s" % config_name
-
-                if instance not in trajectories_description["instances"]:
-                    continue
-                
-                columns_description = trajectories_description["columns"]
-
-                # process all trajectories for current instance
-                for path in trajectories_description["instances"][instance]:
-                    path = os.path.join(os.path.dirname(additional_trajectory_path), path)
-                    try:
-                        trajectory_loaders[file_format](path, config_name, columns_description, trajectories)
-                    except FileNotFoundError as e:
-                        print("Trajectory could not be loaded: %s. Skipping." % e)
-                        traceback.print_exc()                        
-        return {"trajectories": trajectories,
-                "optimize_metrics": optimize_metrics}
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("additional_trajectories", default=list(), type="directory", list=True)
-        ]
-        return options
-
-def csv_trajectory_loader(path, config_name, columns_description, trajectories):
-    with open(path, "r", newline="") as f:
-        reader = csv.reader(f, delimiter=",")
-        next(reader)
-
-        # parse the csv
-        times_finished = list()
-        performances = dict()
-        losses = dict()   
-        for row in reader:
-            for i, col in enumerate(row):
-                if i == columns_description["time_column"]:
-                    times_finished.append(max(0, float(col)))
-                if str(i) in columns_description["metric_columns"].keys():
-                    log_name = columns_description["metric_columns"][str(i)]["name"]
-                    transform = columns_description["metric_columns"][str(i)]["transform"] \
-                        if "transform" in columns_description["metric_columns"][str(i)] else "x"
-                    loss_transform = columns_description["metric_columns"][str(i)]["loss_transform"] \
-                        if "loss_transform" in columns_description["metric_columns"][str(i)] else "x"
-
-                    if log_name not in performances:
-                        performances[log_name] = list()
-                        losses[log_name] = list()
-                    
-                    performances[log_name].append(eval_expr(transform.replace("x", col)))
-                    losses[log_name].append(eval_expr(loss_transform.replace("x", col)))
-        
-        # add data to the other trajectories
-        for log_name in performances.keys():
-            if log_name not in trajectories:
-                trajectories[log_name] = dict()
-            if config_name not in trajectories[log_name]:
-                trajectories[log_name][config_name] = list()
-            trajectories[log_name][config_name].append({
-                "times_finished": sorted(times_finished),
-                "values": list(zip(*sorted(zip(times_finished, performances[log_name]))))[1],
-                "losses": list(zip(*sorted(zip(times_finished, losses[log_name]))))[1]
-            })
-
-trajectory_loaders = {"csv": csv_trajectory_loader}
-
-import ast
-import operator as op
-
-# supported operators
-operators = {ast.Add: op.add, ast.Sub: op.sub, ast.Mult: op.mul,
-             ast.Div: op.truediv, ast.Pow: op.pow, ast.BitXor: op.xor,
-             ast.USub: op.neg}
-
-def eval_expr(expr):
-    return eval_(ast.parse(expr, mode='eval').body)
-
-def eval_(node):
-    if isinstance(node, ast.Num): # <number>
-        return node.n
-    elif isinstance(node, ast.BinOp): # <left> <operator> <right>
-        return operators[type(node.op)](eval_(node.left), eval_(node.right))
-    elif isinstance(node, ast.UnaryOp): # <operator> <operand> e.g., -1
-        return operators[type(node.op)](eval_(node.operand))
-    else:
-        raise TypeError(node)
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/get_ensemble_trajectories.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/get_ensemble_trajectories.py
deleted file mode 100644
index 4f1bbd440..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/get_ensemble_trajectories.py
+++ /dev/null
@@ -1,70 +0,0 @@
-from hpbandster.core.result import logged_results_to_HBS_result
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool, to_list
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.prepare_result_folder import get_run_result_dir
-from autoPyTorch.pipeline.nodes import OneHotEncoding, MetricSelector
-from autoPyTorch.pipeline.nodes.ensemble import read_ensemble_prediction_file
-from hpbandster.core.result import logged_results_to_HBS_result
-from copy import copy
-import os
-import logging
-import math
-import numpy as np
-import json
-
-class GetEnsembleTrajectories(PipelineNode):
-
-    def fit(self, pipeline_config, autonet, run_result_dir, optimize_metric, trajectories):
-        ensemble_log_file = os.path.join(run_result_dir, "ensemble_log.json")
-        test_log_file = os.path.join(run_result_dir, "test_result.json")
-        if not pipeline_config["enable_ensemble"] or optimize_metric is None or \
-            (not os.path.exists(ensemble_log_file) and not os.path.exists(test_log_file)):
-            return {"trajectories": trajectories, "optimize_metric": optimize_metric}
-
-        try:
-            started = logged_results_to_HBS_result(run_result_dir).HB_config["time_ref"]
-        except:
-            return {"trajectories": trajectories, "optimize_metric": optimize_metric}
-        
-        metrics = autonet.pipeline[MetricSelector.get_name()].metrics
-        ensemble_trajectories = dict()
-        test_trajectories = dict()
-        if os.path.exists(ensemble_log_file):
-            ensemble_trajectories = get_ensemble_trajectories(ensemble_log_file, started, metrics)
-        if os.path.exists(test_log_file):
-            test_trajectories = get_ensemble_trajectories(test_log_file, started, metrics,  prefix="", only_test=True)
-        
-        return {"trajectories": dict(trajectories, **ensemble_trajectories, **test_trajectories), "optimize_metric": optimize_metric}
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('enable_ensemble', default=False, type=to_bool)
-        ]
-        return options
-
-def get_ensemble_trajectories(ensemble_log_file, started, metrics, prefix="ensemble_", only_test=False):
-    ensemble_trajectories = dict()
-    with open(ensemble_log_file) as f:
-        for line in f:
-            finished, metric_values, _, _, _ = json.loads(line)
-            finished = finished["finished"] if isinstance(finished, dict) else finished
-
-            for metric_name, metric_value in metric_values.items():
-                if only_test and not metric_name.startswith("test_"):
-                    continue
-                trajectory_name = prefix + metric_name
-                metric_obj = metrics[metric_name[5:]] if metric_name.startswith("test_") else metrics[metric_name]
-    
-                # save in trajectory
-                if trajectory_name not in ensemble_trajectories:
-                    ensemble_trajectories[trajectory_name] = {"times_finished": [], "losses": [], "values": []}
-                ensemble_trajectories[trajectory_name]["times_finished"].append(finished - started)
-                ensemble_trajectories[trajectory_name]["losses"].append(metric_obj.loss_transform(metric_value))
-                ensemble_trajectories[trajectory_name]["values"].append(metric_value)
-
-    for name, trajectory in ensemble_trajectories.items():
-        for key, value_list in trajectory.items():
-            if not isinstance(value_list, (list, tuple)):
-                continue
-            trajectory[key] = [value_list[0] if key != "times_finished" else 0] + value_list
-    return ensemble_trajectories
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/get_run_trajectories.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/get_run_trajectories.py
deleted file mode 100644
index c3fae0c7d..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/get_run_trajectories.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from hpbandster.core.result import logged_results_to_HBS_result
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.prepare_result_folder import get_run_result_dir
-from autoPyTorch.pipeline.nodes import MetricSelector, LogFunctionsSelector
-from copy import copy
-import os
-import logging
-
-class GetRunTrajectories(PipelineNode):
-
-    def fit(self, pipeline_config, autonet, run_result_dir):
-        parser = autonet.get_autonet_config_file_parser()
-        autonet_config = parser.read(os.path.join(run_result_dir, "autonet.config"))
-        metrics = autonet.pipeline[MetricSelector.get_name()].metrics
-        log_functions = autonet.pipeline[LogFunctionsSelector.get_name()].log_functions
-
-        if pipeline_config["only_finished_runs"] and not os.path.exists(os.path.join(run_result_dir, "summary.json")):
-            logging.getLogger('benchmark').info('Skipping ' + run_result_dir + ' because the run is not finished yet')
-            return {"trajectories": dict(), "optimize_metric": None}
-
-        trajectories = build_run_trajectories(run_result_dir, autonet_config, metrics, log_functions)
-        if "test_result" in trajectories:
-            trajectories["test_%s" % autonet_config["optimize_metric"]] = trajectories["test_result"]
-        return {"trajectories": trajectories,
-                "optimize_metric": autonet_config["optimize_metric"]}
-    
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('only_finished_runs', default=True, type=to_bool),
-            ConfigOption('result_dir', default=None, type='directory', required=True),
-        ]
-        return options
-
-
-def build_run_trajectories(results_folder, autonet_config, metrics, log_functions):
-    # parse results
-    try:
-        res = logged_results_to_HBS_result(results_folder)
-        incumbent_trajectory = res.get_incumbent_trajectory(bigger_is_better=False, non_decreasing_budget=False)
-    except:
-        print("No incumbent trajectory found")
-        return dict()
-
-    # prepare
-    metric_name = autonet_config["optimize_metric"]
-    all_metrics = autonet_config["additional_metrics"] + [metric_name]
-    additional_metric_names = [("val_" + m, metrics[m]) for m in all_metrics]
-    additional_metric_names += [("train_" + m, metrics[m]) for m in all_metrics]
-    additional_metric_names += [(l, log_functions[l]) for l in autonet_config["additional_logs"]]
-
-    # initialize incumbent trajectories
-    incumbent_trajectories = dict()
-    
-    # save incumbent trajectories
-    for name, obj in additional_metric_names:
-        tj = copy(incumbent_trajectory)
-        log_available = [name in run["info"] for config_id, budget in zip(tj["config_ids"], tj["budgets"])
-                                             for run in res.get_runs_by_id(config_id)
-                                             if run["budget"] == budget]
-        tj["values"] = [run["info"][name] for config_id, budget in zip(tj["config_ids"], tj["budgets"])
-                                          for run in res.get_runs_by_id(config_id)
-                                          if run["budget"] == budget and name in run["info"]]
-        tj["losses"] = [obj.loss_transform(x) for x in tj["values"]]
-
-        for key, value_list in tj.items():
-            if key in ["losses"]:
-                continue
-            tj[key] = [value for i, value in enumerate(value_list) if log_available[i]]
-        if tj["losses"]:
-            incumbent_trajectories[name] = tj
-    
-    # assume first random config has been evaluated already at time 0
-    for name, trajectory in incumbent_trajectories.items():
-        for key, value_list in trajectory.items():
-            if not isinstance(value_list, (list, tuple)):
-                continue
-            trajectory[key] = [value_list[0] if key != "times_finished" else 0] + value_list
-
-    return incumbent_trajectories
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/plot_summary.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/plot_summary.py
deleted file mode 100644
index 8eb7e6f2d..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/plot_summary.py
+++ /dev/null
@@ -1,225 +0,0 @@
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.benchmarking.visualization_pipeline.plot_trajectories import plot, label_rename, process_trajectory
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-import os
-import logging
-import numpy as np
-import random
-import heapq
-
-class PlotSummary(PipelineNode):
-    def fit(self, pipeline_config, trajectories, optimize_metrics):
-        if not pipeline_config["skip_ranking_plot"]:
-            plot(dict(pipeline_config, plot_type="losses", y_scale="linear"), trajectories, optimize_metrics, "ranking", process_summary)
-        if not pipeline_config["skip_average_plot"]:
-            plot(dict(pipeline_config, scale_uncertainty=0), trajectories, optimize_metrics, "average", process_summary)
-            plot(pipeline_config, trajectories, optimize_metrics, "sampled_average", trajectory_sampling)
-        return dict()
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('skip_ranking_plot', default=False, type=to_bool),
-            ConfigOption('skip_average_plot', default=False, type=to_bool)
-        ]
-        return options
-
-
-def get_ranking_plot_values(values, names, agglomeration):
-    """ values = instance_name --> [((key=prefix + metric), value), ...] """
-    keys = {instance: set([key for key, _ in v]) for instance, v in values.items()}
-    values = {instance: [(key, agglomeration([value for k, value in v if k == key])) for key in keys[instance]] for instance, v in values.items()}
-    sorted_values = {instance: sorted(map(lambda x: x[1], v)) for instance, v in values.items()}  # configs sorted by value
-    ranks = {instance: {n: [sorted_values[instance].index(value) + 1 for config_name, value in v if config_name == n] for n in names}
-             for instance, v in values.items()}
-    ranks = to_dict([(n, r) for rank_dict in ranks.values() for n, r in rank_dict.items()])
-    for name in names:
-        ranks[name] = [i for j in ranks[name] for i in j]  # flatten
-    return ranks
-
-
-def get_average_plot_values(values, names, agglomeration):
-    """ values = instance_name --> [((key=prefix + metric), value), ...] """
-    result = dict()
-    for name in names: # prepare lists
-        result[name] = list()
-    for _, v in values.items():  # aggregate over all instances
-        for name, value in v:  # aggregate over all runs
-            result[name].append(value)
-    return result
-
-get_plot_values_funcs = {
-    "ranking": get_ranking_plot_values,
-    "average": get_average_plot_values
-}
-
-def process_summary(instance_name, metric_name, prefixes, trajectories, plot_type, agglomeration, scale_uncertainty, value_multiplier, cmap):
-    assert instance_name in get_plot_values_funcs.keys()
-    trajectory_names_to_prefix = {(("%s_%s" % (prefix, metric_name)) if prefix else metric_name): prefix
-        for prefix in prefixes}
-    trajectory_names = [t for t in trajectory_names_to_prefix.keys() if t in trajectories]
-
-    # save pointers for each trajectory to iterate over them simultaneously
-    trajectory_pointers = {(config, name): {instance: ([0] * len(run_trajectories))  # name is trajectory name, which consists of prefix and metric
-        for instance, run_trajectories in instance_trajectories.items()}
-        for name in trajectory_names
-        for config, instance_trajectories in trajectories[name].items()}
-    trajectory_values = {(config, name): {instance: ([None] * len(run_trajectories))
-        for instance, run_trajectories in instance_trajectories.items()}
-        for name in trajectory_names
-        for config, instance_trajectories in trajectories[name].items()}
-    heap = [(run_trajectories[j]["times_finished"][0], config, name, instance, j)
-            for name in trajectory_names
-            for config, instance_trajectories in trajectories[name].items()
-            for instance, run_trajectories in instance_trajectories.items()
-            for j in range(len(run_trajectories))]
-    heapq.heapify(heap)
-
-    # data to plot
-    center = {(config, name): [] for name in trajectory_names for config in trajectories[name].keys()}
-    upper = {(config, name): [] for name in trajectory_names for config in trajectories[name].keys()}
-    lower = {(config, name): [] for name in trajectory_names for config in trajectories[name].keys()}
-    finishing_times = []
-    plot_empty = True
-
-    # iterate simultaneously over all trajectories with increasing finishing times
-    while heap:
-
-        # get trajectory with lowest finishing time
-        times_finished, current_config, current_name, current_instance, trajectory_id = heapq.heappop(heap)
-
-        # update trajectory values and pointers
-        current_trajectory = trajectories[current_name][current_config][current_instance][trajectory_id]
-        current_pointer = trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id]
-        current_value = current_trajectory[plot_type][current_pointer]
-
-        trajectory_values[(current_config, current_name)][current_instance][trajectory_id] = current_value
-        trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id] += 1
-
-        if trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id] < len(current_trajectory[plot_type]):
-            heapq.heappush(heap,
-                (current_trajectory["times_finished"][trajectory_pointers[(current_config, current_name)][current_instance][trajectory_id]],
-                 current_config, current_name, current_instance, trajectory_id))
-
-        if any(value is None for _, instance_values in trajectory_values.items() for _, values in instance_values.items() for value in values):
-            continue
-
-        if finishing_times and np.isclose(times_finished, finishing_times[-1]):
-            finishing_times.pop()
-            [x[k].pop() for x in [center, upper, lower] for k in x.keys()]
-
-        # calculate ranks
-        values = to_dict([(instance, (config, name), value * value_multiplier)
-            for (config, name), instance_values in trajectory_values.items()
-            for instance, values in instance_values.items()
-            for value in values if value is not None])
-        plot_values = get_plot_values_funcs[instance_name](values, center.keys(), np.median if agglomeration == "median" else np.mean)
-        
-        # populate plotting data
-        for key in center.keys():
-            if not plot_values[key]:
-                center[key].append(float("nan"))
-                lower[key].append(float("nan"))
-                upper[key].append(float("nan"))
-
-            center[key].append(np.mean(plot_values[key]))
-            lower[key].append(-1 * scale_uncertainty * np.std(plot_values[key]) + center[key][-1])
-            upper[key].append(scale_uncertainty * np.std(plot_values[key]) + center[key][-1])
-        finishing_times.append(times_finished)
-        plot_empty = False
-        
-    # do the plotting
-    plot_data = dict()
-    for i, (config, name) in enumerate(center.keys()):
-        prefix = trajectory_names_to_prefix[name]
-        label = ("%s: %s" % (prefix, config)) if prefix else config
-        color = cmap(i / len(center))
-        plot_data[label] = {
-            "individual_trajectory": None,
-            "individual_times_finished": None,
-            "color": color,
-            "linestyle": "-",
-            "center": center[(config, name)],
-            "lower": lower[(config, name)],
-            "upper": upper[(config, name)],
-            "finishing_times": finishing_times
-        }
-    return plot_empty, plot_data
-
-def to_dict(tuple_list):
-    result = dict()
-    for v in tuple_list:
-        a = v[0]
-        b = v[1:]
-        if len(b) == 1:
-            b = b[0]
-        if a not in result:
-            result[a] = list()
-        result[a].append(b)
-    return result
-
-
-def trajectory_sampling(instance_name, metric_name, prefixes, trajectories, plot_type, agglomeration, scale_uncertainty, value_multiplier, cmap, num_samples=1000):
-    averaged_trajectories = dict()
-
-    # sample #num_samples average trajectories
-    for i in range(num_samples):
-        sampled_trajectories = dict()
-
-        for p, prefix in enumerate(prefixes):
-            trajectory_name = ("%s_%s" % (prefix, metric_name)) if prefix else metric_name
-            config_trajectories = trajectories[trajectory_name]
-            if trajectory_name not in sampled_trajectories:
-                        sampled_trajectories[trajectory_name] = dict()
-
-            for config, instance_trajectories in config_trajectories.items():
-                if config not in sampled_trajectories[trajectory_name]:
-                        sampled_trajectories[trajectory_name][config] = list()
-
-                # for each instance choose a random trajectory over the runs
-                for instance, run_trajectories in instance_trajectories.items():
-                    run_trajectory = random.choice(run_trajectories)
-                    sampled_trajectories[trajectory_name][config].append(run_trajectory)
-
-        # compute the average over the instances
-        plot_empty, plot_data = process_trajectory(
-            instance_name="prepare_sampled_%s_(%s/%s)" % (instance_name, i, num_samples),
-            metric_name=metric_name,
-            prefixes=prefixes,
-            trajectories=sampled_trajectories,
-            plot_type=plot_type,
-            agglomeration=agglomeration,
-            scale_uncertainty=0,
-            value_multiplier=value_multiplier,
-            cmap=cmap
-        )
-
-        if plot_empty:
-            continue
-
-        # save the average trajectories
-        for label, d in plot_data.items():
-            prefix, config = label.split(": ") if ": " in label else ("", label)
-            trajectory_name = ("%s_%s" % (prefix, metric_name)) if prefix else metric_name
-
-            if trajectory_name not in averaged_trajectories:
-                averaged_trajectories[trajectory_name] = dict()
-            if config not in averaged_trajectories[trajectory_name]:
-                averaged_trajectories[trajectory_name][config] = list()
-
-            averaged_trajectories[trajectory_name][config].append({
-                "times_finished": d["finishing_times"],
-                plot_type: d["center"],
-            })
-    
-    # compute mean and stddev over the averaged trajectories
-    return process_trajectory(
-        instance_name=instance_name,
-        metric_name=metric_name,
-        prefixes=prefixes,
-        trajectories=averaged_trajectories,
-        plot_type=plot_type,
-        agglomeration="mean",
-        scale_uncertainty=scale_uncertainty,
-        value_multiplier=1,
-        cmap=cmap
-    )
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/plot_trajectories.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/plot_trajectories.py
deleted file mode 100644
index 90a9f401e..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/plot_trajectories.py
+++ /dev/null
@@ -1,206 +0,0 @@
-import os
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-import numpy as np
-import logging
-import json
-import heapq
-
-class PlotTrajectories(PipelineNode):
-
-    def fit(self, pipeline_config, trajectories, optimize_metrics, instance):
-        if not pipeline_config["skip_dataset_plots"]:
-            plot(pipeline_config, trajectories, optimize_metrics, instance, process_trajectory)
-        return {"trajectories": trajectories, "optimize_metrics": optimize_metrics}
-    
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption('plot_logs', default=None, type='str', list=True),
-            ConfigOption('output_folder', default=None, type='directory'),
-            ConfigOption('agglomeration', default='mean', choices=['mean', 'median']),
-            ConfigOption('scale_uncertainty', default=1, type=float),
-            ConfigOption('font_size', default=12, type=int),
-            ConfigOption('prefixes', default=["val"], list=True, choices=["", "train", "val", "test", "ensemble", "ensemble_test"]),
-            ConfigOption('label_rename', default=False, type=to_bool),
-            ConfigOption('skip_dataset_plots', default=False, type=to_bool),
-            ConfigOption('plot_markers', default=False, type=to_bool),
-            ConfigOption('plot_individual', default=False, type=to_bool),
-            ConfigOption('plot_type', default="values", type=str, choices=["values", "losses"]),
-            ConfigOption('xscale', default='log', type=str),
-            ConfigOption('yscale', default='linear', type=str),
-            ConfigOption('xmin', default=None, type=float),
-            ConfigOption('xmax', default=None, type=float),
-            ConfigOption('ymin', default=None, type=float),
-            ConfigOption('ymax', default=None, type=float),
-            ConfigOption('value_multiplier', default=1, type=float)
-        ]
-        return options
-
-
-def plot(pipeline_config, trajectories, optimize_metrics, instance, process_fnc):
-    import matplotlib.pyplot as plt
-    from matplotlib.backends.backend_pdf import PdfPages
-    extension = "pdf"
-
-    plot_logs = pipeline_config['plot_logs'] or optimize_metrics
-    output_folder = pipeline_config['output_folder']
-    instance_name = os.path.basename(instance).split(".")[0]
-
-    if output_folder and not os.path.exists(output_folder):
-        os.mkdir(output_folder)
-
-    # iterate over all incumbent trajectories for each metric
-    for i, metric_name in enumerate(plot_logs):
-        
-        # prepare pdf
-        if output_folder is not None:
-            pdf_destination = os.path.join(output_folder, instance_name + '_' + metric_name + '.' + extension)
-            pp = PdfPages(pdf_destination)
-
-        # create figure
-        figure = plt.figure(i)
-        plot_empty, plot_data = process_fnc(instance_name=instance_name,
-                                            metric_name=metric_name,
-                                            prefixes=pipeline_config["prefixes"],
-                                            trajectories=trajectories,
-                                            plot_type=pipeline_config["plot_type"],
-                                            agglomeration=pipeline_config["agglomeration"],
-                                            scale_uncertainty=pipeline_config['scale_uncertainty'],
-                                            value_multiplier=pipeline_config['value_multiplier'],
-                                            cmap=plt.get_cmap('jet'))
-        if plot_empty:
-            logging.getLogger('benchmark').warn('Not showing empty plot for ' + instance)
-            plt.close(figure)
-            continue
-
-        plot_trajectory(plot_data=plot_data,
-                        instance_name=instance_name,
-                        metric_name=metric_name,
-                        font_size=pipeline_config["font_size"],
-                        do_label_rename=pipeline_config['label_rename'],
-                        plt=plt,
-                        plot_individual=pipeline_config["plot_individual"],
-                        plot_markers=pipeline_config["plot_markers"],
-                        plot_type=pipeline_config["plot_type"])
-        
-        plt.xscale(pipeline_config["xscale"])
-        plt.yscale(pipeline_config["yscale"])
-        plt.xlim((pipeline_config["xmin"], pipeline_config["xmax"]))
-        plt.ylim((pipeline_config["ymin"], pipeline_config["ymax"]))
-
-        # show or save
-        if output_folder is None:
-            logging.getLogger('benchmark').info('Showing plot for ' + instance)
-            plt.show()
-        else:
-            logging.getLogger('benchmark').info('Saving plot for ' + instance + ' at ' + pdf_destination)
-            pp.savefig(figure)
-            pp.close()
-            plt.close(figure)
-
-
-def process_trajectory(instance_name, metric_name, prefixes, trajectories, plot_type, agglomeration, scale_uncertainty, value_multiplier, cmap):
-    # iterate over the incumbent trajectories of the different runs
-    linestyles = ['-', '--', '-.', ':']
-    plot_empty = True
-    plot_data = dict()
-    for p, prefix in enumerate(prefixes):
-        trajectory_name = ("%s_%s" % (prefix, metric_name)) if prefix else metric_name
-        linestyle = linestyles[p % len(linestyles)]
-        if trajectory_name not in trajectories:
-            continue
-
-        config_trajectories = trajectories[trajectory_name]
-        for i, (config_name, trajectory) in enumerate(config_trajectories.items()):
-            color = cmap((i *len(prefixes) + p) / (len(config_trajectories) * len(prefixes)))
-
-            trajectory_pointers = [0] * len(trajectory)  # points to current entry of each trajectory
-            trajectory_values = [None] * len(trajectory)  # list of current values of each trajectory
-            individual_trajectories = [[] for _ in range(len(trajectory))]
-            individual_times_finished = [[] for _ in range(len(trajectory))]
-            heap = [(trajectory[j]["times_finished"][0], j) for j in range(len(trajectory))]
-            heapq.heapify(heap)
-            # progress = 0
-            # total = sum(len(trajectory[j]["times_finished"]) for j in range(len(trajectory)))
-
-            # data to plot
-            center = []
-            lower = []
-            upper = []
-            finishing_times = []
-            # print("Calculate plot data for instance %s and trajectory %s and config %s" % (instance_name, trajectory_name, config_name))
-
-            # iterate simultaneously over all trajectories with increasing finishing times
-            while heap:
-
-                # get trajectory with lowest finishing times
-                times_finished, trajectory_id = heapq.heappop(heap)
-                current_trajectory = trajectory[trajectory_id]
-
-                # update trajectory values and pointers
-                trajectory_values[trajectory_id] = current_trajectory[plot_type][trajectory_pointers[trajectory_id]]
-                individual_trajectories[trajectory_id].append(trajectory_values[trajectory_id])
-                individual_times_finished[trajectory_id].append(times_finished)
-                trajectory_pointers[trajectory_id] += 1
-                if trajectory_pointers[trajectory_id] < len(current_trajectory[plot_type]):
-                    heapq.heappush(heap,
-                        (trajectory[trajectory_id]["times_finished"][trajectory_pointers[trajectory_id]], trajectory_id)
-                    )
-
-                # progress += 1
-                # print("Progress:", (progress / total) * 100, " " * 20, end="\r" if progress != total else "\n")
-
-                # populate plotting data
-                if any(v is None for v in trajectory_values):
-                    continue
-                if finishing_times and np.isclose(times_finished, finishing_times[-1]):
-                    [x.pop() for x in [center, upper, lower, finishing_times]]
-                values = [v * value_multiplier for v in trajectory_values if v is not None]
-                if agglomeration == "median":
-                    center.append(np.median(values))
-                    lower.append(np.percentile(values, int(50 - scale_uncertainty * 25)))
-                    upper.append(np.percentile(values, int(50 + scale_uncertainty * 25)))
-                elif agglomeration == "mean":
-                    center.append(np.mean(values))
-                    lower.append(-1 * scale_uncertainty * np.std(values) + center[-1])
-                    upper.append(scale_uncertainty * np.std(values) + center[-1])
-                finishing_times.append(times_finished)
-                plot_empty = False
-            label = ("%s: %s" % (prefix, config_name)) if prefix else config_name
-
-            plot_data[label] = {
-                "individual_trajectory": individual_trajectories,
-                "individual_times_finished": individual_times_finished,
-                "color": color,
-                "linestyle": linestyle,
-                "center": center,
-                "lower": lower,
-                "upper": upper,
-                "finishing_times": finishing_times
-            }
-    return plot_empty, plot_data
-    
-def plot_trajectory(plot_data, instance_name, metric_name, font_size, do_label_rename, plt, plot_individual, plot_markers, plot_type):
-    for label, d in plot_data.items():
-
-        if do_label_rename:
-            label = label_rename(label)
-        
-        if plot_individual and d["individual_trajectories"] and d["individual_times_finished"]:
-            for individual_trajectory, individual_times_finished in zip(d["individual_trajectories"], d["individual_times_finished"]):
-                plt.step(individual_times_finished, individual_trajectory, color=d["color"], where='post', linestyle=":", marker="x" if plot_markers else None)
-        
-        plt.step(d["finishing_times"], d["center"], color=d["color"], label=label, where='post', linestyle=d["linestyle"], marker="o" if plot_markers else None)
-        plt.fill_between(d["finishing_times"], d["lower"], d["upper"], step="post", color=[(d["color"][0], d["color"][1], d["color"][2], 0.5)])
-    plt.xlabel('wall clock time [s]', fontsize=font_size)
-    plt.ylabel('incumbent %s %s' % (metric_name, plot_type), fontsize=font_size)
-    plt.legend(loc='best', prop={'size': font_size})
-    plt.title(instance_name, fontsize=font_size)
-
-LABEL_RENAME = dict()
-def label_rename(label):
-    if label not in LABEL_RENAME:
-        rename = input("Rename label %s to? (Leave empty for no rename) " % label)
-        LABEL_RENAME[label] = rename if rename else label
-    return LABEL_RENAME[label]
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/read_instance_info.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/read_instance_info.py
deleted file mode 100644
index fc6029286..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/read_instance_info.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.config.config_option import ConfigOption, to_bool, to_tuple
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.data_management.data_manager import DataManager
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.prepare_result_folder import get_run_result_dir
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.read_instance_data import ReadInstanceData
-from autoPyTorch.data_management.data_manager import ProblemType
-import os
-
-class ReadInstanceInfo(ReadInstanceData):
-
-    def fit(self, pipeline_config, run_result_dir):
-
-        instance_file_config_parser = ConfigFileParser([
-            ConfigOption(name='path', type='directory', required=True),
-            ConfigOption(name='is_classification', type=to_bool, required=True),
-            ConfigOption(name='is_multilabel', type=to_bool, required=True),
-            ConfigOption(name='num_features', type=int, required=True),
-            ConfigOption(name='categorical_features', type=bool, required=True, list=True),
-            ConfigOption(name='instance_shape', type=to_tuple, required=True)
-        ])
-        instance_info = instance_file_config_parser.read(os.path.join(run_result_dir, 'instance.info'))
-        instance_info = instance_file_config_parser.set_defaults(instance_info)
-
-        dm = DataManager()
-        if instance_info["is_multilabel"]:
-            dm.problem_type = ProblemType.FeatureMultilabel
-        elif instance_info["is_classification"]:
-            dm.problem_type = ProblemType.FeatureClassification
-        else:
-             dm.problem_type = ProblemType.FeatureClassification
-
-        return {'instance_info': instance_info, 'data_manager': dm}
\ No newline at end of file
diff --git a/autoPyTorch/utils/benchmarking/visualization_pipeline/visualization_settings.py b/autoPyTorch/utils/benchmarking/visualization_pipeline/visualization_settings.py
deleted file mode 100644
index edee7d236..000000000
--- a/autoPyTorch/utils/benchmarking/visualization_pipeline/visualization_settings.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import logging
-
-from autoPyTorch.utils.config.config_option import ConfigOption
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.utils.benchmarking.benchmark_pipeline.benchmark_settings import BenchmarkSettings
-
-class VisualizationSettings(BenchmarkSettings):
-    def fit(self, pipeline_config):
-        logging.getLogger('benchmark').info("Start visualization")
-
-        logger = logging.getLogger('benchmark')
-        logger.setLevel(self.logger_settings[pipeline_config['log_level']])
-
-        # log level for autonet is set in SetAutoNetConfig
-
-        return { 'run_id_range': pipeline_config['run_id_range']}
-
-    def get_pipeline_config_options(self):
-        options = [
-            ConfigOption("run_id_range", type=str, default=None),
-            ConfigOption("log_level", default="info", type=str, choices=list(self.logger_settings.keys())),
-            ConfigOption("benchmark_name", default=None, type=str, required=True)
-        ]
-        return options
diff --git a/autoPyTorch/utils/common.py b/autoPyTorch/utils/common.py
new file mode 100644
index 000000000..3143ced11
--- /dev/null
+++ b/autoPyTorch/utils/common.py
@@ -0,0 +1,135 @@
+import hashlib
+from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Type, Union
+
+import numpy as np
+
+import pandas as pd
+
+import scipy.sparse
+
+import torch
+from torch.utils.data.dataloader import default_collate
+
+
+class FitRequirement(NamedTuple):
+    """
+    A class that holds inputs required to fit a pipeline. Also indicates wether
+    requirements have to be user specified or are generated by the pipeline itself.
+
+    Attributes:
+    name: The name of the variable expected in the input dictionary
+    supported_types: An iterable of all types that are supported
+    user_defined: If false, this requirement does not have to be given to the pipeline
+    """
+
+    name: str
+    supported_types: Iterable[Type]
+    user_defined: bool
+    dataset_property: bool
+
+    def __str__(self) -> str:
+        """
+        String representation for the requirements
+        """
+        return "Name: %s | Supported types: %s | User defined: %s | Dataset property: %s" % (
+            self.name, self.supported_types, self.user_defined, self.dataset_property)
+
+
+def replace_prefix_in_config_dict(config: Dict[str, Any], prefix: str, replace: str = "") -> Dict[str, Any]:
+    """
+    Replace the prefix in all keys with the specified replacement string (the empty string by
+    default to remove the prefix from the key). The functions makes sure that the prefix is a proper config
+    prefix by checking if it ends with ":", if not it appends ":" to the prefix.
+
+    :param config: config dictionary where the prefixed of the keys should be replaced
+    :param prefix: prefix to be replaced in each key
+    :param replace: the string to replace the prefix with
+    :return: updated config dictionary
+    """
+    # make sure that prefix ends with the config separator ":"
+    if not prefix.endswith(":"):
+        prefix = prefix + ":"
+    # only replace first occurrence of the prefix
+    return {k.replace(prefix, replace, 1): v
+            for k, v in config.items() if
+            k.startswith(prefix)}
+
+
+def custom_collate_fn(batch: List) -> List[Optional[torch.tensor]]:
+    """
+    In the case of not providing a y tensor, in a
+    dataset of form {X, y}, y would be None.
+
+    This custom collate function allows to yield
+    None data for functions that require only features,
+    like predict.
+
+    Args:
+        batch (List): a batch from a dataset
+
+    Returns:
+        List[Optional[torch.Tensor]]
+    """
+
+    items = list(zip(*batch))
+
+    # The feature will always be available
+    items[0] = default_collate(items[0])
+    if None in items[1]:
+        items[1] = list(items[1])
+    else:
+        items[1] = default_collate(items[1])
+    return items
+
+
+def replace_string_bool_to_bool(dictionary: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Utility function to replace string-type bool to
+    bool when a dict is read from json
+
+    Args:
+        dictionary (Dict[str, Any])
+    Returns:
+        Dict[str, Any]
+    """
+    for key, item in dictionary.items():
+        if isinstance(item, str):
+            if item.lower() == "true":
+                dictionary[key] = True
+            elif item.lower() == "false":
+                dictionary[key] = False
+    return dictionary
+
+
+def hash_array_or_matrix(X: Union[np.ndarray, pd.DataFrame]) -> str:
+    """
+    Creates a hash for a given array.
+    Used for dataset name in case none is specified
+    Args:
+        X: (Union[np.ndarray, pd.DataFrame])
+            data
+
+    Returns:
+        (str): hash of the data as string
+    """
+    m = hashlib.md5()
+
+    if hasattr(X, "iloc"):
+        X = X.to_numpy()
+
+    if scipy.sparse.issparse(X):
+        m.update(X.indices)
+        m.update(X.indptr)
+        m.update(X.data)
+        m.update(str(X.shape).encode('utf8'))
+    else:
+        if X.flags['C_CONTIGUOUS']:
+            m.update(X.data)
+            m.update(str(X.shape).encode('utf8'))
+        else:
+            X_tmp = np.ascontiguousarray(X.T)
+            m.update(X_tmp.data)
+            m.update(str(X_tmp.shape).encode('utf8'))
+
+    hash = m.hexdigest()
+    return hash
diff --git a/autoPyTorch/utils/config/config_condition.py b/autoPyTorch/utils/config/config_condition.py
deleted file mode 100644
index f9fe7203f..000000000
--- a/autoPyTorch/utils/config/config_condition.py
+++ /dev/null
@@ -1,27 +0,0 @@
-class ConfigCondition():
-    def __init__(self, name, check):
-        """Initialize the Condition
-        
-        Arguments:
-            name {str} -- Name of the condition. Will be displayed if condition is violated.
-            check {callable} -- takes a pipeline config and returns False, if config violates the condition, else True.
-        """
-
-        self.name = name
-        self.check = check
-    
-    def __call__(self, config):
-        if not self.check(config):
-            raise ValueError("Pipeline configuration condition violated: %s" % self.name)
-    
-    @staticmethod
-    def get_larger_condition(name, config_option_name1, config_option_name2):
-        def check(config):
-            return config[config_option_name1] > config[config_option_name2]
-        return ConfigCondition(name, check)
-    
-    @staticmethod
-    def get_larger_equals_condition(name, config_option_name1, config_option_name2):
-        def check(config):
-            return config[config_option_name1] >= config[config_option_name2]
-        return ConfigCondition(name, check)
diff --git a/autoPyTorch/utils/config/config_file_parser.py b/autoPyTorch/utils/config/config_file_parser.py
deleted file mode 100644
index e7d0acb59..000000000
--- a/autoPyTorch/utils/config/config_file_parser.py
+++ /dev/null
@@ -1,209 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os
-
-class ConfigFileParser():
-    """
-    Class to parse config files
-    """
-
-    def __init__(self, config_options=[], verbose=False):
-        """
-        Initialize to ConfigFileParser.
-        
-        Parameters:
-            config_options: A list of ConfigOptions, which the ConfigFileParser should be able to parse
-        """
-        self.config_options = {option.name: option for option in config_options}
-        self.verbose = verbose
-        
-    def add_option(self, option):
-        """
-        Add a ConfigOption to the ConfigParser.
-        """
-        self.config_options[option.name] = option
-
-    @staticmethod
-    def read_key_values_from_file(filename, delimiter='='):
-        key_values = dict()
-
-        if (filename is None):
-            return key_values
-            
-        # open the config file
-        with open(filename, "r") as configfile:
-            for line in configfile:
-                key, value = map(lambda x: x.strip(), line.split(delimiter))
-                key_values[key] = value
-        return key_values
-        
-    def read(self, filename, key_values_dict=None, silent=False):
-        """
-        Read a config file.
-        
-        Parameters:
-            filename: The file name of the config file.
-            
-        Result:
-            A dictionary containing the read values for the ConfigOptions.
-        """
-        # parse benchmark.txt
-        autonet_home = self.get_autonet_home()
-
-        key_values_dict = key_values_dict or ConfigFileParser.read_key_values_from_file(filename)
-        config = dict()
-            
-        # open the config file
-        for key, value in key_values_dict.items():
-            if (key not in self.config_options):
-                if silent:
-                    continue
-                raise ValueError("Config key '" + key + "' is not a valid autonet config option")
-
-            option = self.config_options[key]
-
-            # parse list configs
-            values = [value]
-            if option.list:
-                value = value.strip("[]")
-                if not value.strip():
-                    values = []
-                else:
-                    values = list(map(lambda x: x.strip("'\" "), value.split(",")))
-                
-            # convert the values
-            converted_values = []
-            for value in values:
-                type_list = option.type if isinstance(option.type, list) else [option.type]
-                for type_conversion in type_list:
-                    # convert relative directories to absolute ones
-                    if type_conversion == "directory" and value == "None":
-                        value = None
-                    elif type_conversion == "directory" and not os.path.isabs(value):
-                        value = os.path.abspath(os.path.join(autonet_home, value))
-                    elif isinstance(type_conversion, dict):
-                        value = type_conversion[value]
-                    elif type_conversion != "directory":
-                        value = type_conversion(value)
-                converted_values.append(value)
-            config[key] = converted_values if option.list else converted_values[0]
-        return config
-    
-    def check_required(self, config):
-        """
-        Check if the given config is required.
-        """
-        for key, option in self.config_options.items():
-            if option.required:
-                assert key in config, key + " must be specified"
-    
-    def set_defaults(self, config, throw_error_if_invalid=True):
-        """
-        Set the default values for the ConfigOptions which are not specified in the given config.
-        """
-        default_depends_configs = []
-        for key, option in self.config_options.items():
-            if key not in config:
-                if option.depends:
-                    default_depends_configs.append((key, option.default))
-                else:
-                    config[key] = option.default
-        
-        # set the value for those configs, that have not been specified and whose default value depends on other values
-        for key, default in default_depends_configs:
-            config[key] = default(config)
-
-        try:
-            self.check_validity(config)
-        except Exception as e:
-            print(e)
-            if throw_error_if_invalid:
-                raise e
-        return config
-
-    def check_validity(self, config):
-        if (len(config) != len(self.config_options)):
-            additional_keys = set(config.keys()).difference(self.config_options.keys())
-            if (len(additional_keys) > 0):
-                raise ValueError("The following unknown config options have been defined: " + str(additional_keys))
-            missing_keys = set(self.config_options.keys()).difference(config.keys())
-            if (len(missing_keys) > 0):
-                raise ValueError("The following config options have not been assigned: " + str(missing_keys))
-            raise NotImplementedError()
-
-        for option_name, option in self.config_options.items():
-            if (option_name not in config):
-                raise ValueError("Config option '" + option_name + "' has not been assigned.")
-
-            choices = option.choices
-            if (choices is None):
-                continue
-                
-            value = config[option_name]
-            if (option.list):
-                if (not isinstance(value, list)):
-                    raise ValueError("Config option " + option_name + " has been assigned with value '" + str(value) + "', list required")
-                diff = set(value).difference(choices)
-                if (len(diff) > 0):
-                    raise ValueError("Config option " + option_name + " contains following invalid values " + str(diff) + ", chose a subset of " + str(choices))
-            else:
-                if (option.type is int or option.type is float):
-                    if (value < choices[0] or value > choices[1]):
-                        raise ValueError("Config option " + option_name + " has been assigned with value '" + str(value) + "' which is not in required interval [" + choices[0] + ", " + choices[1] + "]")
-                else:
-                    if (value not in choices):
-                        raise ValueError("Config option " + option_name + " has been assigned with value '" + str(value) + "', only values in " + str(choices) + " are allowed")
-
-    def print_help(self, base_config=None, max_column_width=40):
-        columns = ["name", "default", "choices", "type"]
-        default = self.set_defaults(base_config or {})
-        column_width = {c: len(c) for c in columns}
-        format_string = dict()
-        num_lines = dict()
-        
-        for option in self.config_options.values():
-            num_lines[option] = 1
-            for column in columns:
-                value = getattr(option, column) if column != "default" else default[option.name]
-                if isinstance(value, list) and len(value) > 0:
-                    column_width[column] = max(column_width[column],
-                                               max(map(lambda x: len(str(x)) + 2, value)))
-                    num_lines[option] = max(num_lines[option], len(value))
-                elif isinstance(value, list):
-                    column_width[column] = max(column_width[column], 2)
-                else:
-                    column_width[column] = max(column_width[column], len(str(value)))
-                format_string[column] = "{0: <" + str(min(max_column_width, column_width[column]) + 1) + "}"
-
-        for column in columns:
-            print(format_string[column].format(column), end="")
-        print()
-        print("=" * sum(map(lambda x: min(x, max_column_width) + 1, column_width.values())))
-
-        for option in sorted(self.config_options.values(), key=lambda x:x.name):
-            for i in range(num_lines[option]):
-                for column in columns:
-                    value = getattr(option, column) if column != "default" else default[option.name]
-                    if isinstance(value, list) and i < len(value):
-                        prefix = "[" if i == 0 else " "
-                        suffix = "]" if i == (len(value) - 1) else ","
-                        print(format_string[column].format(prefix + str(value[i])[:max_column_width-2] + suffix), end="")
-                    elif isinstance(value, list) and i == 0:
-                        print(format_string[column].format("[]"), end="")
-                    elif i == 0:
-                        print(format_string[column].format(str(value)[:max_column_width]), end="")
-                    else:
-                        print(format_string[column].format(""), end="")
-                print()
-            if option.info is not None:
-                print("\tinfo:", option.info)
-            print("-" * sum(map(lambda x: min(x, max_column_width) + 1, column_width.values())))
-    
-    @staticmethod
-    def get_autonet_home():
-        """ Get the home directory of autonet """
-        if "AUTONET_HOME" in os.environ:
-            return os.environ["AUTONET_HOME"]
-        return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
diff --git a/autoPyTorch/utils/config/config_option.py b/autoPyTorch/utils/config/config_option.py
deleted file mode 100644
index 764de0c47..000000000
--- a/autoPyTorch/utils/config/config_option.py
+++ /dev/null
@@ -1,59 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import ast
-
-# TODO use ast per default in ConfigOption
-# Transform a config to a bool.
-def to_bool(value):
-    return value.lower() in ["1", "true", "yes", "y"]
-
-def to_list(value):
-    r = ast.literal_eval(value)
-    assert isinstance(r, list)
-    return r
-
-def to_tuple(value):
-    r = ast.literal_eval(value)
-    assert isinstance(r, tuple)
-    return r
-
-def to_dict(value):
-    r = ast.literal_eval(value)
-    assert isinstance(r, dict)
-    return r
-
-class ConfigOption():
-    """ Options in a config file. A config file specifies values for ConfigOptions. """
-
-    def __init__(self, name, default=None, type=str, list=False, depends=False, required=False, choices=None, info=None):
-        """
-        Initialize the ConfigOption.
-        
-        Parameters:
-            name: The name of the option.
-            default: The default value.
-                If it depends on the value of other options, you can provide a function,
-                which maps a dictionary of the other values to the default value.
-            type: The type of the option.
-                Might be the string "directory", if the option asks for a directoy.
-                Might be a dictionary or function, which maps strings to accepted values.
-                Might be a list, if multiple transformations need to be applied.
-            list: Whether the option expects a list of values.
-            depends: Whether the default depends on other values.
-            required: Whether this option must be set.
-            choices: possible values if string or bounds for numerical - None => no restrictions
-        """
-            
-        self.name = name
-        self.default = default
-        self.type = type
-        self.list = list
-        self.depends = depends
-        self.required = required
-        self.choices = choices
-        self.info = info
-
-    def __str__(self):
-        return str(self.name) + " \t Default: " + str(self.default) + " \t Choices: " + str(self.choices) + " \t Type: " + str(self.type)
\ No newline at end of file
diff --git a/autoPyTorch/utils/config_space_hyperparameter.py b/autoPyTorch/utils/config_space_hyperparameter.py
deleted file mode 100644
index e3e0b115b..000000000
--- a/autoPyTorch/utils/config_space_hyperparameter.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-
-
-def get_hyperparameter(hyper_type, name, value_range, log = False):
-    if isinstance(value_range, tuple) and len(value_range) == 2 and isinstance(value_range[1], bool) and \
-        isinstance(value_range[0], (tuple, list)):
-        value_range, log = value_range
-
-    if len(value_range) == 0:
-        raise ValueError(name + ': The range has to contain at least one element')
-    if len(value_range) == 1:
-        return CSH.Constant(name, int(value_range[0]) if isinstance(value_range[0], bool) else value_range[0])
-    if len(value_range) == 2 and value_range[0] == value_range[1]:
-        return CSH.Constant(name, int(value_range[0]) if isinstance(value_range[0], bool) else value_range[0])
-    if hyper_type == CSH.CategoricalHyperparameter:
-        return CSH.CategoricalHyperparameter(name, value_range)
-    if hyper_type == CSH.UniformFloatHyperparameter:
-        assert len(value_range) == 2, "Float HP range update for %s is specified by the two upper and lower values. %s given." %(name, len(value_range))
-        return CSH.UniformFloatHyperparameter(name, lower=value_range[0], upper=value_range[1], log=log)
-    if hyper_type == CSH.UniformIntegerHyperparameter:
-        assert len(value_range) == 2, "Int HP range update for %s is specified by the two upper and lower values. %s given." %(name, len(value_range))
-        return CSH.UniformIntegerHyperparameter(name, lower=value_range[0], upper=value_range[1], log=log)
-    raise ValueError('Unknown type: %s for hp %s' % (hyper_type, name) )
-
-def add_hyperparameter(cs, hyper_type, name, value_range, log=False):
-    return cs.add_hyperparameter(get_hyperparameter(hyper_type, name, value_range, log))
diff --git a/autoPyTorch/utils/configspace_wrapper.py b/autoPyTorch/utils/configspace_wrapper.py
deleted file mode 100644
index fc8561f2a..000000000
--- a/autoPyTorch/utils/configspace_wrapper.py
+++ /dev/null
@@ -1,48 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import pprint
-    
-class ConfigWrapper(object):
-    delimiter = ':'
-
-    def __init__(self, config_prefix, config):
-        """A wrapper for hyperparameter configs that are specified with a prefix (add_configspace(prefix=...)).
-        The wrapper will provide key access without having to know/specify the prefix of the respective hyperparameter.
-        
-        Arguments:
-            config_prefix {string} -- prefix of keys
-            config {dict} -- hyperparameter config
-        """
-
-        self.config_prefix = config_prefix + ConfigWrapper.delimiter
-        self.config = config
-
-    def __getitem__(self, key):
-        if ((self.config_prefix + key) not in self.config):
-            pprint.pprint(self.config)
-        return self.config[self.config_prefix + key]
-
-    def __iter__(self):
-        for k in self.config.__iter__():
-            if not k.startswith(self.config_prefix):
-                continue
-            yield k[len(self.config_prefix):]
-
-    def __str__(self):
-        return str(self.config)
-    
-    def __contains__(self, key):
-        return (self.config_prefix + key) in self.config
-
-    def update(self, update_dict):
-        self.config.update({"%s%s" % (self.config_prefix, key) : value for key, value in update_dict.items()})
-    
-    def get_dictionary(self):
-        result = dict()
-        config = self.config if isinstance(self.config, dict) else self.config.get_dictionary()
-        for key, value in config.items():
-            if key.startswith(self.config_prefix):
-                result[key[len(self.config_prefix):]] = value
-        return result
diff --git a/autoPyTorch/utils/ensemble.py b/autoPyTorch/utils/ensemble.py
deleted file mode 100644
index 2052bc046..000000000
--- a/autoPyTorch/utils/ensemble.py
+++ /dev/null
@@ -1,259 +0,0 @@
-import os
-import time
-import numpy as np
-import json
-import math
-import tempfile
-import uuid
-import asyncio
-import multiprocessing
-import signal
-import logging
-from autoPyTorch.components.ensembles.ensemble_selection import EnsembleSelection
-
-def build_ensemble(result, optimize_metric,
-        ensemble_size, all_predictions, labels, model_identifiers,
-        only_consider_n_best=0, sorted_initialization_n_best=0):
-    id2config = result.get_id2config_mapping()
-    ensemble_selection = EnsembleSelection(ensemble_size, optimize_metric,
-        only_consider_n_best=only_consider_n_best, sorted_initialization_n_best=sorted_initialization_n_best)
-
-    # fit ensemble
-    ensemble_selection.fit(np.array(all_predictions), labels, model_identifiers)
-    ensemble_configs = dict()
-    for identifier in ensemble_selection.get_selected_model_identifiers():
-        try:
-            ensemble_configs[tuple(identifier[:3])] = id2config[tuple(identifier[:3])]["config"]
-        except:
-            #TODO: Do this properly (baseline configs are not logged by bohb)
-            ensemble_configs[tuple(identifier[:3])] = {"model": "baseline"}
-    return ensemble_selection, ensemble_configs
-
-
-def read_ensemble_prediction_file(filename, y_transform):
-    all_predictions = list()
-    all_timestamps = list()
-    labels = None
-    model_identifiers = list()
-    with open(filename, "rb") as f:
-        labels = np.load(f, allow_pickle=True)
-        labels, _ = y_transform(labels)
-
-        while True:
-            try:
-                job_id, budget, timestamps = np.load(f, allow_pickle=True)
-                predictions = np.load(f, allow_pickle=True)
-                model_identifiers.append(job_id + (budget, ))
-                predictions = np.array(predictions)
-                all_predictions.append(predictions)
-                all_timestamps.append(timestamps)
-            except (EOFError, OSError):
-                break
-    return all_predictions, labels, model_identifiers, all_timestamps
-
-
-class test_predictions_for_ensemble():
-    def __init__(self, autonet, X_test, Y_test):
-        self.autonet = autonet
-        self.X_test = X_test
-        self.Y_test = Y_test
-        from autoPyTorch.core.api import AutoNet
-        self.predict = AutoNet.predict
-
-    
-    def __call__(self, model, epochs):
-        if self.Y_test is None or self.X_test is None:
-            return float("nan")
-        
-        return self.predict(self.autonet, self.X_test, return_probabilities=True)[1], self.Y_test
-
-def combine_predictions(data, pipeline_kwargs, X, Y):
-    all_indices = None
-    all_predictions = None
-    for split, predictions in data.items():
-        if (np.any(np.isnan(predictions))):
-            logging.getLogger("autonet").warn("Not saving predictions containing nans")
-            return None
-        indices = pipeline_kwargs[split]["valid_indices"]
-        assert len(predictions) == len(indices), "Different number of predictions and indices:" + str(len(predictions)) + "!=" + str(len(indices))
-        all_indices = indices if all_indices is None else np.append(all_indices, indices)
-        all_predictions = predictions if all_predictions is None else np.vstack((all_predictions, predictions))
-    argsort = np.argsort(all_indices)
-    sorted_predictions = all_predictions[argsort]
-    sorted_indices = all_indices[argsort]
-    
-    unique = uuid.uuid4()
-    tempfile.gettempdir()
-    with open(os.path.join(tempfile.gettempdir(), "autonet_ensemble_predictions_%s.npy" % unique), "wb") as f:
-        np.save(f, sorted_predictions)
-    with open(os.path.join(tempfile.gettempdir(), "autonet_ensemble_labels_%s.npy" % unique), "wb") as f:
-        np.save(f, Y[sorted_indices])
-    host, port = pipeline_kwargs[0]["pipeline_config"]["ensemble_server_credentials"]
-    return host, port, unique
-
-def combine_test_predictions(data, pipeline_kwargs, X, Y):
-    predictions = [d[0] for d in data.values() if d == d]
-    labels = [d[1] for d in data.values() if d == d]
-    assert all(np.all(labels[0] == l) for l in labels[1:])
-    assert len(predictions) == len(labels)
-    if len(predictions) == 0:
-        return None
-    
-    unique = uuid.uuid4()
-    tempfile.gettempdir()
-    with open(os.path.join(tempfile.gettempdir(), "autonet_ensemble_predictions_%s.npy" % unique), "wb") as f:
-        np.save(f, np.stack(predictions))
-    with open(os.path.join(tempfile.gettempdir(), "autonet_ensemble_labels_%s.npy" % unique), "wb") as f:
-        np.save(f, labels[0])
-    host, port = pipeline_kwargs[0]["pipeline_config"]["ensemble_server_credentials"]
-    return host, port, unique
-
-def filter_nan_predictions(predictions, *args):
-    nan_predictions = set([i for i, p in enumerate(predictions) if np.any(np.isnan(p))])
-    return [
-        [x for i, x in enumerate(vector) if i not in nan_predictions] if vector is not None else None
-        for vector in [predictions, *args]
-    ]
-
-async def serve_predictions(reader, writer):
-    data = await reader.read(1024)
-    name, unique = data.decode().split("_")
-    # logging.getLogger("autonet").info("Serve %s %s" % (name, unique))
-
-    with open(os.path.join(tempfile.gettempdir(), "autonet_ensemble_%s_%s.npy" % (name, unique)), "rb") as f:
-        while True:
-            buf = f.read(1024)
-            if not buf:
-                break
-            writer.write(buf)
-    os.remove(os.path.join(tempfile.gettempdir(), "autonet_ensemble_%s_%s.npy" % (name, unique)))
-    if name == "predictions" and os.path.exists(os.path.join(tempfile.gettempdir(), "autonet_ensemble_labels_%s.npy" % unique)):
-        os.remove(os.path.join(tempfile.gettempdir(), "autonet_ensemble_labels_%s.npy" % unique))
-    await writer.drain()
-    writer.close()
-
-def _start_server(host, queue):
-    def shutdown(signum, stack):
-        raise KeyboardInterrupt
-    signal.signal(signal.SIGTERM, shutdown)
-    loop = asyncio.get_event_loop()
-    coro = asyncio.start_server(serve_predictions, host, 0, loop=loop)
-    server = loop.run_until_complete(coro)
-    host, port = server.sockets[0].getsockname()
-    queue.put((host, port))
-    try:
-        loop.run_forever()
-    except KeyboardInterrupt:
-        pass
-    server.close()
-    loop.run_until_complete(server.wait_closed())
-    loop.close()
-    # logging.getLogger("autonet").info("Ensemble Server has been shut down")
-
-def start_server(host):
-    queue = multiprocessing.Queue()
-    p = multiprocessing.Process(target=_start_server, args=(host, queue))
-    p.start()
-    host, port = queue.get()
-    p.shutdown = p.terminate
-    return host, port, p
-
-class ensemble_logger(object):
-    def __init__(self, directory, overwrite):
-        self.start_time = time.time()
-        self.directory = directory
-        self.overwrite = overwrite
-        self.labels_written = False
-        self.test_labels_written = False
-        
-        self.file_name = os.path.join(directory, 'predictions_for_ensemble.npy')
-        self.test_file_name = os.path.join(directory, 'test_predictions_for_ensemble.npy')
-
-        try:
-            with open(self.file_name, 'x') as fh: pass
-        except FileExistsError:
-            if overwrite:
-                with open(self.file_name, 'w') as fh: pass
-            else:
-                raise FileExistsError('The file %s already exists.'%self.file_name)
-        except:
-            raise
-        
-        try:
-            with open(self.test_file_name, 'x') as fh: pass
-        except FileExistsError:
-            if overwrite:
-                with open(self.test_file_name, 'w') as fh: pass
-            else:
-                raise FileExistsError('The file %s already exists.'%self.test_file_name)
-        except:
-            raise
-
-    def new_config(self, *args, **kwargs):
-        pass
-    
-    async def save_remote_data(self, host, port, name, unique, f):
-        remote_reader, remote_writer = await asyncio.open_connection(host, port)
-        remote_writer.write(("%s_%s" % (name, unique)).encode())
-        while not remote_reader.at_eof():
-            f.write(await remote_reader.read(1024))
-        remote_writer.close()
-
-    def __call__(self, job):
-        if job.result is None:
-            return
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-
-        if "predictions_for_ensemble" in job.result and job.result["predictions_for_ensemble"] is None and \
-            "test_predictions_for_ensemble" in job.result and job.result["test_predictions_for_ensemble"] is not None:
-            host, port, unique =  job.result["test_predictions_for_ensemble"]
-            with open("/dev/null", "wb") as f:
-                loop.run_until_complete(self.save_remote_data(host, port, "predictions", unique, f))
-
-        #logging.info(job.result.__repr__()) #TODO: delete
-
-        if "predictions_for_ensemble" in job.result and job.result["predictions_for_ensemble"] is not None:
-            host, port, unique = job.result["predictions_for_ensemble"]
-            #logging.info("==> Saving preds...") # #TODO: delete
-            #if not self.labels_written:
-            #    logging.info("==> (Labels)") #TODO: delete
-            with open(self.file_name, "ab") as f:
-                if not self.labels_written:
-                    loop.run_until_complete(self.save_remote_data(host, port, "labels", unique, f))
-                    self.labels_written = True
-                np.save(f, np.array([job.id, job.kwargs['budget'], job.timestamps], dtype=object))
-                loop.run_until_complete(self.save_remote_data(host, port, "predictions", unique, f))
-            del job.result["predictions_for_ensemble"]
-
-            if "baseline_predictions_for_ensemble" in job.result and job.result["baseline_predictions_for_ensemble"] is not None:
-                baseline_id = (int(job.result["info"]["baseline_id"]), 0, 0)
-                host, port, unique = job.result["baseline_predictions_for_ensemble"]
-                #logging.info("==> Saving baseline preds...") # #TODO: delete
-                with open(self.file_name, "ab") as f:
-                    if not self.labels_written:
-                        raise RuntimeError("Baseline predictions found but no labels logged yet.")
-                    np.save(f, np.array([baseline_id, 0., job.timestamps], dtype=object))
-                    loop.run_until_complete(self.save_remote_data(host, port, "predictions", unique, f))
-                del job.result["baseline_predictions_for_ensemble"]
-
-            if "test_predictions_for_ensemble" in job.result and job.result["test_predictions_for_ensemble"] is not None:
-                host, port, unique =  job.result["test_predictions_for_ensemble"]
-                with open(self.test_file_name, "ab") as f:
-                    if not self.test_labels_written:
-                         loop.run_until_complete(self.save_remote_data(host, port, "labels", unique, f))
-                         self.test_labels_written = True
-                    np.save(f, np.array([job.id, job.kwargs['budget'], job.timestamps], dtype=object))
-                    loop.run_until_complete(self.save_remote_data(host, port, "predictions", unique, f))
-                del job.result["test_predictions_for_ensemble"]
-
-            if "baseline_test_predictions_for_ensemble" in job.result and job.result["baseline_test_predictions_for_ensemble"] is not None:
-                host, port, unique =  job.result["baseline_test_predictions_for_ensemble"]
-                logging.info("==> Logging baseline test preds")
-                with open(self.test_file_name, "ab") as f:
-                    if not self.test_labels_written:
-                         raise RuntimeError("Baseline test predictions found but no labels logged yet.")
-                    np.save(f, np.array([baseline_id, 0., job.timestamps], dtype=object))
-                    loop.run_until_complete(self.save_remote_data(host, port, "predictions", unique, f))
-                del job.result["baseline_test_predictions_for_ensemble"]
-        loop.close()
diff --git a/autoPyTorch/utils/hyperparameter_search_space_update.py b/autoPyTorch/utils/hyperparameter_search_space_update.py
deleted file mode 100644
index 52d670945..000000000
--- a/autoPyTorch/utils/hyperparameter_search_space_update.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import ast
-import os
-
-
-class HyperparameterSearchSpaceUpdate():
-    def __init__(self, node_name, hyperparameter, value_range, log=False):
-        self.node_name = node_name
-        self.hyperparameter = hyperparameter
-        self.value_range = value_range
-        self.log = log
-    
-    def apply(self, pipeline, pipeline_config):
-        pipeline[self.node_name]._apply_search_space_update(name=self.hyperparameter,
-                                                            new_value_range=self.value_range,
-                                                            log=self.log)
-
-class HyperparameterSearchSpaceUpdates():
-    def __init__(self, updates=[]):
-        self.updates = updates
-    
-    def apply(self, pipeline, pipeline_config):
-        for update in self.updates:
-            update.apply(pipeline, pipeline_config)
-    
-    def append(self, node_name, hyperparameter, value_range, log=False):
-        self.updates.append(HyperparameterSearchSpaceUpdate(node_name=node_name,
-                                                            hyperparameter=hyperparameter,
-                                                            value_range=value_range,
-                                                            log=log))
-
-    def save_as_file(self, path):
-        with open(path, "w") as f:
-            for update in self.updates:
-                print(update.node_name, update.hyperparameter, str(update.value_range) + (" log" if update.log else ""), file=f)
-
-
-def parse_hyperparameter_search_space_updates(updates_file):
-    if updates_file is None or os.path.basename(updates_file) == "None":
-        return None
-    with open(updates_file, "r") as f:
-        result = []
-        for line in f:
-            if line.strip() == "":
-                continue
-            line = line.split()
-            node, hyperparameter, value_range = line[0], line[1], ast.literal_eval(line[2])
-            assert isinstance(value_range, list)
-            log = len(line) == 4 and "log" == line[3]
-            result.append(HyperparameterSearchSpaceUpdate(node, hyperparameter, value_range, log))
-    return HyperparameterSearchSpaceUpdates(result)
-
diff --git a/autoPyTorch/utils/loggers.py b/autoPyTorch/utils/loggers.py
deleted file mode 100644
index b4ed1e65f..000000000
--- a/autoPyTorch/utils/loggers.py
+++ /dev/null
@@ -1,219 +0,0 @@
-import time, os, shutil
-from hpbandster.core.result import json_result_logger
-
-class bohb_logger(json_result_logger):
-    def __init__(self, constant_hyperparameter, directory, overwrite=False):
-        super(bohb_logger, self).__init__(directory, overwrite)
-        self.constants = constant_hyperparameter
-
-    
-    def new_config(self, config_id, config, config_info):
-        import json
-        if not config_id in self.config_ids:
-            self.config_ids.add(config_id)
-
-        full_config = dict()
-        full_config.update(self.constants)
-        full_config.update(config)
-
-        with open(self.config_fn, 'a') as fh:
-            fh.write(json.dumps([config_id, full_config, config_info]))
-            fh.write('\n')
-
-
-class tensorboard_logger(object):
-    def __init__(self, pipeline_config, constant_hyperparameter, global_results_dir):
-        self.start_time = time.time()
-
-        b = pipeline_config['max_budget']
-        budgets = []
-        while b >= pipeline_config['min_budget']:
-            budgets.append(int(b))
-            b /= pipeline_config['eta']
-
-        self.incumbent_results = {b: 0 for b in budgets}
-        self.mean_results = {b: [0, 0] for b in budgets}
-
-        self.constants = constant_hyperparameter
-        self.results_logged = 0
-        self.seed = pipeline_config['random_seed']
-        self.max_budget = pipeline_config['max_budget']
-        self.global_results_dir = global_results_dir
-
-        self.keep_only_incumbent_checkpoints = pipeline_config['keep_only_incumbent_checkpoints']
-
-        self.incumbent_configs_dir = os.path.join(pipeline_config['result_logger_dir'], 'incumbents')
-        self.status_dir = pipeline_config['result_logger_dir']
-        self.run_name = '-'.join(pipeline_config['run_id'].split('-')[1:])
-        os.makedirs(self.incumbent_configs_dir, exist_ok=True)
-
-
-    def new_config(self, config_id, config, config_info):
-        pass
-
-    def __call__(self, job):
-        import json
-        import tensorboard_logger as tl 
-
-        id = job.id
-        budget = int(job.kwargs['budget'])
-        config = job.kwargs['config']
-        # timestamps = job.timestamps
-        result = job.result
-        # exception = job.exception
-
-        if result is None:
-            return
-
-        self.results_logged += 1
-
-        tl.log_value('BOHB/all_results', result['loss'] * -1, self.results_logged)
-
-        if budget not in self.incumbent_results or result['loss'] < self.incumbent_results[budget]:
-            self.incumbent_results[budget] = result['loss']
-            
-            full_config = dict()
-            full_config.update(self.constants)
-            full_config.update(config)
-
-            refit_config = dict()
-            refit_config['budget'] = budget
-            refit_config['seed'] = self.seed
-            
-            refit_config['incumbent_config_path'] = os.path.join(self.incumbent_configs_dir, 'config_' + str(budget) + '.json')
-            with open(refit_config['incumbent_config_path'], 'w+') as f:
-                f.write(json.dumps(full_config, indent=4, sort_keys=True))
-            
-            with open(os.path.join(self.incumbent_configs_dir, 'result_' + str(budget) + '.json'), 'w+') as f:
-                f.write(json.dumps([job.id, job.kwargs['budget'], job.timestamps, job.result, job.exception], indent=4, sort_keys=True))
-
-            checkpoints, refit_config['dataset_order'] = get_checkpoints(result['info']) or ([],None)
-            refit_config['incumbent_checkpoint_paths'] = []
-            for i, checkpoint in enumerate(checkpoints):
-                dest = os.path.join(self.incumbent_configs_dir, 'checkpoint_' + str(budget) + '_' + str(i) + '.pt' if len(checkpoints) > 1 else 'checkpoint_' + str(budget) + '.pt')
-                if os.path.exists(dest):
-                    os.remove(dest)
-                if self.keep_only_incumbent_checkpoints:
-                    shutil.move(checkpoint, dest)
-                else:
-                    shutil.copy(checkpoint, dest)
-                refit_config['incumbent_checkpoint_paths'].append(dest)
-
-            refit_path = os.path.join(self.incumbent_configs_dir, 'refit_config_' + str(budget) + '.json')
-            with open(refit_path, 'w+') as f:
-                f.write(json.dumps(refit_config, indent=4, sort_keys=True))
-
-            if budget >= self.max_budget and self.global_results_dir is not None:
-                import autoPyTorch.utils.thread_read_write as thread_read_write
-                import datetime
-
-                dataset_names = sorted([os.path.splitext(os.path.split(info['dataset_path'])[1])[0] for info in result['info']])
-                suffix = ''
-                if len(result['info']) > 1:
-                    suffix += '+[' + ', '.join(dataset_names) + ']'
-                if budget > self.max_budget:
-                    suffix += '+Refit'
-
-                for info in result['info']:
-                    thread_read_write.update_results(self.global_results_dir, {
-                        'name': os.path.splitext(os.path.split(info['dataset_path'])[1])[0] + suffix, 
-                        'result': round(info['val_top1'], 2), 
-                        'seed': self.seed,
-                        'refit_config': refit_path, 
-                        'text': "{0}/{1} -- {2}".format(
-                            round(info['val_datapoints'] * (info['val_top1'] / 100)),
-                            info['val_datapoints'],
-                            round(budget / len(result['info'])))
-                        })
-
-        if self.keep_only_incumbent_checkpoints and get_checkpoints(result['info']):
-            for checkpoint in get_checkpoints(result['info'])[0]:
-                if os.path.exists(checkpoint):
-                    os.remove(checkpoint)
-
-        if budget not in self.mean_results:
-            self.mean_results[budget] = [result['loss'], 1]
-        else:
-            self.mean_results[budget][0] += result['loss']
-            self.mean_results[budget][1] += 1
-
-        for b, loss in self.incumbent_results.items():
-            tl.log_value('BOHB/incumbent_results_' + str(b), loss * -1, self.mean_results[b][1])
-
-        for b, (loss, n) in self.mean_results.items():
-            tl.log_value('BOHB/mean_results_' + str(b), loss * -1 / n if n > 0 else 0, n)
-
-        status = dict()
-        for b, loss in self.incumbent_results.items():
-            budget_status = dict()
-            budget_status['incumbent'] = loss * -1
-            mean_res = self.mean_results[b]
-            budget_status['mean'] = mean_res[0] / mean_res[1] * -1 if mean_res[1] > 0 else 0
-            budget_status['configs'] = mean_res[1]
-            status['budget: ' + str(b)] = budget_status
-
-        import datetime
-        status["runtime"] = str(datetime.timedelta(seconds=time.time() - self.start_time))
-
-        with open(os.path.join(self.status_dir, 'bohb_status.json'), 'w+') as f:
-            f.write(json.dumps(status, indent=4, sort_keys=True))
-
-
-def get_checkpoints(info):
-    if not isinstance(info, list):
-        if 'checkpoint' in info:
-            return [info['checkpoint']]
-        return []
-
-    checkpoints = []
-    dataset_order = []
-    for subinfo in info:
-        if 'checkpoint' in subinfo:
-            checkpoints.append(subinfo['checkpoint'])
-            dataset_order.append(subinfo['dataset_id'])
-    return checkpoints, dataset_order
-
-class combined_logger(object):
-    def __init__(self, *loggers):
-        self.loggers = loggers
-
-    def new_config(self, config_id, config, config_info):
-        for logger in self.loggers:
-            logger.new_config(config_id, config, config_info)
-
-    def __call__(self, job):
-        for logger in self.loggers:
-            logger(job)
-        
-def get_incumbents(directory):
-    
-    incumbents = os.path.join(directory, 'incumbents')
-
-    if not os.path.exists(incumbents):
-        return None
-
-    import re
-    file_re = [
-        re.compile('config_([0-9]+).json'),
-        re.compile('refit_config_([0-9]+).json'),
-        re.compile('result_([0-9]+).json'),
-        re.compile('checkpoint_([0-9]+).*.pt'),
-    ]
-
-    incumbent_files = [[] for _ in range(len(file_re))]
-    for filename in sorted(os.listdir(incumbents)):
-        for i, reg in enumerate(file_re):
-            match = reg.match(filename)
-            
-            if match:
-                budget = int(match.group(1))
-                inc_file = os.path.join(incumbents, filename)
-                incumbent_files[i].append([budget, inc_file])
-
-    return incumbent_files
-
-
-def get_refit_config(directory):
-    _, refit_configs, _, _ = get_incumbents(directory)
-    refit_config = max(refit_configs, key=lambda x: x[0]) #get config of max budget
-    return refit_config[1]
diff --git a/autoPyTorch/utils/logging.yaml b/autoPyTorch/utils/logging.yaml
new file mode 100644
index 000000000..3d2c00ad1
--- /dev/null
+++ b/autoPyTorch/utils/logging.yaml
@@ -0,0 +1,56 @@
+---
+version: 1
+disable_existing_loggers: False
+formatters:
+  simple:
+    format: '[%(levelname)s] [%(asctime)s:%(name)s] %(message)s'
+
+handlers:
+  console:
+    class: logging.StreamHandler
+    level: WARNING
+    formatter: simple
+    stream: ext://sys.stdout
+
+  file_handler:
+    class: logging.FileHandler
+    level: DEBUG
+    formatter: simple
+    filename: autoPyTorch.log
+
+  distributed_logfile:
+    class: logging.FileHandler
+    level: DEBUG
+    formatter: simple
+    filename: distributed.log
+
+root:
+  level: DEBUG
+  handlers: [console, file_handler]
+
+loggers:
+
+  autoPyTorch.utils.backend:
+    level: DEBUG
+    handlers: [file_handler]
+    propagate: no
+
+  smac.intensification.intensification.Intensifier:
+    level: INFO
+    handlers: [file_handler, console]
+
+  smac.optimizer.local_search.LocalSearch:
+    level: INFO
+    handlers: [file_handler, console]
+
+  smac.optimizer.smbo.SMBO:
+    level: INFO
+    handlers: [file_handler, console]
+
+  EnsembleBuilder:
+    level: DEBUG
+    handlers: [file_handler, console]
+
+  distributed:
+    level: DEBUG
+    handlers: [distributed_logfile]
diff --git a/autoPyTorch/utils/logging_.py b/autoPyTorch/utils/logging_.py
new file mode 100644
index 000000000..3d639a47b
--- /dev/null
+++ b/autoPyTorch/utils/logging_.py
@@ -0,0 +1,291 @@
+# -*- encoding: utf-8 -*-
+import logging
+import logging.config
+import logging.handlers
+import multiprocessing
+import os
+import pickle
+import random
+import select
+import socketserver
+import struct
+import threading
+from typing import Any, Dict, Optional, Type
+
+import yaml
+
+
+def setup_logger(
+    output_dir: str,
+    filename: Optional[str] = None,
+    distributedlog_filename: Optional[str] = None,
+    logging_config: Optional[Dict] = None,
+) -> None:
+    # logging_config must be a dictionary object specifying the configuration
+    # for the logging
+    if logging_config is None:
+        with open(os.path.join(os.path.dirname(__file__), 'logging.yaml'), 'r') as fh:
+            logging_config = yaml.safe_load(fh)
+
+    if filename is None:
+        filename = logging_config['handlers']['file_handler']['filename']
+    logging_config['handlers']['file_handler']['filename'] = os.path.join(
+        output_dir, filename
+    )
+
+    if distributedlog_filename is None:
+        distributedlog_filename = logging_config['handlers']['distributed_logfile']['filename']
+    logging_config['handlers']['distributed_logfile']['filename'] = os.path.join(
+        output_dir, distributedlog_filename
+    )
+    logging.config.dictConfig(logging_config)
+
+
+def _create_logger(name: str) -> logging.Logger:
+    return logging.getLogger(name)
+
+
+def get_named_client_logger(
+    name: str,
+    host: str = 'localhost',
+    port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+) -> 'PicklableClientLogger':
+    logger = PicklableClientLogger(
+        name=name,
+        host=host,
+        port=port
+    )
+    return logger
+
+
+def _get_named_client_logger(
+    name: str,
+    host: str = 'localhost',
+    port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+) -> logging.Logger:
+    """
+    When working with a logging server, clients are expected to create a logger using
+    this method. For example, the automl object will create a master that awaits
+    for records sent through tcp to localhost.
+    Ensemble builder will then instantiate a logger object that will submit records
+    via a socket handler to the server.
+    We do not need to use any format as the server will render the msg as it
+    needs to.
+    Parameters
+    ----------
+        name: (str)
+            the name of the logger, used to tag the messages in the main log
+        host: (str)
+            Address of where the server is gonna look for messages
+        port: (str)
+            Port used to communicate with the server
+    Returns
+    -------
+        local_loger: a logger object that has a socket handler
+    """
+    # Setup the logger configuration
+    # We add client not only to identify that this is the client
+    # communication part of the logger, but to make sure we have
+    # a new singleton with the desired socket handlers
+    local_logger = _create_logger('Client-' + str(name))
+    local_logger.propagate = False
+    local_logger.setLevel(logging.DEBUG)
+
+    try:
+        # Ignore mypy logging.handlers.SocketHandler has no attribute port
+        # This is not the case clearly, yet MyPy assumes this is not the case
+        # Even when using direct casting or getattr
+        ports = [getattr(handler, 'port', None
+                         ) for handler in local_logger.handlers]  # type: ignore[attr-defined]
+    except AttributeError:
+        # We do not want to log twice but adding multiple times the same
+        # handler. So we check to what ports we communicate to
+        # We can prevent errors with streamers not having a port with this try
+        # block -- but it is a scenario that is unlikely to happen
+        ports = []
+
+    if port not in ports:
+        socketHandler = logging.handlers.SocketHandler(host, port)
+        local_logger.addHandler(socketHandler)
+
+    return local_logger
+
+
+class PicklableClientLogger(object):
+
+    def __init__(self, name: str, host: str, port: int):
+        self.name = name
+        self.host = host
+        self.port = port
+        self.logger = _get_named_client_logger(
+            name=name,
+            host=host,
+            port=port
+        )
+
+    def __getstate__(self) -> Dict[str, Any]:
+        """
+        Method is called when pickle dumps an object.
+
+        Returns
+        -------
+        Dictionary, representing the object state to be pickled. Ignores
+        the self.logger field and only returns the logger name.
+        """
+        return {
+            'name': self.name,
+            'host': self.host,
+            'port': self.port,
+        }
+
+    def __setstate__(self, state: Dict[str, Any]) -> None:
+        """
+        Method is called when pickle loads an object. Retrieves the name and
+        creates a logger.
+
+        Parameters
+        ----------
+        state - dictionary, containing the logger name.
+
+        """
+        self.name = state['name']
+        self.host = state['host']
+        self.port = state['port']
+        self.logger = _get_named_client_logger(
+            name=self.name,
+            host=self.host,
+            port=self.port,
+        )
+
+    def debug(self, msg: str, *args: Any, **kwargs: Any) -> None:
+        self.logger.debug(msg, *args, **kwargs)
+
+    def info(self, msg: str, *args: Any, **kwargs: Any) -> None:
+        self.logger.info(msg, *args, **kwargs)
+
+    def warning(self, msg: str, *args: Any, **kwargs: Any) -> None:
+        self.logger.warning(msg, *args, **kwargs)
+
+    def error(self, msg: str, *args: Any, **kwargs: Any) -> None:
+        self.logger.error(msg, *args, **kwargs)
+
+    def exception(self, msg: str, *args: Any, **kwargs: Any) -> None:
+        self.logger.exception(msg, *args, **kwargs)
+
+    def critical(self, msg: str, *args: Any, **kwargs: Any) -> None:
+        self.logger.critical(msg, *args, **kwargs)
+
+    def log(self, level: int, msg: str, *args: Any, **kwargs: Any) -> None:
+        self.logger.log(level, msg, *args, **kwargs)
+
+    def isEnabledFor(self, level: int) -> bool:
+        return self.logger.isEnabledFor(level)
+
+
+class LogRecordStreamHandler(socketserver.StreamRequestHandler):
+    """Handler for a streaming logging request.
+
+    This basically logs the record using whatever logging policy is
+    configured locally.
+    """
+
+    def handle(self) -> None:
+        """
+        Handle multiple requests - each expected to be a 4-byte length,
+        followed by the LogRecord in pickle format. Logs the record
+        according to whatever policy is configured locally.
+        """
+        while True:
+            chunk = self.connection.recv(4)  # type: ignore[attr-defined]
+            if len(chunk) < 4:
+                break
+            slen = struct.unpack('>L', chunk)[0]
+            chunk = self.connection.recv(slen)  # type: ignore[attr-defined]
+            while len(chunk) < slen:
+                chunk = chunk + self.connection.recv(slen - len(chunk))  # type: ignore[attr-defined]  # noqa: E501
+            obj = self.unPickle(chunk)
+            record = logging.makeLogRecord(obj)
+            self.handleLogRecord(record)
+
+    def unPickle(self, data: Any) -> Any:
+        return pickle.loads(data)
+
+    def handleLogRecord(self, record: logging.LogRecord) -> None:
+        # logname is define in LogRecordSocketReceiver
+        # Yet Mypy Cannot see this. This is needed so that we can
+        # re-use the logging setup into the received
+        # records
+        if self.server.logname is not None:  # type: ignore  # noqa
+            name = self.server.logname  # type: ignore  # noqa
+        else:
+            name = record.name
+        logger = logging.getLogger(name)
+        # N.B. EVERY record gets logged. This is because Logger.handle
+        # is normally called AFTER logger-level filtering. If you want
+        # to do filtering, do it at the client end to save wasting
+        # cycles and network bandwidth!
+        logger.handle(record)
+
+
+def start_log_server(
+    host: str,
+    logname: str,
+    event: threading.Event,
+    port: multiprocessing.Value,
+    filename: str,
+    logging_config: Dict,
+    output_dir: str,
+) -> None:
+    setup_logger(filename=filename,
+                 logging_config=logging_config,
+                 output_dir=output_dir)
+
+    while True:
+        # Loop until we find a valid port
+        _port = random.randint(10000, 65535)
+        try:
+            receiver = LogRecordSocketReceiver(
+                host=host,
+                port=_port,
+                logname=logname,
+                event=event,
+            )
+            with port.get_lock():
+                port.value = _port
+            receiver.serve_until_stopped()
+            break
+        except OSError:
+            continue
+
+
+class LogRecordSocketReceiver(socketserver.ThreadingTCPServer):
+    """
+    This class implement a entity that receives tcp messages on a given address
+    For further information, please check
+    https://docs.python.org/3/howto/logging-cookbook.html#configuration-server-example
+    """
+
+    allow_reuse_address = True
+
+    def __init__(
+        self,
+        host: str = 'localhost',
+        port: int = logging.handlers.DEFAULT_TCP_LOGGING_PORT,
+        handler: Type[LogRecordStreamHandler] = LogRecordStreamHandler,
+        logname: Optional[str] = None,
+        event: threading.Event = None,
+    ):
+        socketserver.ThreadingTCPServer.__init__(self, (host, port), handler)
+        self.timeout = 1
+        self.logname = logname
+        self.event = event
+
+    def serve_until_stopped(self) -> None:
+        while True:
+            rd, wr, ex = select.select([self.socket.fileno()],
+                                       [], [],
+                                       self.timeout)
+            if rd:
+                self.handle_request()
+            if self.event is not None and self.event.is_set():
+                break
diff --git a/autoPyTorch/utils/mem_test_thread.py b/autoPyTorch/utils/mem_test_thread.py
deleted file mode 100644
index 14601d5c5..000000000
--- a/autoPyTorch/utils/mem_test_thread.py
+++ /dev/null
@@ -1,71 +0,0 @@
-
-
-import linecache
-import os
-import tracemalloc
-from datetime import datetime
-from queue import Queue, Empty
-from threading import Thread
-from time import sleep
-from resource import getrusage, RUSAGE_SELF
-
-
-class MemoryLogger():
-    def __init__(self):
-        self.queue = None
-        self.monitor_thread = None
-
-    def start(self, poll_interval=0.5):
-        self.queue = Queue()
-        self.monitor_thread = Thread(target=memory_monitor, args=(self.queue, poll_interval))
-        self.monitor_thread.start()
-
-    def stop(self):
-        self.queue.put('stop')
-        self.monitor_thread.join()
-
-
-def memory_monitor(command_queue: Queue, poll_interval=1):
-    tracemalloc.start()
-    old_max = 0
-    snapshot = None
-    while True:
-        try:
-            command_queue.get(timeout=poll_interval)
-            if snapshot is not None:
-                print(datetime.now())
-                display_top(snapshot)
-
-            return
-        except Empty:
-            max_rss = getrusage(RUSAGE_SELF).ru_maxrss
-            if max_rss > old_max:
-                old_max = max_rss
-            snapshot = tracemalloc.take_snapshot()
-            display_top(snapshot, limit=1)
-            print(datetime.now(), 'max RSS', old_max)
-
-def display_top(snapshot, key_type='lineno', limit=3):
-    snapshot = snapshot.filter_traces((
-        tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
-        tracemalloc.Filter(False, "<unknown>"),
-    ))
-    top_stats = snapshot.statistics(key_type)
-
-    print("Top %s lines" % limit)
-    for index, stat in enumerate(top_stats[:limit], 1):
-        frame = stat.traceback[0]
-        # replace "/path/to/module/file.py" with "module/file.py"
-        filename = os.sep.join(frame.filename.split(os.sep)[-4:])
-        print("#%s: %s:%s: %.1f KiB"
-            % (index, filename, frame.lineno, stat.size / 1024))
-        line = linecache.getline(frame.filename, frame.lineno).strip()
-        if line:
-            print('    %s' % line)
-
-    other = top_stats[limit:]
-    if other:
-        size = sum(stat.size for stat in other)
-        print("%s other: %.1f KiB" % (len(other), size / 1024))
-    total = sum(stat.size for stat in top_stats)
-    print("Total allocated size: %.1f KiB" % (total / 1024))
\ No newline at end of file
diff --git a/autoPyTorch/utils/modify_config_space.py b/autoPyTorch/utils/modify_config_space.py
deleted file mode 100644
index a12335bc5..000000000
--- a/autoPyTorch/utils/modify_config_space.py
+++ /dev/null
@@ -1,242 +0,0 @@
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-import copy
-
-def remove_constant_hyperparameter(cs):
-    constants = dict()
-
-    hyperparameter_to_add = []
-    for hyper in cs.get_hyperparameters():
-        const, value = is_constant(hyper)
-        if const:
-            constants[hyper.name] = value
-        else:
-            hyperparameter_to_add.append(copy.copy(hyper))
-
-    for name in constants:
-        truncate_hyperparameter(cs, cs.get_hyperparameter(name))
-
-    cs._hyperparameter_idx = dict()
-    cs._idx_to_hyperparameter = dict()
-    cs._sort_hyperparameters()
-    cs._update_cache()
-        
-    return cs, constants
-
-
-def is_constant(hyper):
-    if isinstance(hyper, CSH.Constant):
-        return True, hyper.value
-
-    elif isinstance(hyper, CSH.UniformFloatHyperparameter) or isinstance(hyper, CSH.UniformIntegerHyperparameter):
-        if abs(hyper.upper - hyper.lower) < 1e-10:
-            return True, hyper.lower
-        
-    elif isinstance(hyper, CSH.CategoricalHyperparameter):
-        if len(hyper.choices) == 1:
-            return True, hyper.choices[0]
-        
-    return False, None
-
-
-def override_hyperparameter(config_space, hyper):
-    import ConfigSpace.conditions as CSC
-
-    for condition in config_space._children[hyper.name].values():
-        subconditions = condition.components if isinstance(condition, CSC.AbstractConjunction) else [condition]
-        for subcondition in subconditions:
-            if subcondition.parent.name == hyper.name:
-                subcondition.parent = hyper
-
-    for condition in config_space._parents[hyper.name].values():
-        if condition is None:
-            continue # root
-        subconditions = condition.components if isinstance(condition, CSC.AbstractConjunction) else [condition]
-        for subcondition in subconditions:
-            if subcondition.child.name == hyper.name:
-                subcondition.child = hyper
-
-    config_space._hyperparameters[hyper.name] = hyper
-
-
-def update_conditions(config_space, parent):
-    import ConfigSpace.conditions as CSC
-
-    if parent.name not in config_space._hyperparameters:
-        # already removed -> all condition already updated
-        return
-
-    possible_values, is_value_range = get_hyperparameter_values(parent)
-    children = [config_space.get_hyperparameter(name) for name in config_space._children[parent.name]]
-
-    for child in children:
-        if child.name not in config_space._children[parent.name]:
-            # already cut
-            continue
-        condition = config_space._children[parent.name][child.name]
-
-        if isinstance(condition, CSC.AbstractConjunction):
-            is_and = isinstance(condition, CSC.AndConjunction)
-            state = 2
-            
-            new_subconditions = []
-            for subcondition in condition.components:
-                if subcondition.parent.name != parent.name:
-                    new_subconditions.append(subcondition)
-                    continue
-                substate = get_condition_state(subcondition, possible_values, is_value_range)
-                if substate == 0 and is_and and state == 2:
-                    state = 0
-
-                if substate == 1 and not is_and and state == 2:
-                    state = 1
-
-                if substate == 2:
-                    new_subconditions.append(subcondition)
-                
-                else:
-                    # condition is not relevant anymore
-                    del config_space._children[parent.name][child.name]
-                    del config_space._parents[child.name][parent.name]
-                    for grand_parent, cond in config_space._parents[parent.name].items():
-                        if cond is None:
-                            continue
-                        cond_type = type(cond)
-                        values, _ = get_hyperparameter_values(cond.parent)
-                        # fake parent value first as it might be invalid atm and gets truncated later
-                        new_condition = cond_type(child, cond.parent, values[0])
-                        new_condition.value = cond.value
-                        config_space._children[grand_parent][child.name] = new_condition
-                        config_space._parents[child.name][grand_parent] = new_condition
-
-            if len(new_subconditions) == 0:
-                state = 1 if is_and else 0 # either everything was false or true
-
-            if state == 2:
-
-                if len(new_subconditions) == 1:
-                    condition = new_subconditions[0]
-                    config_space._children[condition.parent.name][child.name] = new_subconditions[0]
-                    config_space._parents[child.name][condition.parent.name] = new_subconditions[0]
-                else:
-                    condition.__init__(*new_subconditions)
-
-                    for subcondition in new_subconditions:
-                        config_space._children[subcondition.parent.name][child.name] = condition
-                        config_space._parents[child.name][subcondition.parent.name] = condition
-
-        else:
-            state = get_condition_state(condition, possible_values, is_value_range)
-
-        if state == 1:
-            del config_space._children[parent.name][child.name]
-            del config_space._parents[child.name][parent.name]
-
-            for grand_parent, cond in config_space._parents[parent.name].items():
-                if cond is None:
-                    continue
-                cond_type = type(cond)
-                values, _ = get_hyperparameter_values(cond.parent)
-                # fake parent value first as it might be invalid atm and gets truncated later
-                new_condition = cond_type(child, cond.parent, values[0])
-                new_condition.value = cond.value
-                config_space._children[grand_parent][child.name] = new_condition
-                config_space._parents[child.name][grand_parent] = new_condition
-
-            if len(config_space._parents[child.name]) == 0:
-                config_space._conditionals.remove(child.name)
-        if state == 0:
-            truncate_hyperparameter(config_space, child)
-
-
-
-    
-def truncate_hyperparameter(config_space, hyper):
-    if hyper.name not in config_space._hyperparameters:
-        return
-
-    parent_names = list(config_space._parents[hyper.name].keys())
-    for parent_name in parent_names:
-        del config_space._children[parent_name][hyper.name]
-
-    del config_space._parents[hyper.name]
-    del config_space._hyperparameters[hyper.name]
-
-    if hyper.name in config_space._conditionals:
-        config_space._conditionals.remove(hyper.name)
-
-    child_names = list(config_space._children[hyper.name].keys())
-    for child_name in child_names:
-        truncate_hyperparameter(config_space, config_space.get_hyperparameter(child_name))
-
-
-def get_condition_state(condition, possible_values, is_range):
-    """
-        0: always false
-        1: always true
-        2: true or false
-    """
-    import ConfigSpace.conditions as CSC
-
-    c_val = condition.value
-    if isinstance(condition, CSC.EqualsCondition):
-        if is_range:
-            if approx(possible_values[0], possible_values[1]):
-                return 1 if approx(possible_values[0], c_val) else 0
-            return 2 if c_val >= possible_values[0] and c_val <= possible_values[1] else 0
-        else:
-            if len(possible_values) == 1:
-                return 1 if c_val == possible_values[0] else 0
-            return 2 if c_val in possible_values else 0
-            
-    if isinstance(condition, CSC.NotEqualsCondition):
-        if is_range:
-            if approx(possible_values[0], possible_values[1]):
-                return 0 if approx(possible_values[0], c_val) else 1
-            return 2 if c_val >= possible_values[0] and c_val <= possible_values[1] else 1
-        else:
-            if len(possible_values) == 1:
-                return 0 if c_val == possible_values[0] else 1
-            return 2 if c_val in possible_values else 1
-
-    if isinstance(condition, CSC.GreaterThanCondition): # is_range has to be true
-        if c_val < possible_values[0]:
-            return 1
-        if c_val >= possible_values[1]:
-            return 0
-        return 2
-
-    if isinstance(condition, CSC.LessThanCondition): # is_range has to be true
-        if c_val <= possible_values[0]:
-            return 0
-        if c_val > possible_values[1]:
-            return 1
-        return 2
-
-    if isinstance(condition, CSC.InCondition):
-        inter = set(possible_values).intersection(set(c_val))
-        if len(inter) == len(possible_values):
-            return 1
-        if len(inter) == 0:
-            return 0
-        return 2
-        
-
-def approx(x, y):
-    return abs(x - y) < 1e-10
-
-def get_hyperparameter_values(hyper):
-    """Returns list[choices/range] and bool[is value range]
-    """
-    import ConfigSpace.hyperparameters as CSH
-
-    if isinstance(hyper, CSH.CategoricalHyperparameter):
-        return hyper.choices, False
-
-    if isinstance(hyper, CSH.NumericalHyperparameter):
-        return [hyper.lower, hyper.upper], True
-        
-    if isinstance(hyper, CSH.Constant):
-        return [hyper.value, hyper.value], True
-
-    raise ValueError(str(type(hyper)) + ' is not supported')
diff --git a/autoPyTorch/utils/modules.py b/autoPyTorch/utils/modules.py
deleted file mode 100644
index 36504e0f5..000000000
--- a/autoPyTorch/utils/modules.py
+++ /dev/null
@@ -1,14 +0,0 @@
-
-import torch.nn as nn
-
-class Reshape(nn.Module):
-    def __init__(self, size):
-        super(Reshape, self).__init__()
-        self.size = size
-
-    def forward(self, x):
-        # import logging
-        # l = logging.getLogger('autonet')
-        # l.debug(x.shape)
-        # l.debug((x.reshape(-1, self.size)).shape)
-        return x.reshape(-1, self.size)
\ No newline at end of file
diff --git a/autoPyTorch/utils/pipeline.py b/autoPyTorch/utils/pipeline.py
new file mode 100644
index 000000000..6820d2702
--- /dev/null
+++ b/autoPyTorch/utils/pipeline.py
@@ -0,0 +1,142 @@
+# -*- encoding: utf-8 -*-
+from typing import Any, Dict, List, Optional
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+from autoPyTorch.constants import (
+    CLASSIFICATION_TASKS,
+    IMAGE_TASKS,
+    REGRESSION_TASKS,
+    STRING_TO_TASK_TYPES,
+    TABULAR_TASKS,
+)
+from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
+from autoPyTorch.pipeline.tabular_regression import TabularRegressionPipeline
+from autoPyTorch.utils.common import FitRequirement
+
+__all__ = [
+    'get_dataset_requirements',
+    'get_configuration_space'
+]
+
+
+def get_dataset_requirements(info: Dict[str, Any],
+                             include_estimators: Optional[List[str]] = None,
+                             exclude_estimators: Optional[List[str]] = None,
+                             include_preprocessors: Optional[List[str]] = None,
+                             exclude_preprocessors: Optional[List[str]] = None
+                             ) -> List[FitRequirement]:
+    exclude = dict()
+    include = dict()
+    if include_preprocessors is not None and \
+            exclude_preprocessors is not None:
+        raise ValueError('Cannot specify include_preprocessors and '
+                         'exclude_preprocessors.')
+    elif include_preprocessors is not None:
+        include['feature_preprocessor'] = include_preprocessors
+    elif exclude_preprocessors is not None:
+        exclude['feature_preprocessor'] = exclude_preprocessors
+
+    task_type: int = STRING_TO_TASK_TYPES[info['task_type']]
+    if include_estimators is not None and \
+            exclude_estimators is not None:
+        raise ValueError('Cannot specify include_estimators and '
+                         'exclude_estimators.')
+    elif include_estimators is not None:
+        if task_type in CLASSIFICATION_TASKS:
+            include['classifier'] = include_estimators
+        elif task_type in REGRESSION_TASKS:
+            include['regressor'] = include_estimators
+        else:
+            raise ValueError(info['task_type'])
+    elif exclude_estimators is not None:
+        if task_type in CLASSIFICATION_TASKS:
+            exclude['classifier'] = exclude_estimators
+        elif task_type in REGRESSION_TASKS:
+            exclude['regressor'] = exclude_estimators
+        else:
+            raise ValueError(info['task_type'])
+
+    if task_type in REGRESSION_TASKS:
+        return _get_regression_dataset_requirements(info, include, exclude)
+    else:
+        return _get_classification_dataset_requirements(info, include, exclude)
+
+
+def _get_regression_dataset_requirements(info: Dict[str, Any], include: Dict[str, List[str]],
+                                         exclude: Dict[str, List[str]]) -> List[FitRequirement]:
+    task_type = STRING_TO_TASK_TYPES[info['task_type']]
+    if task_type in TABULAR_TASKS:
+        fit_requirements = TabularRegressionPipeline(
+            dataset_properties=info,
+            include=include,
+            exclude=exclude
+        ).get_dataset_requirements()
+        return fit_requirements
+    else:
+        raise ValueError("Task_type not supported")
+
+
+def _get_classification_dataset_requirements(info: Dict[str, Any], include: Dict[str, List[str]],
+                                             exclude: Dict[str, List[str]]) -> List[FitRequirement]:
+    task_type = STRING_TO_TASK_TYPES[info['task_type']]
+
+    if task_type in TABULAR_TASKS:
+        return TabularClassificationPipeline(
+            dataset_properties=info,
+            include=include, exclude=exclude).\
+            get_dataset_requirements()
+    elif task_type in IMAGE_TASKS:
+        return ImageClassificationPipeline(
+            dataset_properties=info,
+            include=include, exclude=exclude).\
+            get_dataset_requirements()
+    else:
+        raise ValueError("Task_type not supported")
+
+
+def get_configuration_space(info: Dict[str, Any],
+                            include: Optional[Dict] = None,
+                            exclude: Optional[Dict] = None,
+                            ) -> ConfigurationSpace:
+    task_type: int = STRING_TO_TASK_TYPES[info['task_type']]
+
+    if task_type in REGRESSION_TASKS:
+        return _get_regression_configuration_space(info,
+                                                   include if include is not None else {},
+                                                   exclude if exclude is not None else {},
+                                                   )
+    else:
+        return _get_classification_configuration_space(info,
+                                                       include if include is not None else {},
+                                                       exclude if exclude is not None else {},
+                                                       )
+
+
+def _get_regression_configuration_space(info: Dict[str, Any], include: Dict[str, List[str]],
+                                        exclude: Dict[str, List[str]]) -> ConfigurationSpace:
+    if STRING_TO_TASK_TYPES[info['task_type']] in TABULAR_TASKS:
+        configuration_space = TabularRegressionPipeline(
+            dataset_properties=info,
+            include=include,
+            exclude=exclude
+        ).get_hyperparameter_search_space()
+        return configuration_space
+    else:
+        raise ValueError("Task_type not supported")
+
+
+def _get_classification_configuration_space(info: Dict[str, Any], include: Dict[str, List[str]],
+                                            exclude: Dict[str, List[str]]) -> ConfigurationSpace:
+    if STRING_TO_TASK_TYPES[info['task_type']] in TABULAR_TASKS:
+        pipeline = TabularClassificationPipeline(dataset_properties=info,
+                                                 include=include, exclude=exclude)
+        return pipeline.get_hyperparameter_search_space()
+    elif STRING_TO_TASK_TYPES[info['task_type']] in IMAGE_TASKS:
+        return ImageClassificationPipeline(
+            dataset_properties=info,
+            include=include, exclude=exclude).\
+            get_hyperparameter_search_space()
+    else:
+        raise ValueError("Task_type not supported")
diff --git a/autoPyTorch/utils/stopwatch.py b/autoPyTorch/utils/stopwatch.py
new file mode 100644
index 000000000..f5369e6b6
--- /dev/null
+++ b/autoPyTorch/utils/stopwatch.py
@@ -0,0 +1,150 @@
+# -*- encoding: utf-8 -*-
+"""Created on Dec 17, 2014.
+@author: Katharina Eggensperger
+@project: AutoML2015
+"""
+import sys
+import time
+from collections import OrderedDict
+from typing import Tuple
+
+
+class TimingTask(object):
+    _cpu_tic = 0.0
+    _cpu_tac = 0.0
+    _cpu_dur = 0.0
+    _wall_tic = 0.0
+    _wall_tac = 0.0
+    _wall_dur = 0.0
+
+    def __init__(self, name: str):
+        self._name = name
+        self._cpu_tic = time.process_time()
+        self._wall_tic = time.time()
+
+    def stop(self) -> None:
+        if not self._cpu_tac:
+            self._cpu_tac = time.process_time()
+            self._wall_tac = time.time()
+            self._cpu_dur = self._cpu_tac - self._cpu_tic
+            self._wall_dur = self._wall_tac - self._wall_tic
+        else:
+            sys.stdout.write('Task has already stopped\n')
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def cpu_tic(self) -> float:
+        return self._cpu_tic
+
+    @property
+    def cpu_tac(self) -> float:
+        return self._cpu_tac
+
+    @property
+    def cpu_dur(self) -> float:
+        return self._cpu_dur
+
+    @property
+    def wall_tic(self) -> float:
+        return self._wall_tic
+
+    @property
+    def wall_tac(self) -> float:
+        return self._wall_tac
+
+    @property
+    def wall_dur(self) -> float:
+        return self._wall_dur
+
+    @property
+    def dur(self) -> Tuple[float, float]:
+        return self._cpu_dur, self._wall_dur
+
+
+class StopWatch:
+
+    """Class to collect all timing tasks."""
+
+    def __init__(self) -> None:
+        self._tasks = OrderedDict()
+        self._tasks['stopwatch_time'] = TimingTask('stopwatch_time')
+
+    def insert_task(self, name: str, cpu_dur: float, wall_dur: float) -> None:
+        if name not in self._tasks:
+            self._tasks[name] = TimingTask(name)
+            self._tasks[name].stop()
+            self._tasks[name]._wall_dur = wall_dur
+            self._tasks[name]._cpu_dur = cpu_dur
+
+    def start_task(self, name: str) -> None:
+        if name not in self._tasks:
+            self._tasks[name] = TimingTask(name)
+
+    def wall_elapsed(self, name: str) -> float:
+        tmp = time.time()
+        if name in self._tasks:
+            if not self._tasks[name].wall_dur:
+                tsk_start = self._tasks[name].wall_tic
+                return tmp - tsk_start
+            else:
+                return self._tasks[name].wall_dur
+        return 0.0
+
+    def cpu_elapsed(self, name: str) -> float:
+        tmp = time.process_time()
+        if name in self._tasks:
+            if not self._tasks[name].cpu_dur:
+                tsk_start = self._tasks[name].cpu_tic
+                return tmp - tsk_start
+            else:
+                return self._tasks[name].cpu_dur
+        return 0.0
+
+    def stop_task(self, name: str) -> None:
+        try:
+            self._tasks[name].stop()
+        except KeyError:
+            sys.stderr.write('There is no such task: %s\n' % name)
+
+    def get_cpu_dur(self, name: str) -> float:
+        try:
+            return self._tasks[name].cpu_dur
+        except KeyError:
+            sys.stderr.write('There is no such task: %s\n' % name)
+        return 0.0
+
+    def get_wall_dur(self, name: str) -> float:
+        try:
+            return self._tasks[name].wall_dur
+        except KeyError:
+            sys.stderr.write('There is no such task: %s\n' % name)
+        return 0.0
+
+    def cpu_sum(self) -> float:
+        """Return sum of CPU time for all so far finished tasks."""
+        return sum([max(0, self._tasks[tsk].cpu_dur) for tsk in self._tasks])
+
+    def wall_sum(self) -> float:
+        """Return sum of CPU time for all so far finished tasks."""
+        return sum([max(0, self._tasks[tsk].wall_dur) for tsk in self._tasks])
+
+    def __repr__(self) -> str:
+        ret_str = '| %10s | %10s | %10s | %10s | %10s | %10s | %10s |\n' % \
+                  ('Name', 'CPUStart', 'CPUEnd', 'CPUDur', 'WallStart',
+                   'WallEnd',
+                   'WallDur')
+        ret_str += '+' + '------------+' * 7 + '\n'
+        offset = self._tasks['stopwatch_time'].wall_tic
+        for tsk in self._tasks:
+            if self._tasks[tsk].wall_tac:
+                wall_tac = self._tasks[tsk].wall_tac - offset
+            ret_str += '| %10s | %10.5f | %10.5f | %10.5f | %10s | %10s | %10s |\n' % \
+                       (tsk, self._tasks[tsk].cpu_tic, self._tasks[tsk].cpu_tac,
+                        self.cpu_elapsed(tsk),
+                        self._tasks[tsk].wall_tic - offset,
+                        wall_tac if self._tasks[tsk].wall_tac else False,
+                        self.wall_elapsed(tsk))
+        return ret_str
diff --git a/autoPyTorch/utils/thread_read_write.py b/autoPyTorch/utils/thread_read_write.py
deleted file mode 100644
index 75d75f874..000000000
--- a/autoPyTorch/utils/thread_read_write.py
+++ /dev/null
@@ -1,42 +0,0 @@
-
-import fasteners, json, os, threading
-
-thread_lock = threading.Lock()
-
-def write(filename, content):
-    with open(filename, 'w+') as f:
-        f.write(content)
-
-def read(filename):
-    content = '{}'
-    if os.path.exists(filename):
-        with open(filename, 'r') as f:
-            content = f.read()
-    return content
-
-def append(filename, content):
-    with fasteners.InterProcessLock('{0}.lock'.format(filename)):
-        with open(filename, 'a+') as f:
-            f.write(content)
-
-def update_results_thread(filename, info):
-    thread_lock.acquire()
-    with fasteners.InterProcessLock('{0}.lock'.format(filename)):
-        content = json.loads(read(filename))
-        name = info['name']
-        result = info['result']
-        refit_config = info['refit_config']
-        text = info['text']
-        seed = str(info['seed'])
-
-        infos = content[name] if name in content else dict()
-        infos[seed] = {'result': result, 'description': text, 'refit': refit_config}
-        content[name] = infos
-
-        write(filename, json.dumps(content, indent=4, sort_keys=True))
-    thread_lock.release()
-
-
-def update_results(filename, info):
-    thread = threading.Thread(target = update_results_thread, args = (filename, info))
-    thread.start()
\ No newline at end of file
diff --git a/autoPyTorch/utils/transforms.py b/autoPyTorch/utils/transforms.py
deleted file mode 100644
index ada717cf8..000000000
--- a/autoPyTorch/utils/transforms.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import numpy as np
-
-
-def transform_int64(y):
-    return y.astype(np.int64)
diff --git a/codecov.yml b/codecov.yml
new file mode 100755
index 000000000..d14333d39
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,2 @@
+codecov:
+  token: 667dbd23-97e1-4ef7-9b80-a87c5ec8cb79
diff --git a/configs/autonet/automl/bohb_cv_sparse.txt b/configs/autonet/automl/bohb_cv_sparse.txt
deleted file mode 100644
index 031776098..000000000
--- a/configs/autonet/automl/bohb_cv_sparse.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=4
-max_runtime=92400
-budget_type=time
-optimize_metric=accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-memory_limit_mb=7000
-normalization_strategies=[maxabs]
-preprocessors=[none,truncated_svd]
-validation_split=0.3
-ensemble_size=20
diff --git a/configs/autonet/automl/cifar_example.txt b/configs/autonet/automl/cifar_example.txt
deleted file mode 100644
index 39121280b..000000000
--- a/configs/autonet/automl/cifar_example.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-max_budget=1500
-min_budget=300
-min_workers=2
-max_runtime=3600
-budget_type=time
-default_dataset_download_dir=./datasets/
-images_root_folders=./datasets/
-optimize_metric=accuracy
-validation_split=0.1
-use_tensorboard_logger=True
-networks=['resnet']
-lr_scheduler=['cosine_annealing']
-batch_loss_computation_techniques=['mixup']
-loss_modules=['cross_entropy']
-optimizer=['adamw']
diff --git a/configs/autonet/automl/hyperband_cv_sparse.txt b/configs/autonet/automl/hyperband_cv_sparse.txt
deleted file mode 100644
index 92ef6a5a5..000000000
--- a/configs/autonet/automl/hyperband_cv_sparse.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=4
-max_runtime=92400
-budget_type=time
-algorithm=hyperband
-optimize_metric=accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-min_budget_for_cv=250
-batch_loss_computation_techniques=[mixup]
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-memory_limit_mb=7000
-networks=[shapedmlpnet,shapedresnet]
-normalization_strategies=[maxabs]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd]
-target_size_strategies=[none,upsample,median]
-validation_split=0.3
\ No newline at end of file
diff --git a/configs/autonet/openml/autonet1.txt b/configs/autonet/openml/autonet1.txt
deleted file mode 100644
index 0f688e5cd..000000000
--- a/configs/autonet/openml/autonet1.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=8000
-ensemble_size=50
-ensemble_only_consider_n_best=50
-embeddings=[none]
-lr_scheduler=[exponential,step]
-networks=[mlpnet]
-over_sampling_methods=[none]
-under_sampling_methods=[none]
-target_size_strategies=[none]
-batch_loss_computation_techniques=[standard]
-hyperparameter_search_space_updates=configs/configspace/autonet1.txt
\ No newline at end of file
diff --git a/configs/autonet/openml/full_cs.txt b/configs/autonet/openml/full_cs.txt
deleted file mode 100644
index 3f5bc3fbd..000000000
--- a/configs/autonet/openml/full_cs.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=8000
-ensemble_size=50
-ensemble_only_consider_n_best=50
diff --git a/configs/autonet/openml/gpu.txt b/configs/autonet/openml/gpu.txt
deleted file mode 100644
index 0694f2e78..000000000
--- a/configs/autonet/openml/gpu.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=8000
-ensemble_size=50
-ensemble_only_consider_n_best=50
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-networks=[shapedresnet]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd,power_transformer]
-target_size_strategies=[none,upsample,median]
-cuda=True
\ No newline at end of file
diff --git a/configs/autonet/openml/hyperband.txt b/configs/autonet/openml/hyperband.txt
deleted file mode 100644
index 5f88c8eb4..000000000
--- a/configs/autonet/openml/hyperband.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=8000
-ensemble_size=50
-ensemble_only_consider_n_best=50
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-networks=[shapedresnet]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd,power_transformer]
-target_size_strategies=[none,upsample,median]
-algorithm=hyperband
\ No newline at end of file
diff --git a/configs/autonet/openml/no_embeddings.txt b/configs/autonet/openml/no_embeddings.txt
deleted file mode 100644
index ef365e15c..000000000
--- a/configs/autonet/openml/no_embeddings.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=8000
-ensemble_size=50
-ensemble_only_consider_n_best=50
-embeddings=[none]
diff --git a/configs/autonet/openml/no_hyperband.txt b/configs/autonet/openml/no_hyperband.txt
deleted file mode 100644
index 87b1323fa..000000000
--- a/configs/autonet/openml/no_hyperband.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-max_budget=6000
-min_budget=6000
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=8000
-ensemble_size=50
-ensemble_only_consider_n_best=50
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-networks=[shapedresnet]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd,power_transformer]
-target_size_strategies=[none,upsample,median]
diff --git a/configs/autonet/openml/restricted_cs.txt b/configs/autonet/openml/restricted_cs.txt
deleted file mode 100644
index db17fb7b2..000000000
--- a/configs/autonet/openml/restricted_cs.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=8000
-ensemble_size=50
-ensemble_only_consider_n_best=50
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-networks=[shapedresnet]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd,power_transformer]
-target_size_strategies=[none,upsample,median]
diff --git a/configs/autonet/optim_alg_comparison/bohb.txt b/configs/autonet/optim_alg_comparison/bohb.txt
deleted file mode 100644
index cf39239f0..000000000
--- a/configs/autonet/optim_alg_comparison/bohb.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=4
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-batch_loss_computation_techniques=[mixup]
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-memory_limit_mb=5000
-networks=[shapedresnet]
-normalization_strategies=[maxabs, standardize]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd]
-target_size_strategies=[none,upsample,median]
diff --git a/configs/autonet/optim_alg_comparison/bohb_tiny_cs.txt b/configs/autonet/optim_alg_comparison/bohb_tiny_cs.txt
deleted file mode 100644
index 7e68a25cd..000000000
--- a/configs/autonet/optim_alg_comparison/bohb_tiny_cs.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=5000
-embeddings=[none]
-lr_scheduler=[cosine_annealing]
-networks=[shapedresnet]
-preprocessors=[truncated_svd]
-target_size_strategies=[none]
-over_sampling_methods=[none]
-under_sampling_methods=[none]
-batch_loss_computation_techniques=[standard]
-imputation_strategies=[median]
-initialization_methods=[default]
-loss_modules=[cross_entropy_weighted]
-normalization_strategies=[standardize]
-optimizer=[sgd]
-hyperparameter_search_space_updates=configs/configspace/tiny_cs.txt
\ No newline at end of file
diff --git a/configs/autonet/optim_alg_comparison/hyperband.txt b/configs/autonet/optim_alg_comparison/hyperband.txt
deleted file mode 100644
index a3aa02389..000000000
--- a/configs/autonet/optim_alg_comparison/hyperband.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=4
-max_runtime=92400
-budget_type=time
-algorithm=hyperband
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-batch_loss_computation_techniques=[mixup]
-embeddings=[none]
-lr_scheduler=[cosine_annealing,plateau]
-memory_limit_mb=5000
-networks=[shapedresnet]
-normalization_strategies=[maxabs, standardize]
-over_sampling_methods=[smote]
-preprocessors=[none,truncated_svd]
-target_size_strategies=[none,upsample,median]
diff --git a/configs/autonet/optim_alg_comparison/hyperband_tiny_cs.txt b/configs/autonet/optim_alg_comparison/hyperband_tiny_cs.txt
deleted file mode 100644
index 1e6b625a7..000000000
--- a/configs/autonet/optim_alg_comparison/hyperband_tiny_cs.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-max_budget=6000
-min_budget=74
-min_workers=1
-max_runtime=92400
-budget_type=time
-optimize_metric=balanced_accuracy
-cross_validator=k_fold
-cross_validator_args={"n_splits": 5}
-memory_limit_mb=5000
-algorithm=hyperband
-embeddings=[none]
-lr_scheduler=[cosine_annealing]
-networks=[shapedresnet]
-preprocessors=[truncated_svd]
-target_size_strategies=[none]
-over_sampling_methods=[none]
-under_sampling_methods=[none]
-batch_loss_computation_techniques=[standard]
-imputation_strategies=[median]
-initialization_methods=[default]
-loss_modules=[cross_entropy_weighted]
-normalization_strategies=[standardize]
-optimizer=[sgd]
-hyperparameter_search_space_updates=configs/configspace/tiny_cs.txt
\ No newline at end of file
diff --git a/configs/autonet/test/ensemble_test.txt b/configs/autonet/test/ensemble_test.txt
deleted file mode 100644
index 6b2ceb4c1..000000000
--- a/configs/autonet/test/ensemble_test.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-min_budget=3
-max_budget=9
-num_iterations=1
-min_workers=1
-log_level=debug
-budget_type=epochs
-validation_split=0.2
-ensemble_size=20
-optimize_metric=balanced_accuracy
\ No newline at end of file
diff --git a/configs/autonet/test/test.txt b/configs/autonet/test/test.txt
deleted file mode 100644
index 94748aa1a..000000000
--- a/configs/autonet/test/test.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-min_budget=10
-max_budget=10
-num_iterations=1
-min_workers=1
-log_level=debug
-budget_type=epochs
-cross_validator=k_fold
-cross_validator_args={"n_splits": 2}
-networks=[mlpnet]
-hyperparameter_search_space_updates=configs/configspace/minimlp.txt
\ No newline at end of file
diff --git a/configs/benchmark/automl.txt b/configs/benchmark/automl.txt
deleted file mode 100644
index 5d096697f..000000000
--- a/configs/benchmark/automl.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-result_dir=benchmark_results_cluster
-instances=configs/datasets/automl.txt
-autonet_configs=[configs/autonet/automl/bohb_cv_sparse.txt, configs/autonet/automl/hyperband_cv_sparse.txt]
-use_dataset_metric=True
-problem_type=feature_classification
-log_level=info
-num_runs=10
-test_split=0.0
-enable_ensemble=True
diff --git a/configs/benchmark/cifar_example.txt b/configs/benchmark/cifar_example.txt
deleted file mode 100644
index 2fc4d8f74..000000000
--- a/configs/benchmark/cifar_example.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-result_dir=benchmark_results
-instances=configs/datasets/cifar.txt
-autonet_configs=[configs/autonet/automl/cifar_example.txt]
-problem_type=image_classification
-log_level=info
-test_split=0.1
-num_runs=1
diff --git a/configs/benchmark/ensemble_test.txt b/configs/benchmark/ensemble_test.txt
deleted file mode 100644
index f250f95f5..000000000
--- a/configs/benchmark/ensemble_test.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-result_dir=benchmark_results
-instances=datasets/classification
-autonet_configs=[configs/autonet/test/ensemble_test.txt]
-problem_type=feature_classification
-log_level=debug
-test_split=0.2
-enable_ensemble=True
diff --git a/configs/benchmark/openml.txt b/configs/benchmark/openml.txt
deleted file mode 100644
index 1dd6eeee0..000000000
--- a/configs/benchmark/openml.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-result_dir=benchmark_results_cluster
-instances=configs/datasets/openml.txt
-autonet_configs=[configs/autonet/openml/restricted_cs.txt, configs/autonet/openml/hyperband.txt, configs/autonet/openml/autonet1.txt, configs/autonet/openml/no_hyperband.txt, configs/autonet/openml/gpu.txt]
-problem_type=feature_classification
-log_level=info
-num_runs=3
-test_split=0.2
-enable_ensemble=True
diff --git a/configs/benchmark/optim_alg_comparison.txt b/configs/benchmark/optim_alg_comparison.txt
deleted file mode 100644
index 2edf02028..000000000
--- a/configs/benchmark/optim_alg_comparison.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-result_dir=benchmark_results_cluster
-instances=configs/datasets/openml_small.txt
-autonet_configs=[configs/autonet/optim_alg_comparison/bohb.txt, configs/autonet/optim_alg_comparison/hyperband.txt]
-problem_type=feature_classification
-log_level=info
-num_runs=10
-test_split=0.0
diff --git a/configs/benchmark/optim_alg_comparison_tiny.txt b/configs/benchmark/optim_alg_comparison_tiny.txt
deleted file mode 100644
index 924d83f73..000000000
--- a/configs/benchmark/optim_alg_comparison_tiny.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-result_dir=benchmark_results_cluster
-instances=configs/datasets/openml.txt
-autonet_configs=[configs/autonet/optim_alg_comparison/bohb_tiny_cs.txt, configs/autonet/optim_alg_comparison/hyperband_tiny_cs.txt]
-problem_type=feature_classification
-log_level=info
-num_runs=3
-test_split=0.0
diff --git a/configs/benchmark/test.txt b/configs/benchmark/test.txt
deleted file mode 100644
index 4ef040376..000000000
--- a/configs/benchmark/test.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-result_dir=benchmark_results
-instances=datasets/classification
-autonet_configs=[configs/autonet/test/test.txt]
-problem_type=feature_classification
-log_level=debug
-num_runs=2
-test_split=0.2
diff --git a/configs/configspace/autonet1.txt b/configs/configspace/autonet1.txt
deleted file mode 100644
index c4dcc9657..000000000
--- a/configs/configspace/autonet1.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-CreateDataLoader batch_size [32,4096] log
-InitializationSelector initializer:initialize_bias ["Zero"]
-NetworkSelector mlpnet:num_layers [1,6]
-NetworkSelector mlpnet:num_units [64,4096] log
\ No newline at end of file
diff --git a/configs/configspace/minimlp.txt b/configs/configspace/minimlp.txt
deleted file mode 100644
index 696e302b9..000000000
--- a/configs/configspace/minimlp.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-NetworkSelector mlpnet:num_layers [2,4]
-CreateDataLoader batch_size [5,10]
\ No newline at end of file
diff --git a/configs/configspace/tiny_cs.txt b/configs/configspace/tiny_cs.txt
deleted file mode 100644
index a8da79da3..000000000
--- a/configs/configspace/tiny_cs.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-CreateDataLoader batch_size [125]
-InitializationSelector initializer:initialize_bias ["No"]
-LearningrateSchedulerSelector cosine_annealing:T_max [10]
-LearningrateSchedulerSelector cosine_annealing:T_mult [2]
-NetworkSelector shapedresnet:activation ["relu"]
-NetworkSelector shapedresnet:max_shake_drop_probability [0.0,0.000001]
-NetworkSelector shapedresnet:resnet_shape ["brick"]
-NetworkSelector shapedresnet:use_dropout [False]
-NetworkSelector shapedresnet:use_shake_drop [False]
-NetworkSelector shapedresnet:use_shake_shake [False]
-PreprocessorSelector truncated_svd:target_dim [100]
\ No newline at end of file
diff --git a/configs/datasets/all_openml.txt b/configs/datasets/all_openml.txt
deleted file mode 100644
index 85f1c1521..000000000
--- a/configs/datasets/all_openml.txt
+++ /dev/null
@@ -1,2546 +0,0 @@
-openml:31
-openml:1464
-openml:334
-openml:50
-openml:333
-openml:1504
-openml:1494
-openml:3
-openml:1510
-openml:1489
-openml:37
-openml:1479
-openml:1487
-openml:1063
-openml:1471
-openml:1467
-openml:44
-openml:1067
-openml:1480
-openml:1068
-openml:1492
-openml:1493
-openml:1491
-openml:1050
-openml:1462
-openml:1046
-openml:1049
-openml:335
-openml:151
-openml:1485
-openml:312
-openml:1116
-openml:1457
-openml:1038
-openml:1220
-openml:1486
-openml:1120
-openml:6
-openml:1461
-openml:4534
-openml:300
-openml:4134
-openml:42
-openml:1515
-openml:183
-openml:4135
-openml:18
-openml:12
-openml:28
-openml:22
-openml:14
-openml:16
-openml:32
-openml:40536
-openml:1501
-openml:1468
-openml:20
-openml:469
-openml:188
-openml:182
-openml:54
-openml:307
-openml:1459
-openml:1466
-openml:11
-openml:1475
-openml:36
-openml:29
-openml:375
-openml:15
-openml:458
-openml:23
-openml:4538
-openml:1478
-openml:377
-openml:1497
-openml:46
-openml:60
-openml:6332
-openml:38
-openml:23380
-openml:1476
-openml:1053
-openml:2
-openml:23381
-openml:24
-openml:470
-openml:451
-openml:40499
-openml:1549
-openml:1555
-openml:23512
-openml:554
-openml:1233
-openml:1590
-openml:40496
-openml:1114
-openml:61
-openml:1112
-openml:4
-openml:7
-openml:1554
-openml:1552
-openml:40668
-openml:40701
-openml:1553
-openml:5
-openml:40994
-openml:40670
-openml:40984
-openml:40975
-openml:1548
-openml:40982
-openml:40981
-openml:40983
-openml:40979
-openml:40966
-openml:41027
-openml:1547
-openml:43
-openml:53
-openml:1137
-openml:1128
-openml:1138
-openml:1166
-openml:1158
-openml:1134
-openml:1165
-openml:1130
-openml:1139
-openml:1145
-openml:1161
-openml:30
-openml:179
-openml:9
-openml:59
-openml:40
-openml:56
-openml:26
-openml:181
-openml:55
-openml:40900
-openml:40971
-openml:48
-openml:13
-openml:52
-openml:10
-openml:27
-openml:782
-openml:39
-openml:51
-openml:184
-openml:41
-openml:49
-openml:34
-openml:35
-openml:40910
-openml:172
-openml:137
-openml:336
-openml:885
-openml:171
-openml:867
-openml:313
-openml:163
-openml:875
-openml:736
-openml:186
-openml:187
-openml:916
-openml:895
-openml:974
-openml:754
-openml:1013
-openml:969
-openml:829
-openml:448
-openml:726
-openml:464
-openml:337
-openml:921
-openml:346
-openml:890
-openml:784
-openml:811
-openml:747
-openml:714
-openml:119
-openml:902
-openml:461
-openml:955
-openml:444
-openml:783
-openml:748
-openml:278
-openml:338
-openml:789
-openml:878
-openml:762
-openml:808
-openml:719
-openml:860
-openml:255
-openml:277
-openml:276
-openml:1075
-openml:814
-openml:685
-openml:450
-openml:57
-openml:339
-openml:342
-openml:1069
-openml:275
-openml:340
-openml:803
-openml:251
-openml:343
-openml:976
-openml:733
-openml:730
-openml:776
-openml:911
-openml:925
-openml:1026
-openml:744
-openml:886
-openml:880
-openml:900
-openml:918
-openml:879
-openml:1011
-openml:1056
-openml:943
-openml:931
-openml:896
-openml:794
-openml:949
-openml:820
-openml:932
-openml:994
-openml:889
-openml:937
-openml:788
-openml:792
-openml:933
-openml:795
-openml:871
-openml:888
-openml:970
-openml:796
-openml:936
-openml:807
-openml:1020
-openml:1061
-openml:774
-openml:868
-openml:995
-openml:996
-openml:779
-openml:793
-openml:935
-openml:909
-openml:756
-openml:185
-openml:775
-openml:1064
-openml:876
-openml:906
-openml:893
-openml:877
-openml:884
-openml:869
-openml:805
-openml:830
-openml:766
-openml:1021
-openml:770
-openml:804
-openml:973
-openml:870
-openml:951
-openml:908
-openml:894
-openml:979
-openml:749
-openml:1037
-openml:997
-openml:1065
-openml:812
-openml:926
-openml:873
-openml:819
-openml:947
-openml:1014
-openml:763
-openml:863
-openml:824
-openml:841
-openml:922
-openml:923
-openml:962
-openml:753
-openml:958
-openml:920
-openml:1005
-openml:1054
-openml:724
-openml:838
-openml:768
-openml:778
-openml:950
-openml:764
-openml:941
-openml:716
-openml:978
-openml:772
-openml:945
-openml:991
-openml:980
-openml:1071
-openml:1066
-openml:746
-openml:850
-openml:790
-openml:907
-openml:834
-openml:752
-openml:915
-openml:816
-openml:761
-openml:725
-openml:847
-openml:934
-openml:946
-openml:832
-openml:481
-openml:750
-openml:971
-openml:874
-openml:1025
-openml:818
-openml:1059
-openml:769
-openml:898
-openml:882
-openml:1018
-openml:735
-openml:717
-openml:857
-openml:848
-openml:1003
-openml:765
-openml:817
-openml:466
-openml:732
-openml:855
-openml:1006
-openml:1073
-openml:720
-openml:721
-openml:1045
-openml:1048
-openml:741
-openml:773
-openml:737
-openml:851
-openml:791
-openml:833
-openml:944
-openml:993
-openml:1015
-openml:446
-openml:742
-openml:767
-openml:728
-openml:827
-openml:780
-openml:798
-openml:1000
-openml:899
-openml:983
-openml:986
-openml:1017
-openml:777
-openml:828
-openml:800
-openml:862
-openml:1002
-openml:1009
-openml:1115
-openml:861
-openml:853
-openml:785
-openml:891
-openml:1022
-openml:975
-openml:826
-openml:815
-openml:887
-openml:1062
-openml:472
-openml:810
-openml:1010
-openml:1023
-openml:961
-openml:836
-openml:864
-openml:999
-openml:683
-openml:852
-openml:825
-openml:745
-openml:967
-openml:1060
-openml:960
-openml:982
-openml:957
-openml:786
-openml:989
-openml:844
-openml:968
-openml:835
-openml:840
-openml:972
-openml:755
-openml:731
-openml:831
-openml:729
-openml:990
-openml:858
-openml:988
-openml:465
-openml:964
-openml:1055
-openml:1167
-openml:459
-openml:757
-openml:963
-openml:985
-openml:839
-openml:842
-openml:802
-openml:1019
-openml:854
-openml:739
-openml:682
-openml:913
-openml:959
-openml:904
-openml:910
-openml:952
-openml:977
-openml:903
-openml:866
-openml:799
-openml:694
-openml:845
-openml:722
-openml:912
-openml:981
-openml:806
-openml:797
-openml:901
-openml:917
-openml:846
-openml:751
-openml:837
-openml:715
-openml:849
-openml:723
-openml:821
-openml:734
-openml:718
-openml:679
-openml:813
-openml:743
-openml:1100
-openml:843
-openml:480
-openml:823
-openml:475
-openml:740
-openml:865
-openml:727
-openml:350
-openml:468
-openml:881
-openml:1042
-openml:452
-openml:924
-openml:801
-openml:1101
-openml:474
-openml:1040
-openml:41026
-openml:771
-openml:1473
-openml:1104
-openml:1044
-openml:382
-openml:392
-openml:389
-openml:1556
-openml:401
-openml:383
-openml:1039
-openml:357
-openml:279
-openml:1041
-openml:1455
-openml:329
-openml:40926
-openml:386
-openml:394
-openml:1242
-openml:1463
-openml:378
-openml:143
-openml:1597
-openml:381
-openml:273
-openml:1107
-openml:1179
-openml:139
-openml:149
-openml:246
-openml:269
-openml:162
-openml:161
-openml:120
-openml:267
-openml:258
-openml:72
-openml:150
-openml:153
-openml:259
-openml:134
-openml:155
-openml:1211
-openml:1219
-openml:1185
-openml:247
-openml:1235
-openml:152
-openml:266
-openml:249
-openml:160
-openml:263
-openml:1241
-openml:158
-openml:159
-openml:271
-openml:1236
-openml:253
-openml:1237
-openml:157
-openml:1238
-openml:154
-openml:268
-openml:1559
-openml:156
-openml:264
-openml:1102
-openml:261
-openml:1183
-openml:272
-openml:250
-openml:1214
-openml:1169
-openml:23517
-openml:1109
-openml:1465
-openml:393
-openml:396
-openml:1527
-openml:1240
-openml:25
-openml:146
-openml:260
-openml:391
-openml:384
-openml:135
-openml:387
-openml:1111
-openml:180
-openml:399
-openml:310
-openml:388
-openml:130
-openml:397
-openml:244
-openml:400
-openml:311
-openml:1443
-openml:123
-openml:1499
-openml:1444
-openml:1442
-openml:1121
-openml:1488
-openml:965
-openml:463
-openml:956
-openml:1513
-openml:1004
-openml:62
-openml:274
-openml:256
-openml:398
-openml:1460
-openml:1484
-openml:1511
-openml:395
-openml:1512
-openml:1498
-openml:914
-openml:1523
-openml:1117
-openml:1508
-openml:1500
-openml:8
-openml:1495
-openml:1506
-openml:1551
-openml:70
-openml:954
-openml:987
-openml:1490
-openml:1012
-openml:316
-openml:164
-openml:1565
-openml:1526
-openml:1530
-openml:953
-openml:1560
-openml:1529
-openml:1016
-openml:966
-openml:1520
-openml:1519
-openml:1540
-openml:1538
-openml:1524
-openml:40517
-openml:1534
-openml:928
-openml:1568
-openml:1541
-openml:1532
-openml:1496
-openml:992
-openml:1528
-openml:1531
-openml:1533
-openml:738
-openml:939
-openml:892
-openml:1539
-openml:1507
-openml:476
-openml:942
-openml:905
-openml:1186
-openml:1566
-openml:787
-openml:929
-openml:1106
-openml:919
-openml:984
-openml:927
-openml:1482
-openml:1536
-openml:1447
-openml:938
-openml:713
-openml:1525
-openml:385
-openml:759
-openml:1535
-openml:859
-openml:1001
-openml:1545
-openml:1542
-openml:1543
-openml:1546
-openml:1557
-openml:40996
-openml:930
-openml:1008
-openml:998
-openml:1544
-openml:1558
-openml:285
-openml:479
-openml:1007
-openml:1453
-openml:1472
-openml:758
-openml:940
-openml:1451
-openml:467
-openml:897
-openml:1452
-openml:760
-openml:40516
-openml:40520
-openml:40514
-openml:40518
-openml:40519
-openml:40515
-openml:40474
-openml:40475
-openml:133
-openml:1178
-openml:1446
-openml:125
-openml:4153
-openml:1509
-openml:1123
-openml:1162
-openml:1127
-openml:1141
-openml:1143
-openml:40927
-openml:265
-openml:1131
-openml:1132
-openml:1135
-openml:1136
-openml:1122
-openml:1124
-openml:1129
-openml:1144
-openml:1147
-openml:1153
-openml:1156
-openml:1154
-openml:4537
-openml:4154
-openml:124
-openml:131
-openml:41039
-openml:122
-openml:73
-openml:1563
-openml:1516
-openml:1517
-openml:1518
-openml:140
-openml:1142
-openml:121
-openml:142
-openml:126
-openml:77
-openml:141
-openml:257
-openml:1119
-openml:1222
-openml:1477
-openml:127
-openml:116
-openml:245
-openml:390
-openml:129
-openml:71
-openml:117
-openml:136
-openml:138
-openml:23499
-openml:128
-openml:144
-openml:148
-openml:210
-openml:262
-openml:248
-openml:1149
-openml:1155
-openml:1159
-openml:1163
-openml:1514
-openml:40498
-openml:75
-openml:76
-openml:147
-openml:1133
-openml:1140
-openml:1125
-openml:1126
-openml:1148
-openml:1150
-openml:1151
-openml:1152
-openml:1157
-openml:1160
-openml:1164
-openml:74
-openml:1146
-openml:41082
-openml:1537
-openml:41083
-openml:41084
-openml:252
-openml:4340
-openml:41081
-openml:254
-openml:118
-openml:455
-openml:1205
-openml:115
-openml:132
-openml:1177
-openml:443
-openml:78
-openml:1212
-openml:1085
-openml:1180
-openml:1182
-openml:1209
-openml:491
-openml:1181
-openml:454
-openml:457
-openml:1564
-openml:41103
-openml:1387
-openml:194
-openml:1218
-openml:462
-openml:40705
-openml:1380
-openml:1388
-openml:1394
-openml:477
-openml:488
-openml:566
-openml:1382
-openml:1393
-openml:1413
-openml:40660
-openml:40702
-openml:40710
-openml:40476
-openml:453
-openml:327
-openml:328
-openml:351
-openml:1390
-openml:1372
-openml:1378
-openml:4329
-openml:4552
-openml:40646
-openml:40647
-openml:40648
-openml:40650
-openml:40691
-openml:40693
-openml:40700
-openml:40663
-openml:40664
-openml:40665
-openml:40666
-openml:40669
-openml:40704
-openml:40706
-openml:40707
-openml:40708
-openml:40709
-openml:40671
-openml:40677
-openml:40678
-openml:40680
-openml:40681
-openml:40682
-openml:40686
-openml:40687
-openml:40690
-openml:40711
-openml:40713
-openml:40497
-openml:40477
-openml:40478
-openml:1386
-openml:1391
-openml:1392
-openml:1360
-openml:1403
-openml:1379
-openml:40714
-openml:1384
-openml:1385
-openml:1395
-openml:1396
-openml:1361
-openml:1365
-openml:1366
-openml:1369
-openml:1370
-openml:1402
-openml:1374
-openml:1376
-openml:1377
-openml:1381
-openml:1383
-openml:1389
-openml:1400
-openml:1362
-openml:1363
-openml:1364
-openml:1367
-openml:1368
-openml:1404
-openml:1406
-openml:1407
-openml:1410
-openml:1371
-openml:1373
-openml:1375
-openml:1397
-openml:1398
-openml:1399
-openml:1401
-openml:1405
-openml:1408
-openml:1409
-openml:40683
-openml:1113
-openml:1481
-openml:354
-openml:673
-openml:293
-openml:204
-openml:189
-openml:506
-openml:524
-openml:1458
-openml:1502
-openml:41000
-openml:511
-openml:703
-openml:222
-openml:578
-openml:639
-openml:40997
-openml:40999
-openml:562
-openml:40998
-openml:41001
-openml:41002
-openml:41003
-openml:41004
-openml:373
-openml:200
-openml:213
-openml:231
-openml:232
-openml:1352
-openml:1354
-openml:41005
-openml:41006
-openml:41007
-openml:1081
-openml:1082
-openml:1083
-openml:1084
-openml:1086
-openml:1077
-openml:1078
-openml:1079
-openml:1351
-openml:1353
-openml:40923
-openml:1087
-openml:1080
-openml:707
-openml:1355
-openml:1357
-openml:1359
-openml:195
-openml:1356
-openml:1358
-openml:1483
-openml:191
-openml:190
-openml:199
-openml:203
-openml:228
-openml:1110
-openml:193
-openml:40978
-openml:211
-openml:1562
-openml:1245
-openml:531
-openml:197
-openml:198
-openml:201
-openml:206
-openml:207
-openml:208
-openml:209
-openml:212
-openml:214
-openml:215
-openml:216
-openml:217
-openml:218
-openml:223
-openml:225
-openml:226
-openml:227
-openml:229
-openml:230
-openml:509
-openml:482
-openml:513
-openml:520
-openml:494
-openml:553
-openml:521
-openml:523
-openml:528
-openml:534
-openml:536
-openml:541
-openml:543
-openml:546
-openml:681
-openml:686
-openml:688
-openml:665
-openml:668
-openml:706
-openml:1090
-openml:1076
-openml:1051
-openml:1091
-openml:1096
-openml:3041
-openml:3040
-openml:518
-openml:530
-openml:492
-openml:40916
-openml:561
-openml:551
-openml:526
-openml:557
-openml:192
-openml:535
-openml:556
-openml:500
-openml:555
-openml:527
-openml:497
-openml:533
-openml:294
-openml:595
-openml:3631
-openml:3632
-openml:3633
-openml:3634
-openml:3635
-openml:3636
-openml:3637
-openml:3638
-openml:3639
-openml:3640
-openml:3661
-openml:3662
-openml:3663
-openml:3664
-openml:3665
-openml:3666
-openml:3667
-openml:3668
-openml:3669
-openml:3670
-openml:3591
-openml:3592
-openml:3593
-openml:3594
-openml:3595
-openml:3596
-openml:3597
-openml:3598
-openml:3599
-openml:3600
-openml:3601
-openml:3602
-openml:3603
-openml:3604
-openml:3605
-openml:3606
-openml:3607
-openml:3608
-openml:3609
-openml:3610
-openml:3621
-openml:3622
-openml:3623
-openml:3624
-openml:3625
-openml:3626
-openml:3627
-openml:3628
-openml:3629
-openml:3630
-openml:3641
-openml:3642
-openml:3643
-openml:3644
-openml:3645
-openml:3646
-openml:3647
-openml:3648
-openml:3649
-openml:3650
-openml:3581
-openml:3582
-openml:3583
-openml:3584
-openml:3585
-openml:3586
-openml:3587
-openml:3588
-openml:3589
-openml:3590
-openml:3611
-openml:3612
-openml:3613
-openml:3614
-openml:3615
-openml:3616
-openml:3617
-openml:3618
-openml:3619
-openml:3620
-openml:3651
-openml:3652
-openml:3653
-openml:3654
-openml:3655
-openml:3656
-openml:3657
-openml:3658
-openml:3659
-openml:3660
-openml:3701
-openml:3702
-openml:3703
-openml:3704
-openml:3705
-openml:3706
-openml:3707
-openml:3708
-openml:3709
-openml:3710
-openml:3741
-openml:3742
-openml:3743
-openml:3744
-openml:3745
-openml:3746
-openml:3747
-openml:3748
-openml:3749
-openml:3750
-openml:3711
-openml:3712
-openml:3713
-openml:3714
-openml:3715
-openml:3716
-openml:3717
-openml:3718
-openml:3719
-openml:3720
-openml:3681
-openml:3682
-openml:3683
-openml:3684
-openml:3685
-openml:3686
-openml:3687
-openml:3688
-openml:3689
-openml:3690
-openml:3721
-openml:3722
-openml:3723
-openml:3724
-openml:3725
-openml:3726
-openml:3727
-openml:3728
-openml:3729
-openml:3730
-openml:3731
-openml:3732
-openml:3733
-openml:3734
-openml:3735
-openml:3736
-openml:3737
-openml:3738
-openml:3739
-openml:3740
-openml:3671
-openml:3672
-openml:3673
-openml:3674
-openml:3675
-openml:3676
-openml:3677
-openml:3678
-openml:3679
-openml:3680
-openml:3694
-openml:3695
-openml:3696
-openml:3697
-openml:3698
-openml:3699
-openml:3700
-openml:3691
-openml:3692
-openml:3693
-openml:3751
-openml:3752
-openml:3753
-openml:3754
-openml:3755
-openml:3756
-openml:3757
-openml:3758
-openml:3759
-openml:3760
-openml:3801
-openml:3802
-openml:3803
-openml:3804
-openml:3805
-openml:3806
-openml:3807
-openml:3808
-openml:3809
-openml:3810
-openml:3761
-openml:3762
-openml:3763
-openml:3764
-openml:3765
-openml:3766
-openml:3767
-openml:3768
-openml:3769
-openml:3770
-openml:3781
-openml:3782
-openml:3783
-openml:3784
-openml:3785
-openml:3786
-openml:3787
-openml:3788
-openml:3789
-openml:3790
-openml:3771
-openml:3772
-openml:3773
-openml:3774
-openml:3775
-openml:3776
-openml:3777
-openml:3778
-openml:3779
-openml:3780
-openml:3811
-openml:3812
-openml:3813
-openml:3814
-openml:3816
-openml:3817
-openml:3818
-openml:3819
-openml:3820
-openml:3795
-openml:3797
-openml:3798
-openml:3799
-openml:3800
-openml:3791
-openml:3792
-openml:3793
-openml:3794
-openml:3821
-openml:3822
-openml:3401
-openml:3402
-openml:3403
-openml:3404
-openml:3405
-openml:3406
-openml:3407
-openml:3408
-openml:3409
-openml:3410
-openml:3361
-openml:3362
-openml:3363
-openml:3364
-openml:3365
-openml:3366
-openml:3367
-openml:3368
-openml:3370
-openml:3351
-openml:3352
-openml:3353
-openml:3354
-openml:3355
-openml:3356
-openml:3357
-openml:3358
-openml:3359
-openml:3360
-openml:3421
-openml:3422
-openml:3423
-openml:3424
-openml:3425
-openml:3426
-openml:3427
-openml:3428
-openml:3429
-openml:3430
-openml:3411
-openml:3412
-openml:3413
-openml:3414
-openml:3415
-openml:3416
-openml:3417
-openml:3418
-openml:3419
-openml:3420
-openml:3381
-openml:3382
-openml:3383
-openml:3385
-openml:3386
-openml:3387
-openml:3388
-openml:3389
-openml:3390
-openml:3372
-openml:3373
-openml:3374
-openml:3375
-openml:3376
-openml:3377
-openml:3378
-openml:3379
-openml:3380
-openml:3391
-openml:3392
-openml:3393
-openml:3394
-openml:3395
-openml:3396
-openml:3397
-openml:3398
-openml:3311
-openml:3312
-openml:3313
-openml:3399
-openml:3400
-openml:3461
-openml:3462
-openml:3463
-openml:3464
-openml:3465
-openml:3466
-openml:3467
-openml:3468
-openml:3469
-openml:3470
-openml:3431
-openml:3432
-openml:3433
-openml:3434
-openml:3435
-openml:3436
-openml:3437
-openml:3438
-openml:3439
-openml:3440
-openml:3481
-openml:3482
-openml:3483
-openml:3484
-openml:3485
-openml:3486
-openml:3487
-openml:3488
-openml:3489
-openml:3490
-openml:3501
-openml:3502
-openml:3503
-openml:3504
-openml:3505
-openml:3506
-openml:3507
-openml:3508
-openml:3509
-openml:3510
-openml:3451
-openml:3452
-openml:3453
-openml:3454
-openml:3455
-openml:3456
-openml:3457
-openml:3458
-openml:3459
-openml:3460
-openml:3491
-openml:3492
-openml:3493
-openml:3494
-openml:3495
-openml:3496
-openml:3497
-openml:3498
-openml:3499
-openml:3500
-openml:3441
-openml:3442
-openml:3443
-openml:3444
-openml:3445
-openml:3447
-openml:3448
-openml:3449
-openml:3450
-openml:3474
-openml:3475
-openml:3476
-openml:3477
-openml:3478
-openml:3479
-openml:3480
-openml:3471
-openml:3472
-openml:3571
-openml:3572
-openml:3573
-openml:3574
-openml:3575
-openml:3576
-openml:3577
-openml:3578
-openml:3579
-openml:3580
-openml:3551
-openml:3552
-openml:3553
-openml:3554
-openml:3555
-openml:3556
-openml:3558
-openml:3559
-openml:3560
-openml:3531
-openml:3532
-openml:3533
-openml:3534
-openml:3535
-openml:3536
-openml:3537
-openml:3538
-openml:3539
-openml:3540
-openml:3541
-openml:3542
-openml:3543
-openml:3544
-openml:3545
-openml:3546
-openml:3547
-openml:3548
-openml:3549
-openml:3550
-openml:3521
-openml:3522
-openml:3523
-openml:3524
-openml:3525
-openml:3526
-openml:3527
-openml:3528
-openml:3529
-openml:3530
-openml:3515
-openml:3516
-openml:3517
-openml:3518
-openml:3519
-openml:3520
-openml:3561
-openml:3562
-openml:3563
-openml:3564
-openml:3565
-openml:3566
-openml:3567
-openml:3568
-openml:3569
-openml:3570
-openml:3511
-openml:3512
-openml:3513
-openml:3514
-openml:3881
-openml:3882
-openml:3883
-openml:3884
-openml:3885
-openml:3886
-openml:3887
-openml:3888
-openml:3889
-openml:3890
-openml:3871
-openml:3872
-openml:3873
-openml:3874
-openml:3875
-openml:3876
-openml:3877
-openml:3878
-openml:3879
-openml:3880
-openml:3851
-openml:3852
-openml:3853
-openml:3854
-openml:3855
-openml:3856
-openml:3857
-openml:3858
-openml:3859
-openml:3860
-openml:3861
-openml:3862
-openml:3863
-openml:3864
-openml:3865
-openml:3866
-openml:3867
-openml:3868
-openml:3869
-openml:3870
-openml:3832
-openml:3833
-openml:3834
-openml:3835
-openml:3836
-openml:3837
-openml:3838
-openml:3839
-openml:3840
-openml:3841
-openml:3842
-openml:3843
-openml:3844
-openml:3845
-openml:3846
-openml:3847
-openml:3848
-openml:3849
-openml:3823
-openml:3824
-openml:3825
-openml:3826
-openml:3827
-openml:3828
-openml:3829
-openml:3830
-openml:3831
-openml:1567
-openml:1569
-openml:1595
-openml:3042
-openml:3043
-openml:3051
-openml:3052
-openml:3053
-openml:3054
-openml:3055
-openml:3056
-openml:3057
-openml:3058
-openml:3059
-openml:3060
-openml:3091
-openml:3092
-openml:3093
-openml:3094
-openml:3095
-openml:3096
-openml:3097
-openml:3098
-openml:3099
-openml:3100
-openml:3101
-openml:3102
-openml:3103
-openml:3104
-openml:3105
-openml:3106
-openml:3107
-openml:3108
-openml:3109
-openml:3110
-openml:3081
-openml:3082
-openml:3083
-openml:3084
-openml:3085
-openml:3086
-openml:3087
-openml:3088
-openml:3089
-openml:3090
-openml:3111
-openml:3112
-openml:3113
-openml:3114
-openml:3115
-openml:3116
-openml:3117
-openml:3118
-openml:3119
-openml:3120
-openml:3061
-openml:3062
-openml:3063
-openml:3064
-openml:3065
-openml:3066
-openml:3067
-openml:3068
-openml:3069
-openml:3070
-openml:3072
-openml:3073
-openml:3074
-openml:3075
-openml:3076
-openml:3077
-openml:3078
-openml:3079
-openml:3080
-openml:3044
-openml:3045
-openml:3046
-openml:3047
-openml:3048
-openml:3049
-openml:3050
-openml:3071
-openml:3261
-openml:3262
-openml:3263
-openml:3264
-openml:3265
-openml:3266
-openml:3267
-openml:3268
-openml:3269
-openml:3270
-openml:3241
-openml:3242
-openml:3243
-openml:3244
-openml:3245
-openml:3246
-openml:3247
-openml:3248
-openml:3249
-openml:3250
-openml:3251
-openml:3252
-openml:3253
-openml:3254
-openml:3255
-openml:3256
-openml:3257
-openml:3258
-openml:3259
-openml:3260
-openml:3231
-openml:3232
-openml:3233
-openml:3234
-openml:3235
-openml:3236
-openml:3237
-openml:3238
-openml:3239
-openml:3240
-openml:3221
-openml:3222
-openml:3223
-openml:3224
-openml:3225
-openml:3226
-openml:3227
-openml:3228
-openml:3229
-openml:3230
-openml:3201
-openml:3202
-openml:3203
-openml:3204
-openml:3205
-openml:3206
-openml:3207
-openml:3208
-openml:3209
-openml:3210
-openml:3191
-openml:3192
-openml:3193
-openml:3194
-openml:3195
-openml:3196
-openml:3197
-openml:3198
-openml:3199
-openml:3200
-openml:3211
-openml:3212
-openml:3213
-openml:3214
-openml:3215
-openml:3216
-openml:3217
-openml:3218
-openml:3219
-openml:3220
-openml:3271
-openml:1503
-openml:1441
-openml:3151
-openml:3152
-openml:3153
-openml:3154
-openml:3155
-openml:3156
-openml:3157
-openml:3158
-openml:3159
-openml:3160
-openml:3161
-openml:3162
-openml:3163
-openml:3164
-openml:3165
-openml:3166
-openml:3167
-openml:3168
-openml:3169
-openml:3170
-openml:3121
-openml:3122
-openml:3123
-openml:3124
-openml:3125
-openml:3126
-openml:3127
-openml:3128
-openml:3129
-openml:3130
-openml:3131
-openml:3132
-openml:3133
-openml:3134
-openml:3135
-openml:3136
-openml:3137
-openml:3138
-openml:3139
-openml:3140
-openml:3181
-openml:3182
-openml:3183
-openml:3184
-openml:3185
-openml:3186
-openml:3187
-openml:3188
-openml:3189
-openml:3190
-openml:3141
-openml:3142
-openml:3143
-openml:3144
-openml:3145
-openml:3146
-openml:3147
-openml:3148
-openml:3149
-openml:3171
-openml:3172
-openml:3173
-openml:3174
-openml:3175
-openml:3176
-openml:3177
-openml:3178
-openml:3179
-openml:3180
-openml:3150
-openml:3291
-openml:3292
-openml:3293
-openml:3294
-openml:3295
-openml:3296
-openml:3297
-openml:3298
-openml:3299
-openml:3300
-openml:3281
-openml:3282
-openml:3283
-openml:3284
-openml:3285
-openml:3286
-openml:3287
-openml:3288
-openml:3289
-openml:3290
-openml:3331
-openml:3332
-openml:3333
-openml:3334
-openml:3335
-openml:3336
-openml:3337
-openml:3338
-openml:3339
-openml:3340
-openml:3321
-openml:3322
-openml:3323
-openml:3324
-openml:3325
-openml:3326
-openml:3327
-openml:3328
-openml:3329
-openml:3330
-openml:3341
-openml:3342
-openml:3343
-openml:3344
-openml:3345
-openml:3346
-openml:3347
-openml:3348
-openml:3349
-openml:3350
-openml:3272
-openml:3273
-openml:3274
-openml:3275
-openml:3276
-openml:3277
-openml:3278
-openml:3279
-openml:3280
-openml:3301
-openml:3302
-openml:3303
-openml:3304
-openml:3305
-openml:3306
-openml:3307
-openml:3308
-openml:3309
-openml:3314
-openml:3315
-openml:3316
-openml:3317
-openml:3318
-openml:3319
-openml:3320
-openml:3951
-openml:3952
-openml:3953
-openml:3954
-openml:3955
-openml:3956
-openml:3957
-openml:3958
-openml:3959
-openml:3960
-openml:3921
-openml:3922
-openml:3923
-openml:3924
-openml:3925
-openml:3926
-openml:3927
-openml:3928
-openml:3929
-openml:3930
-openml:3901
-openml:3902
-openml:3903
-openml:3904
-openml:3905
-openml:3906
-openml:3907
-openml:3908
-openml:3909
-openml:3910
-openml:3931
-openml:3932
-openml:3933
-openml:3934
-openml:3935
-openml:3936
-openml:3937
-openml:3938
-openml:3939
-openml:3940
-openml:3961
-openml:3962
-openml:3963
-openml:3964
-openml:3965
-openml:3966
-openml:3967
-openml:3968
-openml:3969
-openml:3970
-openml:3911
-openml:3912
-openml:3913
-openml:3914
-openml:3915
-openml:3916
-openml:3917
-openml:3918
-openml:3919
-openml:3920
-openml:3941
-openml:3942
-openml:3943
-openml:3944
-openml:3945
-openml:3947
-openml:3948
-openml:3949
-openml:3950
-openml:3891
-openml:3892
-openml:3893
-openml:3894
-openml:3895
-openml:3896
-openml:3897
-openml:3898
-openml:3899
-openml:3900
-openml:3850
-openml:4031
-openml:4032
-openml:4033
-openml:4034
-openml:4035
-openml:4036
-openml:4037
-openml:4038
-openml:4039
-openml:4040
-openml:3981
-openml:3982
-openml:3983
-openml:3984
-openml:3985
-openml:3986
-openml:3987
-openml:3988
-openml:3989
-openml:3990
-openml:4021
-openml:4022
-openml:4023
-openml:4024
-openml:4025
-openml:4026
-openml:4027
-openml:4028
-openml:4029
-openml:4030
-openml:3991
-openml:3992
-openml:3993
-openml:3994
-openml:3995
-openml:3996
-openml:3997
-openml:3998
-openml:3999
-openml:4000
-openml:4001
-openml:4002
-openml:4003
-openml:4004
-openml:4005
-openml:4006
-openml:4007
-openml:4008
-openml:4009
-openml:4010
-openml:4012
-openml:4013
-openml:4014
-openml:4015
-openml:4016
-openml:4017
-openml:4018
-openml:4019
-openml:4020
-openml:3971
-openml:3972
-openml:3973
-openml:3974
-openml:3975
-openml:3976
-openml:3977
-openml:3978
-openml:3979
-openml:3980
-openml:4011
-openml:4061
-openml:4062
-openml:4063
-openml:4064
-openml:4065
-openml:4066
-openml:4067
-openml:4068
-openml:4069
-openml:4070
-openml:4051
-openml:4052
-openml:4053
-openml:4054
-openml:4055
-openml:4056
-openml:4057
-openml:4058
-openml:4059
-openml:4060
-openml:4101
-openml:4102
-openml:4103
-openml:4104
-openml:4105
-openml:4106
-openml:4107
-openml:4108
-openml:4109
-openml:4110
-openml:4091
-openml:4092
-openml:4093
-openml:4094
-openml:4095
-openml:4096
-openml:4097
-openml:4098
-openml:4099
-openml:4100
-openml:4071
-openml:4072
-openml:4073
-openml:4074
-openml:4075
-openml:4076
-openml:4077
-openml:4078
-openml:4079
-openml:4080
-openml:4042
-openml:4043
-openml:4044
-openml:4045
-openml:4046
-openml:4047
-openml:4048
-openml:4049
-openml:4050
-openml:4081
-openml:4082
-openml:4083
-openml:4084
-openml:4085
-openml:4086
-openml:4087
-openml:4088
-openml:4089
-openml:4090
-openml:4041
-openml:4131
-openml:4121
-openml:4122
-openml:4123
-openml:4124
-openml:4125
-openml:4126
-openml:4127
-openml:4128
-openml:4129
-openml:4130
-openml:4111
-openml:4112
-openml:4113
-openml:4114
-openml:4115
-openml:4116
-openml:4117
-openml:4118
-openml:4119
-openml:4120
-openml:40672
-openml:40922
-openml:40590
-openml:584
-openml:196
-openml:202
-openml:205
-openml:315
-openml:296
-openml:298
-openml:299
-openml:224
-openml:301
-openml:308
-openml:287
-openml:404
-openml:405
-openml:406
-openml:407
-openml:408
-openml:409
-openml:410
-openml:411
-openml:412
-openml:413
-openml:415
-openml:416
-openml:417
-openml:418
-openml:420
-openml:372
-openml:374
-openml:376
-openml:380
-openml:421
-openml:422
-openml:423
-openml:424
-openml:425
-openml:426
-openml:428
-openml:430
-openml:433
-openml:434
-openml:435
-openml:437
-openml:438
-openml:439
-openml:440
-openml:456
-openml:460
-openml:441
-openml:442
-openml:449
-openml:501
-openml:502
-openml:504
-openml:505
-openml:507
-openml:508
-openml:510
-openml:485
-openml:486
-openml:487
-openml:512
-openml:516
-openml:495
-openml:558
-openml:559
-openml:563
-openml:564
-openml:565
-openml:568
-openml:572
-openml:574
-openml:575
-openml:576
-openml:579
-openml:580
-openml:525
-openml:532
-openml:537
-openml:538
-openml:544
-openml:547
-openml:550
-openml:581
-openml:582
-openml:587
-openml:589
-openml:590
-openml:601
-openml:602
-openml:603
-openml:604
-openml:605
-openml:606
-openml:607
-openml:608
-openml:592
-openml:593
-openml:594
-openml:597
-openml:599
-openml:600
-openml:691
-openml:692
-openml:693
-openml:695
-openml:696
-openml:697
-openml:698
-openml:699
-openml:700
-openml:684
-openml:687
-openml:689
-openml:690
-openml:671
-openml:672
-openml:674
-openml:675
-openml:676
-openml:678
-openml:680
-openml:651
-openml:652
-openml:653
-openml:654
-openml:655
-openml:656
-openml:657
-openml:658
-openml:659
-openml:660
-openml:631
-openml:632
-openml:633
-openml:634
-openml:635
-openml:636
-openml:637
-openml:638
-openml:640
-openml:641
-openml:642
-openml:643
-openml:644
-openml:645
-openml:646
-openml:647
-openml:648
-openml:649
-openml:650
-openml:621
-openml:622
-openml:623
-openml:624
-openml:625
-openml:626
-openml:627
-openml:628
-openml:629
-openml:630
-openml:611
-openml:612
-openml:613
-openml:614
-openml:615
-openml:616
-openml:617
-openml:619
-openml:620
-openml:661
-openml:663
-openml:664
-openml:666
-openml:669
-openml:670
-openml:711
-openml:712
-openml:702
-openml:704
-openml:705
-openml:708
-openml:709
-openml:710
-openml:1035
-openml:1027
-openml:1028
-openml:1029
-openml:1030
-openml:1103
-openml:1088
-openml:1089
-openml:1072
-openml:1070
-openml:1058
-openml:1092
-openml:1093
-openml:1094
-openml:1097
-openml:1098
-openml:1099
-openml:1168
-openml:1184
-openml:1187
-openml:1188
-openml:1189
-openml:1190
-openml:1201
-openml:1202
-openml:1203
-openml:1204
-openml:1206
-openml:1207
-openml:1208
-openml:1210
-openml:1213
-openml:1216
-openml:1217
-openml:1191
-openml:1192
-openml:1193
-openml:1194
-openml:1195
-openml:1196
-openml:1197
-openml:1198
-openml:1199
-openml:1200
-openml:1226
-openml:1228
-openml:1571
-openml:1572
-openml:1574
-openml:1575
-openml:1577
-openml:1578
-openml:1579
-openml:1561
-openml:1591
-openml:1592
-openml:1593
-openml:1594
-openml:1596
-openml:1600
-openml:1581
-openml:1582
-openml:1583
-openml:1584
-openml:1585
-openml:1586
-openml:1587
-openml:1588
-openml:1589
-openml:1424
-openml:1425
-openml:1426
-openml:1427
-openml:1428
-openml:1429
-openml:1430
-openml:1412
-openml:1414
-openml:1419
-openml:1420
-openml:1432
-openml:1433
-openml:1434
-openml:1435
-openml:1436
-openml:1448
-openml:1449
-openml:1450
-openml:4136
-openml:4137
-openml:4138
-openml:4139
-openml:4140
-openml:4541
-openml:4544
-openml:4545
-openml:4546
-openml:4548
-openml:4549
-openml:4531
-openml:4532
-openml:4533
-openml:4535
-openml:4540
-openml:4353
-openml:4551
-openml:4553
-openml:4562
-openml:4563
-openml:23383
-openml:23394
-openml:23395
-openml:23396
-openml:23397
-openml:23420
-openml:23513
-openml:23515
-openml:23516
-openml:5648
-openml:5889
-openml:5587
-openml:41065
-openml:40992
-openml:40993
-openml:41022
-openml:41142
-openml:41143
-openml:41144
-openml:41145
-openml:41146
-openml:41147
-openml:41150
-openml:41156
-openml:41157
-openml:41158
-openml:41159
-openml:41160
-openml:41164
-openml:41165
-openml:41166
-openml:41167
-openml:41168
-openml:41169
-openml:41161
-openml:41162
-openml:41163
-openml:41138
-openml:40864
-openml:40869
-openml:402
-openml:403
-openml:529
-openml:429
-openml:522
-openml:493
-openml:40645
-openml:40649
-openml:40685
-openml:40601
-openml:41021
-openml:552
-openml:40753
-openml:41228
-openml:41187
-openml:40976
-openml:40985
-openml:40591
-openml:40592
-openml:40593
-openml:40594
-openml:40595
-openml:40596
-openml:40597
-openml:40588
-openml:40589
-openml:40505
-openml:515
-openml:560
-openml:431
-openml:542
-openml:419
-openml:570
-openml:596
-openml:519
-openml:379
-openml:588
-openml:618
-openml:549
-openml:609
-openml:490
-openml:567
-openml:471
-openml:586
-openml:498
-openml:585
-openml:414
-openml:573
-openml:545
-openml:436
-openml:427
-openml:344
-openml:483
-openml:503
-openml:583
-openml:432
-openml:40945
-openml:591
-openml:41265
-openml:577
-openml:540
-openml:539
-openml:569
-openml:610
-openml:598
\ No newline at end of file
diff --git a/configs/datasets/automl.txt b/configs/datasets/automl.txt
deleted file mode 100644
index fb2dae894..000000000
--- a/configs/datasets/automl.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-datasets/automl/newsgroups/newsgroups_public.info
-datasets/automl/dorothea/dorothea_public.info
-datasets/automl/tania/tania_public.info
\ No newline at end of file
diff --git a/configs/datasets/cifar.txt b/configs/datasets/cifar.txt
deleted file mode 100644
index d9099be33..000000000
--- a/configs/datasets/cifar.txt
+++ /dev/null
@@ -1 +0,0 @@
-[datasets/CIFAR10.csv]
diff --git a/configs/datasets/metalearning.txt b/configs/datasets/metalearning.txt
deleted file mode 100644
index 695adb014..000000000
--- a/configs/datasets/metalearning.txt
+++ /dev/null
@@ -1,403 +0,0 @@
-openml:218
-openml:389
-openml:1017
-openml:40518
-openml:23381
-openml:40982
-openml:39
-openml:395
-openml:1115
-openml:227
-openml:136
-openml:444
-openml:1119
-openml:147
-openml:1114
-openml:131
-openml:135
-openml:32
-openml:1137
-openml:385
-openml:294
-openml:163
-openml:52
-openml:1487
-openml:1113
-openml:803
-openml:1494
-openml:311
-openml:40646
-openml:1596
-openml:398
-openml:142
-openml:726
-openml:78
-openml:1116
-openml:890
-openml:1492
-openml:40681
-openml:878
-openml:357
-openml:223
-openml:1512
-openml:329
-openml:981
-openml:784
-openml:736
-openml:129
-openml:397
-openml:1464
-openml:116
-openml:72
-openml:1486
-openml:40478
-openml:1145
-openml:53
-openml:40498
-openml:40672
-openml:1049
-openml:6332
-openml:1497
-openml:40678
-openml:40994
-openml:120
-openml:190
-openml:2
-openml:40713
-openml:954
-openml:1488
-openml:198
-openml:119
-openml:128
-openml:40975
-openml:1038
-openml:754
-openml:77
-openml:1158
-openml:1504
-openml:74
-openml:215
-openml:1039
-openml:333
-openml:4134
-openml:40978
-openml:49
-openml:140
-openml:40680
-openml:783
-openml:75
-openml:4135
-openml:1502
-openml:40670
-openml:1061
-openml:1526
-openml:976
-openml:188
-openml:115
-openml:481
-openml:405
-openml:60
-openml:24
-openml:8
-openml:40660
-openml:387
-openml:51
-openml:55
-openml:1111
-openml:895
-openml:15
-openml:1056
-openml:564
-openml:59
-openml:1397
-openml:144
-openml:151
-openml:161
-openml:1491
-openml:26
-openml:40663
-openml:383
-openml:714
-openml:130
-openml:194
-openml:42
-openml:181
-openml:146
-openml:40664
-openml:507
-openml:172
-openml:40706
-openml:38
-openml:40516
-openml:189
-openml:388
-openml:23512
-openml:187
-openml:40711
-openml:22
-openml:401
-openml:312
-openml:470
-openml:6
-openml:118
-openml:1466
-openml:885
-openml:789
-openml:137
-openml:132
-openml:40923
-openml:1166
-openml:29
-openml:313
-openml:1479
-openml:1460
-openml:334
-openml:1493
-openml:422
-openml:1040
-openml:1549
-openml:40647
-openml:197
-openml:14
-openml:469
-openml:1480
-openml:139
-openml:200
-openml:747
-openml:71
-openml:57
-openml:1178
-openml:25
-openml:40690
-openml:969
-openml:1112
-openml:1462
-openml:184
-openml:40686
-openml:278
-openml:448
-openml:310
-openml:1233
-openml:1101
-openml:1046
-openml:18
-openml:40702
-openml:1134
-openml:327
-openml:390
-openml:46
-openml:143
-openml:40669
-openml:461
-openml:1121
-openml:127
-openml:1063
-openml:40996
-openml:40983
-openml:28
-openml:12
-openml:466
-openml:350
-openml:1161
-openml:377
-openml:40691
-openml:300
-openml:1130
-openml:889
-openml:782
-openml:574
-openml:315
-openml:1590
-openml:40645
-openml:213
-openml:867
-openml:993
-openml:27
-openml:1485
-openml:1471
-openml:41187
-openml:1109
-openml:1004
-openml:20
-openml:35
-openml:76
-openml:875
-openml:1552
-openml:183
-openml:5
-openml:1461
-openml:1165
-openml:4538
-openml:40589
-openml:4136
-openml:1139
-openml:4535
-openml:16
-openml:394
-openml:134
-openml:307
-openml:43
-openml:1547
-openml:44
-openml:40707
-openml:182
-openml:400
-openml:126
-openml:40926
-openml:1002
-openml:464
-openml:150
-openml:62
-openml:3
-openml:73
-openml:34
-openml:1013
-openml:133
-openml:1413
-openml:40499
-openml:40477
-openml:1128
-openml:48
-openml:451
-openml:916
-openml:554
-openml:1457
-openml:186
-openml:344
-openml:199
-openml:1102
-openml:138
-openml:512
-openml:196
-openml:1515
-openml:195
-openml:1478
-openml:1597
-openml:10
-openml:40979
-openml:40666
-openml:1459
-openml:40683
-openml:23
-openml:40665
-openml:1068
-openml:11
-openml:416
-openml:1553
-openml:1476
-openml:1067
-openml:149
-openml:9
-openml:179
-openml:40496
-openml:40984
-openml:902
-openml:117
-openml:41
-openml:23517
-openml:563
-openml:164
-openml:54
-openml:141
-openml:40677
-openml:40693
-openml:4551
-openml:375
-openml:1501
-openml:41026
-openml:1069
-openml:328
-openml:7
-openml:4
-openml:393
-openml:1037
-openml:40714
-openml:40981
-openml:1050
-openml:1120
-openml:396
-openml:380
-openml:40708
-openml:336
-openml:956
-openml:1179
-openml:40685
-openml:1475
-openml:384
-openml:40650
-openml:1220
-openml:392
-openml:40704
-openml:762
-openml:1548
-openml:1146
-openml:921
-openml:1510
-openml:40710
-openml:4541
-openml:61
-openml:379
-openml:191
-openml:40648
-openml:849
-openml:338
-openml:337
-openml:30
-openml:829
-openml:70
-openml:40671
-openml:40701
-openml:40900
-openml:1555
-openml:386
-openml:458
-openml:56
-openml:40682
-openml:1042
-openml:40709
-openml:1016
-openml:1106
-openml:13
-openml:4137
-openml:31
-openml:40705
-openml:40649
-openml:40687
-openml:1467
-openml:372
-openml:40700
-openml:1489
-openml:171
-openml:1142
-openml:40536
-openml:1472
-openml:974
-openml:844
-openml:36
-openml:955
-openml:1053
-openml:4534
-openml:811
-openml:285
-openml:50
-openml:148
-openml:40668
-openml:346
-openml:40910
-openml:399
-openml:23380
-openml:37
-openml:40966
-openml:1018
-openml:748
-openml:1398
-openml:335
-openml:531
-openml:1554
-openml:40
-openml:40971
-openml:1468
-openml:1514
-openml:1138
-openml:41027
-openml:4532
diff --git a/configs/datasets/openml.txt b/configs/datasets/openml.txt
deleted file mode 100644
index 097851feb..000000000
--- a/configs/datasets/openml.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-openml:12
-openml:14
-openml:15
-openml:16
-openml:18
-openml:54
-openml:46
-openml:28
-openml:29
-openml:22
-openml:23
-openml:182
-openml:188
-openml:300
-openml:307
-openml:458
-openml:469
-openml:1049
-openml:1050
-openml:1067
-openml:1068
-openml:1053
-openml:1590
-openml:1485
-openml:1487
-openml:1475
-openml:1478
-openml:1480
-openml:1461
-openml:1468
-openml:1497
-openml:4534
-openml:4538
-openml:6332
-openml:23381
-openml:40994
-openml:41027
-openml:40668
-openml:40670
-openml:40923
-openml:1501
-openml:1462
-openml:11
-openml:40981
-openml:40975
-openml:40978
-openml:40979
-openml:40982
-openml:40983
-openml:40984
-openml:40966
-openml:1486
-openml:40499
-openml:4134
-openml:1063
-openml:40927
-openml:151
-openml:32
-openml:6
-openml:23517
-openml:37
-openml:1494
-openml:1510
-openml:40996
-openml:1489
-openml:1464
-openml:38
-openml:44
-openml:3
-openml:40701
-openml:554
-openml:31
-openml:50
\ No newline at end of file
diff --git a/configs/datasets/openml_image.txt b/configs/datasets/openml_image.txt
deleted file mode 100644
index 086605861..000000000
--- a/configs/datasets/openml_image.txt
+++ /dev/null
@@ -1 +0,0 @@
-openml:40927:3
\ No newline at end of file
diff --git a/configs/datasets/openml_small.txt b/configs/datasets/openml_small.txt
deleted file mode 100644
index d5dfd54c6..000000000
--- a/configs/datasets/openml_small.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-openml:41027
-openml:554
-openml:40927
-openml:6332
-openml:40994
-openml:4134
-openml:11
-openml:18
-openml:14
-openml:40923
diff --git a/configs/hosts/meta.txt b/configs/hosts/meta.txt
deleted file mode 100644
index b94b96ad0..000000000
--- a/configs/hosts/meta.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-network_interface_name=eth0
-result_dir=benchmark_results_cluster
-working_dir=working_dir
\ No newline at end of file
diff --git a/configs/hosts/nemo.txt b/configs/hosts/nemo.txt
deleted file mode 100644
index a8533eff5..000000000
--- a/configs/hosts/nemo.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-network_interface_name=ib0
-working_dir=working_dir
-result_dir=benchmark_results_cluster
-time_limit=345600
\ No newline at end of file
diff --git a/configs/hosts/nemo_singularity.txt b/configs/hosts/nemo_singularity.txt
deleted file mode 100644
index 829d3a2bb..000000000
--- a/configs/hosts/nemo_singularity.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-network_interface_name=ib0
-working_dir=/external_autonet_home/working_dir
-result_dir=benchmark_results_cluster
-time_limit=345600
\ No newline at end of file
diff --git a/configs/refit/refit_example.json b/configs/refit/refit_example.json
deleted file mode 100644
index 3f9b4f8bc..000000000
--- a/configs/refit/refit_example.json
+++ /dev/null
@@ -1,19 +0,0 @@
-{
-    "SimpleTrainNode:batch_loss_computation_technique": "mixup",
-    "SimpleTrainNode:mixup:alpha": 0.012,
-    "CreateImageDataLoader:batch_size": 147, 
-    "NetworkSelectorDatasetInfo:network": "efficientnetb0",
-    "OptimizerSelector:optimizer": "adamw", 
-    "OptimizerSelector:adamw:learning_rate": 0.012, 
-    "OptimizerSelector:adamw:weight_decay": 0.000017,
-    "SimpleLearningrateSchedulerSelector:lr_scheduler": "cosine_annealing",
-    "SimpleLearningrateSchedulerSelector:cosine_annealing:T_max": 73,
-    "SimpleLearningrateSchedulerSelector:cosine_annealing:T_mult": 1.38,
-    "ImageAugmentation:augment": "True",
-    "ImageAugmentation:cutout": "True",
-    "ImageAugmentation:cutout_holes": 3,
-    "ImageAugmentation:autoaugment": "True",
-    "ImageAugmentation:fastautoaugment": "False",
-    "ImageAugmentation:length": 6,
-    "LossModuleSelectorIndices:loss_module": "cross_entropy"
-}
diff --git a/datasets/CIFAR10.csv b/datasets/CIFAR10.csv
deleted file mode 100644
index 04ab00f08..000000000
--- a/datasets/CIFAR10.csv
+++ /dev/null
@@ -1 +0,0 @@
-CIFAR10, 0
\ No newline at end of file
diff --git a/datasets/example.csv b/datasets/example.csv
deleted file mode 100644
index 529464e61..000000000
--- a/datasets/example.csv
+++ /dev/null
@@ -1,99 +0,0 @@
-icebreaker_s_001689.png,8
-peke_s_000545.png,5
-convertible_s_000520.png,1
-domestic_dog_s_000455.png,5
-broodmare_s_000313.png,7
-capreolus_capreolus_s_001380.png,4
-true_cat_s_000886.png,3
-cruiser_s_000163.png,8
-ostrich_s_001561.png,2
-buckskin_s_000031.png,7
-cassowary_s_002024.png,2
-fighter_aircraft_s_001009.png,0
-convertible_s_000295.png,1
-lapdog_s_001489.png,5
-delivery_truck_s_001300.png,9
-rana_pipiens_s_000379.png,6
-ostrich_s_000026.png,2
-fighter_aircraft_s_000720.png,0
-supertanker_s_000275.png,8
-ostrich_s_000147.png,2
-male_horse_s_000742.png,7
-monoplane_s_000877.png,0
-fallow_deer_s_000351.png,4
-automobile_s_001645.png,1
-walking_horse_s_000071.png,7
-stallion_s_000015.png,7
-capreolus_capreolus_s_001283.png,4
-mule_deer_s_000357.png,4
-dumper_s_000805.png,9
-trailer_truck_s_001350.png,9
-green_frog_s_001384.png,6
-rhea_americana_s_000436.png,2
-capreolus_capreolus_s_001605.png,4
-auto_s_000800.png,1
-tailed_frog_s_000246.png,6
-cervus_elaphus_s_000903.png,4
-articulated_lorry_s_000916.png,9
-bullfrog_s_000797.png,6
-bullfrog_s_001028.png,6
-ladder_truck_s_001799.png,9
-toad_frog_s_001786.png,6
-wrecker_s_002395.png,9
-dump_truck_s_001363.png,9
-canis_familiaris_s_000450.png,5
-lipizzan_s_001223.png,7
-station_wagon_s_000464.png,1
-american_toad_s_001003.png,6
-dredger_s_000486.png,8
-wagtail_s_000747.png,2
-dump_truck_s_000163.png,9
-mutt_s_000997.png,5
-dump_truck_s_001097.png,9
-puppy_s_001045.png,5
-tabby_s_001593.png,3
-broodmare_s_000179.png,7
-car_s_000040.png,1
-domestic_cat_s_000913.png,3
-alley_cat_s_000843.png,3
-truck_s_000028.png,9
-estate_car_s_001092.png,1
-arabian_s_000782.png,7
-supertanker_s_000761.png,8
-garbage_truck_s_001211.png,9
-arabian_s_002303.png,7
-red_deer_s_001101.png,4
-tabby_cat_s_000069.png,3
-cervus_elaphus_s_001124.png,4
-trucking_rig_s_001247.png,9
-pekinese_s_000046.png,5
-police_boat_s_001118.png,8
-fallow_deer_s_001785.png,4
-camion_s_000599.png,9
-tabby_s_001774.png,3
-spring_frog_s_000407.png,6
-wagon_s_002463.png,1
-station_wagon_s_002537.png,1
-elk_s_001751.png,4
-house_cat_s_000064.png,3
-lorry_s_000562.png,9
-delivery_truck_s_001587.png,9
-wagon_s_000378.png,1
-trucking_rig_s_001431.png,9
-tractor_trailer_s_000653.png,9
-cassowary_s_000194.png,2
-fawn_s_001418.png,4
-mouser_s_000792.png,3
-bird_of_passage_s_000006.png,2
-sika_s_000337.png,4
-dawn_horse_s_001453.png,7
-police_cruiser_s_001385.png,1
-maltese_s_000562.png,5
-wagon_s_000572.png,1
-liberty_ship_s_001456.png,8
-western_toad_s_000622.png,6
-house_cat_s_002004.png,3
-bufo_bufo_s_002202.png,6
-tabby_cat_s_001983.png,3
-fallow_deer_s_001133.png,4
-red_deer_s_001719.png,4
diff --git a/datasets/example_images/alley_cat_s_000843.png b/datasets/example_images/alley_cat_s_000843.png
deleted file mode 100644
index bef5de531..000000000
Binary files a/datasets/example_images/alley_cat_s_000843.png and /dev/null differ
diff --git a/datasets/example_images/american_toad_s_001003.png b/datasets/example_images/american_toad_s_001003.png
deleted file mode 100644
index c89f0bb36..000000000
Binary files a/datasets/example_images/american_toad_s_001003.png and /dev/null differ
diff --git a/datasets/example_images/arabian_s_000782.png b/datasets/example_images/arabian_s_000782.png
deleted file mode 100644
index 79b94674f..000000000
Binary files a/datasets/example_images/arabian_s_000782.png and /dev/null differ
diff --git a/datasets/example_images/arabian_s_002303.png b/datasets/example_images/arabian_s_002303.png
deleted file mode 100644
index 3125066f4..000000000
Binary files a/datasets/example_images/arabian_s_002303.png and /dev/null differ
diff --git a/datasets/example_images/articulated_lorry_s_000916.png b/datasets/example_images/articulated_lorry_s_000916.png
deleted file mode 100644
index 9d5d1a140..000000000
Binary files a/datasets/example_images/articulated_lorry_s_000916.png and /dev/null differ
diff --git a/datasets/example_images/auto_s_000800.png b/datasets/example_images/auto_s_000800.png
deleted file mode 100644
index ba42044dd..000000000
Binary files a/datasets/example_images/auto_s_000800.png and /dev/null differ
diff --git a/datasets/example_images/automobile_s_001645.png b/datasets/example_images/automobile_s_001645.png
deleted file mode 100644
index b8d317620..000000000
Binary files a/datasets/example_images/automobile_s_001645.png and /dev/null differ
diff --git a/datasets/example_images/bird_of_passage_s_000006.png b/datasets/example_images/bird_of_passage_s_000006.png
deleted file mode 100644
index d9ef770b0..000000000
Binary files a/datasets/example_images/bird_of_passage_s_000006.png and /dev/null differ
diff --git a/datasets/example_images/broodmare_s_000179.png b/datasets/example_images/broodmare_s_000179.png
deleted file mode 100644
index 319f8a398..000000000
Binary files a/datasets/example_images/broodmare_s_000179.png and /dev/null differ
diff --git a/datasets/example_images/broodmare_s_000313.png b/datasets/example_images/broodmare_s_000313.png
deleted file mode 100644
index 87f106363..000000000
Binary files a/datasets/example_images/broodmare_s_000313.png and /dev/null differ
diff --git a/datasets/example_images/buckskin_s_000031.png b/datasets/example_images/buckskin_s_000031.png
deleted file mode 100644
index 276e335d0..000000000
Binary files a/datasets/example_images/buckskin_s_000031.png and /dev/null differ
diff --git a/datasets/example_images/bufo_bufo_s_002202.png b/datasets/example_images/bufo_bufo_s_002202.png
deleted file mode 100644
index 1eef4599a..000000000
Binary files a/datasets/example_images/bufo_bufo_s_002202.png and /dev/null differ
diff --git a/datasets/example_images/bullfrog_s_000797.png b/datasets/example_images/bullfrog_s_000797.png
deleted file mode 100644
index 40c341c0f..000000000
Binary files a/datasets/example_images/bullfrog_s_000797.png and /dev/null differ
diff --git a/datasets/example_images/bullfrog_s_001028.png b/datasets/example_images/bullfrog_s_001028.png
deleted file mode 100644
index 073e4522e..000000000
Binary files a/datasets/example_images/bullfrog_s_001028.png and /dev/null differ
diff --git a/datasets/example_images/camion_s_000599.png b/datasets/example_images/camion_s_000599.png
deleted file mode 100644
index 86b77d960..000000000
Binary files a/datasets/example_images/camion_s_000599.png and /dev/null differ
diff --git a/datasets/example_images/canis_familiaris_s_000450.png b/datasets/example_images/canis_familiaris_s_000450.png
deleted file mode 100644
index c01a7b840..000000000
Binary files a/datasets/example_images/canis_familiaris_s_000450.png and /dev/null differ
diff --git a/datasets/example_images/capreolus_capreolus_s_001283.png b/datasets/example_images/capreolus_capreolus_s_001283.png
deleted file mode 100644
index b301a6c0c..000000000
Binary files a/datasets/example_images/capreolus_capreolus_s_001283.png and /dev/null differ
diff --git a/datasets/example_images/capreolus_capreolus_s_001380.png b/datasets/example_images/capreolus_capreolus_s_001380.png
deleted file mode 100644
index bf7c7366f..000000000
Binary files a/datasets/example_images/capreolus_capreolus_s_001380.png and /dev/null differ
diff --git a/datasets/example_images/capreolus_capreolus_s_001605.png b/datasets/example_images/capreolus_capreolus_s_001605.png
deleted file mode 100644
index 5e207d74c..000000000
Binary files a/datasets/example_images/capreolus_capreolus_s_001605.png and /dev/null differ
diff --git a/datasets/example_images/car_s_000040.png b/datasets/example_images/car_s_000040.png
deleted file mode 100644
index 5c4261e68..000000000
Binary files a/datasets/example_images/car_s_000040.png and /dev/null differ
diff --git a/datasets/example_images/cassowary_s_000194.png b/datasets/example_images/cassowary_s_000194.png
deleted file mode 100644
index 046b033a7..000000000
Binary files a/datasets/example_images/cassowary_s_000194.png and /dev/null differ
diff --git a/datasets/example_images/cassowary_s_002024.png b/datasets/example_images/cassowary_s_002024.png
deleted file mode 100644
index 0e0195107..000000000
Binary files a/datasets/example_images/cassowary_s_002024.png and /dev/null differ
diff --git a/datasets/example_images/cervus_elaphus_s_000903.png b/datasets/example_images/cervus_elaphus_s_000903.png
deleted file mode 100644
index ac4d5a00d..000000000
Binary files a/datasets/example_images/cervus_elaphus_s_000903.png and /dev/null differ
diff --git a/datasets/example_images/cervus_elaphus_s_001124.png b/datasets/example_images/cervus_elaphus_s_001124.png
deleted file mode 100644
index 09f486ee6..000000000
Binary files a/datasets/example_images/cervus_elaphus_s_001124.png and /dev/null differ
diff --git a/datasets/example_images/convertible_s_000295.png b/datasets/example_images/convertible_s_000295.png
deleted file mode 100644
index 98fbf9f3c..000000000
Binary files a/datasets/example_images/convertible_s_000295.png and /dev/null differ
diff --git a/datasets/example_images/convertible_s_000520.png b/datasets/example_images/convertible_s_000520.png
deleted file mode 100644
index ee73ee4b8..000000000
Binary files a/datasets/example_images/convertible_s_000520.png and /dev/null differ
diff --git a/datasets/example_images/cruiser_s_000163.png b/datasets/example_images/cruiser_s_000163.png
deleted file mode 100644
index a3d79e112..000000000
Binary files a/datasets/example_images/cruiser_s_000163.png and /dev/null differ
diff --git a/datasets/example_images/dawn_horse_s_001453.png b/datasets/example_images/dawn_horse_s_001453.png
deleted file mode 100644
index db4fa7089..000000000
Binary files a/datasets/example_images/dawn_horse_s_001453.png and /dev/null differ
diff --git a/datasets/example_images/delivery_truck_s_001300.png b/datasets/example_images/delivery_truck_s_001300.png
deleted file mode 100644
index 94031550a..000000000
Binary files a/datasets/example_images/delivery_truck_s_001300.png and /dev/null differ
diff --git a/datasets/example_images/delivery_truck_s_001587.png b/datasets/example_images/delivery_truck_s_001587.png
deleted file mode 100644
index a86fb263d..000000000
Binary files a/datasets/example_images/delivery_truck_s_001587.png and /dev/null differ
diff --git a/datasets/example_images/domestic_cat_s_000913.png b/datasets/example_images/domestic_cat_s_000913.png
deleted file mode 100644
index 305c57841..000000000
Binary files a/datasets/example_images/domestic_cat_s_000913.png and /dev/null differ
diff --git a/datasets/example_images/domestic_dog_s_000455.png b/datasets/example_images/domestic_dog_s_000455.png
deleted file mode 100644
index 03cbdc0d3..000000000
Binary files a/datasets/example_images/domestic_dog_s_000455.png and /dev/null differ
diff --git a/datasets/example_images/dredger_s_000486.png b/datasets/example_images/dredger_s_000486.png
deleted file mode 100644
index 6876e9574..000000000
Binary files a/datasets/example_images/dredger_s_000486.png and /dev/null differ
diff --git a/datasets/example_images/dump_truck_s_000163.png b/datasets/example_images/dump_truck_s_000163.png
deleted file mode 100644
index bb343b384..000000000
Binary files a/datasets/example_images/dump_truck_s_000163.png and /dev/null differ
diff --git a/datasets/example_images/dump_truck_s_001097.png b/datasets/example_images/dump_truck_s_001097.png
deleted file mode 100644
index 5fffe2a4e..000000000
Binary files a/datasets/example_images/dump_truck_s_001097.png and /dev/null differ
diff --git a/datasets/example_images/dump_truck_s_001363.png b/datasets/example_images/dump_truck_s_001363.png
deleted file mode 100644
index a852d9a24..000000000
Binary files a/datasets/example_images/dump_truck_s_001363.png and /dev/null differ
diff --git a/datasets/example_images/dumper_s_000805.png b/datasets/example_images/dumper_s_000805.png
deleted file mode 100644
index 3c4d3410f..000000000
Binary files a/datasets/example_images/dumper_s_000805.png and /dev/null differ
diff --git a/datasets/example_images/elk_s_001751.png b/datasets/example_images/elk_s_001751.png
deleted file mode 100644
index 5aa450e9c..000000000
Binary files a/datasets/example_images/elk_s_001751.png and /dev/null differ
diff --git a/datasets/example_images/estate_car_s_001092.png b/datasets/example_images/estate_car_s_001092.png
deleted file mode 100644
index b5b7fd952..000000000
Binary files a/datasets/example_images/estate_car_s_001092.png and /dev/null differ
diff --git a/datasets/example_images/fallow_deer_s_000351.png b/datasets/example_images/fallow_deer_s_000351.png
deleted file mode 100644
index 8583e73a7..000000000
Binary files a/datasets/example_images/fallow_deer_s_000351.png and /dev/null differ
diff --git a/datasets/example_images/fallow_deer_s_001133.png b/datasets/example_images/fallow_deer_s_001133.png
deleted file mode 100644
index 3e8fd5969..000000000
Binary files a/datasets/example_images/fallow_deer_s_001133.png and /dev/null differ
diff --git a/datasets/example_images/fallow_deer_s_001785.png b/datasets/example_images/fallow_deer_s_001785.png
deleted file mode 100644
index 9abf685b9..000000000
Binary files a/datasets/example_images/fallow_deer_s_001785.png and /dev/null differ
diff --git a/datasets/example_images/fawn_s_001418.png b/datasets/example_images/fawn_s_001418.png
deleted file mode 100644
index e004bc8f5..000000000
Binary files a/datasets/example_images/fawn_s_001418.png and /dev/null differ
diff --git a/datasets/example_images/fighter_aircraft_s_000720.png b/datasets/example_images/fighter_aircraft_s_000720.png
deleted file mode 100644
index 46cb393e4..000000000
Binary files a/datasets/example_images/fighter_aircraft_s_000720.png and /dev/null differ
diff --git a/datasets/example_images/fighter_aircraft_s_001009.png b/datasets/example_images/fighter_aircraft_s_001009.png
deleted file mode 100644
index 7c7e5dcf4..000000000
Binary files a/datasets/example_images/fighter_aircraft_s_001009.png and /dev/null differ
diff --git a/datasets/example_images/garbage_truck_s_001211.png b/datasets/example_images/garbage_truck_s_001211.png
deleted file mode 100644
index a0cedebee..000000000
Binary files a/datasets/example_images/garbage_truck_s_001211.png and /dev/null differ
diff --git a/datasets/example_images/green_frog_s_001384.png b/datasets/example_images/green_frog_s_001384.png
deleted file mode 100644
index 63b604143..000000000
Binary files a/datasets/example_images/green_frog_s_001384.png and /dev/null differ
diff --git a/datasets/example_images/house_cat_s_000064.png b/datasets/example_images/house_cat_s_000064.png
deleted file mode 100644
index cae1fef87..000000000
Binary files a/datasets/example_images/house_cat_s_000064.png and /dev/null differ
diff --git a/datasets/example_images/house_cat_s_002004.png b/datasets/example_images/house_cat_s_002004.png
deleted file mode 100644
index 79e064548..000000000
Binary files a/datasets/example_images/house_cat_s_002004.png and /dev/null differ
diff --git a/datasets/example_images/icebreaker_s_001689.png b/datasets/example_images/icebreaker_s_001689.png
deleted file mode 100644
index b5b9e8e3c..000000000
Binary files a/datasets/example_images/icebreaker_s_001689.png and /dev/null differ
diff --git a/datasets/example_images/ladder_truck_s_001799.png b/datasets/example_images/ladder_truck_s_001799.png
deleted file mode 100644
index 58a0c8401..000000000
Binary files a/datasets/example_images/ladder_truck_s_001799.png and /dev/null differ
diff --git a/datasets/example_images/lapdog_s_001489.png b/datasets/example_images/lapdog_s_001489.png
deleted file mode 100644
index 259d9b2b9..000000000
Binary files a/datasets/example_images/lapdog_s_001489.png and /dev/null differ
diff --git a/datasets/example_images/liberty_ship_s_001456.png b/datasets/example_images/liberty_ship_s_001456.png
deleted file mode 100644
index de42766d5..000000000
Binary files a/datasets/example_images/liberty_ship_s_001456.png and /dev/null differ
diff --git a/datasets/example_images/lipizzan_s_001223.png b/datasets/example_images/lipizzan_s_001223.png
deleted file mode 100644
index 09a79f7b1..000000000
Binary files a/datasets/example_images/lipizzan_s_001223.png and /dev/null differ
diff --git a/datasets/example_images/lorry_s_000562.png b/datasets/example_images/lorry_s_000562.png
deleted file mode 100644
index b53d1befd..000000000
Binary files a/datasets/example_images/lorry_s_000562.png and /dev/null differ
diff --git a/datasets/example_images/male_horse_s_000742.png b/datasets/example_images/male_horse_s_000742.png
deleted file mode 100644
index 991b3ab87..000000000
Binary files a/datasets/example_images/male_horse_s_000742.png and /dev/null differ
diff --git a/datasets/example_images/maltese_s_000562.png b/datasets/example_images/maltese_s_000562.png
deleted file mode 100644
index 0699958ae..000000000
Binary files a/datasets/example_images/maltese_s_000562.png and /dev/null differ
diff --git a/datasets/example_images/monoplane_s_000877.png b/datasets/example_images/monoplane_s_000877.png
deleted file mode 100644
index fbfaa7682..000000000
Binary files a/datasets/example_images/monoplane_s_000877.png and /dev/null differ
diff --git a/datasets/example_images/mouser_s_000792.png b/datasets/example_images/mouser_s_000792.png
deleted file mode 100644
index 03927b4dd..000000000
Binary files a/datasets/example_images/mouser_s_000792.png and /dev/null differ
diff --git a/datasets/example_images/mule_deer_s_000357.png b/datasets/example_images/mule_deer_s_000357.png
deleted file mode 100644
index 5fe5f2d39..000000000
Binary files a/datasets/example_images/mule_deer_s_000357.png and /dev/null differ
diff --git a/datasets/example_images/mutt_s_000997.png b/datasets/example_images/mutt_s_000997.png
deleted file mode 100644
index 93f2c74c6..000000000
Binary files a/datasets/example_images/mutt_s_000997.png and /dev/null differ
diff --git a/datasets/example_images/ostrich_s_000026.png b/datasets/example_images/ostrich_s_000026.png
deleted file mode 100644
index 320f5e502..000000000
Binary files a/datasets/example_images/ostrich_s_000026.png and /dev/null differ
diff --git a/datasets/example_images/ostrich_s_000147.png b/datasets/example_images/ostrich_s_000147.png
deleted file mode 100644
index 01e375468..000000000
Binary files a/datasets/example_images/ostrich_s_000147.png and /dev/null differ
diff --git a/datasets/example_images/ostrich_s_001561.png b/datasets/example_images/ostrich_s_001561.png
deleted file mode 100644
index 2d1ebb0d6..000000000
Binary files a/datasets/example_images/ostrich_s_001561.png and /dev/null differ
diff --git a/datasets/example_images/peke_s_000545.png b/datasets/example_images/peke_s_000545.png
deleted file mode 100644
index fd75a564d..000000000
Binary files a/datasets/example_images/peke_s_000545.png and /dev/null differ
diff --git a/datasets/example_images/pekinese_s_000046.png b/datasets/example_images/pekinese_s_000046.png
deleted file mode 100644
index ecaf53565..000000000
Binary files a/datasets/example_images/pekinese_s_000046.png and /dev/null differ
diff --git a/datasets/example_images/police_boat_s_001118.png b/datasets/example_images/police_boat_s_001118.png
deleted file mode 100644
index 7adad01ee..000000000
Binary files a/datasets/example_images/police_boat_s_001118.png and /dev/null differ
diff --git a/datasets/example_images/police_cruiser_s_001385.png b/datasets/example_images/police_cruiser_s_001385.png
deleted file mode 100644
index 6b2e00a0b..000000000
Binary files a/datasets/example_images/police_cruiser_s_001385.png and /dev/null differ
diff --git a/datasets/example_images/puppy_s_001045.png b/datasets/example_images/puppy_s_001045.png
deleted file mode 100644
index 3d5c52f73..000000000
Binary files a/datasets/example_images/puppy_s_001045.png and /dev/null differ
diff --git a/datasets/example_images/rana_pipiens_s_000379.png b/datasets/example_images/rana_pipiens_s_000379.png
deleted file mode 100644
index 7bcb45394..000000000
Binary files a/datasets/example_images/rana_pipiens_s_000379.png and /dev/null differ
diff --git a/datasets/example_images/red_deer_s_001101.png b/datasets/example_images/red_deer_s_001101.png
deleted file mode 100644
index 3ec29f469..000000000
Binary files a/datasets/example_images/red_deer_s_001101.png and /dev/null differ
diff --git a/datasets/example_images/red_deer_s_001719.png b/datasets/example_images/red_deer_s_001719.png
deleted file mode 100644
index 6379d6af7..000000000
Binary files a/datasets/example_images/red_deer_s_001719.png and /dev/null differ
diff --git a/datasets/example_images/rhea_americana_s_000436.png b/datasets/example_images/rhea_americana_s_000436.png
deleted file mode 100644
index a5de92cf6..000000000
Binary files a/datasets/example_images/rhea_americana_s_000436.png and /dev/null differ
diff --git a/datasets/example_images/sika_s_000337.png b/datasets/example_images/sika_s_000337.png
deleted file mode 100644
index 22be9cc44..000000000
Binary files a/datasets/example_images/sika_s_000337.png and /dev/null differ
diff --git a/datasets/example_images/spring_frog_s_000407.png b/datasets/example_images/spring_frog_s_000407.png
deleted file mode 100644
index 86f323a5f..000000000
Binary files a/datasets/example_images/spring_frog_s_000407.png and /dev/null differ
diff --git a/datasets/example_images/stallion_s_000015.png b/datasets/example_images/stallion_s_000015.png
deleted file mode 100644
index 4db4e59cc..000000000
Binary files a/datasets/example_images/stallion_s_000015.png and /dev/null differ
diff --git a/datasets/example_images/station_wagon_s_000464.png b/datasets/example_images/station_wagon_s_000464.png
deleted file mode 100644
index a74f1d98b..000000000
Binary files a/datasets/example_images/station_wagon_s_000464.png and /dev/null differ
diff --git a/datasets/example_images/station_wagon_s_002537.png b/datasets/example_images/station_wagon_s_002537.png
deleted file mode 100644
index b24969db7..000000000
Binary files a/datasets/example_images/station_wagon_s_002537.png and /dev/null differ
diff --git a/datasets/example_images/supertanker_s_000275.png b/datasets/example_images/supertanker_s_000275.png
deleted file mode 100644
index c01c4370a..000000000
Binary files a/datasets/example_images/supertanker_s_000275.png and /dev/null differ
diff --git a/datasets/example_images/supertanker_s_000761.png b/datasets/example_images/supertanker_s_000761.png
deleted file mode 100644
index 8b4a3ac00..000000000
Binary files a/datasets/example_images/supertanker_s_000761.png and /dev/null differ
diff --git a/datasets/example_images/tabby_cat_s_000069.png b/datasets/example_images/tabby_cat_s_000069.png
deleted file mode 100644
index 78a62654d..000000000
Binary files a/datasets/example_images/tabby_cat_s_000069.png and /dev/null differ
diff --git a/datasets/example_images/tabby_cat_s_001983.png b/datasets/example_images/tabby_cat_s_001983.png
deleted file mode 100644
index cb423e861..000000000
Binary files a/datasets/example_images/tabby_cat_s_001983.png and /dev/null differ
diff --git a/datasets/example_images/tabby_s_001593.png b/datasets/example_images/tabby_s_001593.png
deleted file mode 100644
index 608e56f4a..000000000
Binary files a/datasets/example_images/tabby_s_001593.png and /dev/null differ
diff --git a/datasets/example_images/tabby_s_001774.png b/datasets/example_images/tabby_s_001774.png
deleted file mode 100644
index a7a54b3d9..000000000
Binary files a/datasets/example_images/tabby_s_001774.png and /dev/null differ
diff --git a/datasets/example_images/tailed_frog_s_000246.png b/datasets/example_images/tailed_frog_s_000246.png
deleted file mode 100644
index 2ba29dcab..000000000
Binary files a/datasets/example_images/tailed_frog_s_000246.png and /dev/null differ
diff --git a/datasets/example_images/toad_frog_s_001786.png b/datasets/example_images/toad_frog_s_001786.png
deleted file mode 100644
index f31dda3d4..000000000
Binary files a/datasets/example_images/toad_frog_s_001786.png and /dev/null differ
diff --git a/datasets/example_images/tractor_trailer_s_000653.png b/datasets/example_images/tractor_trailer_s_000653.png
deleted file mode 100644
index 3f1b7bf97..000000000
Binary files a/datasets/example_images/tractor_trailer_s_000653.png and /dev/null differ
diff --git a/datasets/example_images/trailer_truck_s_001350.png b/datasets/example_images/trailer_truck_s_001350.png
deleted file mode 100644
index e20f68e34..000000000
Binary files a/datasets/example_images/trailer_truck_s_001350.png and /dev/null differ
diff --git a/datasets/example_images/truck_s_000028.png b/datasets/example_images/truck_s_000028.png
deleted file mode 100644
index 975c4da6c..000000000
Binary files a/datasets/example_images/truck_s_000028.png and /dev/null differ
diff --git a/datasets/example_images/trucking_rig_s_001247.png b/datasets/example_images/trucking_rig_s_001247.png
deleted file mode 100644
index eae266b15..000000000
Binary files a/datasets/example_images/trucking_rig_s_001247.png and /dev/null differ
diff --git a/datasets/example_images/trucking_rig_s_001431.png b/datasets/example_images/trucking_rig_s_001431.png
deleted file mode 100644
index 7674c0595..000000000
Binary files a/datasets/example_images/trucking_rig_s_001431.png and /dev/null differ
diff --git a/datasets/example_images/true_cat_s_000886.png b/datasets/example_images/true_cat_s_000886.png
deleted file mode 100644
index badf1aeab..000000000
Binary files a/datasets/example_images/true_cat_s_000886.png and /dev/null differ
diff --git a/datasets/example_images/wagon_s_000378.png b/datasets/example_images/wagon_s_000378.png
deleted file mode 100644
index e53f30df6..000000000
Binary files a/datasets/example_images/wagon_s_000378.png and /dev/null differ
diff --git a/datasets/example_images/wagon_s_000572.png b/datasets/example_images/wagon_s_000572.png
deleted file mode 100644
index a8df0244b..000000000
Binary files a/datasets/example_images/wagon_s_000572.png and /dev/null differ
diff --git a/datasets/example_images/wagon_s_002463.png b/datasets/example_images/wagon_s_002463.png
deleted file mode 100644
index ae436205b..000000000
Binary files a/datasets/example_images/wagon_s_002463.png and /dev/null differ
diff --git a/datasets/example_images/wagtail_s_000747.png b/datasets/example_images/wagtail_s_000747.png
deleted file mode 100644
index e0fee91b9..000000000
Binary files a/datasets/example_images/wagtail_s_000747.png and /dev/null differ
diff --git a/datasets/example_images/walking_horse_s_000071.png b/datasets/example_images/walking_horse_s_000071.png
deleted file mode 100644
index a25e7c505..000000000
Binary files a/datasets/example_images/walking_horse_s_000071.png and /dev/null differ
diff --git a/datasets/example_images/western_toad_s_000622.png b/datasets/example_images/western_toad_s_000622.png
deleted file mode 100644
index 44b973d7b..000000000
Binary files a/datasets/example_images/western_toad_s_000622.png and /dev/null differ
diff --git a/datasets/example_images/wrecker_s_002395.png b/datasets/example_images/wrecker_s_002395.png
deleted file mode 100644
index e67b841ea..000000000
Binary files a/datasets/example_images/wrecker_s_002395.png and /dev/null differ
diff --git a/examples/basics/Auto-PyTorch Tutorial.ipynb b/examples/basics/Auto-PyTorch Tutorial.ipynb
deleted file mode 100644
index 6757e325f..000000000
--- a/examples/basics/Auto-PyTorch Tutorial.ipynb	
+++ /dev/null
@@ -1,453 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Introduction\n",
-    "\n",
-    "This tutorial introduces the basic Auto-PyTorch API together with the classes for featurized and image data.\n",
-    "So far, Auto-PyTorch covers classification and regression on featurized data as well as classification on image data.\n",
-    "For installing Auto-PyTorch, please refer to the github page.\n",
-    "\n",
-    "**Disclaimer**: In this notebook, data will be downloaded from the openml project for featurized tasks and CIFAR10 will be downloaded for image classification. Hence, an internet connection is required."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# API\n",
-    "\n",
-    "There are classes for featurized tasks (classification, multi-label classification, regression) and image tasks (classification). You can import them via:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from autoPyTorch import (AutoNetClassification,\n",
-    "                         AutoNetMultilabel,\n",
-    "                         AutoNetRegression,\n",
-    "                         AutoNetImageClassification,\n",
-    "                         AutoNetImageClassificationMultipleDatasets)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Other imports for later usage\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import os as os\n",
-    "import openml\n",
-    "import json"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Upon initialization of a class, you can specify its configuration. Later, you can override its configuration in each fit call. The *config_preset* allows to constrain the search space to one of *tiny_cs, medium_cs* or *full_cs*. These presets can be seen in *core/presets/*."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/sklearn/utils/deprecation.py:143: FutureWarning: The sklearn.metrics.classification module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.metrics. Anything that cannot be imported from sklearn.metrics is now part of the private API.\n",
-      "  warnings.warn(message, FutureWarning)\n"
-     ]
-    }
-   ],
-   "source": [
-    "autonet = AutoNetClassification(config_preset=\"tiny_cs\", result_logger_dir=\"logs/\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Here are some useful methods provided by the API:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# Get the current configuration as dict\n",
-    "current_configuration = autonet.get_current_autonet_config()\n",
-    "\n",
-    "# Get the ConfigSpace object with all hyperparameters, conditions, default values and default ranges\n",
-    "hyperparameter_search_space = autonet.get_hyperparameter_search_space()\n",
-    "\n",
-    "# Print all possible configuration options \n",
-    "#autonet.print_help()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "scrolled": true
-   },
-   "source": [
-    "The most important methods for using Auto-PyTorch are ***fit***, ***refit***, ***score*** and ***predict***.\n",
-    "\n",
-    "First, we get some data:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# Get data from the openml task \"Supervised Classification on credit-g (https://www.openml.org/t/31)\"\n",
-    "task = openml.tasks.get_task(task_id=31)\n",
-    "X, y = task.get_X_and_y()\n",
-    "ind_train, ind_test = task.get_train_test_split_indices()\n",
-    "X_train, Y_train = X[ind_train], y[ind_train]\n",
-    "X_test, Y_test = X[ind_test], y[ind_test]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "***fit*** is used to search for a good configuration by fitting configurations chosen by the algorithm (by default BOHB). The incumbent configuration is then returned and stored in the class.\n",
-    "\n",
-    "We recommend to have a look at the possible configuration options first. Some of the most important options allow you to set the budget type (epochs or time), run id and task id for cluster usage, tensorboard logging, seed and more.\n",
-    "\n",
-    "Here we search for a configuration for 300 seconds with 60-100 s time for fitting each individual configuration.\n",
-    "Use the *validation_split* parameter to specify a split size. You can also pass your own validation set\n",
-    "via *X_val* and *Y_val*. Use *log_level=\"info\"* or *log_level=\"debug\"* for more detailed output."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:484: UserWarning: To get the last learning rate computed by the scheduler, please use `get_last_lr()`.\n",
-      "  \"please use `get_last_lr()`.\", UserWarning)\n",
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: The epoch parameter in `scheduler.step()` was not necessary and is being deprecated where possible. Please use `scheduler.step()` to step the scheduler. During the deprecation, if epoch is different from None, the closed form is used instead of the new chainable form, where available. Please open an issue if you are unable to replicate your use case: https://github.com/pytorch/pytorch/issues/new/choose.\n",
-      "  warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)\n",
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:484: UserWarning: To get the last learning rate computed by the scheduler, please use `get_last_lr()`.\n",
-      "  \"please use `get_last_lr()`.\", UserWarning)\n",
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: The epoch parameter in `scheduler.step()` was not necessary and is being deprecated where possible. Please use `scheduler.step()` to step the scheduler. During the deprecation, if epoch is different from None, the closed form is used instead of the new chainable form, where available. Please open an issue if you are unable to replicate your use case: https://github.com/pytorch/pytorch/issues/new/choose.\n",
-      "  warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)\n",
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:484: UserWarning: To get the last learning rate computed by the scheduler, please use `get_last_lr()`.\n",
-      "  \"please use `get_last_lr()`.\", UserWarning)\n",
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: The epoch parameter in `scheduler.step()` was not necessary and is being deprecated where possible. Please use `scheduler.step()` to step the scheduler. During the deprecation, if epoch is different from None, the closed form is used instead of the new chainable form, where available. Please open an issue if you are unable to replicate your use case: https://github.com/pytorch/pytorch/issues/new/choose.\n",
-      "  warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)\n"
-     ]
-    }
-   ],
-   "source": [
-    "autonet = AutoNetClassification(config_preset=\"tiny_cs\", result_logger_dir=\"logs/\")\n",
-    "# Fit (note that the settings are for demonstration, you might need larger budgets)\n",
-    "results_fit = autonet.fit(X_train=X_train,\n",
-    "                          Y_train=Y_train,\n",
-    "                          validation_split=0.3,\n",
-    "                          max_runtime=300,\n",
-    "                          min_budget=60,\n",
-    "                          max_budget=100,\n",
-    "                          refit=True)\n",
-    "\n",
-    "# Save fit results as json\n",
-    "with open(\"logs/results_fit.json\", \"w\") as file:\n",
-    "    json.dump(results_fit, file)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "***refit*** allows you to fit a configuration of your choice for a defined time. By default, the incumbent configuration is refitted during a *fit* call using the *max_budget*. However, *refit* might be useful if you want to fit on the full dataset or even another dataset or if you just want to fit a model without searching.\n",
-    "\n",
-    "You can specify a hyperparameter configuration to fit (if you do not specify a configuration the incumbent configuration from the last fit call will be used):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create an autonet\n",
-    "autonet_config = {\n",
-    "    \"result_logger_dir\" : \"logs/\",\n",
-    "    \"budget_type\" : \"epochs\",\n",
-    "    \"log_level\" : \"info\", \n",
-    "    \"use_tensorboard_logger\" : True,\n",
-    "    \"validation_split\" : 0.0\n",
-    "    }\n",
-    "autonet = AutoNetClassification(**autonet_config)\n",
-    "\n",
-    "# Sample a random hyperparameter configuration as an example\n",
-    "hyperparameter_config = autonet.get_hyperparameter_search_space().sample_configuration().get_dictionary()\n",
-    "\n",
-    "# Refit with sampled hyperparameter config for 120 s. This time on the full dataset.\n",
-    "results_refit = autonet.refit(X_train=X_train,\n",
-    "                              Y_train=Y_train,\n",
-    "                              X_valid=None,\n",
-    "                              Y_valid=None,\n",
-    "                              hyperparameter_config=hyperparameter_config,\n",
-    "                              autonet_config=autonet.get_current_autonet_config(),\n",
-    "                              budget=50)\n",
-    "\n",
-    "# Save json\n",
-    "with open(\"logs/results_refit.json\", \"w\") as file:\n",
-    "    json.dump(results_refit, file)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "***pred*** returns the predictions of the incumbent model. ***score*** can be used to evaluate the model on a test set. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# See how the random configuration performs (often it just predicts 0)\n",
-    "score = autonet.score(X_test=X_test, Y_test=Y_test)\n",
-    "pred = autonet.predict(X=X_test)\n",
-    "\n",
-    "print(\"Model prediction:\", pred[0:10])\n",
-    "print(\"Accuracy score\", score)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Finally, you can also get the incumbent model as PyTorch Sequential model via"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "pytorch_model = autonet.get_pytorch_model()\n",
-    "print(pytorch_model)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Featurized Data\n",
-    "\n",
-    "All classes for featurized data (*AutoNetClassification*, *AutoNetMultilabel*, *AutoNetRegression*) can be used as in the example above. The only difference is the type of labels they accept."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Image Data\n",
-    "\n",
-    "Auto-PyTorch provides two classes for image data. *autonet_image_classification* can be used for classification for images. The *autonet_multi_image_classification* class allows to search for configurations for image classification across multiple datasets. This means Auto-PyTorch will try to choose a configuration that works well on all given datasets."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/sklearn/utils/deprecation.py:143: FutureWarning: The sklearn.metrics.classification module is  deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.metrics. Anything that cannot be imported from sklearn.metrics is now part of the private API.\n",
-      "  warnings.warn(message, FutureWarning)\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Load classes\n",
-    "autonet_image_classification = AutoNetImageClassification(config_preset=\"full_cs\", result_logger_dir=\"logs/\")\n",
-    "autonet_multi_image_classification = AutoNetImageClassificationMultipleDatasets(config_preset=\"tiny_cs\", result_logger_dir=\"logs/\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For passing your image data, you have two options (note that arrays are expected):\n",
-    "\n",
-    "I) Via a path to a comma-separated value file, which in turn contains the paths to the images and the image labels (note header is assumed to be None):"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "csv_dir = os.path.abspath(\"../../datasets/example.csv\")\n",
-    "\n",
-    "X_train = np.array([csv_dir])\n",
-    "Y_train = np.array([0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "II) directly passing the paths to the images and the labels"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.read_csv(csv_dir, header=None)\n",
-    "X_train = df.values[:,0]\n",
-    "Y_train = df.values[:,1]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "scrolled": false
-   },
-   "source": [
-    "Make sure you specify *image_root_folders* if the paths to the images are not specified from your current working directory. You can also specify *images_shape* to up- or downscale images.\n",
-    "\n",
-    "Using the flag *save_checkpoints=True* will save checkpoints to the result directory:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:123: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n",
-      "  \"https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\", UserWarning)\n",
-      "/home/zimmerl/Auto-PyTorch_releases/Auto-PyTorch/env/lib/python3.6/site-packages/torch/optim/lr_scheduler.py:143: UserWarning: The epoch parameter in `scheduler.step()` was not necessary and is being deprecated where possible. Please use `scheduler.step()` to step the scheduler. During the deprecation, if epoch is different from None, the closed form is used instead of the new chainable form, where available. Please open an issue if you are unable to replicate your use case: https://github.com/pytorch/pytorch/issues/new/choose.\n",
-      "  warnings.warn(EPOCH_DEPRECATION_WARNING, UserWarning)\n"
-     ]
-    }
-   ],
-   "source": [
-    "autonet_image_classification.fit(X_train=X_train,\n",
-    "                                 Y_train=Y_train,\n",
-    "                                 images_shape=[3,32,32],\n",
-    "                                 min_budget=200,\n",
-    "                                 max_budget=400,\n",
-    "                                 max_runtime=600,\n",
-    "                                 save_checkpoints=True,\n",
-    "                                 images_root_folders=[os.path.abspath(\"../../datasets/example_images\")])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Auto-PyTorch also supports some common datasets. By passing a comma-separated value file with just one line, e.g. \"CIFAR10, 0\" and specifying *default_dataset_download_dir* it will automatically download the data and use it for searching. Supported datasets are CIFAR10, CIFAR100, SVHN and MNIST."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "path_to_cifar_csv = os.path.abspath(\"../../datasets/CIFAR10.csv\")\n",
-    "\n",
-    "autonet_image_classification.fit(X_train=np.array([path_to_cifar_csv]),\n",
-    "                                 Y_train=np.array([0]),\n",
-    "                                 min_budget=600,\n",
-    "                                 max_budget=900,\n",
-    "                                 max_runtime=1800,\n",
-    "                                 default_dataset_download_dir=\"./datasets\",\n",
-    "                                 images_root_folders=[\"./datasets\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For searching across multiple datasets, pass multiple csv files to the corresponding Auto-PyTorch class. Make sure your specify *images_root_folders* for each of them."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "autonet_multi_image_classification.fit(X_train=np.array([path_to_cifar_csv, csv_dir]),\n",
-    "                                       Y_train=np.array([0]),\n",
-    "                                       min_budget=1500,\n",
-    "                                       max_budget=2000,\n",
-    "                                       max_runtime=4000,\n",
-    "                                       default_dataset_download_dir=\"./datasets\",\n",
-    "                                       images_root_folders=[\"./datasets\", \"./datasets/example_images\"])"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/basics/classification.py b/examples/basics/classification.py
deleted file mode 100644
index 05d4d75c5..000000000
--- a/examples/basics/classification.py
+++ /dev/null
@@ -1,20 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
-from autoPyTorch import AutoNetClassification
-from autoPyTorch.data_management.data_manager import DataManager
-
-# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) 
-dm = DataManager()
-dm.generate_classification(num_classes=3, num_features=21, num_samples=1500)
-
-# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
-autonet = AutoNetClassification("tiny_cs", budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='debug', use_pynisher=False)
-
-res = autonet.fit(X_train=dm.X, Y_train=dm.Y, cross_validator="k_fold", cross_validator_args={"n_splits": 3})
-
-print(res)
-print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
diff --git a/examples/basics/ensemble.py b/examples/basics/ensemble.py
deleted file mode 100644
index d52ed054f..000000000
--- a/examples/basics/ensemble.py
+++ /dev/null
@@ -1,22 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
-from autoPyTorch import AutoNetClassification, AutoNetEnsemble
-from autoPyTorch.data_management.data_manager import DataManager
-
-# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) 
-dm = DataManager()
-dm.generate_classification(num_classes=3, num_features=21, num_samples=1500)
-
-# Note: every parameter has a default value, you do not have to specify anything. The given parameters allow for a fast test.
-autonet = AutoNetEnsemble(AutoNetClassification, budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='debug')
-
-
-res = autonet.fit(X_train=dm.X, Y_train=dm.Y, cross_validator="k_fold", cross_validator_args={"n_splits": 3}, validation_split=0.2,
-    ensemble_only_consider_n_best=3)
-
-print(res)
-print("Score:", autonet.score(X_test=dm.X_train, Y_test=dm.Y_train))
diff --git a/examples/basics/image_classification.py b/examples/basics/image_classification.py
deleted file mode 100644
index cb6454b63..000000000
--- a/examples/basics/image_classification.py
+++ /dev/null
@@ -1,33 +0,0 @@
-__license__ = "BSD"
-
-import os, sys
-import numpy as np
-
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
-from autoPyTorch import AutoNetImageClassification
-
-# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) 
-csv_dir = os.path.abspath("../../datasets/example.csv")
-
-def main():
-    X_train = np.array([csv_dir])
-    Y_train = np.array([0])
-
-    # Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
-    autonet = AutoNetImageClassification(config_preset="full_cs", result_logger_dir="logs/")
-
-    res = autonet.fit(X_train=X_train,
-                      Y_train=Y_train,
-                      images_shape=[3, 32, 32],
-                      min_budget=600,
-                      max_budget=900,
-                      max_runtime=1800,
-                      save_checkpoints=True,
-                      images_root_folders=[os.path.abspath("../../datasets/example_images")])
-
-    print(res)
-    print("Score:", autonet.score(X_test=X_train, Y_test=Y_train))
-
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/examples/basics/modify_pipeline.py b/examples/basics/modify_pipeline.py
deleted file mode 100644
index bf7a81a86..000000000
--- a/examples/basics/modify_pipeline.py
+++ /dev/null
@@ -1,27 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", '..')))
-
-from autoPyTorch import AutoNetClassification
-from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
-
-from autoPyTorch.components.metrics.additional_logs import test_result
-
-from autoPyTorch.data_management.data_manager import DataManager
-
-# Note: You can write your own datamanager! Call fit with respective train, valid data (numpy matrices) 
-dm = DataManager()
-dm.generate_classification(num_classes=3, num_features=21, num_samples=1500)
-
-# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
-autonet = AutoNetClassification(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info')
-
-logs = autonet.pipeline[LogFunctionsSelector.get_name()]
-logs.add_log_function('test_result', test_result(autonet, dm.X_test, dm.Y_test), True)
-
-res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train)
-
-print(res)
diff --git a/examples/basics/regression.py b/examples/basics/regression.py
deleted file mode 100644
index c18928342..000000000
--- a/examples/basics/regression.py
+++ /dev/null
@@ -1,19 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
-from autoPyTorch import AutoNetRegression
-from autoPyTorch.data_management.data_manager import DataManager
-
-# Note: You can write your own datamanager! Call fit train, valid data (numpy matrices) 
-dm = DataManager()
-dm.generate_regression(num_features=21, num_samples=1500)
-
-# Note: every parameter has a default value, you do not have to specify anything. The given parameter allow a fast test.
-autonet = AutoNetRegression(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info')
-
-res = autonet.fit(X_train=dm.X, Y_train=dm.Y, X_valid=dm.X_train, Y_valid=dm.Y_train)
-
-print(res)
diff --git a/examples/ensemble/create_trajectory.py b/examples/ensemble/create_trajectory.py
deleted file mode 100644
index b7914cbe3..000000000
--- a/examples/ensemble/create_trajectory.py
+++ /dev/null
@@ -1,257 +0,0 @@
-import os
-import json
-import time
-import argparse
-import numpy as np
-
-from autoPyTorch.components.metrics import accuracy
-from autoPyTorch.pipeline.nodes.metric_selector import AutoNetMetric, undo_ohe, default_minimize_transform
-from autoPyTorch.components.ensembles.ensemble_selection import EnsembleSelection
-from hpbandster.core.result import logged_results_to_HBS_result
-
-
-class EnsembleTrajectorySimulator():
-
-    def __init__(self, ensemble_pred_dir, ensemble_config, seed):
-        
-        self.ensemble_pred_dir = os.path.join(ensemble_pred_dir, "predictions_for_ensemble.npy")
-        self.ensemble_pred_dir_test = os.path.join(ensemble_pred_dir, "test_predictions_for_ensemble.npy")
-        self.ensemble_config = ensemble_config
-        self.seed = seed
-
-        self.read_runfiles()
-        self.timesteps = self.get_timesteps()
-        
-        self.ensemble_selection = EnsembleSelection(**ensemble_config)
-
-    def read_runfiles(self, val_split=0.5):
-
-        self.ensemble_identifiers = []
-        self.ensemble_predictions = []
-        self.ensemble_predictions_ensemble_val = []
-        self.ensemble_timestamps = []
-
-        with open(self.ensemble_pred_dir, "rb") as f:
-            self.labels = np.load(f, allow_pickle=True)
-
-            if val_split is not None and val_split>0:
-                # shuffle val data
-                indices = np.arange(len(self.labels))
-                rng = np.random.default_rng(seed=self.seed)
-                rng.shuffle(indices)
-
-                # Create a train val split for the ensemble from the validation data
-                split = int(len(indices) * (1-val_split))
-                self.train_indices = indices[:split]
-                self.val_indices = indices[split:]
-                
-                self.labels_ensemble_val = self.labels[self.val_indices]
-                self.labels = self.labels[self.train_indices]
-            else:
-                self.labels_ensemble_val = []
-
-            while True:
-                try:
-                    job_id, budget, timestamps = np.load(f, allow_pickle=True)
-                    predictions = np.array(np.load(f, allow_pickle=True))
-                    
-                    if val_split is not None and val_split>0:
-                        self.ensemble_identifiers.append(job_id + (budget, ))
-                        self.ensemble_predictions.append(predictions[self.train_indices])
-                        self.ensemble_predictions_ensemble_val.append(predictions[self.val_indices])
-                        self.ensemble_timestamps.append(timestamps)
-                    else:
-                        self.ensemble_identifiers.append(job_id + (budget, ))
-                        self.ensemble_predictions.append(predictions)
-                        self.ensemble_timestamps.append(timestamps)
-                except (EOFError, OSError):
-                    break
-
-        self.ensemble_predictions_test = []
-        self.test_labels = None
-
-        if os.path.exists(self.ensemble_pred_dir_test):
-            with open(self.ensemble_pred_dir_test, "rb") as f:
-                try:
-                    self.test_labels = np.load(f, allow_pickle=True)
-                except (EOFError, OSError):
-                    pass
-
-                while True:
-                    try:
-                        job_id, budget, timestamps = np.load(f, allow_pickle=True)
-                        predictions = np.array(np.load(f, allow_pickle=True))
-
-                        self.ensemble_predictions_test.append(predictions)
-                        #print("==> Adding test labels with shape", predictions.shape)
-                    except (EOFError, OSError):
-                        break
-
-        # Transform timestamps to start at t=0
-        self.transform_timestamps(add_time=-self.ensemble_timestamps[0]["submitted"])
-
-        print("==> Found %i val preds" %len(self.ensemble_predictions))
-        print("==> Found %i test preds" %len(self.ensemble_predictions_test))
-        print("==> Found %i timestamps" %len(self.ensemble_timestamps))
-
-    def transform_timestamps(self, add_time):
-        transformed_timestamps = [t["finished"]+add_time for t in self.ensemble_timestamps]
-        self.ensemble_timestamps = transformed_timestamps
-
-    def get_timesteps(self):
-        # we want at least 2 models
-        first_timestep = self.ensemble_timestamps[1]
-        final_timestep = self.ensemble_timestamps[-1]
-        return self.ensemble_timestamps[1:]
-
-    def get_ensemble_performance(self, timestep):
-        cutoff_ind = np.argmin([abs(t - timestep) for t in self.ensemble_timestamps])+1
-        print("==> Considering %i models and timestep %f" %(cutoff_ind, timestep))
-
-        # create ensemble
-        self.ensemble_selection.fit(np.array(self.ensemble_predictions[0:cutoff_ind]), self.labels, self.ensemble_identifiers[0:cutoff_ind])
-
-        # get test performance
-        if self.test_labels is not None:
-            test_preds = self.ensemble_selection.predict(self.ensemble_predictions_test[0:cutoff_ind])
-            if len(test_preds.shape)==3:
-                test_preds = test_preds[0]
-            if len(test_preds.shape)==2:
-                test_preds = np.argmax(test_preds, axis=1)
-            test_performance = accuracy(self.test_labels, test_preds)
-        else:
-            test_performance = 0
-
-        # get ensemble performance on ensemble validation set
-        if len(self.labels_ensemble_val)>0 and len(self.ensemble_predictions_ensemble_val)>0:
-            ensemble_val_preds = self.ensemble_selection.predict(self.ensemble_predictions_ensemble_val[0:cutoff_ind])
-            if len(ensemble_val_preds.shape)==3:
-                ensemble_val_preds = ensemble_val_preds[0]
-            if len(ensemble_val_preds.shape)==2:
-                ensemble_val_preds = np.argmax(ensemble_val_preds, axis=1)
-            ensemble_val_performance = accuracy(self.labels_ensemble_val, ensemble_val_preds)
-        else:
-            ensemble_val_performance = 0
-
-        model_identifiers = self.ensemble_selection.identifiers_
-        model_weights = self.ensemble_selection.weights_
-
-        return self.ensemble_selection.get_validation_performance(), test_performance, ensemble_val_performance, model_identifiers, model_weights
-
-    def restart_trajectory_with_reg(self, timelimit=np.inf):
-        # For datasets with heavy overfitting reduce considered models
-        self.ensemble_config["only_consider_n_best"] = 2
-        self.ensemble_selection = EnsembleSelection(**ensemble_config)
-
-        self.simulate_trajectory(timelimit=timelimit, allow_restart=False)
-
-    def simulate_trajectory(self, timelimit=np.inf, allow_restart=True):
-        self.trajectory = []
-        self.test_trajectory = []
-        self.enstest_trajectory = []
-        self.model_identifiers = []
-        self.model_weights = []
-        self.ensemble_loss = []
-
-        for ind, t in enumerate(self.timesteps):
-            
-            if t>timelimit:
-                break
-            
-            print("==> Building ensemble at %i -th timestep %f" %(ind, t))
-            ensemble_performance, test_performance, ensemble_val_performance, model_identifiers, model_weights = self.get_ensemble_performance(t)
-            print("==> Performance:", ensemble_performance, "/", test_performance, "/", ensemble_val_performance)
-            
-            if abs(ensemble_performance) == 100 and ind<20 and allow_restart:
-                self.restart_trajectory_with_reg(timelimit=np.inf)
-                break
-            
-            self.ensemble_loss.append(ensemble_performance)
-            self.trajectory.append((t, ensemble_performance))
-            self.test_trajectory.append((t, test_performance))
-            self.enstest_trajectory.append((t, ensemble_val_performance))
-            self.model_identifiers.append(model_identifiers)
-            self.model_weights.append(model_weights)
-
-    def get_incumbent_at_timestep(self, timestep, use_val=True):
-        best_val_score = 0
-        best_ind = 0
-        if use_val:
-            for ind, performance_tuple in enumerate(self.enstest_trajectory):
-                if performance_tuple[0]<=timestep and performance_tuple[1]>=best_val_score:
-                    best_val_score = performance_tuple[1]
-                    best_ind = ind
-        else:
-            for ind, performance_tuple in enumerate(self.test_trajectory):
-                if performance_tuple[0]<=timestep and performance_tuple[1]>=best_val_score:
-                    best_val_score = performance_tuple[1]
-                    best_ind = ind
-        return self.test_trajectory[best_ind], best_ind
-
-    def save_trajectory(self, save_file, test=False):
-
-        print("==> Saving ensemble trajectory to", save_file)
-
-        with open(save_file, "w") as f:
-            if test:
-                json.dump(self.test_trajectory, f)
-            else:
-                json.dump(self.trajectory, f)
-
-def get_bohb_rundirs(rundir):
-
-    rundirs = []
-
-    dataset_dirs = [os.path.join(rundir, p) for p in os.listdir(rundir) if not p.endswith("cluster")]
-
-    for ds_path in dataset_dirs:
-        rundirs = rundirs + [os.path.join(ds_path, rundir) for rundir in os.listdir(ds_path)]
-
-    return rundirs
-
-def minimize_trf(value):
-        return -1*value
-
-def get_ensemble_config():
-    autonet_accuracy = AutoNetMetric(name="accuracy", metric=accuracy, loss_transform=minimize_trf, ohe_transform=undo_ohe)
-
-    ensemble_config = {"ensemble_size" : 35,
-                       "only_consider_n_best" : 10,
-                       "sorted_initialization_n_best" : 1,
-                       "metric" : autonet_accuracy}
-    return ensemble_config
-
-
-if __name__=="__main__":
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument("--rundir", type=str, default="./logs")
-    parser.add_argument("--run_id", type=int)
-    args = parser.parse_args()
-
-    ensemble_config = get_ensemble_config()
-
-    bohb_rundirs = get_bohb_rundirs(args.rundir)
-
-    print(bohb_rundirs)
-
-    bohb_rundir = bohb_rundirs[args.run_id-1]
-
-    simulator = EnsembleTrajectorySimulator(ensemble_pred_dir=bohb_rundir, ensemble_config=ensemble_config, seed=1)
-    simulator.simulate_trajectory()
-    simulator.save_trajectory(save_file=os.path.join(bohb_rundir, "ensemble_trajectory.json"))
-    simulator.save_trajectory(save_file=os.path.join(bohb_rundir, "ensemble_trajectory_test.json"), test=True)
-
-    #incumbent_score_all_time, incumbent_ind_all_time = simulator.get_incumbent_at_timestep(timestep=np.inf, use_val=False)
-    #incumbent_score_all_time_val, incumbent_ind_val_all_time = simulator.get_incumbent_at_timestep(timestep=np.inf, use_val=True)
-    incumbent_score_val, incumbent_ind_val = simulator.get_incumbent_at_timestep(timestep=3600, use_val=True)
-        
-    print("Incumbent ind / score:", incumbent_ind_val, "/", incumbent_score_val)
-
-    results = {#"all_time_incumbent":incumbent_score_all_time,
-            #"all_time_incumbent_val":incumbent_score_all_time_val,
-            #"3600_without_val": score_at_3600,
-            "3600_incumbent_val":incumbent_score_val}
-            #"3600_incumbent_val":combined_score}
-
-    with open(os.path.join(bohb_rundir, "incumbent_ensemble.json"), "w") as f:
-        json.dump(results, f)
diff --git a/examples/ensemble/job_main_exp.sh b/examples/ensemble/job_main_exp.sh
deleted file mode 100644
index d19226f7f..000000000
--- a/examples/ensemble/job_main_exp.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-#SBATCH -p bosch_cpu-cascadelake                        # partition (queue) (test_cpu-ivy, all_cpu-cascadelake, bosch_cpu-cascadelake)
-#SBATCH -t 0-04:00                                      # time (D-HH:MM)
-#SBATCH -N 1                                            # number of nodes
-#SBATCH -c 2                                            # number of cores
-#SBATCH -a 1-3                                          # array size
-#SBATCH -o logs/cluster/%x.%N.%j.out                    # STDOUT  (the folder log has to be created prior to running or this won't work)
-#SBATCH -e logs/cluster/%x.%N.%j.err                    # STDERR  (the folder log has to be created prior to running or this won't work)
-#SBATCH -J apt_test                                    # sets the job name. If not specified, the file name will be used as job name
-# Print some information about the job to STDOUT
-echo "Workingdir: $PWD";
-echo "Started at $(date)";
-echo "Running job $SLURM_JOB_NAME using $SLURM_JOB_CPUS_PER_NODE cpus per node with given JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION"; 
-
-# Activate venv
-source env/bin/activate
-export PYTHONPATH=$PWD
-
-if [ $SLURM_ARRAY_TASK_ID -gt 1 ]
-then
-    sleep 10
-fi
-
-# Array jobs
-python3 -W ignore examples/ensemble/test_ensemble.py --run_id $1 --task_id $SLURM_ARRAY_TASK_ID --num_workers 3 --dataset_id $2 --seed $3 --ensemble_setting ensemble --portfolio_type greedy --num_threads 2 --test false
-
-# Done
-echo "DONE";
-echo "Finished at $(date)";
-
-
-# DEBUGGING:
-#python3 -W ignore examples/ensemble/test_ensemble.py --run_id 999 --task_id 1 --num_workers 1 --dataset_id 0 --seed 1 --ensemble_setting ensemble --portfolio_type greedy --num_threads 2 --test true
diff --git a/examples/ensemble/start_main_exp.sh b/examples/ensemble/start_main_exp.sh
deleted file mode 100644
index 7a5e01665..000000000
--- a/examples/ensemble/start_main_exp.sh
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/bash
-
-RUN_ID=$1        # Starting run id
-DATASET_ID=0
-limit=7          # 8 Datasets
-SEED=$2
-
-until [ $DATASET_ID -gt $limit ]
-do
-   yes | sbatch examples/ensemble/job_main_exp.sh $RUN_ID $DATASET_ID $SEED
-   ((RUN_ID++))
-
-   ((DATASET_ID++))   
-done
diff --git a/examples/ensemble/test_ensemble.py b/examples/ensemble/test_ensemble.py
deleted file mode 100644
index 93f7e8bc6..000000000
--- a/examples/ensemble/test_ensemble.py
+++ /dev/null
@@ -1,244 +0,0 @@
-import argparse
-import os as os
-import numpy as np
-import logging
-import json
-import random
-import torch
-import openml
-from sklearn.model_selection import train_test_split
-from IPython import embed
-
-import ConfigSpace as cs
-from autoPyTorch import HyperparameterSearchSpaceUpdates
-from autoPyTorch.pipeline.nodes import LogFunctionsSelector, BaselineTrainer
-from autoPyTorch import AutoNetClassification, AutoNetEnsemble
-from autoPyTorch.pipeline.nodes import LogFunctionsSelector
-from autoPyTorch.components.metrics.additional_logs import *
-from autoPyTorch.utils.ensemble import test_predictions_for_ensemble
-
-def seed_everything(seed):
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-
-def resplit(X, y, test_split=0.33):
-        uniques, counts = np.unique(y, return_counts=True)
-        indices = np.array(range(len(y)))
-        if min(counts)>1:
-            ind_train, ind_test = train_test_split(indices, stratify=y, test_size=test_split, shuffle=True, random_state=42)
-        else:
-            ind_train, ind_test = train_test_split(indices, test_size=test_split, shuffle=True, random_state=42)
-        return ind_train, ind_test
-
-def load_openml_data(openml_task_id):
-    task = openml.tasks.get_task(task_id=openml_task_id)
-    X, y = task.get_X_and_y()
-
-    ten_splits = [3945, 146212, 34539, 168337, 168338, 7593, 189354, 168332, 168331, 168330, 168335]
-
-    if openml_task_id in ten_splits:
-        ind_train, ind_test = resplit(X, y)
-    else:
-        ind_train, ind_test = task.get_train_test_split_indices()
-
-    return X[ind_train], X[ind_test], y[ind_train], y[ind_test]
-
-def get_hyperparameter_search_space_updates_lcbench():
-    search_space_updates = HyperparameterSearchSpaceUpdates()
-    search_space_updates.append(node_name="InitializationSelector",
-                                hyperparameter="initializer:initialize_bias",
-                                value_range=["Yes"])
-    search_space_updates.append(node_name="CreateDataLoader",
-                                hyperparameter="batch_size",
-                                value_range=[16, 512],
-                                log=True)
-    search_space_updates.append(node_name="LearningrateSchedulerSelector",
-                                hyperparameter="cosine_annealing:T_max",
-                                value_range=[50, 50])
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedmlpnet:activation",
-                                value_range=["relu"])
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedmlpnet:max_units",
-                                value_range=[64, 1024],
-                                log=True)
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedresnet:max_units",
-                                value_range=[32,512],
-                                log=True)
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedresnet:num_groups",
-                                value_range=[1,5])
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedresnet:blocks_per_group",
-                                value_range=[1,3])
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedresnet:resnet_shape",
-                                value_range=["funnel"])
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedresnet:activation",
-                                value_range=["relu"])
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedmlpnet:mlp_shape",
-                                value_range=["funnel"])
-    search_space_updates.append(node_name="NetworkSelector",
-                                hyperparameter="shapedmlpnet:num_layers",
-                                value_range=[1, 6])
-    return search_space_updates
-
-def get_autonet_config_lcbench(min_budget, max_budget, max_runtime, run_id, task_id, num_workers, logdir, seed):
-    autonet_config = {
-            'additional_logs': [],
-            'additional_metrics': ["balanced_accuracy"],
-            'algorithm': 'bohb',
-            'batch_loss_computation_techniques': ['standard', 'mixup'],
-            'best_over_epochs': False,
-            'budget_type': 'epochs',
-            'categorical_features': None,
-            #'cross_validator': 'stratified_k_fold',
-            #'cross_validator_args': dict({"n_splits":5}),
-            'cross_validator': 'none',
-            'cuda': False,
-            'dataset_name': None,
-            'early_stopping_patience': 10,
-            'early_stopping_reset_parameters': False,
-            'embeddings': ['none', 'learned'],
-            'eta': 2,
-            'final_activation': 'softmax',
-            'full_eval_each_epoch': True,
-            'hyperparameter_search_space_updates': get_hyperparameter_search_space_updates_lcbench(),
-            'imputation_strategies': ['mean'],
-            'initialization_methods': ['default'],
-            'initializer': 'simple_initializer',
-            'log_level': 'info',
-            'loss_modules': ['cross_entropy_weighted'],
-            'lr_scheduler': ['cosine_annealing'],
-            'max_budget': max_budget,
-            'max_runtime': max_runtime,
-            'memory_limit_mb': 12000,
-            'min_budget': min_budget,
-            'min_budget_for_cv': 0,
-            'min_workers': num_workers,
-            'network_interface_name': 'eth0',
-            'networks': ['shapedmlpnet', 'shapedresnet'],
-            'normalization_strategies': ['standardize'],
-            'num_iterations': 300,
-            'optimize_metric': 'accuracy',
-            'optimizer': ['sgd', 'adam'],
-            'over_sampling_methods': ['none'],
-            'preprocessors': ['none', 'truncated_svd'],
-            'random_seed': seed,
-            'refit_validation_split': 0.2,
-            'result_logger_dir': logdir,
-            'run_id': run_id,
-            'run_worker_on_master_node': True,
-            'shuffle': True,
-            'target_size_strategies': ['none'],
-            'task_id': task_id,
-            'torch_num_threads': 2,
-            'under_sampling_methods': ['none'],
-            'use_pynisher': True,
-            'use_tensorboard_logger': False,
-            'validation_split': 0.2,
-            'working_dir': '.'
-            }
-    return autonet_config
-
-def get_ensemble_config():
-    ensemble_config = {
-            "ensemble_size":50,
-            "ensemble_only_consider_n_best":20,
-            "ensemble_sorted_initialization_n_best":0
-            }
-    return ensemble_config
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='')
-    parser.add_argument("--run_id", type=int, help="An id for the run.")
-    parser.add_argument("--task_id", type=int)
-    parser.add_argument("--num_workers", type=int)
-    parser.add_argument("--dataset_id", type=int)
-    parser.add_argument("--seed", type=int)
-    parser.add_argument("--ensemble_setting", type=str, choices=["normal", "ensemble"])
-    parser.add_argument("--portfolio_type", type=str, choices=["none", "simple", "greedy"])
-    parser.add_argument("--num_threads", type=str, default="1")
-    parser.add_argument("--test", type=str, choices=["true", "false"], default="false")
-    args = parser.parse_args()
-
-    os.environ["OMP_NUM_THREADS"] = args.num_threads
-
-    logdir = os.path.join("logs/", str(args.dataset_id), "run_"+str(args.run_id))
-
-    # Get data
-    openml_ids = [7593, 168331, 167200, 189905, 167152, 189860, 167190, 189871]
-    openml_id = openml_ids[int(args.dataset_id)]
-    seed_everything(42)
-    X_train, X_test, y_train, y_test = load_openml_data(openml_id)
-
-    # Seed
-    seed = args.seed
-    seed_everything(seed)
-
-    # Get autonet config
-    min_budget=10 if args.test=="false" else 1
-    max_budget=50 if args.test=="false" else 2
-    max_runtime = 2*60*60 if args.test=="false" else 5*60
-    autonet_config = get_autonet_config_lcbench(min_budget=min_budget,
-                                                max_budget=max_budget, 
-                                                max_runtime=max_runtime,
-                                                run_id=args.run_id, 
-                                                task_id=args.task_id, 
-                                                num_workers=args.num_workers, 
-                                                logdir=logdir, 
-                                                seed=args.seed)
-
-    if args.portfolio_type=="none":
-        autonet_config["algorithm"] = "bohb"
-    else:
-        autonet_config["algorithm"] = "portfolio_bohb"
-        autonet_config["portfolio_type"] = args.portfolio_type
-
-    # Categoricals
-    cat_feats = [type(f)==str for f in X_train[0]]
-    if any(cat_feats):
-        autonet_config["categorical_features"] = cat_feats
-    autonet_config["embeddings"] = ['none', 'learned']
-
-    # Test logging
-    autonet_config["additional_logs"] = [test_predictions_for_ensemble.__name__, test_result_ens.__name__]
-
-    # Initialize (ensemble)
-    if args.ensemble_setting == "ensemble":
-        print("Using ensembles!")
-        ensemble_config = get_ensemble_config()
-        autonet_config = {**autonet_config, **ensemble_config}
-        autonet = AutoNetEnsemble(AutoNetClassification, config_preset="full_cs", **autonet_config)
-    elif args.ensemble_setting == "normal":
-        autonet = AutoNetClassification(config_preset="full_cs", **autonet_config)
-
-    # Test logging cont.
-    autonet.pipeline[LogFunctionsSelector.get_name()].add_log_function(name=test_predictions_for_ensemble.__name__,
-                                                                       log_function=test_predictions_for_ensemble(autonet, X_test, y_test),
-                                                                       loss_transform=False)
-    autonet.pipeline[LogFunctionsSelector.get_name()].add_log_function(name=test_result_ens.__name__,
-                                                                       log_function=test_result_ens(autonet, X_test, y_test))
-
-    autonet.pipeline[BaselineTrainer.get_name()].add_test_data(X_test)
-
-    print(autonet.get_current_autonet_config())
-
-    fit_results = autonet.fit(X_train, y_train, **autonet.get_current_autonet_config())
-    
-    score = autonet.score(X_test, y_test) if y_test is not None else None
-
-    print("Test score:", score)
-
-    # Write to json
-    results = dict()
-    results["run_id"] = int(args.run_id)
-    results["test_score"] = score
-    results["seed"] = int(seed)
-
-    with open(logdir + "/results_dump.json", "w") as f:
-        json.dump(results, f)
diff --git a/examples/ensemble/trajectories_job.sh b/examples/ensemble/trajectories_job.sh
deleted file mode 100644
index e5ff3940a..000000000
--- a/examples/ensemble/trajectories_job.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-#SBATCH -p bosch_cpu-cascadelake                                     # partition (queue) (test_cpu-ivy, all_cpu-cascadelake, bosch_cpu-cascadelake)
-#SBATCH -t 1-00:00                                      # time (D-HH:MM)
-#SBATCH -N 1                                            # number of nodes
-#SBATCH -c 1                                            # number of cores
-#SBATCH -a 1-9                                         # array size
-#SBATCH -o logs/cluster/%x.%N.%j.out                        # STDOUT  (the folder log has to be created prior to running or this won't work)
-#SBATCH -e logs/cluster/%x.%N.%j.err                        # STDERR  (the folder log has to be created prior to running or this won't work)
-#SBATCH -J create_ensemble                                         # sets the job name. If not specified, the file name will be used as job name
-# Print some information about the job to STDOUT
-echo "Workingdir: $PWD";
-echo "Started at $(date)";
-echo "Running job $SLURM_JOB_NAME using $SLURM_JOB_CPUS_PER_NODE cpus per node with given JID $SLURM_JOB_ID on queue $SLURM_JOB_PARTITION"; 
-
-# Activate venv
-source env/bin/activate
-export PYTHONPATH=$PWD
-
-# Array jobs 
-python3 create_trajectory.py --test false --run_id $SLURM_ARRAY_TASK_ID
-
-# Done
-echo "DONE";
-echo "Finished at $(date)";
diff --git a/examples/example_ensemble_classification.py b/examples/example_ensemble_classification.py
new file mode 100644
index 000000000..b3b5cd9a0
--- /dev/null
+++ b/examples/example_ensemble_classification.py
@@ -0,0 +1,213 @@
+
+"""
+======================
+Ensemble from random search
+---------------------------
+
+This is a temporal example to make sure that ensemble works.
+It also sets how SMAC should create the output information,
+so that the ensemble builder works.
+
+We will remove this file, once SMAC + ensemble builder work
+======================
+"""
+import copy
+import tempfile
+import time
+import typing
+
+import dask
+import dask.distributed
+
+import numpy as np
+
+import sklearn.datasets
+import sklearn.model_selection
+from sklearn.metrics import accuracy_score
+
+from autoPyTorch.constants import MULTICLASS, TABULAR_CLASSIFICATION
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.ensemble.ensemble_builder import EnsembleBuilderManager
+from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
+from autoPyTorch.utils.backend import Backend, create
+from autoPyTorch.utils.pipeline import get_dataset_requirements
+
+
+def get_data_to_train(backend: Backend) -> typing.Tuple[typing.Dict[str, typing.Any]]:
+    """
+    This function returns a fit dictionary that within itself, contains all
+    the information to fit a pipeline
+    """
+
+    # Get the training data for tabular classification
+    # Move to Australian to showcase numerical vs categorical
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+        test_size=0.2,
+    )
+
+    train_indices, val_indices = sklearn.model_selection.train_test_split(
+        list(range(X_train.shape[0])),
+        random_state=1,
+        test_size=0.25,
+    )
+
+    # Create a datamanager for this toy problem
+    datamanager = TabularDataset(
+        X=X_train, Y=y_train,
+        X_test=X_test, Y_test=y_test,
+    )
+    backend.save_datamanager(datamanager)
+
+    info = {'task_type': datamanager.task_type,
+            'output_type': datamanager.output_type,
+            'issparse': datamanager.issparse,
+            'numerical_columns': datamanager.numerical_columns,
+            'categorical_columns': datamanager.categorical_columns}
+    dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info))
+
+    # Fit the pipeline
+    fit_dictionary = {
+        'X_train': X_train,
+        'y_train': y_train,
+        'train_indices': train_indices,
+        'val_indices': val_indices,
+        'X_test': X_test,
+        'y_test': y_test,
+        'dataset_properties': dataset_properties,
+        # Training configuration
+        'job_id': 'example_ensemble_1',
+        'working_dir': './tmp/example_ensemble_1',  # Hopefully generated by backend
+        'device': 'cpu',
+        'runtime': 100,
+        'torch_num_threads': 1,
+        'early_stopping': 20,
+        'use_tensorboard_logger': True,
+        'use_pynisher': False,
+        'memory_limit': 4096,
+        'metrics_during_training': True,
+        'seed': 0,
+        'budget_type': 'epochs',
+        'epochs': 10.0,
+        'split_id': 0,
+        'backend': backend,
+    }
+
+    return fit_dictionary
+
+
+def random_search_and_save(fit_dictionary: typing.Dict[str, typing.Any], backend: Backend,
+                           num_models: int) -> None:
+    """
+    A function to generate randomly fitted pipelines.
+    It inefficiently pass the data in the fit dictionary, as there is no datamanager yet.
+
+    It uses the backend to save the models and predictions for the ensemble selection
+    """
+
+    # Ensemble selection will evaluate performance on the OOF predictions. Store the OOF
+    # Ground truth
+    datamanager = backend.load_datamanager()
+    X_train, y_train = datamanager.train_tensors
+    X_test, y_test = (None, None)
+    if datamanager.test_tensors is not None:
+        X_test, y_test = datamanager.test_tensors
+    targets = np.take(y_train, fit_dictionary['val_indices'], axis=0)
+    backend.save_targets_ensemble(targets)
+
+    for idx in range(num_models):
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+
+        # Sample a random configuration
+        pipeline_cs = pipeline.get_hyperparameter_search_space()
+        config = pipeline_cs.sample_configuration()
+        pipeline.set_hyperparameters(config)
+
+        # Fit the sample configuration
+        pipeline.fit(fit_dictionary)
+
+        # Predict using the fit model
+        ensemble_predictions = pipeline.predict(
+            X_train.iloc[fit_dictionary['val_indices']]
+        )
+        test_predictions = pipeline.predict(X_test)
+
+        backend.save_numrun_to_dir(
+            seed=fit_dictionary['seed'],
+            idx=idx,
+            budget=fit_dictionary['epochs'],
+            model=pipeline,
+            cv_model=None,
+            ensemble_predictions=ensemble_predictions,
+            valid_predictions=None,
+            test_predictions=test_predictions,
+        )
+
+        score = accuracy_score(y_test, np.argmax(test_predictions, axis=1))
+        print(f"Fitted a pipeline {idx} with score = {score}")
+
+    return
+
+
+if __name__ == "__main__":
+
+    # Build a repository with random fitted models
+    backend = create(temporary_directory='./tmp/autoPyTorch_ensemble_test_tmp',
+                     output_directory='./tmp/autoPyTorch_ensemble_test_out',
+                     delete_tmp_folder_after_terminate=False)
+
+    # Create the directory structure
+    backend._make_internals_directory()
+
+    # Get data to train
+    fit_dictionary = get_data_to_train(backend)
+
+    # Create some random models for the ensemble
+    random_search_and_save(fit_dictionary, backend, num_models=1)
+
+    # Build a ensemble from the above components
+    # Use dak client here to make sure this is proper working,
+    # as with smac we will have to use a client
+    dask.config.set({'distributed.worker.daemon': False})
+    dask_client = dask.distributed.Client(
+        dask.distributed.LocalCluster(
+            n_workers=2,
+            processes=True,
+            threads_per_worker=1,
+            # We use the temporal directory to save the
+            # dask workers, because deleting workers
+            # more time than deleting backend directories
+            # This prevent an error saying that the worker
+            # file was deleted, so the client could not close
+            # the worker properly
+            local_directory=tempfile.gettempdir(),
+        )
+    )
+    manager = EnsembleBuilderManager(
+        start_time=time.time(),
+        time_left_for_ensembles=100,
+        backend=copy.deepcopy(backend),
+        dataset_name=fit_dictionary['job_id'],
+        output_type=MULTICLASS,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        ensemble_size=50,
+        ensemble_nbest=50,
+        max_models_on_disc=50,
+        seed=fit_dictionary['seed'],
+        max_iterations=1,
+        read_at_most=np.inf,
+        ensemble_memory_limit=fit_dictionary['memory_limit'],
+        random_state=fit_dictionary['seed'],
+        precision=32,
+    )
+    manager.build_ensemble(dask_client)
+    future = manager.futures.pop()
+    dask.distributed.wait([future])  # wait for the ensemble process to finish
+    print(f"Ensemble build it: {future.result()}")
diff --git a/examples/example_image_classification.py b/examples/example_image_classification.py
new file mode 100644
index 000000000..881d18f06
--- /dev/null
+++ b/examples/example_image_classification.py
@@ -0,0 +1,54 @@
+"""
+======================
+Image Classification
+======================
+"""
+import numpy as np
+
+import sklearn.model_selection
+
+import torchvision.datasets
+
+from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline
+
+# Get the training data for tabular classification
+trainset = torchvision.datasets.FashionMNIST(root='../datasets/', train=True, download=True)
+data = trainset.data.numpy()
+data = np.expand_dims(data, axis=3)
+# Create a proof of concept pipeline!
+dataset_properties = dict()
+pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)
+
+# Train and test split
+train_indices, val_indices = sklearn.model_selection.train_test_split(
+    list(range(data.shape[0])),
+    random_state=1,
+    test_size=0.25,
+)
+
+# Configuration space
+pipeline_cs = pipeline.get_hyperparameter_search_space()
+print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}")
+config = pipeline_cs.sample_configuration()
+print("Pipeline Random Config:\n", '_' * 40, f"\n{config}")
+pipeline.set_hyperparameters(config)
+
+# Fit the pipeline
+print("Fitting the pipeline...")
+
+pipeline.fit(X=dict(X_train=data,
+                    is_small_preprocess=True,
+                    dataset_properties=dict(mean=np.array([np.mean(data[:, :, :, i]) for i in range(1)]),
+                                            std=np.array([np.std(data[:, :, :, i]) for i in range(1)]),
+                                            num_classes=10,
+                                            num_features=data.shape[1] * data.shape[2],
+                                            image_height=data.shape[1],
+                                            image_width=data.shape[2],
+                                            is_small_preprocess=True),
+                    train_indices=train_indices,
+                    val_indices=val_indices,
+                    )
+             )
+
+# Showcase some components of the pipeline
+print(pipeline)
diff --git a/examples/example_smac_intensify.py b/examples/example_smac_intensify.py
new file mode 100644
index 000000000..b92c90968
--- /dev/null
+++ b/examples/example_smac_intensify.py
@@ -0,0 +1,151 @@
+import multiprocessing
+import tempfile
+import time
+import typing
+
+import dask
+import dask.distributed
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.datasets.resampling_strategy import CrossValTypes
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.optimizer.smbo import AutoMLSMBO
+from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics
+from autoPyTorch.utils.backend import create
+from autoPyTorch.utils.logging_ import setup_logger, start_log_server
+from autoPyTorch.utils.pipeline import get_configuration_space
+from autoPyTorch.utils.stopwatch import StopWatch
+
+
+def _start_logger(name, logging_config, backend):
+    logger_name = 'AutoML :%s' % (name)
+    setup_logger(
+        filename='%s.log' % str(logger_name),
+        logging_config=logging_config,
+        output_dir=backend.temporary_directory,
+    )
+
+    # As Auto-sklearn works with distributed process,
+    # we implement a logger server that can receive tcp
+    # pickled messages. They are unpickled and processed locally
+    # under the above logging configuration setting
+    # We need to specify the logger_name so that received records
+    # are treated under the logger_name ROOT logger setting
+    context = multiprocessing.get_context('spawn')
+    stop_logging_server = context.Event()
+    port = context.Value('l')  # be safe by using a long
+    port.value = -1
+
+    logging_server = context.Process(
+        target=start_log_server,
+        kwargs=dict(
+            host='localhost',
+            logname=logger_name,
+            event=stop_logging_server,
+            port=port,
+            filename='%s.log' % str(logger_name),
+            logging_config=logging_config,
+            output_dir=backend.temporary_directory,
+        ),
+    )
+
+    logging_server.start()
+
+    while True:
+        with port.get_lock():
+            if port.value == -1:
+                time.sleep(0.01)
+            else:
+                break
+
+    return int(port.value), stop_logging_server
+
+
+def get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:
+    """
+    This function returns a fit dictionary that within itself, contains all
+    the information to fit a pipeline
+    """
+
+    # Get the training data for tabular classification
+    # Move to Australian to showcase numerical vs categorical
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+        test_size=0.2,
+    )
+
+    return X_train, X_test, y_train, y_test
+
+
+if __name__ == "__main__":
+    # Get data to train
+    X_train, X_test, y_train, y_test = get_data_to_train()
+
+    # Build a repository with random fitted models
+    backend = create(temporary_directory='./tmp/autoPyTorch_smac_test_tmp',
+                     output_directory='./tmp/autoPyTorch_smac_test_out',
+                     delete_tmp_folder_after_terminate=False)
+    # Create the directory structure
+    backend._make_internals_directory()
+
+    # Create a datamanager for this toy problem
+    datamanager = TabularDataset(
+        X=X_train, Y=y_train,
+        X_test=X_test, Y_test=y_test,
+        resampling_strategy=CrossValTypes.k_fold_cross_validation)
+    backend.save_datamanager(datamanager)
+
+    # Build a ensemble from the above components
+    # Use dak client here to make sure this is proper working,
+    # as with smac we will have to use a client
+    dask.config.set({'distributed.worker.daemon': False})
+    dask_client = dask.distributed.Client(
+        dask.distributed.LocalCluster(
+            n_workers=2,
+            processes=True,
+            threads_per_worker=1,
+            # We use the temporal directory to save the
+            # dask workers, because deleting workers
+            # more time than deleting backend directories
+            # This prevent an error saying that the worker
+            # file was deleted, so the client could not close
+            # the worker properly
+            local_directory=tempfile.gettempdir(),
+        )
+    )
+    port, stop_logging_server = _start_logger("trial_australian", logging_config=None, backend=backend)
+
+    info = {'task_type': datamanager.task_type,
+            'output_type': datamanager.output_type,
+            'categorical_columns': datamanager.categorical_columns,
+            'numerical_columns': datamanager.numerical_columns}
+    config_space = get_configuration_space(info)
+    # Make the optimizer
+    smbo = AutoMLSMBO(
+        config_space=config_space,
+        dataset_name='Australian',
+        backend=backend,
+        total_walltime_limit=120,
+        dask_client=dask_client,
+        func_eval_time_limit=60,
+        memory_limit=4096,
+        metric=get_metrics(dataset_properties=dict({'task_type': datamanager.task_type,
+                                                    'output_type': datamanager.output_type}))[0],
+        watcher=StopWatch(),
+        n_jobs=2,
+        ensemble_callback=None,
+        logger_port=port
+    )
+
+    # Then run the optimization
+    run_history, trajectory, budget = smbo.run_smbo()
+
+    for k, v in run_history.data.items():
+        print(f"{k}->{v}")
+    if not stop_logging_server.is_set():
+        stop_logging_server.set()
diff --git a/examples/example_tabular_classification.py b/examples/example_tabular_classification.py
new file mode 100644
index 000000000..4693b39bb
--- /dev/null
+++ b/examples/example_tabular_classification.py
@@ -0,0 +1,52 @@
+"""
+======================
+Tabular Classification
+======================
+"""
+import typing
+import warnings
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+
+
+# Get the training data for tabular classification
+def get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:
+    """
+    This function returns a fit dictionary that within itself, contains all
+    the information to fit a pipeline
+    """
+
+    # Get the training data for tabular classification
+    # Move to Australian to showcase numerical vs categorical
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    return X_train, X_test, y_train, y_test
+
+
+if __name__ == '__main__':
+    # Get data to train
+    X_train, X_test, y_train, y_test = get_data_to_train()
+
+    # Create a datamanager for this toy problem
+    datamanager = TabularDataset(
+        X=X_train, Y=y_train,
+        X_test=X_test, Y_test=y_test)
+
+    api = TabularClassificationTask(delete_tmp_folder_after_terminate=False,)
+    api.search(dataset=datamanager, optimize_metric='accuracy', total_walltime_limit=500, func_eval_time_limit=150)
+    print(api.run_history, api.trajectory)
+    y_pred = api.predict(X_test)
+    score = api.score(y_pred, y_test)
+    print(score)
diff --git a/examples/real_data/advanced_classification.py b/examples/real_data/advanced_classification.py
deleted file mode 100644
index cc7b58897..000000000
--- a/examples/real_data/advanced_classification.py
+++ /dev/null
@@ -1,78 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
-import logging
-
-from autoPyTorch import AutoNetClassification, AutoNetMultilabel
-import autoPyTorch.pipeline.nodes as autonet_nodes
-from autoPyTorch.components.metrics.additional_logs import test_result
-import autoPyTorch.components.metrics as autonet_metrics
-
-from autoPyTorch.data_management.data_manager import DataManager
-
-dm = DataManager(verbose=1)
-dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'datasets'))
-
-# choose between the 5 defined testcases
-TEST_CASE = 1
-
-""" TEST CASE 1: Sparse data """
-if TEST_CASE == 1:
-    dm.read_data(os.path.join(dataset_dir, "automl/newsgroups/newsgroups_public.info"), is_classification=True)
-    metric = "pac_metric"
-    additional_metrices = ["accuracy"]
-
-""" TEST CASE 2: Sparse binary data """
-if TEST_CASE == 2:
-    dm.read_data(os.path.join(dataset_dir, "automl/dorothea/dorothea_public.info"), is_classification=True)
-    metric = "auc_metric"
-    additional_metrices = ["accuracy"]
-
-""" TEST CASE 3: Multilabel, sparse, binary, cv """
-if TEST_CASE == 3:
-    dm.read_data(os.path.join(dataset_dir, "automl/tania/tania_public.info"), is_classification=True)
-    metric = "pac_metric"
-    additional_metrices = []
-
-""" TEST CASE 4: Openml, missing values """
-if TEST_CASE == 4:
-    dm.read_data("openml:188", is_classification=True)
-    metric = "accuracy"
-    additional_metrices = []
-
-""" TEST CASE 5: MNIST """
-if TEST_CASE == 5:
-    dm.read_data("openml:40996", is_classification=True)
-    metric = "accuracy"
-    additional_metrices = []
-
-# Generate autonet
-autonet = AutoNetClassification() if TEST_CASE != 3 else AutoNetMultilabel()
-
-# add metrics and test_result to pipeline
-autonet.pipeline[autonet_nodes.LogFunctionsSelector.get_name()].add_log_function('test_result', test_result(autonet, dm.X_test, dm.Y_test), True)
-
-# Fit autonet using train data
-res = autonet.fit(min_budget=300,
-                  max_budget=900, max_runtime=1800, budget_type='time',
-                  normalization_strategies=['maxabs'],
-                  optimize_metric=metric,
-                  additional_metrics=additional_metrices,
-                  cross_validator='stratified_k_fold',
-                  cross_validator_args={'n_splits': 3},
-                  preprocessors=["truncated_svd"],
-                  log_level="debug",
-                  X_train=dm.X_train,
-                  Y_train=dm.Y_train,
-                  X_valid=None,
-                  Y_valid=None,
-                  categorical_features=dm.categorical_features,
-                  additional_logs=["test_result"])
-
-# Calculate quality metrics using validation data.
-autonet.score(dm.X_test, dm.Y_test)
-print(res)
diff --git a/examples/real_data/classification_test.py b/examples/real_data/classification_test.py
deleted file mode 100644
index cf964c943..000000000
--- a/examples/real_data/classification_test.py
+++ /dev/null
@@ -1,65 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "..")))
-import logging
-
-from autoPyTorch import AutoNetClassification
-
-from autoPyTorch.data_management.data_manager import DataManager
-
-dm = DataManager(verbose=1)
-dataset_dir = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', '..', 'datasets'))
-
-# choose between the 10 classification testcases on real data.
-TEST_CASE = 4
-
-if TEST_CASE == 1:
-    dm.read_data("openml:22", is_classification=True)
-    # 2000 samples, 10 classes, 48 features
-
-if TEST_CASE == 2:
-    dm.read_data("openml:1476", is_classification=True)
-    # 13910 samples, 6 classes, 128 features
-
-if TEST_CASE == 3:
-    dm.read_data("openml:1464", is_classification=True)
-    # 748 samples, 2 classes, 4 features
-    
-if TEST_CASE == 4:
-    dm.read_data("openml:31", is_classification=True)
-
-if TEST_CASE == 5:
-    dm.read_data("openml:28", is_classification=True)
-    # 5620 samples, 10 classes, 65 features
-
-if TEST_CASE == 6:
-    dm.read_data("openml:42", is_classification=True)
-    # 683 samples, 19 classes, 36 categorical features
-
-if TEST_CASE == 7:
-    dm.read_data("openml:44", is_classification=True)
-    # 4601 samples, 2 classes, 58 features
-    
-if TEST_CASE == 8:
-    dm.read_data("openml:32", is_classification=True)
-    
-if TEST_CASE == 9:
-    dm.read_data("openml:334", is_classification=True)
-
-if TEST_CASE == 10:
-    dm.read_data("openml:40996", is_classification=True)
-
-
-autonet = AutoNetClassification(budget_type='epochs', min_budget=1, max_budget=9, num_iterations=1, log_level='info')
-
-res = autonet.fit(X_train=dm.X_train,
-                  Y_train=dm.Y_train,
-                  early_stopping_patience=3,
-                  # validation_split=0.3,
-                  categorical_features=dm.categorical_features)
-
-print(res)
diff --git a/examples/real_data/openml_task.py b/examples/real_data/openml_task.py
deleted file mode 100644
index 799d9267b..000000000
--- a/examples/real_data/openml_task.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import openml
-from pprint import pprint
-from autoPyTorch import AutoNetClassification
-from sklearn.metrics import accuracy_score
-
-
-# get OpenML task by its ID
-task = openml.tasks.get_task(task_id=32)
-X, y = task.get_X_and_y()
-ind_train, ind_test = task.get_train_test_split_indices()
-
-
-# run Auto-PyTorch
-autoPyTorch = AutoNetClassification("tiny_cs",  # config preset
-                                    log_level='info',
-                                    max_runtime=300,
-                                    min_budget=30,
-                                    max_budget=90)
-
-autoPyTorch.fit(X[ind_train], y[ind_train], validation_split=0.3)
-
-
-# predict
-y_pred = autoPyTorch.predict(X[ind_test])
-
-print("Accuracy score", accuracy_score(y[ind_test], y_pred))
-
-
-# print network configuration
-pprint(autoPyTorch.fit_result["optimized_hyperparameter_config"])
diff --git a/examples/traditional_pipeline.py b/examples/traditional_pipeline.py
new file mode 100644
index 000000000..39b5db17e
--- /dev/null
+++ b/examples/traditional_pipeline.py
@@ -0,0 +1,86 @@
+"""
+======================
+Tabular Classification
+======================
+"""
+import typing
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.pipeline.traditional_tabular_classification import TraditionalTabularClassificationPipeline
+from autoPyTorch.utils.backend import create
+from autoPyTorch.utils.pipeline import get_dataset_requirements
+
+
+# Get the training data for tabular classification
+def get_data_to_train() -> typing.Tuple[typing.Any, typing.Any, typing.Any, typing.Any]:
+    """
+    This function returns a fit dictionary that within itself, contains all
+    the information to fit a pipeline
+    """
+
+    # Get the training data for tabular classification
+    # Move to Australian to showcase numerical vs categorical
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+    )
+
+    return X_train, X_test, y_train, y_test
+
+
+if __name__ == '__main__':
+    # Get data to train
+    X_train, X_test, y_train, y_test = get_data_to_train()
+
+    # Create a datamanager for this toy problem
+    datamanager = TabularDataset(
+        X=X_train, Y=y_train,
+        X_test=X_test, Y_test=y_test)
+
+    backend = create(temporary_directory='./tmp/example_trad_clf_1_tmp',
+                     output_directory='./tmp/example_trad_clf_1_out',
+                     delete_tmp_folder_after_terminate=False)
+    backend.save_datamanager(datamanager)
+    info = {'task_type': datamanager.task_type,
+            'output_type': datamanager.output_type,
+            'issparse': datamanager.issparse,
+            'numerical_columns': datamanager.numerical_columns,
+            'categorical_columns': datamanager.categorical_columns}
+    dataset_requirements = get_dataset_requirements(info=info)
+    dataset_properties = datamanager.get_dataset_properties(dataset_requirements)
+    pipeline = TraditionalTabularClassificationPipeline(dataset_properties=dataset_properties)
+
+    split_id = 0
+    X = dict({'dataset_properties': dataset_properties,
+              'backend': backend,
+              'X_train': datamanager.train_tensors[0],
+              'y_train': datamanager.train_tensors[1],
+              'X_test': datamanager.test_tensors[0] if datamanager.test_tensors is not None else None,
+              'y_test': datamanager.test_tensors[1] if datamanager.test_tensors is not None else None,
+              'train_indices': datamanager.splits[split_id][0],
+              'val_indices': datamanager.splits[split_id][1],
+              'split_id': split_id,
+              'job_id': 0
+              })
+
+    # Configuration space
+    pipeline_cs = pipeline.get_hyperparameter_search_space()
+    print("Pipeline CS:\n", '_' * 40, f"\n{pipeline_cs}")
+    config = pipeline_cs.sample_configuration()
+    print("Pipeline Random Config:\n", '_' * 40, f"\n{config}")
+    pipeline.set_hyperparameters(config)
+
+    # Fit the pipeline
+    print("Fitting the pipeline...")
+    pipeline.fit(X)
+
+    # Showcase some components of the pipeline
+    print(pipeline)
+
+    predictions = pipeline.predict(X_test.to_numpy())
+    print(predictions)
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 000000000..8c8b6589a
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,8 @@
+[mypy]
+# Reports any config lines that are not recognized
+warn_unused_configs=True
+ignore_missing_imports=True
+follow_imports=skip
+disallow_untyped_defs=True
+disallow_incomplete_defs=True
+disallow_untyped_decorators=True
diff --git a/optional-requirements.txt b/optional-requirements.txt
deleted file mode 100644
index 400f72608..000000000
--- a/optional-requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-SimpleITK
-matplotlib
diff --git a/requirements.txt b/requirements.txt
old mode 100644
new mode 100755
index cf29fe78a..366837cd6
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,19 +1,19 @@
-setuptools
-Cython
-netifaces
-numpy
 pandas
-scipy
-statsmodels
-scikit-learn>=0.20.0
-imblearn
-ConfigSpace
-pynisher
-hpbandster
-fasteners
 torch
 torchvision
-tensorboard_logger
-openml
-lightgbm
+scikit-learn>=0.22.0,<0.23
+torchvision
+pytorch-lightning
+numpy
+scipy
+lockfile
+imgaug>=0.4.0
+ConfigSpace>=0.4.14,<0.5
+pynisher>=0.6.3
+pyrfr>=0.7,<0.9
+smac>=0.13.1,<0.14
+dask
+distributed>=2.2.0
 catboost
+lightgbm
+
diff --git a/scripts/Singularity b/scripts/Singularity
deleted file mode 100644
index 47c423256..000000000
--- a/scripts/Singularity
+++ /dev/null
@@ -1,40 +0,0 @@
-Bootstrap: docker
-From: ubuntu
-
-%help
-Singularity container for Auto-PyTorch.
-
-%labels
-    Version v0.1
-
-%environment
-    export PATH=/data/miniconda/bin:$PATH
-
-%setup
-    mkdir ${SINGULARITY_ROOTFS}/data
-
-%files
-    move_into_container.tar.gz
-
-%post
-    apt-get update
-    apt-get -y install wget git gcc tar
-    cd /data
-    wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
-    bash Miniconda3-latest-Linux-x86_64.sh -b -p /data/miniconda
-    export PATH=/data/miniconda/bin:$PATH
-    conda install -y pytorch-cpu torchvision-cpu -c pytorch
-    git clone https://github.com/urbanmatthias/Auto-PyTorch.git
-    cd Auto-PyTorch
-    git submodule update --init --recursive
-    if [ -f /move_into_container.tar.gz ]
-      then
-        tar -xzvf /move_into_container.tar.gz --overwrite
-        rm -f /move_into_container.tar.gz
-    fi
-    pip install ConfigSpace==0.4.7
-    cat requirements.txt | xargs -n 1 -L 1 pip install
-    cat optional-requirements.txt | xargs -n 1 -L 1 pip install
-    cd /data/Auto-PyTorch/submodules/HpBandSter
-    python setup.py install
-    cd /data
diff --git a/scripts/build_singularity_container.py b/scripts/build_singularity_container.py
deleted file mode 100644
index 92f81b2c2..000000000
--- a/scripts/build_singularity_container.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import os, subprocess
-
-if __name__ == "__main__":
-    move_into_container = list()
-    if input("Do you want to move some of your local files into to container? This will overwrite files from origin/master. (y/n) ").startswith("y"):
-        for f in sorted(os.listdir()):
-            if input("Move %s into container (y/n)? " % f).startswith("y"):
-                move_into_container.append(f)
-    if move_into_container:
-        subprocess.call(["tar", "-czvf", "move_into_container.tar.gz"] + move_into_container)
-    image_name = input("Name of Image? (Default: Auto-PyTorch.simg) ") or "Auto-PyTorch.simg"
-    if os.path.exists(image_name) and input("%s exists. Remove (y/n)? " % image_name).startswith("y"):
-        os.remove(image_name)
-    print("Building Singularity container. You need to be root for that.")
-    subprocess.call(["sudo", "singularity", "build", image_name, "scripts/Singularity"])
-    if move_into_container:
-        os.remove("move_into_container.tar.gz")
diff --git a/scripts/recompute_ensemble_performance.moab b/scripts/recompute_ensemble_performance.moab
deleted file mode 100644
index 05a219581..000000000
--- a/scripts/recompute_ensemble_performance.moab
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-#MOAB -N LOL
-#MOAB -l nodes=1:ppn=1,walltime=90:00:00,pmem=8gb
-#MOAB -V
-#MOAB -E
-#MOAB -t 1-73
-
-cd $HOME/Auto-PyTorch
-
-python scripts/recompute_ensemble_performance.py --partial_benchmark $MOAB_JOBARRAYINDEX 0 --host_config configs/hosts/nemo.txt configs/benchmark/openml.txt 
diff --git a/scripts/recompute_ensemble_performance.py b/scripts/recompute_ensemble_performance.py
deleted file mode 100644
index ec3eac338..000000000
--- a/scripts/recompute_ensemble_performance.py
+++ /dev/null
@@ -1,64 +0,0 @@
-
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..")))
-
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.utils.benchmarking.benchmark import Benchmark
-
-import argparse
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Run benchmarks for autonet.')
-    parser.add_argument("--run_id_range", default=None, help="An id for the run. A range of run ids can be given: start-stop.")
-    parser.add_argument("--partial_benchmark", default=None, nargs="+", help="Only run a part of the benchmark. Run other parts later or in parallel. 3-tuple: instance_slice, autonet_config_slice, run_number_range.")
-    parser.add_argument("--result_dir", default=None, help="Override result dir in benchmark config.")
-    parser.add_argument("--host_config", default=None, help="Override some configs according to host specifics.")
-    parser.add_argument("--only_finished_runs", action="store_true", help="Skip run folders, that do not contain a summary.json")
-    parser.add_argument("--ensemble_size", default=0, type=int, help="Ensemble config")
-    parser.add_argument("--ensemble_only_consider_n_best", default=0, type=int, help="Ensemble config")
-    parser.add_argument("--ensemble_sorted_initialization_n_best", default=0, type=int, help="Ensemble config")
-    parser.add_argument('benchmark', help='The benchmark to visualize')
-
-    args = parser.parse_args()
-
-    run_id_range = args.run_id_range
-    if args.run_id_range is not None:
-        if "-" in args.run_id_range:
-            run_id_range = range(int(args.run_id_range.split("-")[0]), int(args.run_id_range.split("-")[1]) + 1)
-        else:
-            run_id_range = range(int(args.run_id_range), int(args.run_id_range) + 1)
-    
-    benchmark_config_file = args.benchmark
-    host_config_file = args.host_config
-
-    benchmark = Benchmark()
-    config_parser = benchmark.get_benchmark_config_file_parser()
-
-    benchmark_config = config_parser.read(benchmark_config_file)
-    benchmark_config.update(config_parser.read(host_config_file))
-
-    if (args.result_dir is not None):
-        benchmark_config['result_dir'] = os.path.abspath(args.result_dir)
-    
-    if (args.partial_benchmark is not None):
-        if (len(args.partial_benchmark) > 0):
-            benchmark_config['instance_slice'] = args.partial_benchmark[0]
-        if (len(args.partial_benchmark) > 1):
-            benchmark_config['autonet_config_slice'] = args.partial_benchmark[1]
-        if (len(args.partial_benchmark) > 2):
-            benchmark_config['run_number_range'] = args.partial_benchmark[2]
-
-    benchmark_config["run_id_range"] = run_id_range
-    benchmark_config["only_finished_runs"] = args.only_finished_runs
-    benchmark_config["ensemble_size"] = args.ensemble_size
-    benchmark_config["ensemble_only_consider_n_best"] = args.ensemble_only_consider_n_best
-    benchmark_config["ensemble_sorted_initialization_n_best"] = args.ensemble_sorted_initialization_n_best
-    benchmark_config['benchmark_name'] = os.path.basename(args.benchmark).split(".")[0]
-    
-    benchmark.compute_ensemble_performance(**benchmark_config)
diff --git a/scripts/run_benchmark.py b/scripts/run_benchmark.py
deleted file mode 100644
index 6c3aa2a8c..000000000
--- a/scripts/run_benchmark.py
+++ /dev/null
@@ -1,52 +0,0 @@
-
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..")))
-
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.utils.benchmarking.benchmark import Benchmark
-
-import argparse
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Run benchmarks for autonet.')
-    parser.add_argument("--run_id", default="0", help="An id for the run.")
-    parser.add_argument("--task_id", default=-1, type=int, help="An id for the task. Necessary on cluster.")
-    parser.add_argument("--partial_benchmark", default=None, nargs="+", help="Only run a part of the benchmark. Run other parts later or in parallel. 3-tuple: instance_slice, autonet_config_slice, run_number_range.")
-    parser.add_argument("--result_dir", default=None, help="Override result dir in benchmark config.")
-    parser.add_argument("--host_config", default=None, help="Override some configs according to host specifics.")
-    parser.add_argument('benchmark', help='The benchmark to run')
-    args = parser.parse_args()
-
-    assert "-" not in args.run_id, "The run id must not contain a minus"
-    
-    benchmark_config_file = args.benchmark
-    host_config_file = args.host_config
-
-    benchmark = Benchmark()
-    config_parser = benchmark.get_benchmark_config_file_parser()
-
-    benchmark_config = config_parser.read(benchmark_config_file)
-    benchmark_config.update(config_parser.read(host_config_file))
-
-    if (args.result_dir is not None):
-        benchmark_config['result_dir'] = os.path.abspath(args.result_dir)
-
-    if args.partial_benchmark is not None:
-        if len(args.partial_benchmark) > 0:
-            benchmark_config['instance_slice'] = args.partial_benchmark[0]
-        if len(args.partial_benchmark) > 1:
-            benchmark_config['autonet_config_slice'] = args.partial_benchmark[1]
-        if len(args.partial_benchmark) > 2:
-            benchmark_config['run_number_range'] = args.partial_benchmark[2]
-
-    benchmark_config['run_id'] = args.run_id
-    benchmark_config['task_id'] = args.task_id
-    benchmark_config['benchmark_name'] = os.path.basename(args.benchmark).split(".")[0]
-
-    benchmark.run_benchmark(**benchmark_config)
diff --git a/scripts/run_benchmark_cluster.py b/scripts/run_benchmark_cluster.py
deleted file mode 100644
index d5173436f..000000000
--- a/scripts/run_benchmark_cluster.py
+++ /dev/null
@@ -1,177 +0,0 @@
-import os, sys, re, shutil
-import subprocess
-import json
-from math import ceil
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..")))
-
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.utils.benchmarking.benchmark import Benchmark
-from autoPyTorch.utils.benchmarking.benchmark_pipeline import ForInstance, ForAutoNetConfig, ForRun, CreateAutoNet, SetAutoNetConfig
-from autoPyTorch.data_management.data_manager import DataManager, ProblemType
-
-import argparse
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Run benchmarks for autonet.')
-    parser.add_argument("--partial_benchmark", default=None, nargs="+", help="Only run a part of the benchmark. Run other parts later or in parallel. 3-tuple: instance_slice, autonet_config_slice, run_number_range.")
-    parser.add_argument("--time_bonus", default=[7200, 8200, 10800], type=int, nargs="+", help="Give the job some more time.")
-    parser.add_argument("--memory_bonus", default=1000, type=int, help="Give the job some more memory. Unit: MB.")
-    parser.add_argument("--result_dir", default=None, help="The dir to save the results")
-    parser.add_argument("--output_dir", default=None, help="The dir to save the outputs")
-    parser.add_argument("--template_args", default=[], nargs="+", type=str, help="Additional args specified in template")
-    parser.add_argument("runscript", help="The script template used to submit job on cluster.")
-    parser.add_argument('benchmark', help='The benchmark to run')
-    args = parser.parse_args()
-
-    # parse the runscript template
-    with open(args.runscript, "r") as f:
-        runscript_template = list(f)
-    runscript_name = os.path.basename(args.runscript if not args.runscript.endswith(".template") else args.runscript[:-9])
-    autonet_home = ConfigFileParser.get_autonet_home()
-    host_config_orig = [l[13:] for l in runscript_template if l.startswith("#HOST_CONFIG ")][0].strip()
-    host_config_file = os.path.join(autonet_home, host_config_orig) if not os.path.isabs(host_config_orig) else host_config_orig
-
-    # parse define statements
-    for i in range(len(runscript_template)):
-        if runscript_template[i].startswith("#DEFINE"):
-            runscript_template[i] = "%s=%s\n" % (runscript_template[i].split()[1], " ".join(runscript_template[i].split()[2:]))
-
-    # parse template args
-    runscript_template_args = [l[19:].strip().split() for l in runscript_template if l.startswith("#TEMPLATE_ARGUMENT ")]
-    parsed_template_args = dict()
-    for variable_name, default in runscript_template_args:
-        try:
-            value = [a.split("=")[1] for a in args.template_args if a.split("=")[0] == variable_name][0]
-        except IndexError:
-            value = default
-        parsed_template_args[variable_name] = value
-    
-    # get benchmark config
-    benchmark_config_file = args.benchmark
-
-    benchmark = Benchmark()
-    config_parser = benchmark.get_benchmark_config_file_parser()
-
-    benchmark_config = config_parser.read(benchmark_config_file)
-    benchmark_config.update(config_parser.read(host_config_file))
-    config_parser.set_defaults(benchmark_config)
-
-    # get ranges of runs, autonet_configs and instances
-    all_configs = ForAutoNetConfig.get_config_files(benchmark_config, parse_slice=False)
-    all_instances = ForInstance.get_instances(benchmark_config, instances_must_exist=True)
-
-    runs_range = list(range(benchmark_config["num_runs"]))
-    configs_range = list(range(len(all_configs)))
-    instances_range = list(range(len(all_instances)))
-
-    if args.partial_benchmark:
-        if len(args.partial_benchmark) > 0:
-            instances_range = instances_range[ForInstance.parse_slice(args.partial_benchmark[0])]
-        if len(args.partial_benchmark) > 1:
-            configs_range = configs_range[ForAutoNetConfig.parse_slice(args.partial_benchmark[1])]
-        if len(args.partial_benchmark) > 2:
-            runs_range = list(ForRun.parse_range(args.partial_benchmark[2], benchmark_config["num_runs"]))
-    
-    # set up dict used used to make replacements in runscript
-    base_dir = os.getcwd()
-    result_dir = os.path.abspath(args.result_dir) if args.result_dir is not None else benchmark_config["result_dir"]
-    outputs_folder = os.path.abspath(args.output_dir) if args.output_dir is not None else os.path.join(base_dir, "outputs")
-    benchmark = args.benchmark if os.path.isabs(args.benchmark) else os.path.join(base_dir, args.benchmark)
-    output_base_dir = os.path.join(outputs_folder, os.path.basename(benchmark).split(".")[0])
-    replacement_dict = {
-        "BASE_DIR": base_dir,
-        "OUTPUTS_FOLDER": outputs_folder,
-        "OUTPUT_BASE_DIR": output_base_dir,
-        "AUTONET_HOME": autonet_home,
-        "BENCHMARK": benchmark,
-        "BENCHMARK_NAME": os.path.basename(benchmark).split(".")[0],
-        "HOST_CONFIG": host_config_file,
-        "ORIG_HOST_CONFIG": host_config_orig,
-        "ORIG_BENCHMARK": args.benchmark,
-        "RESULT_DIR": result_dir
-    }
-    replacement_dict.update(parsed_template_args)
-
-    # create directories
-    if os.path.exists(output_base_dir) and input("%s exists. Delete? (y/n)" %output_base_dir).startswith("y"):
-        shutil.rmtree(output_base_dir)
-    if not os.path.exists(outputs_folder):
-        os.mkdir(outputs_folder)
-    if not os.path.exists(output_base_dir):
-        os.mkdir(output_base_dir)
-    if not os.path.exists(result_dir):
-        os.mkdir(result_dir)
-
-    # iterate over all runs
-    for run_number in runs_range:
-        replacement_dict["RUN_NUMBER"] = run_number
-
-        for config_id in configs_range:
-            replacement_dict["CONFIG_ID"] = config_id
-            replacement_dict["CONFIG_FILE"] = all_configs[config_id]
-
-            # get autonet
-            dm = DataManager()
-            dm.problem_type = {
-                "feature_classification": ProblemType.FeatureClassification,
-                "feature_multilabel": ProblemType.FeatureMultilabel,
-                "feature_regression": ProblemType.FeatureRegression
-            }[benchmark_config["problem_type"]]
-            autonet = CreateAutoNet().fit(benchmark_config, dm)["autonet"]
-            autonet_config_file = benchmark_config["autonet_configs"][config_id]
-
-            for instance_id in instances_range:
-                replacement_dict["INSTANCE_ID"] = instance_id
-                replacement_dict["INSTANCE_FILE"] = all_instances[instance_id]
-
-                # read autonet config
-                SetAutoNetConfig().fit(benchmark_config, autonet, autonet_config_file, dm, all_instances[instance_id])
-                autonet_config = autonet.get_current_autonet_config()
-
-                # add autonet config specific stuff to replacement dict
-                replacement_dict["NUM_NODES"] = autonet_config["min_workers"] + (0 if autonet_config["run_worker_on_master_node"] else 1)
-                replacement_dict["MEMORY_LIMIT_MB"] = autonet_config["memory_limit_mb"] + args.memory_bonus
-                time_limit_base = autonet_config["max_runtime"] if autonet_config["max_runtime"] < float("inf") else (benchmark_config["time_limit"] - max(args.time_bonus))
-                replacement_dict.update({("TIME_LIMIT[%s]" % i): int(t + time_limit_base) for i, t in enumerate(args.time_bonus)})
-                replacement_dict["NUM_PROCESSES"] = max(autonet_config["torch_num_threads"], int(ceil(replacement_dict["MEMORY_LIMIT_MB"] / benchmark_config["memory_per_core"])))
-
-                # create output subdirectory used fot this run
-                output_dir = os.path.join(output_base_dir, "output_%s_%s_%s" % (instance_id, config_id, run_number))
-                if os.path.exists(output_dir) and input("%s exists. Delete? (y/n)" %output_dir).startswith("y"):
-                    shutil.rmtree(output_dir)
-                os.mkdir(output_dir)
-                replacement_dict["OUTPUT_DIR"] = output_dir
-
-                # make replacements in runscript and get command to submit the job
-                pattern = re.compile("|".join(map(lambda x: re.escape("$${" + x + "}"), replacement_dict.keys())))
-                runscript = [pattern.sub(lambda x: str(replacement_dict[x.group()[3:-1]]), l) for l in runscript_template]
-                command = [l[9:] for l in runscript if l.startswith("#COMMAND ")][0].strip()
-
-                # save runscript
-                with open(os.path.join(output_dir, runscript_name), "w") as f:
-                    f.writelines(runscript)
-
-                # submit job
-                os.chdir(output_dir)
-                print("Calling %s in %s" % (command, os.getcwd()))
-                try:
-                    command_output = subprocess.check_output(command, shell=True)
-                except subprocess.CalledProcessError as e:
-                    print("Warning: %s" % e)
-                    command_output = str(e).encode("utf-8")
-                    if not input("Continue (y/n)? ").startswith("y"):
-                        raise
-                os.chdir(base_dir)
-
-                # save output and info data
-                with open(os.path.join(output_dir, "call.info"), "w") as f:
-                    print(command, file=f)
-                    json.dump(replacement_dict, f)
-                    print("", file=f)
-                with open(os.path.join(output_dir, "call.info"), "ba") as f:
-                    f.write(command_output)
diff --git a/scripts/run_benchmark_cluster_condensed.py b/scripts/run_benchmark_cluster_condensed.py
deleted file mode 100644
index b03ceb99b..000000000
--- a/scripts/run_benchmark_cluster_condensed.py
+++ /dev/null
@@ -1,228 +0,0 @@
-import os, sys, re, shutil
-import subprocess
-import json
-from math import ceil
-from copy import copy
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..")))
-
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.utils.benchmarking.benchmark import Benchmark
-from autoPyTorch.utils.benchmarking.benchmark_pipeline import ForInstance, ForAutoNetConfig, ForRun, CreateAutoNet, SetAutoNetConfig
-from autoPyTorch.data_management.data_manager import DataManager, ProblemType
-
-import argparse
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Run benchmarks for autonet.')
-    parser.add_argument("--partial_benchmark", default=None, nargs="+", help="Only run a part of the benchmark. Run other parts later or in parallel. 3-tuple: instance_slice, autonet_config_slice, run_number_range.")
-    parser.add_argument("--time_bonus", default=[7200, 8200, 10800], type=int, nargs="+", help="Give the job some more time.")
-    parser.add_argument("--memory_bonus", default=1000, type=int, help="Give the job some more memory. Unit: MB.")
-    parser.add_argument("--result_dir", default=None, help="The dir to save the results")
-    parser.add_argument("--output_dir", default=None, help="The dir to save the outputs")
-    parser.add_argument("--template_args", default=[], nargs="+", type=str, help="Additional args specified in template")
-    parser.add_argument("--num_condense", default=100, type=int, help="How many jobs should be condensed to single job array")
-    parser.add_argument("runscript", help="The script template used to submit job on cluster.")
-    parser.add_argument('benchmark', help='The benchmark to run')
-    args = parser.parse_args()
-
-    # parse the runscript template
-    with open(args.runscript, "r") as f:
-        runscript_template = list(f)
-    runscript_name = os.path.basename(args.runscript if not args.runscript.endswith(".template") else args.runscript[:-9])
-    autonet_home = ConfigFileParser.get_autonet_home()
-    host_config_orig = [l[13:] for l in runscript_template if l.startswith("#HOST_CONFIG ")][0].strip()
-    host_config_file = os.path.join(autonet_home, host_config_orig) if not os.path.isabs(host_config_orig) else host_config_orig
-
-    # parse template args
-    runscript_template_args = [l[19:].strip().split() for l in runscript_template if l.startswith("#TEMPLATE_ARGUMENT ")]
-    parsed_template_args = dict()
-    for variable_name, default in runscript_template_args:
-        try:
-            value = [a.split("=")[1] for a in args.template_args if a.split("=")[0] == variable_name][0]
-        except IndexError:
-            value = default
-        parsed_template_args[variable_name] = value
-    
-    # get benchmark config
-    benchmark_config_file = args.benchmark
-
-    benchmark = Benchmark()
-    config_parser = benchmark.get_benchmark_config_file_parser()
-
-    benchmark_config = config_parser.read(benchmark_config_file)
-    benchmark_config.update(config_parser.read(host_config_file))
-    config_parser.set_defaults(benchmark_config)
-
-    # get ranges of runs, autonet_configs and instances
-    all_configs = ForAutoNetConfig.get_config_files(benchmark_config, parse_slice=False)
-    all_instances = ForInstance.get_instances(benchmark_config, instances_must_exist=True)
-
-    runs_range = list(range(benchmark_config["num_runs"]))
-    configs_range = list(range(len(all_configs)))
-    instances_range = list(range(len(all_instances)))
-
-    if args.partial_benchmark:
-        if len(args.partial_benchmark) > 0:
-            instances_range = instances_range[ForInstance.parse_slice(args.partial_benchmark[0])]
-        if len(args.partial_benchmark) > 1:
-            configs_range = configs_range[ForAutoNetConfig.parse_slice(args.partial_benchmark[1])]
-        if len(args.partial_benchmark) > 2:
-            runs_range = list(ForRun.parse_range(args.partial_benchmark[2], benchmark_config["num_runs"]))
-    
-    # set up dict used used to make replacements in runscript
-    base_dir = os.getcwd()
-    result_dir = os.path.abspath(args.result_dir) if args.result_dir is not None else benchmark_config["result_dir"]
-    outputs_folder = os.path.abspath(args.output_dir) if args.output_dir is not None else os.path.join(base_dir, "outputs")
-    benchmark = args.benchmark if os.path.isabs(args.benchmark) else os.path.join(base_dir, args.benchmark)
-    output_base_dir = os.path.join(outputs_folder, os.path.basename(benchmark).split(".")[0])
-    replacement_dict = {
-        "BASE_DIR": base_dir,
-        "OUTPUTS_FOLDER": outputs_folder,
-        "OUTPUT_BASE_DIR": output_base_dir,
-        "AUTONET_HOME": autonet_home,
-        "BENCHMARK": benchmark,
-        "BENCHMARK_NAME": os.path.basename(benchmark).split(".")[0],
-        "HOST_CONFIG": host_config_file,
-        "ORIG_HOST_CONFIG": host_config_orig,
-        "ORIG_BENCHMARK": args.benchmark,
-        "RESULT_DIR": result_dir
-    }
-    replacement_dict.update(parsed_template_args)
-
-    # create directories
-    if os.path.exists(output_base_dir) and input("%s exists. Delete? (y/n)" %output_base_dir).startswith("y"):
-        shutil.rmtree(output_base_dir)
-    if not os.path.exists(outputs_folder):
-        os.mkdir(outputs_folder)
-    if not os.path.exists(output_base_dir):
-        os.mkdir(output_base_dir)
-    if not os.path.exists(result_dir):
-        os.mkdir(result_dir)
-
-    # divide script
-    divided_runscript_template = [[]]
-    for line in runscript_template:
-        if line.startswith("#JOBSCRIPT START"):
-            assert len(divided_runscript_template) == 1
-            divided_runscript_template += [[]]
-    
-        divided_runscript_template[-1].append(line)
-
-        if line.startswith("#JOBSCRIPT END"):
-            assert len(divided_runscript_template) == 2
-            divided_runscript_template += [[]]
-    replacement_dicts = list()
-    replacement_dict_keys = set()
-
-
-    # iterate over all runs
-    for run_number in runs_range:
-        replacement_dict["RUN_NUMBER"] = run_number
-
-        for config_id in configs_range:
-            replacement_dict["CONFIG_ID"] = config_id
-            replacement_dict["CONFIG_FILE"] = all_configs[config_id]
-
-            # get autonet
-            dm = DataManager()
-            dm.problem_type = {
-                "feature_classification": ProblemType.FeatureClassification,
-                "feature_multilabel": ProblemType.FeatureMultilabel,
-                "feature_regression": ProblemType.FeatureRegression
-            }[benchmark_config["problem_type"]]
-            autonet = CreateAutoNet().fit(benchmark_config, dm)["autonet"]
-            autonet_config_file = benchmark_config["autonet_configs"][config_id]
-
-            for instance_id in instances_range:
-                replacement_dict["INSTANCE_ID"] = instance_id
-                replacement_dict["INSTANCE_FILE"] = all_instances[instance_id]
-
-                # read autonet config
-                SetAutoNetConfig().fit(benchmark_config, autonet, autonet_config_file, dm, all_instances[instance_id])
-                autonet_config = autonet.get_current_autonet_config()
-
-                # add autonet config specific stuff to replacement dict
-                replacement_dict["NUM_NODES"] = autonet_config["min_workers"] + (0 if autonet_config["run_worker_on_master_node"] else 1)
-                replacement_dict["MEMORY_LIMIT_MB"] = autonet_config["memory_limit_mb"] + args.memory_bonus
-                time_limit_base = autonet_config["max_runtime"] if autonet_config["max_runtime"] < float("inf") else (benchmark_config["time_limit"] - max(args.time_bonus))
-                replacement_dict.update({("TIME_LIMIT[%s]" % i): int(t + time_limit_base) for i, t in enumerate(args.time_bonus)})
-                replacement_dict["NUM_PROCESSES"] = max(autonet_config["torch_num_threads"], int(ceil(replacement_dict["MEMORY_LIMIT_MB"] / benchmark_config["memory_per_core"])))
-
-                replacement_dicts.append(copy(replacement_dict))
-                replacement_dict_keys |= set(replacement_dict.keys())
-
-    # build final runscript
-    for k in range(ceil(len(replacement_dicts) / args.num_condense)):
-        output_dir = os.path.join(output_base_dir, "part_%s" % k)
-        if os.path.exists(output_dir) and input("%s exists. Delete? (y/n)" % output_dir).startswith("y"):
-            shutil.rmtree(output_dir)
-        os.mkdir(output_dir)
-        replacement_dicts_split = replacement_dicts[k * args.num_condense : (k + 1) * args.num_condense]
-
-        # unify replacement dict
-        unified_replacement_dict = {"OUTPUT_DIR": output_dir}
-        for key in replacement_dict_keys:
-            all_values = [replacement_dict[key] for replacement_dict in replacement_dicts_split]
-
-            if key == "NUM_NODES":
-                unified_replacement_dict[key] = sum(map(int, all_values))
-            elif key in ["NUM_PROCESSES", "MEMORY_LIMIT_MB"] or key.startswith("TIME_LIMIT"):
-                unified_replacement_dict[key] = max(map(int, all_values))
-            elif all(all_values[0] == v for v in all_values):
-                unified_replacement_dict[key] = all_values[0]
-
-        final_runscript = []
-        for i, part in enumerate(divided_runscript_template):
-            if i != 1:
-                pattern = re.compile("|".join(map(lambda x: re.escape("$${" + x + "}"), unified_replacement_dict.keys())))
-                runscript = [pattern.sub(lambda x: str(unified_replacement_dict[x.group()[3:-1]]), l) for l in part]
-
-                # DEFINE STATEMENTS
-                for j in range(len(runscript)):
-                    if runscript[j].startswith("#DEFINE"):
-                        runscript[j] = "GLOBAL_%s=%s\n" % (runscript[j].split()[1], " ".join(runscript[j].split()[2:]))
-                final_runscript.extend(runscript)
-                continue
-
-            final_runscript += ["TASK_ID=$GLOBAL_TASK_ID\n"]
-            for j, replacement_dict in enumerate(replacement_dicts_split):
-                replacement_dict["OUTPUT_DIR"] = output_dir
-                runscript = [
-                    "if [ $TASK_ID -gt 0 ]; then\n",
-                    "if [ $TASK_ID -le %s ]; then\n" % replacement_dict["NUM_NODES"],
-                    "RUN_ID=\"${GLOBAL_RUN_ID}_[%s]\"\n" % j]
-                pattern = re.compile("|".join(map(lambda x: re.escape("$${" + x + "}"), replacement_dict.keys())))
-                runscript += [pattern.sub(lambda x: str(replacement_dict[x.group()[3:-1]]), l) for l in part]
-                runscript += ["fi\n", "fi\n", "TASK_ID=`expr $TASK_ID - %s`\n" % replacement_dict["NUM_NODES"], "\n"]
-                final_runscript.extend(runscript)
-        
-        command = [l[9:] for l in final_runscript if l.startswith("#COMMAND ")][0].strip()
-
-        # save runscript
-        with open(os.path.join(output_dir, runscript_name), "w") as f:
-            f.writelines(final_runscript)
-
-        # submit job
-        os.chdir(output_dir)
-        print("Calling %s in %s" % (command, os.getcwd()))
-        try:
-            command_output = subprocess.check_output(command, shell=True)
-        except subprocess.CalledProcessError as e:
-            print("Warning: %s" % e)
-            command_output = str(e).encode("utf-8")
-            if not input("Continue (y/n)? ").startswith("y"):
-                raise
-        os.chdir(base_dir)
-
-        # save output and info data
-        with open(os.path.join(output_dir, "call.info"), "w") as f:
-            print(command, file=f)
-            json.dump([unified_replacement_dict, replacement_dicts_split], f)
-            print("", file=f)
-        with open(os.path.join(output_dir, "call.info"), "ba") as f:
-            f.write(command_output)
diff --git a/scripts/run_meta.slurm.template b/scripts/run_meta.slurm.template
deleted file mode 100644
index 57b962ff8..000000000
--- a/scripts/run_meta.slurm.template
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-#SBATCH -p $${PARTITION}
-#SBATCH --mem $${MEMORY_LIMIT_MB}
-#SBATCH -c $${NUM_PROCESSES}
-#SBATCH -a 1-$${NUM_NODES}
-#SBATCH -D $${AUTONET_HOME}
-#SBATCH -o $${OUTPUT_DIR}/stdout_%a.txt
-#SBATCH -e $${OUTPUT_DIR}/stderr_%a.txt
-#SBATCH --mail-type=FAIL
-#SBATCH -J $${BENCHMARK_NAME}
-
-source activate AutoPyTorch
-python3 scripts/run_benchmark.py $${BENCHMARK} --run_id $SLURM_JOB_ID --task_id $SLURM_ARRAY_TASK_ID --partial_benchmark $${INSTANCE_ID} $${CONFIG_ID} $${RUN_NUMBER} --host_config $${HOST_CONFIG} --result_dir $${RESULT_DIR}
-
-#COMMAND sbatch run_meta.slurm
-#HOST_CONFIG configs/hosts/meta.txt
-#TEMPLATE_ARGUMENT PARTITION ml_cpu-ivy
\ No newline at end of file
diff --git a/scripts/run_meta_gpu.slurm.template b/scripts/run_meta_gpu.slurm.template
deleted file mode 100644
index a8a30ea61..000000000
--- a/scripts/run_meta_gpu.slurm.template
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-
-#SBATCH -p $${PARTITION}
-#SBATCH --mem $${MEMORY_LIMIT_MB}
-#SBATCH -c $${NUM_PROCESSES}
-#SBATCH -a 1-$${NUM_NODES}
-#SBATCH -D $${AUTONET_HOME}
-#SBATCH -o $${OUTPUT_DIR}/stdout_%a.txt
-#SBATCH -e $${OUTPUT_DIR}/stderr_%a.txt
-#SBATCH --mail-type=FAIL
-#SBATCH -J $${BENCHMARK_NAME}
-#SBATCH --gres=gpu:1
-
-source activate AutoPyTorch
-python3 scripts/run_benchmark.py $${BENCHMARK} --run_id $SLURM_JOB_ID --task_id $SLURM_ARRAY_TASK_ID --partial_benchmark $${INSTANCE_ID} $${CONFIG_ID} $${RUN_NUMBER} --host_config $${HOST_CONFIG} --result_dir $${RESULT_DIR}
-
-#COMMAND sbatch run_meta_gpu.slurm
-#HOST_CONFIG configs/hosts/meta.txt
-#TEMPLATE_ARGUMENT PARTITION meta_gpu-ti
\ No newline at end of file
diff --git a/scripts/run_nemo.moab.template b/scripts/run_nemo.moab.template
deleted file mode 100644
index babec8714..000000000
--- a/scripts/run_nemo.moab.template
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-#MOAB -t 1-$${NUM_NODES}
-#MOAB -N $${BENCHMARK_NAME}
-#MOAB -l nodes=1:ppn=$${NUM_PROCESSES},walltime=$${TIME_LIMIT[2]},pmem=$${MEMORY_LIMIT_MB}MB
-#MOAB -E
-
-cd $${OUTPUT_DIR}
-
-JOBID=(${MOAB_JOBID//[/ })
-COMMAND="python $${AUTONET_HOME}/scripts/run_benchmark.py $${BENCHMARK} --partial_benchmark $${INSTANCE_ID} $${CONFIG_ID} $${RUN_NUMBER} --host_config $${HOST_CONFIG} --result_dir $TMPDIR/benchmark_results --run_id $JOBID --task_id $MOAB_JOBARRAYINDEX"
-echo "Run benchmark: $COMMAND"
-timeout -k $${TIME_LIMIT[1]} $${TIME_LIMIT[0]} $COMMAND 1> $TMPDIR/stdout.txt 2> $TMPDIR/stderr.txt
-
-echo "Job finished. Copy output to $${OUTPUT_DIR}"
-cp $TMPDIR/stdout.txt $${OUTPUT_DIR}/stdout_${MOAB_JOBARRAYINDEX}.txt
-cp $TMPDIR/stderr.txt $${OUTPUT_DIR}/stderr_${MOAB_JOBARRAYINDEX}.txt
-
-if [ $MOAB_JOBARRAYINDEX -eq 1 ]
-then
-    echo "Copy benchmark results"
-    cp -r $TMPDIR/benchmark_results/ $${RESULT_DIR}
-fi
-
-#COMMAND msub run_nemo.moab
-#HOST_CONFIG configs/hosts/nemo.txt
\ No newline at end of file
diff --git a/scripts/run_nemo_singularity.moab.template b/scripts/run_nemo_singularity.moab.template
deleted file mode 100644
index e8bf9a7d7..000000000
--- a/scripts/run_nemo_singularity.moab.template
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/bash
-#MOAB -t 1-$${NUM_NODES}
-#MOAB -N $${BENCHMARK_NAME}
-#MOAB -l nodes=1:ppn=$${NUM_PROCESSES},walltime=$${TIME_LIMIT[2]},pmem=$${MEMORY_LIMIT_MB}MB
-#MOAB -E
-
-cd $${OUTPUT_DIR}
-cp $${BASE_DIR}/$${IMAGE} $TMPDIR/image.simg
-cp $${BENCHMARK} $TMPDIR/$${BENCHMARK_NAME}.txt
-cp $${HOST_CONFIG} $TMPDIR/host_config.txt
-module load tools/singularity/2.6
-cd $TMPDIR
-
-#DEFINE TASK_ID $MOAB_JOBARRAYINDEX
-#DEFINE RUN_ID (${MOAB_JOBID//[/ })
-
-#JOBSCRIPT START
-COMMAND="python /data/Auto-PyTorch/scripts/run_benchmark.py /tmp/$${BENCHMARK_NAME}.txt --partial_benchmark $${INSTANCE_ID} $${CONFIG_ID} $${RUN_NUMBER} --host_config /tmp/host_config.txt --result_dir /tmp/benchmark_results --run_id $RUN_ID --task_id $TASK_ID"
-COMMAND="singularity exec -B $${AUTONET_HOME}:/external_autonet_home/ -B $TMPDIR:/tmp image.simg $COMMAND"
-echo "Run benchmark: $COMMAND"
-timeout -k $${TIME_LIMIT[1]} $${TIME_LIMIT[0]} $COMMAND 1> $TMPDIR/stdout.txt 2> $TMPDIR/stderr.txt
-
-if [ $TASK_ID -eq 1 ]
-then
-    echo "Copy benchmark results"
-    cd $TMPDIR 
-    cp -r benchmark_results/* $${RESULT_DIR}
-fi
-#JOBSCRIPT END
-
-echo "Job finished. Copy output to $${OUTPUT_DIR}"
-cd $TMPDIR
-cp stdout.txt $${OUTPUT_DIR}/stdout_${MOAB_JOBARRAYINDEX}.txt
-cp stderr.txt $${OUTPUT_DIR}/stderr_${MOAB_JOBARRAYINDEX}.txt
-
-#COMMAND msub run_nemo_singularity.moab
-#HOST_CONFIG configs/hosts/nemo_singularity.txt
-#TEMPLATE_ARGUMENT IMAGE Auto-PyTorch.simg
\ No newline at end of file
diff --git a/scripts/visualize_benchmark.py b/scripts/visualize_benchmark.py
deleted file mode 100644
index bff95001b..000000000
--- a/scripts/visualize_benchmark.py
+++ /dev/null
@@ -1,100 +0,0 @@
-
-
-import os, sys
-sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..")))
-
-from autoPyTorch.utils.config.config_file_parser import ConfigFileParser
-from autoPyTorch.utils.benchmarking.benchmark import Benchmark
-
-import argparse
-
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description='Run benchmarks for autonet.')
-    parser.add_argument("--run_id_range", default=None, help="An id for the run. A range of run ids can be given: start-stop.")
-    parser.add_argument("--partial_benchmark", default=None, nargs="+", help="Only run a part of the benchmark. Run other parts later or in parallel. 3-tuple: instance_slice, autonet_config_slice, run_number_range.")
-    parser.add_argument("--result_dir", default=None, help="Override result dir in benchmark config.")
-    parser.add_argument("--host_config", default=None, help="Override some configs according to host specifics.")
-    parser.add_argument("--plot_logs", default=[], nargs="+", help="List of metrics to plot. If not given, plot metric given in autonet config.")
-    parser.add_argument("--only_finished_runs", action="store_true", help="Skip run folders, that do not contain a summary.json")
-    parser.add_argument("--output_folder", default=None, help="Store the plots as pdf. Specify an output folder.")
-    parser.add_argument("--scale_uncertainty", default=1, type=float, help="Scale the uncertainty")
-    parser.add_argument("--agglomeration", default="mean", help="Choose between mean and median.")
-    parser.add_argument("--font_size", default=12, type=int, help="Set font size.")
-    parser.add_argument("--prefixes", default=["val"], type=str, nargs="+", help="The prefixes to plot. Choices: none, train, val, test, ensemble, ensemble_test")
-    parser.add_argument("--additional_trajectories", default=[], type=str, nargs="+", help="Path to json file describing additional trajectories")
-    parser.add_argument("--do_label_rename", action="store_true", help="Whether the default labels should be renamed")
-    parser.add_argument("--skip_dataset_plots", action="store_true", help="Whether the plots for each dataset should be skipped")
-    parser.add_argument("--skip_ranking_plot", action="store_true", help="Whether the ranking plot should be skipped")
-    parser.add_argument("--skip_average_plot", action="store_true", help="Whether the average plot should be skipped")
-    parser.add_argument("--plot_markers", action="store_true", help="Whether markers should be plotted")
-    parser.add_argument("--plot_individual", action="store_true", help="Whether the individual trajectories should be plotted")
-    parser.add_argument("--plot_type", default="values", help="Whether to plot metric values or losses")
-    parser.add_argument("--xscale", default="log", type=str, help="Whether x should be in logscale")
-    parser.add_argument("--yscale", default="linear", help="Whether x should be in logscale")
-    parser.add_argument("--xmin", default=None, type=float, help="Limit the x axis")
-    parser.add_argument("--xmax", default=None, type=float, help="Limit the x axis")
-    parser.add_argument("--ymin", default=None, type=float, help="Limit the y axis")
-    parser.add_argument("--ymax", default=None, type=float, help="Limit the y axis")
-    parser.add_argument("--value_multiplier", default=1, type=float, help="Multiply each value")
-    parser.add_argument('benchmark', help='The benchmark to visualize')
-
-    args = parser.parse_args()
-
-    run_id_range = args.run_id_range
-    if args.run_id_range is not None:
-        if "-" in args.run_id_range:
-            run_id_range = range(int(args.run_id_range.split("-")[0]), int(args.run_id_range.split("-")[1]) + 1)
-        else:
-            run_id_range = range(int(args.run_id_range), int(args.run_id_range) + 1)
-    
-    benchmark_config_file = args.benchmark
-    host_config_file = args.host_config
-
-    benchmark = Benchmark()
-    config_parser = benchmark.get_benchmark_config_file_parser()
-
-    benchmark_config = config_parser.read(benchmark_config_file)
-    benchmark_config.update(config_parser.read(host_config_file))
-
-    if (args.result_dir is not None):
-        benchmark_config['result_dir'] = os.path.abspath(args.result_dir)
-    
-    if (args.partial_benchmark is not None):
-        if (len(args.partial_benchmark) > 0):
-            benchmark_config['instance_slice'] = args.partial_benchmark[0]
-        if (len(args.partial_benchmark) > 1):
-            benchmark_config['autonet_config_slice'] = args.partial_benchmark[1]
-        if (len(args.partial_benchmark) > 2):
-            benchmark_config['run_number_range'] = args.partial_benchmark[2]
-
-    benchmark_config['run_id_range'] = run_id_range
-    benchmark_config['plot_logs'] = args.plot_logs
-    benchmark_config['only_finished_runs'] = args.only_finished_runs
-    benchmark_config['output_folder'] = args.output_folder
-    benchmark_config['scale_uncertainty'] = args.scale_uncertainty
-    benchmark_config['agglomeration'] = args.agglomeration
-    benchmark_config['font_size'] = args.font_size
-    benchmark_config['prefixes'] = [p if p != "none" else "" for p in args.prefixes]
-    benchmark_config['additional_trajectories'] = args.additional_trajectories
-    benchmark_config['benchmark_name'] = os.path.basename(args.benchmark).split(".")[0]
-    benchmark_config['label_rename'] = args.do_label_rename
-    benchmark_config["skip_dataset_plots"] = args.skip_dataset_plots
-    benchmark_config["skip_ranking_plot"] = args.skip_ranking_plot
-    benchmark_config["skip_average_plot"] = args.skip_average_plot
-    benchmark_config["xscale"] = args.xscale
-    benchmark_config["yscale"] = args.yscale
-    benchmark_config["xmin"] = args.xmin
-    benchmark_config["xmax"] = args.xmax
-    benchmark_config["ymin"] = args.ymin    
-    benchmark_config["ymax"] = args.ymax    
-    benchmark_config["plot_individual"] = args.plot_individual    
-    benchmark_config["plot_markers"] = args.plot_markers
-    benchmark_config["plot_type"] = args.plot_type
-    benchmark_config["value_multiplier"] = args.value_multiplier
-    
-    benchmark.visualize_benchmark(**benchmark_config)
diff --git a/setup.cfg b/setup.cfg
old mode 100644
new mode 100755
index b88034e41..10957bf35
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,15 @@
 [metadata]
 description-file = README.md
+
+[flake8]
+application-import-names = autoPyTorch
+max-line-length = 120
+ignore = W605,E402,W503
+show-source = True
+
+[mypy]
+ignore_missing_imports = True
+follow_imports=skip
+disallow_untyped_decorators = True
+disallow_incomplete_defs = True
+disallow_untyped_defs = True
\ No newline at end of file
diff --git a/setup.py b/setup.py
old mode 100644
new mode 100755
index 10c84cdae..07ab30a8c
--- a/setup.py
+++ b/setup.py
@@ -1,24 +1,14 @@
-import os
 import setuptools
 
 with open("README.md", "r") as f:
     long_description = f.read()
 
 requirements = []
-with open('requirements.txt', 'r') as f:
+with open("requirements.txt", "r") as f:
     for line in f:
         requirements.append(line.strip())
 
-optional_requirements = []
-with open('optional-requirements.txt', 'r') as f:
-    for line in f:
-        optional_requirements.append(line.strip())
-
-add_presets = []
-for dirname, subdirs, files in os.walk(os.path.join('autoPyTorch', 'core', 'presets')):
-    add_presets.extend([os.path.join(dirname, f) for f in files])
-
-
+# noinspection PyInterpreter
 setuptools.setup(
     name="autoPyTorch",
     version="0.0.2",
@@ -29,7 +19,7 @@
     url="https://github.com/automl/Auto-PyTorch",
     long_description_content_type="text/markdown",
     license="3-clause BSD",
-    keywords="machine learning algorithm configuration hyperparameter "
+    keywords="machine learning algorithm configuration hyperparameter"
              "optimization tuning neural architecture deep learning",
     packages=setuptools.find_packages(),
     classifiers=[
@@ -40,10 +30,32 @@
         "Programming Language :: Python :: 3",
         "License :: OSI Approved :: BSD License",
     ],
-	python_requires='>=3',
+    python_requires='>=3',
     platforms=['Linux'],
     install_requires=requirements,
-    data_files=[('', add_presets)],
     include_package_data=True,
-#    extras_require=optional_requirements
+    extras_require={
+        "test": [
+            "matplotlib",
+            "pytest",
+            "pytest-xdist",
+            "pytest-timeout",
+            "flaky",
+            "pyarrow",
+            "pre-commit",
+            "pytest-cov",
+            "codecov",
+            "pep8",
+            "mypy",
+        ],
+        "examples": [
+            "matplotlib",
+            "jupyter",
+            "notebook",
+            "seaborn",
+        ],
+        "docs": ["sphinx", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc"],
+    },
+    test_suite="pytest",
+    data_files=[('configs', ['autoPyTorch/configs/default_pipeline_options.json'])]
 )
diff --git a/test/conftest.py b/test/conftest.py
new file mode 100644
index 000000000..1e66ed72a
--- /dev/null
+++ b/test/conftest.py
@@ -0,0 +1,314 @@
+import os
+import re
+import shutil
+import time
+
+import dask
+import dask.distributed
+
+import numpy as np
+
+import pytest
+
+from sklearn.datasets import fetch_openml, make_classification
+
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.utils.backend import create
+from autoPyTorch.utils.pipeline import get_dataset_requirements
+
+
+def slugify(text):
+    return re.sub(r'[\[\]]+', '-', text.lower())
+
+
+@pytest.fixture(scope="function")
+def backend(request):
+
+    test_dir = os.path.dirname(__file__)
+    tmp = slugify(os.path.join(
+        test_dir, '.tmp__%s__%s' % (request.module.__name__, request.node.name)))
+    output = slugify(os.path.join(
+        test_dir, '.output__%s__%s' % (request.module.__name__, request.node.name)))
+
+    for dir in (tmp, output):
+        for i in range(10):
+            if os.path.exists(dir):
+                try:
+                    shutil.rmtree(dir)
+                    break
+                except OSError:
+                    time.sleep(1)
+
+    # Make sure the folders we wanna create do not already exist.
+    backend = create(
+        tmp,
+        output,
+        delete_tmp_folder_after_terminate=True,
+        delete_output_folder_after_terminate=True,
+    )
+
+    def get_finalizer(tmp_dir, output_dir):
+        def session_run_at_end():
+            for dir in (tmp_dir, output_dir):
+                for i in range(10):
+                    if os.path.exists(dir):
+                        try:
+                            shutil.rmtree(dir)
+                            break
+                        except OSError:
+                            time.sleep(1)
+        return session_run_at_end
+    request.addfinalizer(get_finalizer(tmp, output))
+
+    return backend
+
+
+@pytest.fixture(scope="function")
+def tmp_dir(request):
+    return _dir_fixture('tmp', request)
+
+
+@pytest.fixture(scope="function")
+def output_dir(request):
+    return _dir_fixture('output', request)
+
+
+def _dir_fixture(dir_type, request):
+
+    test_dir = os.path.dirname(__file__)
+    dir = os.path.join(
+        test_dir, '.%s__%s__%s' % (dir_type, request.module.__name__, request.node.name)
+    )
+
+    for i in range(10):
+        if os.path.exists(dir):
+            try:
+                shutil.rmtree(dir)
+                break
+            except OSError:
+                pass
+
+    def get_finalizer(dir):
+        def session_run_at_end():
+            for i in range(10):
+                if os.path.exists(dir):
+                    try:
+                        shutil.rmtree(dir)
+                        break
+                    except OSError:
+                        time.sleep(1)
+
+        return session_run_at_end
+
+    request.addfinalizer(get_finalizer(dir))
+
+    return dir
+
+
+@pytest.fixture(scope="function")
+def dask_client(request):
+    """
+    This fixture is meant to be called one per pytest session.
+    The goal of this function is to create a global client at the start
+    of the testing phase. We can create clients at the start of the
+    session (this case, as above scope is session), module, class or function
+    level.
+    The overhead of creating a dask client per class/module/session is something
+    that travis cannot handle, so we rely on the following execution flow:
+    1- At the start of the pytest session, session_run_at_beginning fixture is called
+    to create a global client on port 4567.
+    2- Any test that needs a client, would query the global scheduler that allows
+    communication through port 4567.
+    3- At the end of the test, we shutdown any remaining work being done by any worker
+    in the client. This has a maximum 10 seconds timeout. The client object will afterwards
+    be empty and when pytest closes, it can safely delete the object without hanging.
+    More info on this file can be found on:
+    https://docs.pytest.org/en/stable/writing_plugins.html#conftest-py-plugins
+    """
+    dask.config.set({'distributed.worker.daemon': False})
+
+    client = dask.distributed.Client(n_workers=2, threads_per_worker=1, processes=False)
+
+    def get_finalizer(address):
+        def session_run_at_end():
+            client = dask.distributed.get_client(address)
+            client.shutdown()
+            client.close()
+            del client
+        return session_run_at_end
+    request.addfinalizer(get_finalizer(client.scheduler_info()['address']))
+
+    return client
+
+
+# Dataset fixture to test different scenarios on a scalable way
+# Please refer to https://docs.pytest.org/en/stable/fixture.html for details
+# on what fixtures are
+@pytest.fixture
+def fit_dictionary(request):
+    return request.getfixturevalue(request.param)
+
+
+@pytest.fixture
+def fit_dictionary_numerical_only(backend):
+    X, y = make_classification(
+        n_samples=200,
+        n_features=4,
+        n_informative=3,
+        n_redundant=1,
+        n_repeated=0,
+        n_classes=2,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0
+    )
+    datamanager = TabularDataset(
+        X=X, Y=y,
+        X_test=X, Y_test=y,
+    )
+
+    info = {'task_type': datamanager.task_type,
+            'output_type': datamanager.output_type,
+            'issparse': datamanager.issparse,
+            'numerical_columns': datamanager.numerical_columns,
+            'categorical_columns': datamanager.categorical_columns}
+
+    dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info))
+    fit_dictionary = {
+        'X_train': X,
+        'y_train': y,
+        'dataset_properties': dataset_properties,
+        'job_id': 'example_tabular_classification_1',
+        'device': 'cpu',
+        'budget_type': 'epochs',
+        'epochs': 1,
+        'torch_num_threads': 1,
+        'early_stopping': 20,
+        'working_dir': '/tmp',
+        'use_tensorboard_logger': True,
+        'use_pynisher': False,
+        'metrics_during_training': True,
+        'split_id': 0,
+        'backend': backend,
+    }
+    backend.save_datamanager(datamanager)
+    return fit_dictionary
+
+
+@pytest.fixture
+def fit_dictionary_categorical_only(backend):
+    X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
+    X = X[categorical_columns]
+    X = X.iloc[0:200]
+    y = y.iloc[0:200]
+    datamanager = TabularDataset(
+        X=X, Y=y,
+        X_test=X, Y_test=y,
+    )
+    info = {'task_type': datamanager.task_type,
+            'output_type': datamanager.output_type,
+            'issparse': datamanager.issparse,
+            'numerical_columns': datamanager.numerical_columns,
+            'categorical_columns': datamanager.categorical_columns}
+
+    dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info))
+    fit_dictionary = {
+        'X_train': X,
+        'y_train': y,
+        'dataset_properties': dataset_properties,
+        'job_id': 'example_tabular_classification_1',
+        'device': 'cpu',
+        'budget_type': 'epochs',
+        'epochs': 1,
+        'torch_num_threads': 1,
+        'early_stopping': 20,
+        'working_dir': '/tmp',
+        'use_tensorboard_logger': True,
+        'use_pynisher': False,
+        'metrics_during_training': True,
+        'split_id': 0,
+        'backend': backend,
+    }
+    datamanager = TabularDataset(
+        X=X, Y=y,
+        X_test=X, Y_test=y,
+    )
+    backend.save_datamanager(datamanager)
+    return fit_dictionary
+
+
+@pytest.fixture
+def fit_dictionary_num_and_categorical(backend):
+    X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X = X.iloc[0:200]
+    y = y.iloc[0:200]
+    datamanager = TabularDataset(
+        X=X, Y=y,
+        X_test=X, Y_test=y,
+    )
+    info = {'task_type': datamanager.task_type,
+            'output_type': datamanager.output_type,
+            'issparse': datamanager.issparse,
+            'numerical_columns': datamanager.numerical_columns,
+            'categorical_columns': datamanager.categorical_columns}
+
+    dataset_properties = datamanager.get_dataset_properties(get_dataset_requirements(info))
+
+    fit_dictionary = {
+        'X_train': X,
+        'y_train': y,
+        'dataset_properties': dataset_properties,
+        'job_id': 'example_tabular_classification_1',
+        'device': 'cpu',
+        'budget_type': 'epochs',
+        'epochs': 1,
+        'torch_num_threads': 1,
+        'early_stopping': 20,
+        'working_dir': '/tmp',
+        'use_tensorboard_logger': True,
+        'use_pynisher': False,
+        'metrics_during_training': True,
+        'split_id': 0,
+        'backend': backend,
+    }
+    backend.save_datamanager(datamanager)
+    return fit_dictionary
+
+
+@pytest.fixture
+def dataset(request):
+    return request.getfixturevalue(request.param)
+
+
+@pytest.fixture
+def dataset_traditional_classifier_num_only():
+    X, y = make_classification(
+        n_samples=200,
+        n_features=4,
+        n_informative=3,
+        n_redundant=1,
+        n_repeated=0,
+        n_classes=2,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0
+    )
+    return X, y
+
+
+@pytest.fixture
+def dataset_traditional_classifier_categorical_only():
+    X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    categorical_columns = [column for column in X.columns if X[column].dtype.name == 'category']
+    X = X[categorical_columns]
+    X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
+    return X, y
+
+
+@pytest.fixture
+def dataset_traditional_classifier_num_categorical():
+    X, y = fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    y = y.astype(np.int)
+    X, y = X[:200].to_numpy(), y[:200].to_numpy().astype(np.int)
+    return X, y
diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py
new file mode 100644
index 000000000..60973c722
--- /dev/null
+++ b/test/test_api/test_api.py
@@ -0,0 +1,168 @@
+import os
+import pickle
+
+import numpy as np
+
+import pytest
+
+
+import sklearn
+import sklearn.datasets
+from sklearn.ensemble import VotingClassifier
+
+import torch
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+from autoPyTorch.datasets.resampling_strategy import (
+    CrossValTypes,
+    HoldoutValTypes,
+)
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+
+
+# Fixtures
+# ========
+
+
+# Test
+# ========
+@pytest.mark.parametrize('openml_id', (40981, ))
+@pytest.mark.parametrize('resampling_strategy', (HoldoutValTypes.holdout_validation,
+                                                 CrossValTypes.k_fold_cross_validation, ))
+def test_classification(openml_id, resampling_strategy, backend):
+
+    # Get the data and check that contents of data-manager make sense
+    X, y = sklearn.datasets.fetch_openml(
+        data_id=int(openml_id),
+        return_X_y=True, as_frame=True
+    )
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X, y, random_state=1)
+    datamanager = TabularDataset(
+        X=X_train, Y=y_train,
+        X_test=X_test, Y_test=y_test,
+        resampling_strategy=resampling_strategy,
+        dataset_name=str(openml_id),
+    )
+    assert datamanager.task_type == 'tabular_classification'
+    expected_num_splits = 1 if resampling_strategy == HoldoutValTypes.holdout_validation else 3
+    assert len(datamanager.splits) == expected_num_splits
+
+    # Search for a good configuration
+    estimator = TabularClassificationTask(backend=backend)
+    estimator.search(
+        dataset=datamanager,
+        optimize_metric='accuracy',
+        total_walltime_limit=150,
+        func_eval_time_limit=30,
+        traditional_per_total_budget=0
+    )
+
+    # TODO: check for budget
+
+    # Check for the created files
+    tmp_dir = estimator._backend.temporary_directory
+    loaded_datamanager = estimator._backend.load_datamanager()
+    assert len(loaded_datamanager.train_tensors) == len(datamanager.train_tensors)
+
+    expected_files = [
+        'smac3-output/run_1/configspace.json',
+        'smac3-output/run_1/runhistory.json',
+        'smac3-output/run_1/scenario.txt',
+        'smac3-output/run_1/stats.json',
+        'smac3-output/run_1/train_insts.txt',
+        'smac3-output/run_1/trajectory.json',
+        '.autoPyTorch/datamanager.pkl',
+        '.autoPyTorch/ensemble_read_preds.pkl',
+        '.autoPyTorch/start_time_1',
+        '.autoPyTorch/ensemble_history.json',
+        '.autoPyTorch/ensemble_read_scores.pkl',
+        '.autoPyTorch/true_targets_ensemble.npy',
+    ]
+    for expected_file in expected_files:
+        assert os.path.exists(os.path.join(tmp_dir, expected_file)), expected_file
+
+    # Check that smac was able to find proper models
+    succesful_runs = [run_value.status for run_value in estimator.run_history.data.values(
+    ) if 'SUCCESS' in str(run_value.status)]
+    assert len(succesful_runs) > 1, estimator.run_history.data.items()
+
+    # Search for an existing run key in disc. A individual model might have
+    # a timeout and hence was not written to disc
+    for i, (run_key, value) in enumerate(estimator.run_history.data.items()):
+        if i == 0:
+            # Ignore dummy run
+            continue
+        if 'SUCCESS' not in str(value.status):
+            continue
+
+        run_key_model_run_dir = estimator._backend.get_numrun_directory(
+            estimator.seed, run_key.config_id, run_key.budget)
+        if os.path.exists(run_key_model_run_dir):
+            break
+
+    if resampling_strategy == HoldoutValTypes.holdout_validation:
+        model_file = os.path.join(run_key_model_run_dir,
+                                  f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.model")
+        assert os.path.exists(model_file), model_file
+        model = estimator._backend.load_model_by_seed_and_id_and_budget(
+            estimator.seed, run_key.config_id, run_key.budget)
+        assert isinstance(model.named_steps['network'].choice.get_network(), torch.nn.Module)
+    elif resampling_strategy == CrossValTypes.k_fold_cross_validation:
+        model_file = os.path.join(
+            run_key_model_run_dir,
+            f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.cv_model"
+        )
+        assert os.path.exists(model_file), model_file
+        model = estimator._backend.load_cv_model_by_seed_and_id_and_budget(
+            estimator.seed, run_key.config_id, run_key.budget)
+        assert isinstance(model, VotingClassifier)
+        assert len(model.estimators_) == 3
+        assert isinstance(model.estimators_[0].named_steps['network'].choice.get_network(),
+                          torch.nn.Module)
+    else:
+        pytest.fail(resampling_strategy)
+
+    # Make sure that predictions on the test data are printed and make sense
+    test_prediction = os.path.join(run_key_model_run_dir,
+                                   estimator._backend.get_prediction_filename(
+                                       'test', estimator.seed, run_key.config_id,
+                                       run_key.budget))
+    assert os.path.exists(test_prediction), test_prediction
+    assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0]
+
+    # Also, for ensemble builder, the OOF predictions should be there and match
+    # the Ground truth that is also physically printed to disk
+    ensemble_prediction = os.path.join(run_key_model_run_dir,
+                                       estimator._backend.get_prediction_filename(
+                                           'ensemble',
+                                           estimator.seed, run_key.config_id,
+                                           run_key.budget))
+    assert os.path.exists(ensemble_prediction), ensemble_prediction
+    assert np.shape(np.load(ensemble_prediction, allow_pickle=True))[0] == np.shape(
+        estimator._backend.load_targets_ensemble()
+    )[0]
+
+    # Ensemble Builder produced an ensemble
+    estimator.ensemble_ is not None
+
+    # There should be a weight for each element of the ensemble
+    assert len(estimator.ensemble_.identifiers_) == len(estimator.ensemble_.weights_)
+
+    y_pred = estimator.predict(X_test)
+
+    assert np.shape(y_pred)[0] == np.shape(X_test)[0]
+
+    score = estimator.score(y_pred, y_test)
+    assert 'accuracy' in score
+
+    # Check that we can pickle
+    # Test pickle
+    dump_file = os.path.join(estimator._backend.temporary_directory, 'dump.pkl')
+
+    with open(dump_file, 'wb') as f:
+        pickle.dump(estimator, f)
+
+    with open(dump_file, 'rb') as f:
+        restored_estimator = pickle.load(f)
+    restored_estimator.predict(X_test)
diff --git a/test/test_datasets/test_image_dataset.py b/test/test_datasets/test_image_dataset.py
new file mode 100644
index 000000000..151685703
--- /dev/null
+++ b/test/test_datasets/test_image_dataset.py
@@ -0,0 +1,38 @@
+import unittest
+
+import numpy as np
+
+import torch
+
+import torchvision
+
+from autoPyTorch.datasets.image_dataset import ImageDataset
+
+
+@unittest.skip(reason="Image Dataset issue")
+class DatasetTest(unittest.TestCase):
+    def runTest(self):
+        dataset = torchvision.datasets.FashionMNIST(root='../../datasets/',
+                                                    transform=torchvision.transforms.ToTensor(),
+                                                    download=True)
+        ds = ImageDataset(dataset)
+        self.assertIsInstance(ds.mean, torch.Tensor)
+        self.assertIsInstance(ds.std, torch.Tensor)
+        for img, _ in ds.train_tensors:
+            self.assertIsInstance(img, torch.Tensor)
+
+
+@unittest.skip(reason="Image Dataset issue")
+class NumpyArrayTest(unittest.TestCase):
+    def runTest(self):
+        matrix = np.random.randint(0, 255, (15, 3, 10, 10)).astype(np.float)
+        target_df = np.random.randint(0, 5, (15, )).astype(np.float)
+        ds = ImageDataset((matrix, target_df))
+        self.assertIsInstance(ds.mean, torch.Tensor)
+        self.assertIsInstance(ds.std, torch.Tensor)
+        for img, _ in ds.train_tensors:
+            self.assertIsInstance(img, torch.Tensor)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_datasets/test_tabular_dataset.py b/test/test_datasets/test_tabular_dataset.py
new file mode 100644
index 000000000..dfc72be77
--- /dev/null
+++ b/test/test_datasets/test_tabular_dataset.py
@@ -0,0 +1,117 @@
+import typing
+import unittest
+
+import numpy as np
+
+import pandas as pd
+
+import sklearn.datasets
+import sklearn.model_selection
+
+from autoPyTorch.datasets.tabular_dataset import DataTypes, TabularDataset
+from autoPyTorch.utils.backend import create
+from autoPyTorch.utils.pipeline import get_dataset_requirements
+
+
+class DataFrameTest(unittest.TestCase):
+    def runTest(self):
+        df = pd.DataFrame([['a', 0.1, 1], ['b', 0.2, np.nan]])
+        target_df = pd.Series([1, 2])
+        ds = TabularDataset(df, target_df)
+        self.assertEqual(ds.data_types, [DataTypes.String, DataTypes.Float, DataTypes.Canonical])
+        self.assertEqual(set(ds.itovs[2]), {np.nan, 1})
+        self.assertEqual(set(ds.itovs[0]), {np.nan, 'a', 'b'})
+
+        self.assertEqual(ds.vtois[0]['a'], 1)
+        self.assertEqual(ds.vtois[0][np.nan], 0)
+        self.assertEqual(ds.vtois[0][pd._libs.NaT], 0)
+        self.assertEqual(ds.vtois[0][pd._libs.missing.NAType()], 0)
+        self.assertTrue((ds.nan_mask == np.array([[0, 0, 0], [0, 0, 1]], dtype=np.bool)).all())
+
+
+class NumpyArrayTest(unittest.TestCase):
+    def runTest(self):
+        matrix = np.array([(0, 0.1, 1), (1, np.nan, 3)], dtype='f4, f4, i4')
+        target_df = pd.Series([1, 2])
+        ds = TabularDataset(matrix, target_df)
+        self.assertEqual(ds.data_types, [DataTypes.Canonical, DataTypes.Float, DataTypes.Canonical])
+        self.assertEqual(set(ds.itovs[2]), {np.nan, 1, 3})
+
+        self.assertEqual(ds.vtois[0][1], 2)
+        self.assertEqual(ds.vtois[0][np.nan], 0)
+        self.assertEqual(ds.vtois[0][pd._libs.NaT], 0)
+        self.assertEqual(ds.vtois[0][pd._libs.missing.NAType()], 0)
+        self.assertTrue((ds.nan_mask == np.array([[0, 0, 0], [0, 1, 0]], dtype=np.bool)).all())
+
+
+def get_data_to_train() -> typing.Dict[str, typing.Any]:
+    """
+    This function returns a fit dictionary that within itself, contains all
+    the information needed
+    """
+
+    # Get the training data for tabular classification
+    # Move to Australian to showcase numerical vs categorical
+    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X,
+        y,
+        random_state=1,
+        test_size=0.2,
+    )
+    # Fit the pipeline
+    fit_dictionary = {
+        'X_train': X_train,
+        'y_train': y_train,
+        'X_test': X_test,
+        'y_test': y_test,
+    }
+
+    return fit_dictionary
+
+
+class TabularDatasetTest(unittest.TestCase):
+
+    def test_get_dataset_properties(self):
+        # Get data to train
+        fit_dictionary = get_data_to_train()
+
+        # Build a repository with random fitted models
+        try:
+            backend = create(temporary_directory='/tmp/autoPyTorch_ensemble_test_tmp',
+                             output_directory='/tmp/autoPyTorch_ensemble_test_out',
+                             delete_tmp_folder_after_terminate=False)
+        except Exception:
+            self.assertRaises(FileExistsError)
+            return unittest.skip("File already exists")
+
+        fit_dictionary['backend'] = backend
+
+        # Create the directory structure
+        backend._make_internals_directory()
+
+        # Create a datamanager for this toy problem
+        datamanager = TabularDataset(
+            X=fit_dictionary['X_train'], Y=fit_dictionary['y_train'],
+            X_test=fit_dictionary['X_test'], Y_test=fit_dictionary['y_test'],
+        )
+        backend.save_datamanager(datamanager)
+
+        datamanager = backend.load_datamanager()
+        info = {'task_type': datamanager.task_type,
+                'output_type': datamanager.output_type,
+                'issparse': datamanager.issparse,
+                'numerical_columns': datamanager.numerical_columns,
+                'categorical_columns': datamanager.categorical_columns}
+        dataset_requirements = get_dataset_requirements(info)
+
+        dataset_properties = datamanager.get_dataset_properties(dataset_requirements)
+
+        self.assertIsInstance(dataset_properties, dict)
+        for dataset_requirement in dataset_requirements:
+            self.assertIn(dataset_requirement.name, dataset_properties.keys())
+            self.assertIsInstance(dataset_properties[dataset_requirement.name], dataset_requirement.supported_types)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_ensemble/.autoPyTorch/predictions_ensemble_true.npy b/test/test_ensemble/.autoPyTorch/predictions_ensemble_true.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/predictions_ensemble_true.npy differ
diff --git a/autoPyTorch/core/presets/feature_classification/full_cs.txt b/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model
similarity index 100%
rename from autoPyTorch/core/presets/feature_classification/full_cs.txt
rename to test/test_ensemble/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy differ
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy differ
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy differ
diff --git a/autoPyTorch/core/presets/feature_multilabel/full_cs.txt b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model
similarity index 100%
rename from autoPyTorch/core/presets/feature_multilabel/full_cs.txt
rename to test/test_ensemble/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy differ
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np differ
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy differ
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy differ
diff --git a/autoPyTorch/core/presets/feature_regression/full_cs.txt b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model
similarity index 100%
rename from autoPyTorch/core/presets/feature_regression/full_cs.txt
rename to test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model
diff --git a/autoPyTorch/core/presets/image_classification/full_cs.txt b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model
similarity index 100%
rename from autoPyTorch/core/presets/image_classification/full_cs.txt
rename to test/test_ensemble/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy differ
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy differ
diff --git a/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/predictions_ensemble_true.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/predictions_ensemble_true.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/predictions_ensemble_true.npy differ
diff --git a/autoPyTorch/core/presets/image_classification_multiple_datasets/full_cs.txt b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model
similarity index 100%
rename from autoPyTorch/core/presets/image_classification_multiple_datasets/full_cs.txt
rename to test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/0.1.0.0.model
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_test_0_1_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_1_0.0/predictions_valid_0_1_0.0.npy differ
diff --git a/configs/hosts/local.txt b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model
similarity index 100%
rename from configs/hosts/local.txt
rename to test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/0.2.0.0.model
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.np differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_test_0_2_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_2_0.0/predictions_valid_0_2_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.0.0.model
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/0.3.100.0.model
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_test_0_3_100.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/.auto-sklearn/runs/0_3_100.0/predictions_valid_0_3_100.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/predictions_ensemble_true.npy b/test/test_ensemble/data/.autoPyTorch/predictions_ensemble_true.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/predictions_ensemble_true.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model b/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/0.1.0.0.model
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_test_0_1_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy
new file mode 100644
index 000000000..1b2320113
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_1_0.0/predictions_valid_0_1_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/0.2.0.0.model
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.np differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_test_0_2_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_2_0.0/predictions_valid_0_2_0.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.0.0.model
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/0.3.100.0.model
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_test_0_3_100.0.npy differ
diff --git a/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy
new file mode 100644
index 000000000..fee3160c8
Binary files /dev/null and b/test/test_ensemble/data/.autoPyTorch/runs/0_3_100.0/predictions_valid_0_3_100.0.npy differ
diff --git a/test/test_ensemble/ensemble_utils.py b/test/test_ensemble/ensemble_utils.py
new file mode 100644
index 000000000..7b0ab7fb8
--- /dev/null
+++ b/test/test_ensemble/ensemble_utils.py
@@ -0,0 +1,106 @@
+import os
+import shutil
+import unittest
+
+import numpy as np
+
+from autoPyTorch.ensemble.ensemble_builder import (
+    AbstractEnsemble,
+    EnsembleBuilder,
+)
+from autoPyTorch.pipeline.components.training.metrics.base import make_metric
+
+
+def score_func(prediction: np.ndarray,
+               solution: np.ndarray,
+               sample_weight=None,
+               ) -> float:
+    return 0.9
+
+
+mockmetric = make_metric(name='mockmetric', score_func=score_func)
+
+
+class BackendMock(object):
+
+    def __init__(self, target_directory):
+        this_directory = os.path.abspath(
+            os.path.dirname(__file__)
+        )
+        shutil.copytree(os.path.join(this_directory, 'data'), target_directory)
+        self.temporary_directory = target_directory
+        self.internals_directory = os.path.join(self.temporary_directory, '.autoPyTorch')
+
+    def load_datamanager(self):
+        manager = unittest.mock.Mock()
+        manager.__reduce__ = lambda self: (unittest.mock.MagicMock, ())
+        array = np.load(os.path.join(
+            self.temporary_directory,
+            '.autoPyTorch',
+            'runs', '0_3_100.0',
+            'predictions_test_0_3_100.0.npy'
+        ))
+        manager.test_tensors = (None, array)
+        return manager
+
+    def load_targets_ensemble(self):
+        with open(os.path.join(
+            self.temporary_directory,
+            ".autoPyTorch",
+            "predictions_ensemble_true.npy"
+        ), "rb") as fp:
+            y = np.load(fp, allow_pickle=True)
+        return y
+
+    def save_ensemble(self, ensemble, index_run, seed):
+        return
+
+    def save_predictions_as_txt(self, predictions, subset, idx, prefix, precision):
+        return
+
+    def get_runs_directory(self) -> str:
+        return os.path.join(self.temporary_directory, '.autoPyTorch', 'runs')
+
+    def get_numrun_directory(self, seed: int, num_run: int, budget: float) -> str:
+        return os.path.join(self.get_runs_directory(), '%d_%d_%s' % (seed, num_run, budget))
+
+    def get_model_filename(self, seed: int, idx: int, budget: float) -> str:
+        return '%s.%s.%s.model' % (seed, idx, budget)
+
+
+def compare_read_preds(read_preds1, read_preds2):
+    """
+    compares read_preds attribute. An alternative to
+    assert Dict Equal as it contains np arrays, so we have
+    to use np testing utilities accordingly
+    """
+
+    # Both arrays should have the same splits
+    assert set(read_preds1.keys()) == set(read_preds2.keys())
+
+    for k, v in read_preds1.items():
+
+        # Each split should have the same elements
+        assert set(read_preds1[k].keys()) == set(read_preds2[k].keys())
+
+        # This level contains the scores/ensmebles/etc
+        for actual_k, actual_v in read_preds1[k].items():
+
+            # If it is a numpy array, make sure it is the same
+            if type(actual_v) is np.ndarray:
+                np.testing.assert_array_equal(actual_v, read_preds2[k][actual_k])
+            else:
+                assert actual_v == read_preds2[k][actual_k]
+
+
+class EnsembleBuilderMemMock(EnsembleBuilder):
+
+    def fit_ensemble(self, selected_keys):
+        return True
+
+    def predict(self, set_: str,
+                ensemble: AbstractEnsemble,
+                selected_keys: list,
+                n_preds: int,
+                index_run: int):
+        np.ones([10000000, 1000000])
diff --git a/test/test_ensemble/test_ensemble.py b/test/test_ensemble/test_ensemble.py
new file mode 100644
index 000000000..0c4a2c609
--- /dev/null
+++ b/test/test_ensemble/test_ensemble.py
@@ -0,0 +1,821 @@
+import os
+import pickle
+import shutil
+import sys
+import time
+import unittest.mock
+
+import dask.distributed
+
+import numpy as np
+
+import pandas as pd
+
+import pytest
+
+from smac.runhistory.runhistory import RunHistory, RunKey, RunValue
+
+from autoPyTorch.constants import BINARY, MULTICLASS, TABULAR_CLASSIFICATION
+from autoPyTorch.ensemble.ensemble_builder import (
+    EnsembleBuilder,
+    EnsembleBuilderManager,
+    Y_ENSEMBLE,
+    Y_TEST,
+)
+from autoPyTorch.ensemble.ensemble_selection import EnsembleSelection
+from autoPyTorch.ensemble.singlebest_ensemble import SingleBest
+from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
+
+this_directory = os.path.dirname(__file__)
+sys.path.append(this_directory)
+from ensemble_utils import BackendMock, compare_read_preds, EnsembleBuilderMemMock, mockmetric  # noqa (E402: module level import not   at top of file)
+
+
+# -----------------------------------------------------------------------------------------------
+#                                   Ensemble Builder Testing
+# -----------------------------------------------------------------------------------------------
+@pytest.fixture(scope="function")
+def ensemble_backend(request):
+    """
+    This fixture reads a pre-compiled ensemble predictions that physically
+    reside in the test directory. They were created beforehand to make sure
+    ensemble building is correct
+    """
+    test_id = '%s_%s' % (request.module.__name__, request.node.name)
+    test_dir = os.path.join(this_directory, test_id)
+
+    try:
+        shutil.rmtree(test_dir)
+    except:  # noqa E722
+        pass
+
+    # Make sure the folders we wanna create do not already exist.
+    backend = BackendMock(test_dir)
+
+    def get_finalizer(ensemble_backend):
+        def session_run_at_end():
+            try:
+                shutil.rmtree(test_dir)
+            except:  # noqa E722
+                pass
+        return session_run_at_end
+    request.addfinalizer(get_finalizer(backend))
+
+    return backend
+
+
+@pytest.fixture(scope="function")
+def ensemble_run_history(request):
+
+    run_history = RunHistory()
+    run_history._add(
+        RunKey(
+            config_id=3,
+            instance_id='{"task_id": "breast_cancer"}',
+            seed=1,
+            budget=3.0
+        ),
+        RunValue(
+            cost=0.11347517730496459,
+            time=0.21858787536621094,
+            status=None,
+            starttime=time.time(),
+            endtime=time.time(),
+            additional_info={
+                'duration': 0.20323538780212402,
+                'num_run': 3,
+                'configuration_origin': 'Random Search'}
+        ),
+        status=None,
+        origin=None,
+    )
+    run_history._add(
+        RunKey(
+            config_id=6,
+            instance_id='{"task_id": "breast_cancer"}',
+            seed=1,
+            budget=6.0
+        ),
+        RunValue(
+            cost=2 * 0.11347517730496459,
+            time=2 * 0.21858787536621094,
+            status=None,
+            starttime=time.time(),
+            endtime=time.time(),
+            additional_info={
+                'duration': 0.20323538780212402,
+                'num_run': 6,
+                'configuration_origin': 'Random Search'}
+        ),
+        status=None,
+        origin=None,
+    )
+    return run_history
+
+
+def testRead(ensemble_backend):
+
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+    )
+
+    success = ensbuilder.score_ensemble_preds()
+    assert success, str(ensbuilder.read_preds)
+    assert len(ensbuilder.read_preds) == 3, ensbuilder.read_preds.keys()
+    assert len(ensbuilder.read_scores) == 3, ensbuilder.read_scores.keys()
+
+    filename = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+    )
+    np.testing.assert_almost_equal(
+        ensbuilder.read_scores[filename]["ens_score"],
+        np.array(0.8)
+    )
+
+    filename = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+    )
+    np.testing.assert_almost_equal(
+        ensbuilder.read_scores[filename]["ens_score"],
+        np.array(1.0)
+    )
+
+
+@pytest.mark.parametrize(
+    "ensemble_nbest,max_models_on_disc,exp",
+    (
+        (1, None, 1),
+        (1.0, None, 2),
+        (0.1, None, 1),
+        (0.9, None, 1),
+        (1, 2, 1),
+        (2, 1, 1),
+    )
+)
+def testNBest(ensemble_backend, ensemble_nbest, max_models_on_disc, exp):
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=ensemble_nbest,
+        max_models_on_disc=max_models_on_disc,
+    )
+
+    ensbuilder.score_ensemble_preds()
+    sel_keys = ensbuilder.get_n_best_preds()
+
+    assert len(sel_keys) == exp
+
+    fixture = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+    )
+    assert sel_keys[0] == fixture
+
+
+@pytest.mark.parametrize("test_case,exp", [
+    # If None, no reduction
+    (None, 2),
+    # If Int, limit only on exceed
+    (4, 2),
+    (1, 1),
+    # If Float, translate float to # models.
+    # below, mock of each file is 100 Mb and 4 files .model and .npy (test/val/pred) exist
+    # per run (except for run3, there they are 5). Now, it takes 500MB for run 3 and
+    # another 500 MB of slack because we keep as much space as the largest model
+    # available as slack
+    (1499.0, 1),
+    (1500.0, 2),
+    (9999.0, 2),
+])
+def testMaxModelsOnDisc(ensemble_backend, test_case, exp):
+    ensemble_nbest = 4
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=ensemble_nbest,
+        max_models_on_disc=test_case,
+    )
+
+    with unittest.mock.patch('os.path.getsize') as mock:
+        mock.return_value = 100 * 1024 * 1024
+        ensbuilder.score_ensemble_preds()
+        sel_keys = ensbuilder.get_n_best_preds()
+        assert len(sel_keys) == exp, test_case
+
+
+def testMaxModelsOnDisc2(ensemble_backend):
+    # Test for Extreme scenarios
+    # Make sure that the best predictions are kept
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=50,
+        max_models_on_disc=10000.0,
+    )
+    ensbuilder.read_preds = {}
+    for i in range(50):
+        ensbuilder.read_scores['pred' + str(i)] = {
+            'ens_score': i * 10,
+            'num_run': i,
+            'loaded': 1,
+            "seed": 1,
+            "disc_space_cost_mb": 50 * i,
+        }
+        ensbuilder.read_preds['pred' + str(i)] = {Y_ENSEMBLE: True}
+    sel_keys = ensbuilder.get_n_best_preds()
+    assert ['pred49', 'pred48', 'pred47'] == sel_keys
+
+    # Make sure at least one model is kept alive
+    ensbuilder.max_models_on_disc = 0.0
+    sel_keys = ensbuilder.get_n_best_preds()
+    assert ['pred49'] == sel_keys
+
+
+@pytest.mark.parametrize(
+    "performance_range_threshold,exp",
+    ((0.0, 4), (0.1, 4), (0.3, 3), (0.5, 2), (0.6, 2), (0.8, 1), (1.0, 1), (1, 1))
+)
+def testPerformanceRangeThreshold(ensemble_backend, performance_range_threshold, exp):
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=100,
+        performance_range_threshold=performance_range_threshold
+    )
+    ensbuilder.read_scores = {
+        'A': {'ens_score': 1, 'num_run': 1, 'loaded': -1, "seed": 1},
+        'B': {'ens_score': 2, 'num_run': 2, 'loaded': -1, "seed": 1},
+        'C': {'ens_score': 3, 'num_run': 3, 'loaded': -1, "seed": 1},
+        'D': {'ens_score': 4, 'num_run': 4, 'loaded': -1, "seed": 1},
+        'E': {'ens_score': 5, 'num_run': 5, 'loaded': -1, "seed": 1},
+    }
+    ensbuilder.read_preds = {
+        key: {key_2: True for key_2 in (Y_ENSEMBLE, Y_TEST)}
+        for key in ensbuilder.read_scores
+    }
+    sel_keys = ensbuilder.get_n_best_preds()
+
+    assert len(sel_keys) == exp
+
+
+@pytest.mark.parametrize(
+    "performance_range_threshold,ensemble_nbest,exp",
+    (
+        (0.0, 1, 1), (0.0, 1.0, 4), (0.1, 2, 2), (0.3, 4, 3),
+        (0.5, 1, 1), (0.6, 10, 2), (0.8, 0.5, 1), (1, 1.0, 1)
+    )
+)
+def testPerformanceRangeThresholdMaxBest(ensemble_backend, performance_range_threshold,
+                                         ensemble_nbest, exp):
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=ensemble_nbest,
+        performance_range_threshold=performance_range_threshold,
+        max_models_on_disc=None,
+    )
+    ensbuilder.read_scores = {
+        'A': {'ens_score': 1, 'num_run': 1, 'loaded': -1, "seed": 1},
+        'B': {'ens_score': 2, 'num_run': 2, 'loaded': -1, "seed": 1},
+        'C': {'ens_score': 3, 'num_run': 3, 'loaded': -1, "seed": 1},
+        'D': {'ens_score': 4, 'num_run': 4, 'loaded': -1, "seed": 1},
+        'E': {'ens_score': 5, 'num_run': 5, 'loaded': -1, "seed": 1},
+    }
+    ensbuilder.read_preds = {
+        key: {key_2: True for key_2 in (Y_ENSEMBLE, Y_TEST)}
+        for key in ensbuilder.read_scores
+    }
+    sel_keys = ensbuilder.get_n_best_preds()
+
+    assert len(sel_keys) == exp
+
+
+def testFallBackNBest(ensemble_backend):
+
+    ensbuilder = EnsembleBuilder(backend=ensemble_backend,
+                                 dataset_name="TEST",
+                                 output_type=BINARY,
+                                 task_type=TABULAR_CLASSIFICATION,
+                                 metrics=[accuracy],
+                                 opt_metric='accuracy',
+                                 seed=0,  # important to find the test files
+                                 ensemble_nbest=1
+                                 )
+
+    ensbuilder.score_ensemble_preds()
+
+    filename = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+    )
+    ensbuilder.read_scores[filename]["ens_score"] = -1
+
+    filename = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy"
+    )
+    ensbuilder.read_scores[filename]["ens_score"] = -1
+
+    filename = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+    )
+    ensbuilder.read_scores[filename]["ens_score"] = -1
+
+    sel_keys = ensbuilder.get_n_best_preds()
+
+    fixture = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+    )
+    assert len(sel_keys) == 1
+    assert sel_keys[0] == fixture
+
+
+def testGetTestPreds(ensemble_backend):
+
+    ensbuilder = EnsembleBuilder(backend=ensemble_backend,
+                                 dataset_name="TEST",
+                                 output_type=BINARY,
+                                 task_type=TABULAR_CLASSIFICATION,
+                                 metrics=[accuracy],
+                                 opt_metric='accuracy',
+                                 seed=0,  # important to find the test files
+                                 ensemble_nbest=1
+                                 )
+
+    ensbuilder.score_ensemble_preds()
+
+    d1 = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_1_0.0/predictions_ensemble_0_1_0.0.npy"
+    )
+    d2 = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+    )
+    d3 = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_3_100.0/predictions_ensemble_0_3_100.0.npy"
+    )
+
+    sel_keys = ensbuilder.get_n_best_preds()
+    assert len(sel_keys) == 1
+    ensbuilder.get_test_preds(selected_keys=sel_keys)
+
+    # Number of read files should be three and
+    # predictions_ensemble_0_4_0.0.npy must not be in there
+    assert len(ensbuilder.read_preds) == 3
+    assert os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_4_0.0/predictions_ensemble_0_4_0.0.npy"
+    ) not in ensbuilder.read_preds
+
+    # not selected --> should still be None
+    assert ensbuilder.read_preds[d1][Y_TEST] is None
+    assert ensbuilder.read_preds[d3][Y_TEST] is None
+
+    # selected --> read valid and test predictions
+    assert ensbuilder.read_preds[d2][Y_TEST] is not None
+
+
+def testEntireEnsembleBuilder(ensemble_backend):
+
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=2,
+    )
+    ensbuilder.SAVE2DISC = False
+
+    ensbuilder.score_ensemble_preds()
+
+    d2 = os.path.join(
+        ensemble_backend.temporary_directory,
+        ".autoPyTorch/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy"
+    )
+
+    sel_keys = ensbuilder.get_n_best_preds()
+    assert len(sel_keys) > 0
+
+    ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
+
+    n_sel_test = ensbuilder.get_test_preds(selected_keys=sel_keys)
+
+    # both valid and test prediction files are available
+    assert len(n_sel_test) > 0
+
+    y_test = ensbuilder.predict(
+        set_="test",
+        ensemble=ensemble,
+        selected_keys=n_sel_test,
+        n_preds=len(sel_keys),
+        index_run=1,
+    )
+
+    # since d2 provides perfect predictions
+    # it should get a higher weight
+    # so that y_valid should be exactly y_valid_d2
+    y_test_d2 = ensbuilder.read_preds[d2][Y_TEST][:, 1]
+    np.testing.assert_array_almost_equal(y_test, y_test_d2)
+
+
+def test_main(ensemble_backend):
+
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=MULTICLASS,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=2,
+        max_models_on_disc=None,
+    )
+    ensbuilder.SAVE2DISC = False
+
+    run_history, ensemble_nbest, _, _ = ensbuilder.main(
+        time_left=np.inf, iteration=1, return_predictions=False,
+    )
+
+    assert len(ensbuilder.read_preds) == 3
+    assert ensbuilder.last_hash is not None
+    assert ensbuilder.y_true_ensemble is not None
+
+    # Make sure the run history is ok
+
+    # We expect at least 1 element to be in the ensemble
+    assert len(run_history) > 0
+
+    # As the data loader loads the same val/train/test
+    # we expect 1.0 as score and all keys available
+    expected_performance = {
+        'train_accuracy': 1.0,
+        'test_accuracy': 1.0,
+    }
+
+    # Make sure that expected performance is a subset of the run history
+    assert all(item in run_history[0].items() for item in expected_performance.items())
+    assert 'Timestamp' in run_history[0]
+    assert isinstance(run_history[0]['Timestamp'], pd.Timestamp)
+
+    assert os.path.exists(
+        os.path.join(ensemble_backend.internals_directory, 'ensemble_read_preds.pkl')
+    ), os.listdir(ensemble_backend.internals_directory)
+    assert os.path.exists(
+        os.path.join(ensemble_backend.internals_directory, 'ensemble_read_scores.pkl')
+    ), os.listdir(ensemble_backend.internals_directory)
+
+
+def test_run_end_at(ensemble_backend):
+    with unittest.mock.patch('pynisher.enforce_limits') as pynisher_mock:
+        ensbuilder = EnsembleBuilder(
+            backend=ensemble_backend,
+            dataset_name="TEST",
+            output_type=MULTICLASS,  # Multilabel Classification
+            task_type=TABULAR_CLASSIFICATION,
+            metrics=[accuracy],
+            opt_metric='accuracy',
+            seed=0,  # important to find the test files
+            ensemble_nbest=2,
+            max_models_on_disc=None,
+        )
+        ensbuilder.SAVE2DISC = False
+
+        current_time = time.time()
+
+        ensbuilder.run(end_at=current_time + 10, iteration=1)
+        # 4 seconds left because: 10 seconds - 5 seconds overhead - very little overhead,
+        # but then rounded to an integer
+        assert pynisher_mock.call_args_list[0][1]["wall_time_in_s"], 4
+
+
+def testLimit(ensemble_backend):
+    ensbuilder = EnsembleBuilderMemMock(backend=ensemble_backend,
+                                        dataset_name="TEST",
+                                        output_type=BINARY,
+                                        task_type=TABULAR_CLASSIFICATION,
+                                        metrics=[accuracy],
+                                        opt_metric='accuracy',
+                                        seed=0,  # important to find the test files
+                                        ensemble_nbest=10,
+                                        # small to trigger MemoryException
+                                        memory_limit=100,
+                                        )
+    ensbuilder.SAVE2DISC = False
+
+    read_scores_file = os.path.join(
+        ensemble_backend.internals_directory,
+        'ensemble_read_scores.pkl'
+    )
+    read_preds_file = os.path.join(
+        ensemble_backend.internals_directory,
+        'ensemble_read_preds.pkl'
+    )
+
+    with unittest.mock.patch('logging.getLogger') as get_logger_mock, \
+            unittest.mock.patch('logging.config.dictConfig') as _:
+        logger_mock = unittest.mock.Mock()
+        logger_mock.handlers = []
+        get_logger_mock.return_value = logger_mock
+
+        ensbuilder.run(time_left=1000, iteration=0)
+        assert os.path.exists(read_scores_file)
+        assert not os.path.exists(read_preds_file)
+        assert logger_mock.warning.call_count == 1
+        ensbuilder.run(time_left=1000, iteration=0)
+        assert os.path.exists(read_scores_file)
+        assert not os.path.exists(read_preds_file)
+        assert logger_mock.warning.call_count == 2
+        ensbuilder.run(time_left=1000, iteration=0)
+        assert os.path.exists(read_scores_file)
+        assert not os.path.exists(read_preds_file)
+        assert logger_mock.warning.call_count == 3
+
+        # it should try to reduce ensemble_nbest until it also failed at 2
+        assert ensbuilder.ensemble_nbest == 1
+
+        ensbuilder.run(time_left=1000, iteration=0)
+        assert os.path.exists(read_scores_file)
+        assert not os.path.exists(read_preds_file)
+        assert logger_mock.warning.call_count == 4
+
+        # it should next reduce the number of models to read at most
+        assert ensbuilder.read_at_most == 1
+
+        # And then it still runs, but basically won't do anything any more except for raising error
+        # messages via the logger
+        ensbuilder.run(time_left=1000, iteration=0)
+        assert os.path.exists(read_scores_file)
+        assert not os.path.exists(read_preds_file)
+        assert logger_mock.warning.call_count == 4
+
+
+def test_read_pickle_read_preds(ensemble_backend):
+    """
+    This procedure test that we save the read predictions before
+    destroying the ensemble builder and that we are able to read
+    them safely after
+    """
+    ensbuilder = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=MULTICLASS,  # Multilabel Classification
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=2,
+        max_models_on_disc=None,
+    )
+    ensbuilder.SAVE2DISC = False
+
+    ensbuilder.main(time_left=np.inf, iteration=1, return_predictions=False)
+
+    # Check that the memory was created
+    ensemble_memory_file = os.path.join(
+        ensemble_backend.internals_directory,
+        'ensemble_read_preds.pkl'
+    )
+    assert os.path.exists(ensemble_memory_file)
+
+    # Make sure we pickle the correct read preads and hash
+    with (open(ensemble_memory_file, "rb")) as memory:
+        read_preds, last_hash = pickle.load(memory)
+
+    compare_read_preds(read_preds, ensbuilder.read_preds)
+    assert last_hash == ensbuilder.last_hash
+
+    ensemble_memory_file = os.path.join(
+        ensemble_backend.internals_directory,
+        'ensemble_read_scores.pkl'
+    )
+    assert os.path.exists(ensemble_memory_file)
+
+    # Make sure we pickle the correct read scores
+    with (open(ensemble_memory_file, "rb")) as memory:
+        read_scores = pickle.load(memory)
+
+    compare_read_preds(read_scores, ensbuilder.read_scores)
+
+    # Then create a new instance, which should automatically read this file
+    ensbuilder2 = EnsembleBuilder(
+        backend=ensemble_backend,
+        dataset_name="TEST",
+        output_type=MULTICLASS,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        seed=0,  # important to find the test files
+        ensemble_nbest=2,
+        max_models_on_disc=None,
+    )
+    compare_read_preds(ensbuilder2.read_preds, ensbuilder.read_preds)
+    compare_read_preds(ensbuilder2.read_scores, ensbuilder.read_scores)
+    assert ensbuilder2.last_hash == ensbuilder.last_hash
+
+
+def test_ensemble_builder_process_realrun(dask_client, ensemble_backend):
+    manager = EnsembleBuilderManager(
+        start_time=time.time(),
+        time_left_for_ensembles=1000,
+        backend=ensemble_backend,
+        dataset_name='Test',
+        output_type=BINARY,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[mockmetric],
+        opt_metric='mockmetric',
+        ensemble_size=50,
+        ensemble_nbest=10,
+        max_models_on_disc=None,
+        seed=0,
+        precision=32,
+        max_iterations=1,
+        read_at_most=np.inf,
+        ensemble_memory_limit=None,
+        random_state=0,
+    )
+    manager.build_ensemble(dask_client)
+    future = manager.futures.pop()
+    dask.distributed.wait([future])  # wait for the ensemble process to finish
+    result = future.result()
+    history, _, _, _ = result
+
+    assert 'train_mockmetric' in history[0]
+    assert history[0]['train_mockmetric'] == 0.9
+    assert 'test_mockmetric' in history[0]
+    assert history[0]['test_mockmetric'] == 0.9
+
+
+@unittest.mock.patch('autoPyTorch.ensemble.ensemble_builder.EnsembleBuilder.fit_ensemble')
+def test_ensemble_builder_nbest_remembered(fit_ensemble, ensemble_backend, dask_client):
+    """
+    Makes sure ensemble builder returns the size of the ensemble that pynisher allowed
+    This way, we can remember it and not waste more time trying big ensemble sizes
+    """
+
+    fit_ensemble.side_effect = MemoryError
+
+    manager = EnsembleBuilderManager(
+        start_time=time.time(),
+        time_left_for_ensembles=1000,
+        backend=ensemble_backend,
+        dataset_name='Test',
+        output_type=MULTICLASS,
+        task_type=TABULAR_CLASSIFICATION,
+        metrics=[accuracy],
+        opt_metric='accuracy',
+        ensemble_size=50,
+        ensemble_nbest=10,
+        max_models_on_disc=None,
+        seed=0,
+        precision=32,
+        read_at_most=np.inf,
+        ensemble_memory_limit=1000,
+        random_state=0,
+        max_iterations=None,
+    )
+
+    manager.build_ensemble(dask_client)
+    future = manager.futures[0]
+    dask.distributed.wait([future])  # wait for the ensemble process to finish
+    assert future.result() == ([], 5, None, None)
+    file_path = os.path.join(ensemble_backend.internals_directory, 'ensemble_read_preds.pkl')
+    assert not os.path.exists(file_path)
+
+    manager.build_ensemble(dask_client)
+
+    future = manager.futures[0]
+    dask.distributed.wait([future])  # wait for the ensemble process to finish
+    assert not os.path.exists(file_path)
+    assert future.result() == ([], 2, None, None)
+
+
+# -----------------------------------------------------------------------------------------------
+#                                   SingleBest Testing
+# -----------------------------------------------------------------------------------------------
+def testPredict():
+    # Test that ensemble prediction applies weights correctly to given
+    # predictions. There are two possible cases:
+    # 1) predictions.shape[0] == len(self.weights_). In this case,
+    # predictions include those made by zero-weighted models. Therefore,
+    # we simply apply each weights to the corresponding model preds.
+    # 2) predictions.shape[0] < len(self.weights_). In this case,
+    # predictions exclude those made by zero-weighted models. Therefore,
+    # we first exclude all occurrences of zero in self.weights_, and then
+    # apply the weights.
+    # If none of the above is the case, predict() raises Error.
+    ensemble = EnsembleSelection(ensemble_size=3,
+                                 random_state=np.random.RandomState(0),
+                                 metric=accuracy,
+                                 task_type=TABULAR_CLASSIFICATION,
+                                 )
+    # Test for case 1. Create (3, 2, 2) predictions.
+    per_model_pred = np.array([
+        [[0.9, 0.1],
+         [0.4, 0.6]],
+        [[0.8, 0.2],
+         [0.3, 0.7]],
+        [[1.0, 0.0],
+         [0.1, 0.9]]
+    ])
+    # Weights of 3 hypothetical models
+    ensemble.weights_ = [0.7, 0.2, 0.1]
+    pred = ensemble.predict(per_model_pred)
+    truth = np.array([[0.89, 0.11],  # This should be the true prediction.
+                      [0.35, 0.65]])
+    assert np.allclose(pred, truth)
+
+    # Test for case 2.
+    per_model_pred = np.array([
+        [[0.9, 0.1],
+         [0.4, 0.6]],
+        [[0.8, 0.2],
+         [0.3, 0.7]],
+        [[1.0, 0.0],
+         [0.1, 0.9]]
+    ])
+    # The third model now has weight of zero.
+    ensemble.weights_ = [0.7, 0.2, 0.0, 0.1]
+    pred = ensemble.predict(per_model_pred)
+    truth = np.array([[0.89, 0.11],
+                      [0.35, 0.65]])
+    assert np.allclose(pred, truth)
+
+    # Test for error case.
+    per_model_pred = np.array([
+        [[0.9, 0.1],
+         [0.4, 0.6]],
+        [[0.8, 0.2],
+         [0.3, 0.7]],
+        [[1.0, 0.0],
+         [0.1, 0.9]]
+    ])
+    # Now the weights have 2 zero weights and 2 non-zero weights,
+    # which is incompatible.
+    ensemble.weights_ = [0.6, 0.0, 0.0, 0.4]
+
+    with pytest.raises(ValueError):
+        ensemble.predict(per_model_pred)
+
+
+# -----------------------------------------------------------------------------------------------
+#                                   SingleBest Testing
+# -----------------------------------------------------------------------------------------------
+@unittest.mock.patch('os.path.exists')
+def test_get_identifiers_from_run_history(exists, ensemble_run_history, ensemble_backend):
+    exists.return_value = True
+    ensemble = SingleBest(
+        metric=accuracy,
+        seed=1,
+        run_history=ensemble_run_history,
+        backend=ensemble_backend,
+    )
+
+    # Just one model
+    assert len(ensemble.identifiers_) == 1
+
+    # That model must be the best
+    seed, num_run, budget = ensemble.identifiers_[0]
+    assert num_run == 3
+    assert seed == 1
+    assert budget == 3.0
diff --git a/test/test_evaluation/__init__.py b/test/test_evaluation/__init__.py
new file mode 100644
index 000000000..cc3cd7bec
--- /dev/null
+++ b/test/test_evaluation/__init__.py
@@ -0,0 +1,2 @@
+# -*- encoding: utf-8 -*-
+__author__ = 'feurerm'
diff --git a/test/test_evaluation/evaluation_util.py b/test/test_evaluation/evaluation_util.py
new file mode 100644
index 000000000..b61df8643
--- /dev/null
+++ b/test/test_evaluation/evaluation_util.py
@@ -0,0 +1,238 @@
+import functools
+import traceback
+import unittest
+
+import numpy as np
+from numpy.linalg import LinAlgError
+
+import scipy.sparse
+
+import sklearn.datasets
+import sklearn.model_selection
+from sklearn import preprocessing
+
+from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes
+from autoPyTorch.datasets.tabular_dataset import TabularDataset
+from autoPyTorch.pipeline.components.training.metrics.metrics import (
+    accuracy,
+    balanced_accuracy,
+    log_loss
+)
+
+SCORER_LIST = [accuracy, balanced_accuracy, log_loss]
+
+N_TEST_RUNS = 5
+
+
+def get_dataset(dataset='iris', make_sparse=False, add_NaNs=False,
+                train_size_maximum=150, make_multilabel=False,
+                make_binary=False):
+    iris = getattr(sklearn.datasets, "load_%s" % dataset)()
+    X = iris.data.astype(np.float32)
+    Y = iris.target
+    rs = np.random.RandomState(42)
+    indices = np.arange(X.shape[0])
+    train_size = min(int(len(indices) / 3. * 2.), train_size_maximum)
+    rs.shuffle(indices)
+    X = X[indices]
+    Y = Y[indices]
+    X_train = X[:train_size]
+    Y_train = Y[:train_size]
+    X_test = X[train_size:]
+    Y_test = Y[train_size:]
+    if add_NaNs:
+        mask = rs.choice([True, False], size=(X_train.shape))
+        X_train[mask] = np.NaN
+    if make_sparse:
+        X_train[:, 0] = 0
+        X_train[rs.random_sample(X_train.shape) > 0.5] = 0
+        X_train = scipy.sparse.csc_matrix(X_train)
+        X_train.eliminate_zeros()
+        X_test[:, 0] = 0
+        X_test[rs.random_sample(X_test.shape) > 0.5] = 0
+        X_test = scipy.sparse.csc_matrix(X_test)
+        X_test.eliminate_zeros()
+    if make_binary and make_multilabel:
+        raise ValueError('Can convert dataset only to one of the two '
+                         'options binary or multilabel!')
+    if make_binary:
+        Y_train[Y_train > 1] = 1
+        Y_test[Y_test > 1] = 1
+    if make_multilabel:
+        num_classes = len(np.unique(Y))
+        Y_train_ = np.zeros((Y_train.shape[0], num_classes))
+        for i in range(Y_train.shape[0]):
+            Y_train_[i, Y_train[i]] = 1
+        Y_train = Y_train_
+        Y_test_ = np.zeros((Y_test.shape[0], num_classes))
+        for i in range(Y_test.shape[0]):
+            Y_test_[i, Y_test[i]] = 1
+        Y_test = Y_test_
+    return X_train, Y_train, X_test, Y_test
+
+
+class Dummy(object):
+    def __init__(self):
+        self.name = 'Dummy'
+
+
+class BaseEvaluatorTest(unittest.TestCase):
+    def __init__(self, methodName):
+        super(BaseEvaluatorTest, self).__init__(methodName)
+        self.output_directories = []
+
+    def _fit(self, evaluator):
+        return self.__fit(evaluator.search)
+
+    def _partial_fit(self, evaluator, fold):
+        partial_fit = functools.partial(evaluator.partial_fit, fold=fold)
+        return self.__fit(partial_fit)
+
+    def __fit(self, function_handle):
+        """Allow us to catch known and valid exceptions for all evaluate
+        scripts."""
+        try:
+            function_handle()
+            return True
+        except KeyError as e:
+            if 'Floating-point under-/overflow occurred at epoch' in \
+                    e.args[0] or \
+                    'removed all features' in e.args[0] or \
+                    'failed to create intent' in e.args[0]:
+                pass
+            else:
+                traceback.print_exc()
+                raise e
+        except ValueError as e:
+            if 'Floating-point under-/overflow occurred at epoch' in e.args[0]:
+                pass
+            elif 'removed all features' in e.args[0]:
+                pass
+            elif 'failed to create intent' in e.args[0]:
+                pass
+            else:
+                raise e
+        except LinAlgError as e:
+            if 'not positive definite, even with jitter' in e.args[0]:
+                pass
+            else:
+                raise e
+        except RuntimeWarning as e:
+            if 'invalid value encountered in sqrt' in e.args[0]:
+                pass
+            elif 'divide by zero encountered in divide' in e.args[0]:
+                pass
+            else:
+                raise e
+        except UserWarning as e:
+            if 'FastICA did not converge' in e.args[0]:
+                pass
+            else:
+                raise e
+
+
+def get_multiclass_classification_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation):
+    X_train, Y_train, X_test, Y_test = get_dataset('iris')
+    indices = list(range(X_train.shape[0]))
+    np.random.seed(1)
+    np.random.shuffle(indices)
+    X_train = X_train[indices]
+    Y_train = Y_train[indices]
+
+    dataset = TabularDataset(
+        X=X_train, Y=Y_train,
+        X_test=X_test, Y_test=Y_test,
+        resampling_strategy=resampling_strategy
+    )
+    return dataset
+
+
+def get_abalone_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation):
+    # https://www.openml.org/d/183
+    X, y = sklearn.datasets.fetch_openml(data_id=183, return_X_y=True, as_frame=False)
+    y = preprocessing.LabelEncoder().fit_transform(y)
+    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+        X, y, random_state=1
+    )
+
+    dataset = TabularDataset(
+        X=X_train, Y=y_train,
+        X_test=X_test, Y_test=y_test,
+        resampling_strategy=resampling_strategy
+    )
+    return dataset
+
+
+def get_binary_classification_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation):
+    X_train, Y_train, X_test, Y_test = get_dataset('iris')
+    indices = list(range(X_train.shape[0]))
+    np.random.seed(1)
+    np.random.shuffle(indices)
+    X_train = X_train[indices]
+    Y_train = Y_train[indices]
+
+    eliminate_class_two = Y_train != 2
+    X_train = X_train[eliminate_class_two]
+    Y_train = Y_train[eliminate_class_two]
+
+    eliminate_class_two = Y_test != 2
+    X_test = X_test[eliminate_class_two]
+    Y_test = Y_test[eliminate_class_two]
+
+    dataset = TabularDataset(
+        X=X_train, Y=Y_train,
+        X_test=X_test, Y_test=Y_test,
+        resampling_strategy=resampling_strategy
+    )
+    return dataset
+
+
+def get_regression_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation):
+    X_train, Y_train, X_test, Y_test = get_dataset('boston')
+    indices = list(range(X_train.shape[0]))
+    np.random.seed(1)
+    np.random.shuffle(indices)
+    X_train = X_train[indices]
+    Y_train = Y_train[indices]
+
+    dataset = TabularDataset(
+        X=X_train, Y=Y_train,
+        X_test=X_test, Y_test=Y_test,
+        resampling_strategy=resampling_strategy
+    )
+    return dataset
+
+
+def get_500_classes_datamanager(resampling_strategy=HoldoutValTypes.holdout_validation):
+    weights = ([0.002] * 475) + ([0.001] * 25)
+    X, Y = sklearn.datasets.make_classification(n_samples=1000,
+                                                n_features=20,
+                                                n_classes=500,
+                                                n_clusters_per_class=1,
+                                                n_informative=15,
+                                                n_redundant=5,
+                                                n_repeated=0,
+                                                weights=weights,
+                                                flip_y=0,
+                                                class_sep=1.0,
+                                                hypercube=True,
+                                                shift=None,
+                                                scale=1.0,
+                                                shuffle=True,
+                                                random_state=1)
+
+    dataset = TabularDataset(
+        X=X[:700], Y=Y[:700],
+        X_test=X[700:], Y_test=Y[710:],
+        resampling_strategy=resampling_strategy
+    )
+
+    return dataset
+
+
+def get_dataset_getters():
+    return [get_binary_classification_datamanager,
+            get_multiclass_classification_datamanager,
+            get_500_classes_datamanager,
+            get_abalone_datamanager,
+            get_regression_datamanager]
diff --git a/test/test_evaluation/test_abstract_evaluator.py b/test/test_evaluation/test_abstract_evaluator.py
new file mode 100644
index 000000000..3fa2667a8
--- /dev/null
+++ b/test/test_evaluation/test_abstract_evaluator.py
@@ -0,0 +1,283 @@
+# -*- encoding: utf-8 -*-
+import os
+import shutil
+import sys
+import unittest
+import unittest.mock
+
+import numpy as np
+
+import sklearn.dummy
+
+from smac.tae import StatusType
+
+from autoPyTorch.evaluation.abstract_evaluator import AbstractEvaluator
+from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
+from autoPyTorch.utils.backend import Backend, BackendContext
+
+this_directory = os.path.dirname(__file__)
+sys.path.append(this_directory)
+from evaluation_util import get_multiclass_classification_datamanager  # noqa E402
+
+
+class AbstractEvaluatorTest(unittest.TestCase):
+    _multiprocess_can_split_ = True
+
+    def setUp(self):
+        """
+        Creates a backend mock
+        """
+        self.ev_path = os.path.join(this_directory, '.tmp_evaluation')
+        if not os.path.exists(self.ev_path):
+            os.mkdir(self.ev_path)
+        dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)]
+        dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)]
+
+        backend_mock = unittest.mock.Mock()
+        backend_mock.get_model_dir.return_value = self.ev_path
+        backend_mock.get_model_path.side_effect = dummy_model_files
+        backend_mock.get_prediction_output_path.side_effect = dummy_pred_files
+        backend_mock.temporary_directory = self.ev_path
+
+        D = get_multiclass_classification_datamanager()
+        backend_mock.load_datamanager.return_value = D
+        self.backend_mock = backend_mock
+
+        self.working_directory = os.path.join(this_directory, '.tmp_%s' % self.id())
+
+    def tearDown(self):
+        if os.path.exists(self.ev_path):
+            try:
+                os.rmdir(self.ev_path)
+            except:  # noqa E722
+                pass
+
+    def test_finish_up_model_predicts_NaN(self):
+        '''Tests by handing in predictions which contain NaNs'''
+        rs = np.random.RandomState(1)
+
+        queue_mock = unittest.mock.Mock()
+        ae = AbstractEvaluator(backend=self.backend_mock,
+                               output_y_hat_optimization=False,
+                               queue=queue_mock, metric=accuracy, budget=0,
+                               configuration=1)
+        ae.Y_optimization = rs.rand(33, 3)
+        predictions_ensemble = rs.rand(33, 3)
+        predictions_test = rs.rand(25, 3)
+        predictions_valid = rs.rand(25, 3)
+
+        # NaNs in prediction ensemble
+        predictions_ensemble[5, 2] = np.NaN
+        _, loss, _, additional_run_info = ae.finish_up(
+            loss={'accuracy': 0.1},
+            train_loss={'accuracy': 0.1},
+            opt_pred=predictions_ensemble,
+            valid_pred=predictions_valid,
+            test_pred=predictions_test,
+            additional_run_info=None,
+            file_output=True,
+            status=StatusType.SUCCESS,
+        )
+        self.assertEqual(loss, 1.0)
+        self.assertEqual(additional_run_info,
+                         {'error': 'Model predictions for optimization set '
+                                   'contains NaNs.'})
+
+        # NaNs in prediction validation
+        predictions_ensemble[5, 2] = 0.5
+        predictions_valid[5, 2] = np.NaN
+        _, loss, _, additional_run_info = ae.finish_up(
+            loss={'accuracy': 0.1},
+            train_loss={'accuracy': 0.1},
+            opt_pred=predictions_ensemble,
+            valid_pred=predictions_valid,
+            test_pred=predictions_test,
+            additional_run_info=None,
+            file_output=True,
+            status=StatusType.SUCCESS,
+        )
+        self.assertEqual(loss, 1.0)
+        self.assertEqual(additional_run_info,
+                         {'error': 'Model predictions for validation set '
+                                   'contains NaNs.'})
+
+        # NaNs in prediction test
+        predictions_valid[5, 2] = 0.5
+        predictions_test[5, 2] = np.NaN
+        _, loss, _, additional_run_info = ae.finish_up(
+            loss={'accuracy': 0.1},
+            train_loss={'accuracy': 0.1},
+            opt_pred=predictions_ensemble,
+            valid_pred=predictions_valid,
+            test_pred=predictions_test,
+            additional_run_info=None,
+            file_output=True,
+            status=StatusType.SUCCESS,
+        )
+        self.assertEqual(loss, 1.0)
+        self.assertEqual(additional_run_info,
+                         {'error': 'Model predictions for test set contains '
+                                   'NaNs.'})
+
+        self.assertEqual(self.backend_mock.save_predictions_as_npy.call_count, 0)
+
+    def test_disable_file_output(self):
+        queue_mock = unittest.mock.Mock()
+
+        rs = np.random.RandomState(1)
+
+        ae = AbstractEvaluator(
+            backend=self.backend_mock,
+            queue=queue_mock,
+            disable_file_output=True,
+            metric=accuracy,
+            logger_port=unittest.mock.Mock(),
+            budget=0,
+            configuration=1
+        )
+        ae.pipeline = unittest.mock.Mock()
+        predictions_ensemble = rs.rand(33, 3)
+        predictions_test = rs.rand(25, 3)
+        predictions_valid = rs.rand(25, 3)
+
+        loss_, additional_run_info_ = (
+            ae.file_output(
+                predictions_ensemble,
+                predictions_valid,
+                predictions_test,
+            )
+        )
+
+        self.assertIsNone(loss_)
+        self.assertEqual(additional_run_info_, {})
+        # This function is never called as there is a return before
+        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 0)
+
+        for call_count, disable in enumerate(['pipeline', 'pipelines'], start=1):
+            ae = AbstractEvaluator(
+                backend=self.backend_mock,
+                output_y_hat_optimization=False,
+                queue=queue_mock,
+                disable_file_output=[disable],
+                metric=accuracy,
+                budget=0,
+                configuration=1
+            )
+            ae.Y_optimization = predictions_ensemble
+            ae.pipeline = unittest.mock.Mock()
+            ae.pipelines = [unittest.mock.Mock()]
+
+            loss_, additional_run_info_ = (
+                ae.file_output(
+                    predictions_ensemble,
+                    predictions_valid,
+                    predictions_test,
+                )
+            )
+
+            self.assertIsNone(loss_)
+            self.assertEqual(additional_run_info_, {})
+            self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, call_count)
+            if disable == 'pipeline':
+                self.assertIsNone(
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
+                self.assertIsNotNone(
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+            else:
+                self.assertIsNotNone(
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
+                self.assertIsNone(
+                    self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+            self.assertIsNotNone(
+                self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                    'ensemble_predictions']
+            )
+            self.assertIsNotNone(
+                self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                    'valid_predictions']
+            )
+            self.assertIsNotNone(
+                self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                    'test_predictions']
+            )
+
+        ae = AbstractEvaluator(
+            backend=self.backend_mock,
+            output_y_hat_optimization=False,
+            queue=queue_mock,
+            metric=accuracy,
+            disable_file_output=['y_optimization'],
+            budget=0,
+            configuration=1
+        )
+        ae.Y_optimization = predictions_ensemble
+        ae.pipeline = 'pipeline'
+        ae.pipelines = [unittest.mock.Mock()]
+
+        loss_, additional_run_info_ = (
+            ae.file_output(
+                predictions_ensemble,
+                predictions_valid,
+                predictions_test,
+            )
+        )
+
+        self.assertIsNone(loss_)
+        self.assertEqual(additional_run_info_, {})
+
+        self.assertIsNone(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                'ensemble_predictions']
+        )
+        self.assertIsNotNone(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                'valid_predictions']
+        )
+        self.assertIsNotNone(
+            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1][
+                'test_predictions']
+        )
+
+    def test_file_output(self):
+        shutil.rmtree(self.working_directory, ignore_errors=True)
+        os.mkdir(self.working_directory)
+
+        queue_mock = unittest.mock.Mock()
+
+        context = BackendContext(
+            temporary_directory=os.path.join(self.working_directory, 'tmp'),
+            output_directory=os.path.join(self.working_directory, 'out'),
+            delete_tmp_folder_after_terminate=True,
+            delete_output_folder_after_terminate=True,
+        )
+        with unittest.mock.patch.object(Backend, 'load_datamanager') as load_datamanager_mock:
+            load_datamanager_mock.return_value = get_multiclass_classification_datamanager()
+
+            backend = Backend(context)
+
+            ae = AbstractEvaluator(
+                backend=backend,
+                output_y_hat_optimization=False,
+                queue=queue_mock,
+                metric=accuracy,
+                budget=0,
+                configuration=1
+            )
+            ae.model = sklearn.dummy.DummyClassifier()
+
+            rs = np.random.RandomState()
+            ae.Y_optimization = rs.rand(33, 3)
+            predictions_ensemble = rs.rand(33, 3)
+            predictions_test = rs.rand(25, 3)
+            predictions_valid = rs.rand(25, 3)
+
+            ae.file_output(
+                Y_optimization_pred=predictions_ensemble,
+                Y_valid_pred=predictions_valid,
+                Y_test_pred=predictions_test,
+            )
+
+            self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'tmp',
+                                                        '.autoPyTorch', 'runs', '1_0_1.0')))
+
+            shutil.rmtree(self.working_directory, ignore_errors=True)
diff --git a/test/test_evaluation/test_evaluation.py b/test/test_evaluation/test_evaluation.py
new file mode 100644
index 000000000..82a65ba96
--- /dev/null
+++ b/test/test_evaluation/test_evaluation.py
@@ -0,0 +1,376 @@
+import logging
+import os
+import shutil
+import sys
+import time
+import unittest
+import unittest.mock
+
+import numpy as np
+
+import pynisher
+
+from smac.runhistory.runhistory import RunInfo
+from smac.stats.stats import Stats
+from smac.tae import StatusType
+
+from autoPyTorch.evaluation.tae import ExecuteTaFuncWithQueue, get_cost_of_crash
+from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
+
+this_directory = os.path.dirname(__file__)
+sys.path.append(this_directory)
+from evaluation_util import get_multiclass_classification_datamanager  # noqa E402
+
+
+def safe_eval_success_mock(*args, **kwargs):
+    queue = kwargs['queue']
+    queue.put({'status': StatusType.SUCCESS,
+               'loss': 0.5,
+               'additional_run_info': ''})
+
+
+class BackendMock(object):
+    def __init__(self):
+        self.temporary_directory = './.tmp_evaluation'
+        try:
+            os.mkdir(self.temporary_directory)
+        except:  # noqa 3722
+            pass
+
+    def load_datamanager(self):
+        return get_multiclass_classification_datamanager()
+
+
+class EvaluationTest(unittest.TestCase):
+    def setUp(self):
+        self.datamanager = get_multiclass_classification_datamanager()
+        self.tmp = os.path.join(os.getcwd(), '.test_evaluation')
+        os.mkdir(self.tmp)
+        self.logger = logging.getLogger()
+        scenario_mock = unittest.mock.Mock()
+        scenario_mock.wallclock_limit = 10
+        scenario_mock.algo_runs_timelimit = 1000
+        scenario_mock.ta_run_limit = 100
+        self.scenario = scenario_mock
+        stats = Stats(scenario_mock)
+        stats.start_timing()
+        self.stats = stats
+
+        try:
+            shutil.rmtree(self.tmp)
+        except Exception:
+            pass
+
+    def tearDown(self):
+        try:
+            shutil.rmtree(self.tmp)
+        except Exception:
+            pass
+
+    ############################################################################
+    # pynisher tests
+    def test_pynisher_memory_error(self):
+        def fill_memory():
+            a = np.random.random_sample((10000000, 10000000)).astype(np.float64)
+            return np.sum(a)
+
+        safe_eval = pynisher.enforce_limits(mem_in_mb=1)(fill_memory)
+        safe_eval()
+        self.assertEqual(safe_eval.exit_status, pynisher.MemorylimitException)
+
+    def test_pynisher_timeout(self):
+        def run_over_time():
+            time.sleep(2)
+
+        safe_eval = pynisher.enforce_limits(wall_time_in_s=1,
+                                            grace_period_in_s=0)(run_over_time)
+        safe_eval()
+        self.assertEqual(safe_eval.exit_status, pynisher.TimeoutException)
+
+    ############################################################################
+    # Test ExecuteTaFuncWithQueue.run_wrapper()
+    @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function')
+    def test_eval_with_limits_holdout(self, pynisher_mock):
+        pynisher_mock.side_effect = safe_eval_success_mock
+        config = unittest.mock.Mock()
+        config.config_id = 198
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[0].config.config_id, 198)
+        self.assertEqual(info[1].status, StatusType.SUCCESS)
+        self.assertEqual(info[1].cost, 0.5)
+        self.assertIsInstance(info[1].time, float)
+
+    @unittest.mock.patch('pynisher.enforce_limits')
+    def test_cutoff_lower_than_remaining_time(self, pynisher_mock):
+        config = unittest.mock.Mock()
+        config.config_id = 198
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        self.stats.ta_runs = 1
+        ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None, instance_specific=None,
+                               seed=1, capped=False))
+        self.assertEqual(pynisher_mock.call_args[1]['wall_time_in_s'], 4)
+        self.assertIsInstance(pynisher_mock.call_args[1]['wall_time_in_s'], int)
+
+    @unittest.mock.patch('pynisher.enforce_limits')
+    def test_eval_with_limits_holdout_fail_timeout(self, pynisher_mock):
+        config = unittest.mock.Mock()
+        config.config_id = 198
+
+        m1 = unittest.mock.Mock()
+        m2 = unittest.mock.Mock()
+        m1.return_value = m2
+        pynisher_mock.return_value = m1
+        m2.exit_status = pynisher.TimeoutException
+        m2.wall_clock_time = 30
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.TIMEOUT)
+        self.assertEqual(info[1].cost, 1.0)
+        self.assertIsInstance(info[1].time, float)
+        self.assertNotIn('exitcode', info[1].additional_info)
+
+    @unittest.mock.patch('pynisher.enforce_limits')
+    def test_zero_or_negative_cutoff(self, pynisher_mock):
+        config = unittest.mock.Mock()
+        config.config_id = 198
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        self.scenario.wallclock_limit = 5
+        self.stats.submitted_ta_runs += 1
+        run_info, run_value = ta.run_wrapper(RunInfo(config=config, cutoff=9, instance=None,
+                                             instance_specific=None, seed=1, capped=False))
+        self.assertEqual(run_value.status, StatusType.STOP)
+
+    @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function')
+    def test_eval_with_limits_holdout_fail_silent(self, pynisher_mock):
+        pynisher_mock.return_value = None
+        config = unittest.mock.Mock()
+        config.origin = 'MOCK'
+        config.config_id = 198
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+
+        # The following should not fail because abort on first config crashed is false
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=60, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.CRASHED)
+        self.assertEqual(info[1].cost, 1.0)
+        self.assertIsInstance(info[1].time, float)
+        self.assertEqual(info[1].additional_info, {'configuration_origin': 'MOCK',
+                                                   'error': "Result queue is empty",
+                                                   'exit_status': '0',
+                                                   'exitcode': 0,
+                                                   'subprocess_stdout': '',
+                                                   'subprocess_stderr': ''})
+
+        self.stats.submitted_ta_runs += 1
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.CRASHED)
+        self.assertEqual(info[1].cost, 1.0)
+        self.assertIsInstance(info[1].time, float)
+        self.assertEqual(info[1].additional_info, {'configuration_origin': 'MOCK',
+                                                   'error': "Result queue is empty",
+                                                   'exit_status': '0',
+                                                   'exitcode': 0,
+                                                   'subprocess_stdout': '',
+                                                   'subprocess_stderr': ''})
+
+    @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function')
+    def test_eval_with_limits_holdout_fail_memory_error(self, pynisher_mock):
+        pynisher_mock.side_effect = MemoryError
+        config = unittest.mock.Mock()
+        config.config_id = 198
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.MEMOUT)
+
+        # For accuracy, worst possible result is MAXINT
+        worst_possible_result = 1
+        self.assertEqual(info[1].cost, worst_possible_result)
+        self.assertIsInstance(info[1].time, float)
+        self.assertNotIn('exitcode', info[1].additional_info)
+
+    @unittest.mock.patch('pynisher.enforce_limits')
+    def test_eval_with_limits_holdout_timeout_with_results_in_queue(self, pynisher_mock):
+        config = unittest.mock.Mock()
+        config.config_id = 198
+
+        def side_effect(**kwargs):
+            queue = kwargs['queue']
+            queue.put({'status': StatusType.SUCCESS,
+                       'loss': 0.5,
+                       'additional_run_info': {}})
+        m1 = unittest.mock.Mock()
+        m2 = unittest.mock.Mock()
+        m1.return_value = m2
+        pynisher_mock.return_value = m1
+        m2.side_effect = side_effect
+        m2.exit_status = pynisher.TimeoutException
+        m2.wall_clock_time = 30
+
+        # Test for a succesful run
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.SUCCESS)
+        self.assertEqual(info[1].cost, 0.5)
+        self.assertIsInstance(info[1].time, float)
+        self.assertNotIn('exitcode', info[1].additional_info)
+
+        # And a crashed run which is in the queue
+        def side_effect(**kwargs):
+            queue = kwargs['queue']
+            queue.put({'status': StatusType.CRASHED,
+                       'loss': 2.0,
+                       'additional_run_info': {}})
+        m2.side_effect = side_effect
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.CRASHED)
+        self.assertEqual(info[1].cost, 1.0)
+        self.assertIsInstance(info[1].time, float)
+        self.assertNotIn('exitcode', info[1].additional_info)
+
+    @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function')
+    def test_eval_with_limits_holdout_2(self, eval_houldout_mock):
+        config = unittest.mock.Mock()
+        config.config_id = 198
+
+        def side_effect(*args, **kwargs):
+            queue = kwargs['queue']
+            queue.put({'status': StatusType.SUCCESS,
+                       'loss': 0.5,
+                       'additional_run_info': kwargs['instance']})
+        eval_houldout_mock.side_effect = side_effect
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        self.scenario.wallclock_limit = 180
+        instance = "{'subsample': 30}"
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=instance,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.SUCCESS)
+        self.assertEqual(len(info[1].additional_info), 2)
+        self.assertIn('configuration_origin', info[1].additional_info)
+        self.assertEqual(info[1].additional_info['message'], "{'subsample': 30}")
+
+    @unittest.mock.patch('autoPyTorch.evaluation.train_evaluator.eval_function')
+    def test_exception_in_target_function(self, eval_holdout_mock):
+        config = unittest.mock.Mock()
+        config.config_id = 198
+
+        eval_holdout_mock.side_effect = ValueError
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        self.stats.submitted_ta_runs += 1
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=30, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.CRASHED)
+        self.assertEqual(info[1].cost, 1.0)
+        self.assertIsInstance(info[1].time, float)
+        self.assertEqual(info[1].additional_info['error'], 'ValueError()')
+        self.assertIn('traceback', info[1].additional_info)
+        self.assertNotIn('exitcode', info[1].additional_info)
+
+    def test_silent_exception_in_target_function(self):
+        config = unittest.mock.Mock(spec=int)
+        config.config_id = 198
+
+        ta = ExecuteTaFuncWithQueue(backend=BackendMock(), seed=1,
+                                    stats=self.stats,
+                                    memory_limit=3072,
+                                    metric=accuracy,
+                                    cost_for_crash=get_cost_of_crash(accuracy),
+                                    abort_on_first_run_crash=False,
+                                    logger=self.logger
+                                    )
+        ta.pynisher_logger = unittest.mock.Mock()
+        self.stats.submitted_ta_runs += 1
+        info = ta.run_wrapper(RunInfo(config=config, cutoff=3000, instance=None,
+                                      instance_specific=None, seed=1, capped=False))
+        self.assertEqual(info[1].status, StatusType.CRASHED, msg=str(info[1].additional_info))
+        self.assertEqual(info[1].cost, 1.0)
+        self.assertIsInstance(info[1].time, float)
+        self.assertIn(
+            info[1].additional_info['error'],
+            (
+                """AttributeError("'BackendMock' object has no attribute """
+                """'save_targets_ensemble'",)""",
+                """AttributeError("'BackendMock' object has no attribute """
+                """'save_targets_ensemble'")""",
+            )
+        )
+        self.assertNotIn('exitcode', info[1].additional_info)
+        self.assertNotIn('exit_status', info[1].additional_info)
+        self.assertNotIn('traceback', info[1])
diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py
new file mode 100644
index 000000000..67132285e
--- /dev/null
+++ b/test/test_evaluation/test_train_evaluator.py
@@ -0,0 +1,258 @@
+import multiprocessing
+import os
+import queue
+import shutil
+import sys
+import unittest
+import unittest.mock
+
+from ConfigSpace import Configuration
+
+import numpy as np
+
+from sklearn.base import BaseEstimator
+
+from smac.tae import StatusType
+
+from autoPyTorch.datasets.resampling_strategy import CrossValTypes
+from autoPyTorch.evaluation.train_evaluator import TrainEvaluator
+from autoPyTorch.evaluation.utils import read_queue
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
+from autoPyTorch.utils import backend
+
+this_directory = os.path.dirname(__file__)
+sys.path.append(this_directory)
+from evaluation_util import (  # noqa (E402: module level import not at top of file)
+    BaseEvaluatorTest,
+    get_binary_classification_datamanager,
+    get_multiclass_classification_datamanager,
+    get_regression_datamanager,
+)  # noqa (E402: module level import not at top of file)
+
+
+class BackendMock(object):
+    def load_datamanager(self):
+        return get_multiclass_classification_datamanager()
+
+
+class Dummy(object):
+    def __init__(self):
+        self.name = 'dummy'
+
+
+class DummyPipeline(BasePipeline):
+    def __init__(self):
+        mocked_estimator = unittest.mock.Mock(spec=BaseEstimator)
+        self.steps = [('MockStep', mocked_estimator)]
+        pass
+
+    def predict_proba(self, X, batch_size=None):
+        return np.tile([0.6, 0.4], (len(X), 1))
+
+    def get_additional_run_info(self) -> None:
+        return None
+
+
+class TestTrainEvaluator(BaseEvaluatorTest, unittest.TestCase):
+    _multiprocess_can_split_ = True
+
+    def setUp(self):
+        """
+        Creates a backend mock
+        """
+        tmp_dir_name = self.id()
+        self.ev_path = os.path.join(this_directory, '.tmp_evaluations', tmp_dir_name)
+        if os.path.exists(self.ev_path):
+            shutil.rmtree(self.ev_path)
+        os.makedirs(self.ev_path, exist_ok=False)
+        dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)]
+        dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)]
+        dummy_cv_model_files = [os.path.join(self.ev_path, str(n)) for n in range(200, 300)]
+        backend_mock = unittest.mock.Mock()
+        backend_mock.get_model_dir.return_value = self.ev_path
+        backend_mock.get_cv_model_dir.return_value = self.ev_path
+        backend_mock.get_model_path.side_effect = dummy_model_files
+        backend_mock.get_cv_model_path.side_effect = dummy_cv_model_files
+        backend_mock.get_prediction_output_path.side_effect = dummy_pred_files
+        backend_mock.temporary_directory = self.ev_path
+        self.backend_mock = backend_mock
+
+        self.tmp_dir = os.path.join(self.ev_path, 'tmp_dir')
+        self.output_dir = os.path.join(self.ev_path, 'out_dir')
+
+    def tearDown(self):
+        if os.path.exists(self.ev_path):
+            shutil.rmtree(self.ev_path)
+
+    @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline')
+    def test_holdout(self, pipeline_mock):
+        # Binary iris, contains 69 train samples, 31 test samples
+        D = get_binary_classification_datamanager()
+        pipeline_mock.predict_proba.side_effect = \
+            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
+        pipeline_mock.get_additional_run_info.return_value = None
+
+        configuration = unittest.mock.Mock(spec=Configuration)
+        backend_api = backend.create(self.tmp_dir, self.output_dir)
+        backend_api.load_datamanager = lambda: D
+        queue_ = multiprocessing.Queue()
+
+        evaluator = TrainEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0)
+        evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
+        evaluator.file_output.return_value = (None, {})
+
+        evaluator.fit_predict_and_loss()
+
+        rval = read_queue(evaluator.queue)
+        self.assertEqual(len(rval), 1)
+        result = rval[0]['loss']
+        self.assertEqual(len(rval[0]), 3)
+        self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
+
+        self.assertEqual(evaluator.file_output.call_count, 1)
+        self.assertEqual(result, 0.4782608695652174)
+        self.assertEqual(pipeline_mock.fit.call_count, 1)
+        # 3 calls because of train, holdout and test set
+        self.assertEqual(pipeline_mock.predict_proba.call_count, 3)
+        self.assertEqual(evaluator.file_output.call_count, 1)
+        self.assertEqual(evaluator.file_output.call_args[0][0].shape[0], len(D.splits[0][1]))
+        self.assertIsNone(evaluator.file_output.call_args[0][1])
+        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
+                         D.test_tensors[1].shape[0])
+        self.assertEqual(evaluator.pipeline.fit.call_count, 1)
+
+    @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline')
+    def test_cv(self, pipeline_mock):
+        D = get_binary_classification_datamanager(resampling_strategy=CrossValTypes.k_fold_cross_validation)
+
+        pipeline_mock.predict_proba.side_effect = \
+            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
+        pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
+        pipeline_mock.get_additional_run_info.return_value = None
+
+        configuration = unittest.mock.Mock(spec=Configuration)
+        backend_api = backend.create(self.tmp_dir, self.output_dir)
+        backend_api.load_datamanager = lambda: D
+        queue_ = multiprocessing.Queue()
+
+        evaluator = TrainEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0)
+        evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
+        evaluator.file_output.return_value = (None, {})
+
+        evaluator.fit_predict_and_loss()
+
+        rval = read_queue(evaluator.queue)
+        self.assertEqual(len(rval), 1)
+        result = rval[0]['loss']
+        self.assertEqual(len(rval[0]), 3)
+        self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
+
+        self.assertEqual(evaluator.file_output.call_count, 1)
+        self.assertEqual(result, 0.463768115942029)
+        self.assertEqual(pipeline_mock.fit.call_count, 3)
+        # 9 calls because of the training, holdout and
+        # test set (3 sets x 3 folds = 9)
+        self.assertEqual(pipeline_mock.predict_proba.call_count, 9)
+        # as the optimisation preds in cv is concatenation of the three folds,
+        # so it is 3*splits
+        self.assertEqual(evaluator.file_output.call_args[0][0].shape[0],
+                         3 * len(D.splits[0][1]))
+        self.assertIsNone(evaluator.file_output.call_args[0][1])
+        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
+                         D.test_tensors[1].shape[0])
+
+    @unittest.mock.patch.object(TrainEvaluator, '_loss')
+    def test_file_output(self, loss_mock):
+
+        D = get_regression_datamanager()
+        D.name = 'test'
+        self.backend_mock.load_datamanager.return_value = D
+        configuration = unittest.mock.Mock(spec=Configuration)
+        queue_ = multiprocessing.Queue()
+        loss_mock.return_value = None
+
+        evaluator = TrainEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0)
+
+        self.backend_mock.get_model_dir.return_value = True
+        evaluator.pipeline = 'model'
+        evaluator.Y_optimization = D.train_tensors[1]
+        rval = evaluator.file_output(
+            D.train_tensors[1],
+            None,
+            D.test_tensors[1],
+        )
+
+        self.assertEqual(rval, (None, {}))
+        self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 1)
+        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1)
+        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
+                         {'seed', 'idx', 'budget', 'model', 'cv_model',
+                          'ensemble_predictions', 'valid_predictions', 'test_predictions'})
+        self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
+        self.assertIsNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+
+        evaluator.pipelines = ['model2', 'model2']
+        rval = evaluator.file_output(
+            D.train_tensors[1],
+            None,
+            D.test_tensors[1],
+        )
+        self.assertEqual(rval, (None, {}))
+        self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 2)
+        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 2)
+        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
+                         {'seed', 'idx', 'budget', 'model', 'cv_model',
+                          'ensemble_predictions', 'valid_predictions', 'test_predictions'})
+        self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
+        self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
+
+        # Check for not containing NaNs - that the models don't predict nonsense
+        # for unseen data
+        D.train_tensors[1][0] = np.NaN
+        rval = evaluator.file_output(
+            D.train_tensors[1],
+            None,
+            D.test_tensors[1],
+        )
+        self.assertEqual(
+            rval,
+            (
+                1.0,
+                {
+                    'error':
+                    'Model predictions for optimization set contains NaNs.'
+                },
+            )
+        )
+
+    @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline')
+    def test_predict_proba_binary_classification(self, mock):
+        D = get_binary_classification_datamanager()
+        self.backend_mock.load_datamanager.return_value = D
+        mock.predict_proba.side_effect = lambda y, batch_size=None: np.array(
+            [[0.1, 0.9]] * y.shape[0]
+        )
+        mock.side_effect = lambda **kwargs: mock
+
+        configuration = unittest.mock.Mock(spec=Configuration)
+        queue_ = multiprocessing.Queue()
+
+        evaluator = TrainEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0)
+
+        evaluator.fit_predict_and_loss()
+        Y_optimization_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][1][
+            'ensemble_predictions']
+
+        for i in range(7):
+            self.assertEqual(0.9, Y_optimization_pred[i][1])
+
+    def test_get_results(self):
+        queue_ = multiprocessing.Queue()
+        for i in range(5):
+            queue_.put((i * 1, 1 - (i * 0.2), 0, "", StatusType.SUCCESS))
+        result = read_queue(queue_)
+        self.assertEqual(len(result), 5)
+        self.assertEqual(result[0][0], 0)
+        self.assertAlmostEqual(result[0][1], 1.0)
diff --git a/test/test_pipeline/components/__init__.py b/test/test_pipeline/components/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_pipeline/components/base.py b/test/test_pipeline/components/base.py
new file mode 100644
index 000000000..120fa9fcd
--- /dev/null
+++ b/test/test_pipeline/components/base.py
@@ -0,0 +1,69 @@
+import logging
+import unittest
+
+from sklearn.datasets import make_classification
+
+import torch
+
+from autoPyTorch.constants import STRING_TO_TASK_TYPES
+from autoPyTorch.pipeline.components.training.metrics.utils import get_metrics
+from autoPyTorch.pipeline.components.training.trainer.base_trainer import BudgetTracker
+
+
+class BaseTraining(unittest.TestCase):
+
+    def setUp(self):
+        # Data
+        self.X, self.y = make_classification(
+            n_samples=5000,
+            n_features=4,
+            n_informative=3,
+            n_redundant=1,
+            n_repeated=0,
+            n_classes=2,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0
+        )
+        self.X = torch.FloatTensor(self.X)
+        self.y = torch.LongTensor(self.y)
+        self.dataset = torch.utils.data.TensorDataset(self.X, self.y)
+        self.loader = torch.utils.data.DataLoader(self.dataset, batch_size=20)
+        self.dataset_properties = {
+            'task_type': 'tabular_classification',
+            'output_type': 'binary'
+        }
+
+        # training requirements
+        layers = []
+        layers.append(torch.nn.Linear(4, 4))
+        layers.append(torch.nn.Sigmoid())
+        layers.append(torch.nn.Linear(4, 2))
+        self.model = torch.nn.Sequential(*layers)
+        self.criterion = torch.nn.CrossEntropyLoss()
+        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=0.01)
+        self.device = torch.device('cpu')
+        self.logger = logging.getLogger('test')
+        self.metrics = get_metrics(self.dataset_properties)
+        self.epochs = 20
+        self.budget_tracker = BudgetTracker(
+            budget_type='epochs',
+            max_epochs=self.epochs,
+        )
+        self.task_type = STRING_TO_TASK_TYPES[self.dataset_properties['task_type']]
+
+    def _overfit_model(self):
+        self.model.train()
+        for epoch in range(self.epochs):
+            total_loss = 0
+            for x, y in self.loader:
+                self.optimizer.zero_grad()
+                # Forward pass
+                y_pred = self.model(self.X)
+                # Compute Loss
+                loss = self.criterion(y_pred.squeeze(), self.y)
+                total_loss += loss
+
+                # Backward pass
+                loss.backward()
+                self.optimizer.step()
diff --git a/test/test_pipeline/components/test_encoder_choice.py b/test/test_pipeline/components/test_encoder_choice.py
new file mode 100644
index 000000000..f4dbcc119
--- /dev/null
+++ b/test/test_pipeline/components/test_encoder_choice.py
@@ -0,0 +1,53 @@
+import copy
+import unittest
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
+    EncoderChoice
+)
+
+
+class TestEncoderChoice(unittest.TestCase):
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the encoder
+        choice"""
+        dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': [5]}
+        encoder_choice = EncoderChoice(dataset_properties)
+        cs = encoder_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the search space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(encoder_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            encoder_choice.set_hyperparameters(config)
+
+            self.assertEqual(encoder_choice.choice.__class__,
+                             encoder_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(encoder_choice.choice))
+                self.assertEqual(value, encoder_choice.choice.__dict__[key])
+
+    def test_only_numerical(self):
+        dataset_properties = {'numerical_columns': list(range(4)), 'categorical_columns': []}
+
+        chooser = EncoderChoice(dataset_properties)
+        configspace = chooser.get_hyperparameter_search_space().sample_configuration().get_dictionary()
+        self.assertEqual(configspace['__choice__'], 'NoEncoder')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_encoders.py b/test/test_pipeline/components/test_encoders.py
new file mode 100644
index 000000000..1f210936f
--- /dev/null
+++ b/test/test_pipeline/components/test_encoders.py
@@ -0,0 +1,126 @@
+import unittest
+
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from sklearn.base import BaseEstimator
+from sklearn.compose import make_column_transformer
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.NoEncoder import NoEncoder
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.OneHotEncoder import OneHotEncoder
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.OrdinalEncoder import OrdinalEncoder
+
+
+class TestEncoders(unittest.TestCase):
+
+    def test_one_hot_encoder_no_unknown(self):
+        data = np.array([[1, 'male'],
+                         [1, 'female'],
+                         [3, 'female'],
+                         [2, 'male'],
+                         [2, 'female']])
+
+        categorical_columns = [1]
+        numerical_columns = [0]
+        train_indices = np.array([0, 2, 3])
+        test_indices = np.array([1, 4])
+
+        dataset_properties = {
+            'categorical_columns': categorical_columns,
+            'numerical_columns': numerical_columns,
+            'categories': [['female', 'male']]
+        }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        encoder_component = OneHotEncoder()
+        encoder_component.fit(X)
+        X = encoder_component.transform(X)
+        encoder = X['encoder']['categorical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['encoder'], dict)
+        self.assertIsInstance(encoder, BaseEstimator)
+        self.assertIsNone(X['encoder']['numerical'])
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((encoder, X['dataset_properties']['categorical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        # check if the transform is correct
+        assert_array_equal(transformed, [['1.0', '0.0', 1], ['1.0', '0.0', 2]])
+
+    def test_ordinal_encoder(self):
+
+        data = np.array([[1, 'male'],
+                         [1, 'female'],
+                         [3, 'male'],
+                         [2, 'female'],
+                         [2, 'male']])
+
+        categorical_columns = [1]
+        numerical_columns = [0]
+        train_indices = np.array([0, 2, 3])
+        test_indices = np.array([1, 4])
+
+        dataset_properties = {
+            'categorical_columns': categorical_columns,
+            'numerical_columns': numerical_columns,
+            'categories': [['female', 'male', 'unknown']]
+        }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        encoder_component = OrdinalEncoder()
+        encoder_component.fit(X)
+        X = encoder_component.transform(X)
+
+        encoder = X['encoder']['categorical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['encoder'], dict)
+        self.assertIsInstance(encoder, BaseEstimator)
+        self.assertIsNone(X['encoder']['numerical'])
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((encoder, X['dataset_properties']['categorical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        # check if we got the expected transformed array
+        assert_array_equal(transformed, [['0.0', 1], ['1.0', 2]])
+
+    def test_none_encoder(self):
+
+        data = np.array([[1, 'male'],
+                         [1, 'female'],
+                         [3, 'unknown'],
+                         [2, 'female'],
+                         [2, 'male']])
+
+        categorical_columns = [1]
+        numerical_columns = [0]
+        train_indices = np.array([0, 2, 3])
+
+        dataset_properties = {
+            'categorical_columns': categorical_columns,
+            'numerical_columns': numerical_columns,
+            'categories': [['female', 'male', 'unknown']]
+        }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        encoder_component = NoEncoder()
+        encoder_component.fit(X)
+        X = encoder_component.transform(X)
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['encoder'], dict)
+        self.assertIsNone(X['encoder']['categorical'])
+        self.assertIsNone(X['encoder']['numerical'])
diff --git a/test/test_pipeline/components/test_feature_data_loader.py b/test/test_pipeline/components/test_feature_data_loader.py
new file mode 100644
index 000000000..958c9ad7b
--- /dev/null
+++ b/test/test_pipeline/components/test_feature_data_loader.py
@@ -0,0 +1,43 @@
+import unittest
+import unittest.mock
+
+import torchvision
+
+from autoPyTorch.pipeline.components.training.data_loader.feature_data_loader import (
+    FeatureDataLoader
+)
+
+
+class TestFeatureDataLoader(unittest.TestCase):
+    def test_build_transform_small_preprocess_true(self):
+        """
+        Makes sure a proper composition is created
+        """
+        loader = FeatureDataLoader()
+
+        fit_dictionary = {'dataset_properties': {'is_small_preprocess': True}}
+        for thing in ['imputer', 'scaler', 'encoder']:
+            fit_dictionary[thing] = [unittest.mock.Mock()]
+
+        compose = loader.build_transform(fit_dictionary, mode='train')
+
+        self.assertIsInstance(compose, torchvision.transforms.Compose)
+
+        # No preprocessing needed here as it was done before
+        self.assertEqual(len(compose.transforms), 1)
+
+    def test_build_transform_small_preprocess_false(self):
+        """
+        Makes sure a proper composition is created
+        """
+        loader = FeatureDataLoader()
+
+        fit_dictionary = {'dataset_properties': {'is_small_preprocess': False},
+                          'preprocess_transforms': [unittest.mock.Mock()]}
+
+        compose = loader.build_transform(fit_dictionary, mode='train')
+
+        self.assertIsInstance(compose, torchvision.transforms.Compose)
+
+        # We expect the to tensor, the preproces transforms and the check_array
+        self.assertEqual(len(compose.transforms), 3)
diff --git a/test/test_pipeline/components/test_image_data_loader.py b/test/test_pipeline/components/test_image_data_loader.py
new file mode 100644
index 000000000..76023fec3
--- /dev/null
+++ b/test/test_pipeline/components/test_image_data_loader.py
@@ -0,0 +1,28 @@
+import unittest
+import unittest.mock
+
+import torchvision
+
+from autoPyTorch.pipeline.components.training.data_loader.image_data_loader import (
+    ImageDataLoader
+)
+
+
+class TestFeatureDataLoader(unittest.TestCase):
+    def test_build_transform(self):
+        """
+        Makes sure a proper composition is created
+        """
+        loader = ImageDataLoader()
+
+        fit_dictionary = dict()
+        fit_dictionary['dataset_properties'] = dict()
+        fit_dictionary['dataset_properties']['is_small_preprocess'] = unittest.mock.Mock(())
+        fit_dictionary['image_augmenter'] = unittest.mock.Mock()
+
+        compose = loader.build_transform(fit_dictionary, mode='train')
+
+        self.assertIsInstance(compose, torchvision.transforms.Compose)
+
+        # We expect to tensor and image augmenter
+        self.assertEqual(len(compose.transforms), 2)
diff --git a/test/test_pipeline/components/test_imputers.py b/test/test_pipeline/components/test_imputers.py
new file mode 100644
index 000000000..ac8f0e143
--- /dev/null
+++ b/test/test_pipeline/components/test_imputers.py
@@ -0,0 +1,218 @@
+import unittest
+
+import numpy as np
+from numpy.testing import assert_array_equal
+
+from sklearn.base import BaseEstimator, clone
+from sklearn.compose import make_column_transformer
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
+
+
+class TestSimpleImputer(unittest.TestCase):
+
+    def test_get_config_space(self):
+        dataset_properties = dict(categorical_columns=[0, 1],
+                                  numerical_columns=[1, 2])
+        config = SimpleImputer.get_hyperparameter_search_space(dataset_properties).sample_configuration()
+        estimator = SimpleImputer(**config)
+        estimator_clone = clone(estimator)
+        estimator_clone_params = estimator_clone.get_params()
+
+        # Make sure all keys are copied properly
+        for k, v in estimator.get_params().items():
+            self.assertIn(k, estimator_clone_params)
+
+        # Make sure the params getter of estimator are honored
+        klass = estimator.__class__
+        new_object_params = estimator.get_params(deep=False)
+        for name, param in new_object_params.items():
+            new_object_params[name] = clone(param, safe=False)
+        new_object = klass(**new_object_params)
+        params_set = new_object.get_params(deep=False)
+
+        for name in new_object_params:
+            param1 = new_object_params[name]
+            param2 = params_set[name]
+            self.assertEqual(param1, param2)
+
+    def test_mean_imputation(self):
+        data = np.array([['1.0', np.nan, 3],
+                         [np.nan, 8, 9],
+                         ['4.0', 5, np.nan],
+                         [np.nan, 2, 3],
+                         ['7.0', np.nan, 9],
+                         ['4.0', np.nan, np.nan]], dtype=object)
+        numerical_columns = [1, 2]
+        categorical_columns = [0]
+        train_indices = np.array([0, 2, 3])
+        test_indices = np.array([1, 4, 5])
+        dataset_properties = {
+            'categorical_columns': categorical_columns,
+            'numerical_columns': numerical_columns,
+        }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        imputer_component = SimpleImputer(numerical_strategy='mean')
+
+        imputer_component = imputer_component.fit(X)
+        X = imputer_component.transform(X)
+        categorical_imputer = X['imputer']['categorical']
+        numerical_imputer = X['imputer']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['imputer'], dict)
+        self.assertIsInstance(categorical_imputer, BaseEstimator)
+        self.assertIsInstance(numerical_imputer, BaseEstimator)
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((categorical_imputer,
+                                                      X['dataset_properties']['categorical_columns']),
+                                                     (numerical_imputer,
+                                                      X['dataset_properties']['numerical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_array_equal(transformed.astype(str), np.array([[1.0, 8.0, 9.0],
+                                                             [7.0, 3.5, 9.0],
+                                                             [4.0, 3.5, 3.0]], dtype=str))
+
+    def test_median_imputation(self):
+        data = np.array([['1.0', np.nan, 3],
+                         [np.nan, 8, 9],
+                         ['4.0', 5, np.nan],
+                         [np.nan, 2, 3],
+                         ['7.0', np.nan, 9],
+                         ['4.0', np.nan, np.nan]], dtype=object)
+        numerical_columns = [1, 2]
+        categorical_columns = [0]
+        train_indices = np.array([0, 2, 3])
+        test_indices = np.array([1, 4, 5])
+        dataset_properties = {
+            'categorical_columns': categorical_columns,
+            'numerical_columns': numerical_columns,
+        }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        imputer_component = SimpleImputer(numerical_strategy='median')
+
+        imputer_component = imputer_component.fit(X)
+        X = imputer_component.transform(X)
+        categorical_imputer = X['imputer']['categorical']
+        numerical_imputer = X['imputer']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['imputer'], dict)
+        self.assertIsInstance(categorical_imputer, BaseEstimator)
+        self.assertIsInstance(numerical_imputer, BaseEstimator)
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer(
+            (categorical_imputer, X['dataset_properties']['categorical_columns']),
+            (numerical_imputer, X['dataset_properties']['numerical_columns']),
+            remainder='passthrough'
+        )
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_array_equal(transformed.astype(str), np.array([[1.0, 8.0, 9.0],
+                                                             [7.0, 3.5, 9.0],
+                                                             [4.0, 3.5, 3.0]], dtype=str))
+
+    def test_frequent_imputation(self):
+        data = np.array([['1.0', np.nan, 3],
+                         [np.nan, 8, 9],
+                         ['4.0', 5, np.nan],
+                         [np.nan, 2, 3],
+                         ['7.0', np.nan, 9],
+                         ['4.0', np.nan, np.nan]], dtype=object)
+        numerical_columns = [1, 2]
+        categorical_columns = [0]
+        train_indices = np.array([0, 2, 3])
+        test_indices = np.array([1, 4, 5])
+        dataset_properties = {
+            'categorical_columns': categorical_columns,
+            'numerical_columns': numerical_columns,
+        }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        imputer_component = SimpleImputer(numerical_strategy='most_frequent',
+                                          categorical_strategy='most_frequent')
+
+        imputer_component = imputer_component.fit(X)
+        X = imputer_component.transform(X)
+        categorical_imputer = X['imputer']['categorical']
+        numerical_imputer = X['imputer']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['imputer'], dict)
+        self.assertIsInstance(categorical_imputer, BaseEstimator)
+        self.assertIsInstance(numerical_imputer, BaseEstimator)
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer(
+            (categorical_imputer, X['dataset_properties']['categorical_columns']),
+            (numerical_imputer, X['dataset_properties']['numerical_columns']),
+            remainder='passthrough'
+        )
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_array_equal(transformed.astype(str), np.array([[1.0, 8, 9],
+                                                             [7.0, 2, 9],
+                                                             [4.0, 2, 3]], dtype=str))
+
+    def test_constant_imputation(self):
+        data = np.array([['1.0', np.nan, 3],
+                         [np.nan, 8, 9],
+                         ['4.0', 5, np.nan],
+                         [np.nan, 2, 3],
+                         ['7.0', np.nan, 9],
+                         ['4.0', np.nan, np.nan]], dtype=object)
+        numerical_columns = [1, 2]
+        categorical_columns = [0]
+        train_indices = np.array([0, 2, 3])
+        test_indices = np.array([1, 4, 5])
+        dataset_properties = {
+            'categorical_columns': categorical_columns,
+            'numerical_columns': numerical_columns,
+        }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        imputer_component = SimpleImputer(numerical_strategy='constant_zero',
+                                          categorical_strategy='constant_!missing!')
+
+        imputer_component = imputer_component.fit(X)
+        X = imputer_component.transform(X)
+        categorical_imputer = X['imputer']['categorical']
+        numerical_imputer = X['imputer']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['imputer'], dict)
+        self.assertIsInstance(categorical_imputer, BaseEstimator)
+        self.assertIsInstance(numerical_imputer, BaseEstimator)
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer(
+            (categorical_imputer, X['dataset_properties']['categorical_columns']),
+            (numerical_imputer, X['dataset_properties']['numerical_columns']),
+            remainder='passthrough'
+        )
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+        assert_array_equal(transformed.astype(str), np.array([['!missing!', 8, 9],
+                                                             [7.0, '0', 9],
+                                                             [4.0, '0', '0']], dtype=str))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_normalizer_choice.py b/test/test_pipeline/components/test_normalizer_choice.py
new file mode 100644
index 000000000..dbb711ab0
--- /dev/null
+++ b/test/test_pipeline/components/test_normalizer_choice.py
@@ -0,0 +1,47 @@
+import copy
+import unittest
+
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.base_normalizer_choice import (
+    NormalizerChoice
+)
+
+
+class TestNormalizerChoice(unittest.TestCase):
+
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the encoder
+        choice"""
+        dataset_properties = {}
+        normalizer_choice = NormalizerChoice(dataset_properties)
+        cs = normalizer_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the search space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(normalizer_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            normalizer_choice.set_hyperparameters(config)
+
+            self.assertEqual(normalizer_choice.choice.__class__,
+                             normalizer_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(normalizer_choice.choice))
+                self.assertEqual(value, normalizer_choice.choice.__dict__[key])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_normalizers.py b/test/test_pipeline/components/test_normalizers.py
new file mode 100644
index 000000000..d8914773f
--- /dev/null
+++ b/test/test_pipeline/components/test_normalizers.py
@@ -0,0 +1,46 @@
+import unittest
+
+import numpy as np
+from numpy.testing import assert_allclose, assert_array_equal
+
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.ImageNormalizer import ImageNormalizer
+from autoPyTorch.pipeline.components.preprocessing.image_preprocessing.normalise.NoNormalizer import NoNormalizer
+
+
+class TestNormalizers(unittest.TestCase):
+    def initialise(self):
+        self.train = np.random.randint(0, 255, (3, 2, 2, 3))
+        self.mean = np.array([np.mean(self.train[:, :, :, i]) for i in range(3)])
+        self.std = np.array([np.std(self.train[:, :, :, i]) for i in range(3)])
+
+    def test_image_normalizer(self):
+        self.initialise()
+        dataset_properties = {'mean': self.mean, 'std': self.std, }
+        X = {'dataset_properties': dataset_properties, 'X_train': self.train}
+
+        normalizer = ImageNormalizer()
+        normalizer = normalizer.fit(X)
+        X = normalizer.transform(X)
+
+        # check if normalizer added to X is instance of self
+        self.assertEqual(X['normalise'], normalizer)
+        epsilon = 1e-8
+        train = self.train - self.mean
+        train *= 1.0 / (epsilon + self.std)
+
+        assert_allclose(train, normalizer(self.train), rtol=1e-5)
+
+    def test_no_normalizer(self):
+        self.initialise()
+
+        dataset_properties = {'mean': self.mean, 'std': self.std, }
+        X = {'dataset_properties': dataset_properties, 'X_train': self.train}
+
+        normalizer = NoNormalizer()
+        normalizer = normalizer.fit(X)
+        X = normalizer.transform(X)
+
+        # check if normalizer added to X is instance of self
+        self.assertEqual(X['normalise'], normalizer)
+
+        assert_array_equal(self.train, normalizer(self.train))
diff --git a/test/test_pipeline/components/test_scaler_choice.py b/test/test_pipeline/components/test_scaler_choice.py
new file mode 100644
index 000000000..9d10af59f
--- /dev/null
+++ b/test/test_pipeline/components/test_scaler_choice.py
@@ -0,0 +1,54 @@
+import copy
+import unittest
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+
+
+class TestRescalerChoice(unittest.TestCase):
+
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the encoder
+        choice"""
+        dataset_properties = {'categorical_columns': list(range(4)),
+                              'numerical_columns': [5],
+                              'issparse': False}
+        rescaler_choice = ScalerChoice(dataset_properties)
+        cs = rescaler_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the search space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(rescaler_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            rescaler_choice.set_hyperparameters(config)
+
+            self.assertEqual(rescaler_choice.choice.__class__,
+                             rescaler_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(rescaler_choice.choice))
+                self.assertEqual(value, rescaler_choice.choice.__dict__[key])
+
+    def test_only_categorical(self):
+        dataset_properties = {'categorical_columns': list(range(4)), 'numerical_columns': []}
+        chooser = ScalerChoice(dataset_properties)
+        configspace = chooser.get_hyperparameter_search_space(dataset_properties).sample_configuration().\
+            get_dictionary()
+        self.assertEqual(configspace['__choice__'], 'NoScaler')
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_scalers.py b/test/test_pipeline/components/test_scalers.py
new file mode 100644
index 000000000..94ba0f2dc
--- /dev/null
+++ b/test/test_pipeline/components/test_scalers.py
@@ -0,0 +1,241 @@
+import unittest
+
+import numpy as np
+from numpy.testing import assert_allclose
+
+from sklearn.base import BaseEstimator
+from sklearn.compose import make_column_transformer
+
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.MinMaxScaler import MinMaxScaler
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.NoScaler import NoScaler
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.Normalizer import Normalizer
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.StandardScaler import StandardScaler
+
+
+class TestNormalizer(unittest.TestCase):
+
+    def test_l2_norm(self):
+        data = np.array([[1, 2, 3],
+                         [7, 8, 9],
+                         [4, 5, 6],
+                         [11, 12, 13],
+                         [17, 18, 19],
+                         [14, 15, 16]])
+        train_indices = np.array([0, 2, 5])
+        test_indices = np.array([1, 4, 3])
+        categorical_columns = list()
+        numerical_columns = [0, 1, 2]
+        dataset_properties = {'categorical_columns': categorical_columns,
+                              'numerical_columns': numerical_columns, }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        scaler_component = Normalizer(norm='mean_squared')
+
+        scaler_component = scaler_component.fit(X)
+        X = scaler_component.transform(X)
+        scaler = X['scaler']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['scaler'], dict)
+        self.assertIsInstance(scaler, BaseEstimator)
+        self.assertIsNone(X['scaler']['categorical'])
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((scaler, X['dataset_properties']['numerical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_allclose(transformed, np.array([[0.50257071, 0.57436653, 0.64616234],
+                                               [0.54471514, 0.5767572, 0.60879927],
+                                               [0.5280169, 0.57601843, 0.62401997]]))
+
+    def test_l1_norm(self):
+        data = np.array([[1, 2, 3],
+                         [7, 8, 9],
+                         [4, 5, 6],
+                         [11, 12, 13],
+                         [17, 18, 19],
+                         [14, 15, 16]])
+        train_indices = np.array([0, 2, 5])
+        test_indices = np.array([1, 4, 3])
+        categorical_columns = list()
+        numerical_columns = [0, 1, 2]
+        dataset_properties = {'categorical_columns': categorical_columns,
+                              'numerical_columns': numerical_columns, }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        scaler_component = Normalizer(norm='mean_abs')
+
+        scaler_component = scaler_component.fit(X)
+        X = scaler_component.transform(X)
+        scaler = X['scaler']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['scaler'], dict)
+        self.assertIsInstance(scaler, BaseEstimator)
+        self.assertIsNone(X['scaler']['categorical'])
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((scaler, X['dataset_properties']['numerical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_allclose(transformed, np.array([[0.29166667, 0.33333333, 0.375],
+                                               [0.31481481, 0.33333333, 0.35185185],
+                                               [0.30555556, 0.33333333, 0.36111111]]))
+
+    def test_max_norm(self):
+        data = np.array([[1, 2, 3],
+                         [7, 8, 9],
+                         [4, 5, 6],
+                         [11, 12, 13],
+                         [17, 18, 19],
+                         [14, 15, 16]])
+        train_indices = np.array([0, 2, 5])
+        test_indices = np.array([1, 4, 3])
+        categorical_columns = list()
+        numerical_columns = [0, 1, 2]
+        dataset_properties = {'categorical_columns': categorical_columns,
+                              'numerical_columns': numerical_columns, }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        scaler_component = Normalizer(norm='max')
+
+        scaler_component = scaler_component.fit(X)
+        X = scaler_component.transform(X)
+        scaler = X['scaler']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['scaler'], dict)
+        self.assertIsInstance(scaler, BaseEstimator)
+        self.assertIsNone(X['scaler']['categorical'])
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((scaler, X['dataset_properties']['numerical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_allclose(transformed, np.array([[0.77777778, 0.88888889, 1],
+                                               [0.89473684, 0.94736842, 1],
+                                               [0.84615385, 0.92307692, 1]]))
+
+
+class TestMinMaxScaler(unittest.TestCase):
+
+    def test_minmax_scaler(self):
+        data = np.array([[1, 2, 3],
+                         [7, 8, 9],
+                         [4, 5, 6],
+                         [11, 12, 13],
+                         [17, 18, 19],
+                         [14, 15, 16]])
+        train_indices = np.array([0, 2, 5])
+        test_indices = np.array([1, 4, 3])
+        categorical_columns = list()
+        numerical_columns = [0, 1, 2]
+        dataset_properties = {'categorical_columns': categorical_columns,
+                              'numerical_columns': numerical_columns, }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        scaler_component = MinMaxScaler()
+
+        scaler_component = scaler_component.fit(X)
+        X = scaler_component.transform(X)
+        scaler = X['scaler']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['scaler'], dict)
+        self.assertIsInstance(scaler, BaseEstimator)
+        self.assertIsNone(X['scaler']['categorical'])
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((scaler, X['dataset_properties']['numerical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_allclose(transformed, np.array([[0.46153846, 0.46153846, 0.46153846],
+                                               [1.23076923, 1.23076923, 1.23076923],
+                                               [0.76923077, 0.76923077, 0.76923077]]))
+
+
+class TestStandardScaler(unittest.TestCase):
+
+    def test_standard_scaler(self):
+        data = np.array([[1, 2, 3],
+                         [7, 8, 9],
+                         [4, 5, 6],
+                         [11, 12, 13],
+                         [17, 18, 19],
+                         [14, 15, 16]])
+        train_indices = np.array([0, 2, 5])
+        test_indices = np.array([1, 4, 3])
+        categorical_columns = list()
+        numerical_columns = [0, 1, 2]
+        dataset_properties = {'categorical_columns': categorical_columns,
+                              'numerical_columns': numerical_columns,
+                              'issparse': False}
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        scaler_component = StandardScaler()
+
+        scaler_component = scaler_component.fit(X)
+        X = scaler_component.transform(X)
+        scaler = X['scaler']['numerical']
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['scaler'], dict)
+        self.assertIsInstance(scaler, BaseEstimator)
+        self.assertIsNone(X['scaler']['categorical'])
+
+        # make column transformer with returned encoder to fit on data
+        column_transformer = make_column_transformer((scaler, X['dataset_properties']['numerical_columns']),
+                                                     remainder='passthrough')
+        column_transformer = column_transformer.fit(X['X_train'])
+        transformed = column_transformer.transform(data[test_indices])
+
+        assert_allclose(transformed, np.array([[0.11995203, 0.11995203, 0.11995203],
+                                               [1.91923246, 1.91923246, 1.91923246],
+                                               [0.8396642, 0.8396642, 0.8396642]]))
+
+
+class TestNoneScaler(unittest.TestCase):
+
+    def test_none_scaler(self):
+        data = np.array([[1, 2, 3],
+                         [7, 8, 9],
+                         [4, 5, 6],
+                         [11, 12, 13],
+                         [17, 18, 19],
+                         [14, 15, 16]])
+        train_indices = np.array([0, 2, 5])
+        categorical_columns = list()
+        numerical_columns = [0, 1, 2]
+        dataset_properties = {'categorical_columns': categorical_columns,
+                              'numerical_columns': numerical_columns, }
+        X = {
+            'X_train': data[train_indices],
+            'dataset_properties': dataset_properties
+        }
+        scaler_component = NoScaler()
+
+        scaler_component = scaler_component.fit(X)
+        X = scaler_component.transform(X)
+
+        # check if the fit dictionary X is modified as expected
+        self.assertIsInstance(X['scaler'], dict)
+        self.assertIsNone(X['scaler']['categorical'])
+        self.assertIsNone(X['scaler']['numerical'])
diff --git a/test/test_pipeline/components/test_setup.py b/test/test_pipeline/components/test_setup.py
new file mode 100644
index 000000000..706d0718c
--- /dev/null
+++ b/test/test_pipeline/components/test_setup.py
@@ -0,0 +1,478 @@
+import copy
+import unittest.mock
+
+from ConfigSpace.configuration_space import ConfigurationSpace
+
+import numpy as np
+
+from sklearn.base import clone
+
+import torch
+
+import autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice as lr_components
+import autoPyTorch.pipeline.components.setup.network.base_network_choice as network_components
+import \
+    autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice as network_initializer_components  # noqa: E501
+import autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice as optimizer_components
+from autoPyTorch.pipeline.components.setup.lr_scheduler.base_scheduler_choice import (
+    BaseLRComponent,
+    SchedulerChoice
+)
+from autoPyTorch.pipeline.components.setup.network.base_network_choice import (
+    BaseNetworkComponent,
+    NetworkChoice
+)
+from autoPyTorch.pipeline.components.setup.network_initializer.base_network_init_choice import (
+    BaseNetworkInitializerComponent,
+    NetworkInitializerChoice
+)
+from autoPyTorch.pipeline.components.setup.optimizer.base_optimizer_choice import (
+    BaseOptimizerComponent,
+    OptimizerChoice
+)
+
+
+class DummyLR(BaseLRComponent):
+    def __init__(self, random_state=None):
+        pass
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        return cs
+
+    def get_properties(dataset_properties=None):
+        return {
+            'shortname': 'Dummy',
+            'name': 'Dummy',
+        }
+
+
+class DummyOptimizer(BaseOptimizerComponent):
+    def __init__(self, random_state=None):
+        pass
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        return cs
+
+    def get_properties(dataset_properties=None):
+        return {
+            'shortname': 'Dummy',
+            'name': 'Dummy',
+        }
+
+
+class DummyNet(BaseNetworkComponent):
+    def __init__(self, random_state=None):
+        pass
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        return cs
+
+    def get_properties(dataset_properties=None):
+        return {
+            'shortname': 'Dummy',
+            'name': 'Dummy',
+        }
+
+
+class DummyNetworkInitializer(BaseNetworkInitializerComponent):
+    def __init__(self, random_state=None):
+        pass
+
+    @staticmethod
+    def get_hyperparameter_search_space(dataset_properties=None):
+        cs = ConfigurationSpace()
+        return cs
+
+    def get_properties(dataset_properties=None):
+        return {
+            'shortname': 'Dummy',
+            'name': 'Dummy',
+        }
+
+
+class SchedulerTest(unittest.TestCase):
+    def test_every_scheduler_is_valid(self):
+        """
+        Makes sure that every scheduler is a valid estimator.
+        That is, we can fully create an object via get/set params.
+
+        This also test that we can properly initialize each one
+        of them
+        """
+        scheduler_choice = SchedulerChoice(dataset_properties={})
+
+        # Make sure all components are returned
+        self.assertEqual(len(scheduler_choice.get_components().keys()), 7)
+
+        # For every scheduler in the components, make sure
+        # that it complies with the scikit learn estimator.
+        # This is important because usually components are forked to workers,
+        # so the set/get params methods should recreate the same object
+        for name, scheduler in scheduler_choice.get_components().items():
+            config = scheduler.get_hyperparameter_search_space().sample_configuration()
+            estimator = scheduler(**config)
+            estimator_clone = clone(estimator)
+            estimator_clone_params = estimator_clone.get_params()
+
+            # Make sure all keys are copied properly
+            for k, v in estimator.get_params().items():
+                self.assertIn(k, estimator_clone_params)
+
+            # Make sure the params getter of estimator are honored
+            klass = estimator.__class__
+            new_object_params = estimator.get_params(deep=False)
+            for name, param in new_object_params.items():
+                new_object_params[name] = clone(param, safe=False)
+            new_object = klass(**new_object_params)
+            params_set = new_object.get_params(deep=False)
+
+            for name in new_object_params:
+                param1 = new_object_params[name]
+                param2 = params_set[name]
+                self.assertEqual(param1, param2)
+
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the scheduler
+        choice"""
+        scheduler_choice = SchedulerChoice(dataset_properties={})
+        cs = scheduler_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the serach space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(scheduler_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            scheduler_choice.set_hyperparameters(config)
+
+            self.assertEqual(scheduler_choice.choice.__class__,
+                             scheduler_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(scheduler_choice.choice))
+                self.assertEqual(value, scheduler_choice.choice.__dict__[key])
+
+    def test_scheduler_add(self):
+        """Makes sure that a component can be added to the CS"""
+        # No third party components to start with
+        self.assertEqual(len(lr_components._addons.components), 0)
+
+        # Then make sure the scheduler can be added and query'ed
+        lr_components.add_scheduler(DummyLR)
+        self.assertEqual(len(lr_components._addons.components), 1)
+        cs = SchedulerChoice(dataset_properties={}).get_hyperparameter_search_space()
+        self.assertIn('DummyLR', str(cs))
+
+
+class OptimizerTest(unittest.TestCase):
+    def test_every_optimizer_is_valid(self):
+        """
+        Makes sure that every optimizer is a valid estimator.
+        That is, we can fully create an object via get/set params.
+
+        This also test that we can properly initialize each one
+        of them
+        """
+        optimizer_choice = OptimizerChoice(dataset_properties={})
+
+        # Make sure all components are returned
+        self.assertEqual(len(optimizer_choice.get_components().keys()), 4)
+
+        # For every optimizer in the components, make sure
+        # that it complies with the scikit learn estimator.
+        # This is important because usually components are forked to workers,
+        # so the set/get params methods should recreate the same object
+        for name, optimizer in optimizer_choice.get_components().items():
+            config = optimizer.get_hyperparameter_search_space().sample_configuration()
+            estimator = optimizer(**config)
+            estimator_clone = clone(estimator)
+            estimator_clone_params = estimator_clone.get_params()
+
+            # Make sure all keys are copied properly
+            for k, v in estimator.get_params().items():
+                self.assertIn(k, estimator_clone_params)
+
+            # Make sure the params getter of estimator are honored
+            klass = estimator.__class__
+            new_object_params = estimator.get_params(deep=False)
+            for name, param in new_object_params.items():
+                new_object_params[name] = clone(param, safe=False)
+            new_object = klass(**new_object_params)
+            params_set = new_object.get_params(deep=False)
+
+            for name in new_object_params:
+                param1 = new_object_params[name]
+                param2 = params_set[name]
+                self.assertEqual(param1, param2)
+
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the optimizer
+        choice"""
+        optimizer_choice = OptimizerChoice(dataset_properties={})
+        cs = optimizer_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the serach space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(optimizer_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            optimizer_choice.set_hyperparameters(config)
+
+            self.assertEqual(optimizer_choice.choice.__class__,
+                             optimizer_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(optimizer_choice.choice))
+                self.assertEqual(value, optimizer_choice.choice.__dict__[key])
+
+    def test_optimizer_add(self):
+        """Makes sure that a component can be added to the CS"""
+        # No third party components to start with
+        self.assertEqual(len(optimizer_components._addons.components), 0)
+
+        # Then make sure the optimizer can be added and query'ed
+        optimizer_components.add_optimizer(DummyOptimizer)
+        self.assertEqual(len(optimizer_components._addons.components), 1)
+        cs = OptimizerChoice(dataset_properties={}).get_hyperparameter_search_space()
+        self.assertIn('DummyOptimizer', str(cs))
+
+
+class NetworkTest(unittest.TestCase):
+    def test_every_network_is_valid(self):
+        """
+        Makes sure that every network is a valid estimator.
+        That is, we can fully create an object via get/set params.
+
+        This also test that we can properly initialize each one
+        of them
+        """
+        network_choice = NetworkChoice(dataset_properties={})
+
+        # Make sure all components are returned
+        self.assertEqual(len(network_choice.get_components().keys()), 1)
+
+        # For every network in the components, make sure
+        # that it complies with the scikit learn estimator.
+        # This is important because usually components are forked to workers,
+        # so the set/get params methods should recreate the same object
+        for name, network in network_choice.get_components().items():
+            config = network.get_hyperparameter_search_space().sample_configuration()
+            estimator = network(**config)
+            estimator_clone = clone(estimator)
+            estimator_clone_params = estimator_clone.get_params()
+
+            # Make sure all keys are copied properly
+            for k, v in estimator.get_params().items():
+                self.assertIn(k, estimator_clone_params)
+
+            # Make sure the params getter of estimator are honored
+            klass = estimator.__class__
+            new_object_params = estimator.get_params(deep=False)
+            for name, param in new_object_params.items():
+                new_object_params[name] = clone(param, safe=False)
+            new_object = klass(**new_object_params)
+            params_set = new_object.get_params(deep=False)
+
+            for name in new_object_params:
+                param1 = new_object_params[name]
+                param2 = params_set[name]
+                self.assertEqual(param1, param2)
+
+    def test_backbone_head_net(self):
+        network_choice = NetworkChoice(dataset_properties={})
+        task_types = {"image_classification": ((1, 3, 64, 64), (5,)),
+                      "image_regression": ((1, 3, 64, 64), (1,)),
+                      "time_series_classification": ((1, 32, 6), (5,)),
+                      "time_series_regression": ((1, 32, 6), (1,)),
+                      "tabular_classification": ((1, 100,), (5,)),
+                      "tabular_regression": ((1, 100), (1,))}
+
+        device = torch.device("cpu")
+        for task_type, (input_shape, output_shape) in task_types.items():
+            cs = network_choice.get_hyperparameter_search_space(dataset_properties={"task_type": task_type},
+                                                                include=["BackboneHeadNet"])
+            # test 10 random configurations
+            for i in range(10):
+                config = cs.sample_configuration()
+                network_choice.set_hyperparameters(config)
+                network_choice.fit(X={"X_train": np.zeros(input_shape),
+                                      "y_train": np.zeros(output_shape),
+                                      'dataset_properties': {"task_type": task_type,
+                                                             'input_shape': input_shape[1:],
+                                                             "output_shape": output_shape,
+                                                             "num_classes": output_shape[0]}}, y=None)
+                self.assertNotEqual(network_choice.choice.network, None)
+                network_choice.choice.to(device)
+                dummy_input = torch.randn((2, *input_shape[1:]), dtype=torch.float)
+                output = network_choice.choice.network(dummy_input)
+                self.assertEqual(output.shape[1:], output_shape)
+
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the network
+        choice"""
+        network_choice = NetworkChoice(dataset_properties={})
+        cs = network_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the search space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(network_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            network_choice.set_hyperparameters(config)
+
+            self.assertEqual(network_choice.choice.__class__,
+                             network_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            self.assertNotEqual(selected_choice, None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+
+                key = key.replace(selected_choice + ':', '')
+                # In the case of MLP, parameters are dynamic, so they exist in config
+                parameters = vars(network_choice.choice)
+                parameters.update(vars(network_choice.choice)['config'])
+                self.assertIn(key, parameters)
+                self.assertEqual(value, parameters[key])
+
+    def test_network_add(self):
+        """Makes sure that a component can be added to the CS"""
+        # No third party components to start with
+        self.assertEqual(len(network_components._addons.components), 0)
+
+        # Then make sure the scheduler can be added and query'ed
+        network_components.add_network(DummyNet)
+        self.assertEqual(len(network_components._addons.components), 1)
+        cs = NetworkChoice(dataset_properties={}).get_hyperparameter_search_space()
+        self.assertIn('DummyNet', str(cs))
+
+
+class NetworkInitializerTest(unittest.TestCase):
+    def test_every_network_initializer_is_valid(self):
+        """
+        Makes sure that every network_initializer is a valid estimator.
+        That is, we can fully create an object via get/set params.
+
+        This also test that we can properly initialize each one
+        of them
+        """
+        network_initializer_choice = NetworkInitializerChoice(dataset_properties={})
+
+        # Make sure all components are returned
+        self.assertEqual(len(network_initializer_choice.get_components().keys()), 5)
+
+        # For every optimizer in the components, make sure
+        # that it complies with the scikit learn estimator.
+        # This is important because usually components are forked to workers,
+        # so the set/get params methods should recreate the same object
+        for name, network_initializer in network_initializer_choice.get_components().items():
+            config = network_initializer.get_hyperparameter_search_space().sample_configuration()
+            estimator = network_initializer(**config)
+            estimator_clone = clone(estimator)
+            estimator_clone_params = estimator_clone.get_params()
+
+            # Make sure all keys are copied properly
+            for k, v in estimator.get_params().items():
+                self.assertIn(k, estimator_clone_params)
+
+            # Make sure the params getter of estimator are honored
+            klass = estimator.__class__
+            new_object_params = estimator.get_params(deep=False)
+            for name, param in new_object_params.items():
+                new_object_params[name] = clone(param, safe=False)
+            new_object = klass(**new_object_params)
+            params_set = new_object.get_params(deep=False)
+
+            for name in new_object_params:
+                param1 = new_object_params[name]
+                param2 = params_set[name]
+                self.assertEqual(param1, param2)
+
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the network_initializer
+        choice"""
+        network_initializer_choice = NetworkInitializerChoice(dataset_properties={})
+        cs = network_initializer_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the serach space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(network_initializer_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            network_initializer_choice.set_hyperparameters(config)
+
+            self.assertEqual(network_initializer_choice.choice.__class__,
+                             network_initializer_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(network_initializer_choice.choice))
+                self.assertEqual(value, network_initializer_choice.choice.__dict__[key])
+
+    def test_network_initializer_add(self):
+        """Makes sure that a component can be added to the CS"""
+        # No third party components to start with
+        self.assertEqual(len(network_initializer_components._addons.components), 0)
+
+        # Then make sure the network_initializer can be added and query'ed
+        network_initializer_components.add_network_initializer(DummyNetworkInitializer)
+        self.assertEqual(len(network_initializer_components._addons.components), 1)
+        cs = NetworkInitializerChoice(dataset_properties={}).get_hyperparameter_search_space()
+        self.assertIn('DummyNetworkInitializer', str(cs))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_setup_image_augmenter.py b/test/test_pipeline/components/test_setup_image_augmenter.py
new file mode 100644
index 000000000..353d6a3ee
--- /dev/null
+++ b/test/test_pipeline/components/test_setup_image_augmenter.py
@@ -0,0 +1,51 @@
+import unittest
+
+from imgaug.augmenters.meta import Augmenter, Sequential
+
+import numpy as np
+
+from autoPyTorch.pipeline.components.setup.augmentation.image.ImageAugmenter import ImageAugmenter
+
+
+class TestImageAugmenter(unittest.TestCase):
+    def test_every_augmenter(self):
+        image_augmenter = ImageAugmenter()
+        #  To test every augmenter, we set the configuration as default where each augmenter
+        #  has use_augmenter set to True
+        configuration = image_augmenter.get_hyperparameter_search_space().get_default_configuration()
+        image_augmenter = image_augmenter.set_hyperparameters(configuration=configuration)
+        X = dict(X_train=np.random.randint(0, 255, (8, 3, 16, 16), dtype=np.uint8),
+                 dataset_properties=dict(image_height=16, image_width=16))
+        for name, augmenter in image_augmenter.available_augmenters.items():
+            augmenter = augmenter.fit(X)
+            # check if augmenter in the component has correct name
+            self.assertEqual(augmenter.get_image_augmenter().name, name)
+            # test if augmenter has an Augmenter attribute
+            self.assertIsInstance(augmenter.get_image_augmenter(), Augmenter)
+
+            # test if augmenter works on a random image
+            train_aug = augmenter(X['X_train'])
+            self.assertIsInstance(train_aug, np.ndarray)
+            # check if data was changed
+            self.assertIsNot(train_aug, X['X_train'])
+
+    def test_get_set_config_space(self):
+        X = dict(X_train=np.random.randint(0, 255, (8, 3, 16, 16), dtype=np.uint8),
+                 dataset_properties=dict(image_height=16, image_width=16))
+        image_augmenter = ImageAugmenter()
+        configuration = image_augmenter.get_hyperparameter_search_space().sample_configuration()
+        image_augmenter = image_augmenter.set_hyperparameters(configuration=configuration)
+        image_augmenter = image_augmenter.fit(X)
+        X = image_augmenter.transform(X)
+
+        image_augmenter = X['image_augmenter']
+        # test if a sequential augmenter was formed
+        self.assertIsInstance(image_augmenter.augmenter, Sequential)
+
+        # test if augmenter works on a random image
+        train_aug = image_augmenter(X['X_train'])
+        self.assertIsInstance(train_aug, np.ndarray)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_setup_preprocessing_node.py b/test/test_pipeline/components/test_setup_preprocessing_node.py
new file mode 100644
index 000000000..11e3b8901
--- /dev/null
+++ b/test/test_pipeline/components/test_setup_preprocessing_node.py
@@ -0,0 +1,165 @@
+import unittest
+from unittest import mock
+
+import numpy as np
+
+from sklearn.base import BaseEstimator
+
+from autoPyTorch.constants import (
+    MULTICLASS,
+    OUTPUT_TYPES_TO_STRING,
+    TABULAR_CLASSIFICATION,
+    TASK_TYPES_TO_STRING,
+)
+from autoPyTorch.pipeline.image_classification import ImageClassificationPipeline
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
+from autoPyTorch.utils.backend import Backend
+
+
+class TabularPreprocessingTest(unittest.TestCase):
+    def setUp(self):
+        # Setup the backed for this test
+        self.backend = mock.Mock(spec=Backend)
+        dataset = mock.MagicMock()
+        dataset.__len__.return_value = 1
+        datamanager = mock.MagicMock()
+        datamanager.get_dataset_for_training.return_value = (dataset, dataset)
+        datamanager.train_tensors = (np.random.random((10, 15)), np.random.random(10))
+        datamanager.test_tensors = None
+        self.backend.load_datamanager.return_value = datamanager
+
+    def test_tabular_preprocess(self):
+        dataset_properties = {
+            'numerical_columns': list(range(15)),
+            'categorical_columns': [],
+            'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
+            'output_type': OUTPUT_TYPES_TO_STRING[MULTICLASS],
+            'is_small_preprocess': True,
+            'input_shape': (15,),
+            'num_classes': 2,
+            'categories': [],
+            'issparse': False
+        }
+        X = dict(X_train=np.random.random((10, 15)),
+                 y_train=np.random.random(10),
+                 train_indices=[0, 1, 2, 3, 4, 5],
+                 val_indices=[6, 7, 8, 9],
+                 dataset_properties=dataset_properties,
+                 # Training configuration
+                 job_id='test',
+                 device='cpu',
+                 budget_type='epochs',
+                 epochs=10,
+                 torch_num_threads=1,
+                 early_stopping=20,
+                 split_id=0,
+                 backend=self.backend,
+                 )
+        pipeline = TabularClassificationPipeline(dataset_properties=dataset_properties)
+        # Remove the trainer
+        pipeline.steps.pop()
+        pipeline = pipeline.fit(X)
+        X = pipeline.transform(X)
+
+        # We expect the transformation always for inference
+        self.assertIn('preprocess_transforms', X.keys())
+
+    def test_tabular_no_preprocess(self):
+        dataset_properties = {
+            'numerical_columns': list(range(15)),
+            'categorical_columns': [],
+            'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
+            'output_type': OUTPUT_TYPES_TO_STRING[MULTICLASS],
+            'is_small_preprocess': False,
+            'input_shape': (15,),
+            'num_classes': 2,
+            'categories': [],
+            'issparse': False
+        }
+        X = dict(X_train=np.random.random((10, 15)),
+                 y_train=np.random.random(10),
+                 train_indices=[0, 1, 2, 3, 4, 5],
+                 val_indices=[6, 7, 8, 9],
+                 dataset_properties=dataset_properties,
+                 # Training configuration
+                 job_id='test',
+                 device='cpu',
+                 budget_type='epochs',
+                 epochs=10,
+                 torch_num_threads=1,
+                 early_stopping=20,
+                 split_id=0,
+                 backend=self.backend,
+                 )
+
+        pipeline = TabularClassificationPipeline(dataset_properties=dataset_properties)
+        # Remove the trainer
+        pipeline.steps.pop()
+        pipeline = pipeline.fit(X)
+        X = pipeline.transform(X)
+        self.assertIn('preprocess_transforms', X.keys())
+        self.assertIsInstance(X['preprocess_transforms'], list)
+        self.assertIsInstance(X['preprocess_transforms'][-1].preprocessor, BaseEstimator)
+
+
+class ImagePreprocessingTest(unittest.TestCase):
+    def setUp(self):
+        # Setup the backed for this test
+        self.backend = mock.Mock(spec=Backend)
+        dataset = mock.MagicMock()
+        dataset.__len__.return_value = 1
+        datamanager = mock.MagicMock()
+        datamanager.get_dataset_for_training.return_value = (dataset, dataset)
+        datamanager.train_tensors = (np.random.random((10, 2, 2, 3)), np.random.random(10))
+        datamanager.test_tensors = None
+        self.backend.load_datamanager.return_value = datamanager
+
+    def test_image_preprocess(self):
+        data = np.random.random((10, 2, 2, 3))
+        dataset_properties = dict(image_height=2,
+                                  image_width=2,
+                                  is_small_preprocess=True,
+                                  mean=np.array([np.mean(data[:, :, :, i]) for i in range(3)]),
+                                  std=np.array([np.std(data[:, :, :, i]) for i in range(3)]),
+                                  )
+        X = dict(X_train=data,
+                 y_train=np.random.random(10),
+                 train_indices=[0, 1, 2, 3, 4, 5],
+                 val_indices=[6, 7, 8, 9],
+                 dataset_properties=dataset_properties,
+                 backend=self.backend,
+                 )
+
+        pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)
+        pipeline = pipeline.fit(X)
+        X = pipeline.transform(X)
+
+        # We always expect the transforms for inference
+        self.assertIn('preprocess_transforms', X.keys())
+
+    def test_image_no_preprocess(self):
+        data = np.random.random((10, 2, 2, 3))
+        dataset_properties = dict(image_height=2,
+                                  image_width=2,
+                                  is_small_preprocess=False,
+                                  mean=np.array([np.mean(data[:, :, :, i]) for i in range(3)]),
+                                  std=np.array([np.std(data[:, :, :, i]) for i in range(3)]),
+                                  )
+        X = dict(X_train=data,
+                 y_train=np.random.random(10),
+                 train_indices=[0, 1, 2, 3, 4, 5],
+                 val_indices=[6, 7, 8, 9],
+                 dataset_properties=dataset_properties,
+                 backend=self.backend,
+                 )
+        dataset_properties = dict()
+        pipeline = ImageClassificationPipeline(dataset_properties=dataset_properties)
+        pipeline = pipeline.fit(X)
+        X = pipeline.transform(X)
+        self.assertIn('preprocess_transforms', X.keys())
+        self.assertIsInstance(X['preprocess_transforms'], list)
+        self.assertIsInstance(X['preprocess_transforms'][-1], BaseEstimator)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_setup_traditional_classification.py b/test/test_pipeline/components/test_setup_traditional_classification.py
new file mode 100644
index 000000000..ea3100724
--- /dev/null
+++ b/test/test_pipeline/components/test_setup_traditional_classification.py
@@ -0,0 +1,125 @@
+import copy
+import os
+import sys
+
+import numpy as np
+
+import pytest
+
+from autoPyTorch.pipeline.components.setup.traditional_ml.base_model_choice import ModelChoice
+from autoPyTorch.pipeline.components.setup.traditional_ml.classifier_models.classifiers import (
+    CatboostModel,
+    ExtraTreesModel,
+    KNNModel,
+    LGBModel,
+    RFModel,
+    SVMModel
+)
+
+
+# Disable
+def blockPrint():
+    sys.stdout = open(os.devnull, 'w')
+
+
+# Restore
+def enablePrint():
+    sys.stdout = sys.__stdout__
+
+
+@pytest.fixture(params=[LGBModel(), CatboostModel(), SVMModel(),
+                        RFModel(), ExtraTreesModel(), KNNModel()])
+def classifier(request):
+    return request.param
+
+
+@pytest.fixture
+def dataset_properties(request):
+    return request.getfixturevalue(request.param)
+
+
+@pytest.fixture
+def dataset_properties_num_only():
+    return {'numerical_columns': list(range(5))}
+
+
+@pytest.fixture
+def dataset_properties_categorical_only():
+    return {'numerical_columns': list(range(0))}
+
+
+@pytest.mark.parametrize("dataset_properties", ['dataset_properties_num_only',
+                                                'dataset_properties_categorical_only'], indirect=True)
+class TestModelChoice:
+    def test_get_set_config_space(self, dataset_properties):
+        """Make sure that we can setup a valid choice in the encoder
+        choice"""
+        model_choice = ModelChoice(dataset_properties)
+        cs = model_choice.get_hyperparameter_search_space(dataset_properties=dataset_properties)
+
+        # Make sure that all hyperparameters are part of the search space
+        assert sorted(cs.get_hyperparameter('__choice__').choices) == sorted(list(model_choice.get_components().keys()))
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            model_choice.set_hyperparameters(config)
+
+            assert model_choice.choice.__class__ == model_choice.get_components()[config_dict['__choice__']]
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                assert key in vars(model_choice.choice)['config']
+                assert value == model_choice.choice.__dict__['config'][key]
+
+
+@pytest.mark.parametrize("dataset", ['dataset_traditional_classifier_num_only',
+                                     'dataset_traditional_classifier_categorical_only',
+                                     'dataset_traditional_classifier_num_categorical'], indirect=True)
+class TestTraditionalClassifiers:
+    def test_classifier_fit_predict(self, classifier, dataset):
+        X, y = dataset
+
+        blockPrint()
+        try:
+            results = classifier.fit(X_train=X, X_val=X, y_train=y, y_val=y)
+        except ValueError as e:
+            assert isinstance(classifier, KNNModel)
+            assert 'Found array with 0 feature' in e.args[0]
+            # KNN classifier works only on numerical data
+            pytest.skip()
+
+        enablePrint()
+
+        assert isinstance(results, dict)
+        assert 'val_preds' in results.keys()
+        assert isinstance(results['val_preds'], list)
+        assert len(results['val_preds']) == y.shape[0]
+        assert len(results['val_preds'][0]) == len(np.unique(y))
+        assert len(np.argwhere(0 > np.array(results['val_preds']).all() > 1)) == 0
+        assert 'labels' in results.keys()
+        assert len(results['labels']) == y.shape[0]
+        assert 'train_score' in results.keys()
+        assert isinstance(results['train_score'], float)
+        assert 'val_score' in results.keys()
+        assert isinstance(results['val_score'], float)
+
+        # Test if classifier can predict on val set and
+        # if the result is same as the one in results
+        y_pred = classifier.predict(X, predict_proba=True)
+        assert np.allclose(y_pred, results['val_preds'], atol=1e-04)
+        assert y_pred.shape[0] == y.shape[0]
+        # Test if classifier can score and
+        # the result is same as in results
+        score = classifier.score(X, y)
+        assert score == results['val_score']
+        # Test if score is greater than 0.8
+        assert score >= 0.8
diff --git a/test/test_pipeline/components/test_tabular_column_transformer.py b/test/test_pipeline/components/test_tabular_column_transformer.py
new file mode 100644
index 000000000..08d891a14
--- /dev/null
+++ b/test/test_pipeline/components/test_tabular_column_transformer.py
@@ -0,0 +1,132 @@
+import unittest
+from typing import Any, Dict, List, Optional, Tuple
+
+import numpy as np
+
+from scipy.sparse import csr_matrix
+
+from sklearn.compose import ColumnTransformer
+
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.TabularColumnTransformer import (
+    TabularColumnTransformer
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.base_encoder_choice import (
+    EncoderChoice
+)
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler_choice import ScalerChoice
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
+
+
+class TabularPipeline(TabularClassificationPipeline):
+    def _get_pipeline_steps(self, dataset_properties: Optional[Dict[str, Any]],
+                            ) -> List[Tuple[str, autoPyTorchChoice]]:
+        """
+        Defines what steps a pipeline should follow.
+        The step itself has choices given via autoPyTorchChoice.
+
+        Returns:
+            List[Tuple[str, autoPyTorchChoice]]: list of steps sequentially exercised
+                by the pipeline.
+        """
+        steps = []  # type: List[Tuple[str, autoPyTorchChoice]]
+
+        default_dataset_properties = {'target_type': 'tabular_classification'}
+        if dataset_properties is not None:
+            default_dataset_properties.update(dataset_properties)
+
+        steps.extend([
+            ("imputer", SimpleImputer()),
+            ("encoder", EncoderChoice(default_dataset_properties)),
+            ("scaler", ScalerChoice(default_dataset_properties)),
+            ("tabular_transformer", TabularColumnTransformer()),
+        ])
+        return steps
+
+
+class TabularTransformerTest(unittest.TestCase):
+
+    def test_tabular_preprocess_only_numerical(self):
+        dataset_properties = dict(numerical_columns=list(range(15)),
+                                  categorical_columns=[],
+                                  categories=[],
+                                  num_features=15,
+                                  num_classes=2,
+                                  issparse=False)
+        X = dict(X_train=np.random.random((10, 15)),
+                 is_small_preprocess=True,
+                 dataset_properties=dataset_properties
+                 )
+
+        pipeline = TabularPipeline(dataset_properties=dataset_properties)
+        pipeline = pipeline.fit(X)
+        X = pipeline.transform(X)
+        column_transformer = X['tabular_transformer']
+
+        # check if transformer was added to fit dictionary
+        self.assertIn('tabular_transformer', X.keys())
+        # check if transformer is of expected type
+        # In this case we expect the tabular transformer not the actual column transformer
+        # as the later is not callable and runs into error in the compose transform
+        self.assertIsInstance(column_transformer, TabularColumnTransformer)
+
+        data = column_transformer.preprocessor.fit_transform(X['X_train'])
+        self.assertIsInstance(data, np.ndarray)
+
+    def test_tabular_preprocess_only_categorical(self):
+        dataset_properties = dict(numerical_columns=[],
+                                  categorical_columns=list(range(2)),
+                                  categories=[['male', 'female'], ['germany']],
+                                  num_features=15,
+                                  num_classes=2,
+                                  issparse=False)
+        X = dict(X_train=np.array([['male', 'germany'],
+                                   ['female', 'germany'],
+                                   ['male', 'germany']], dtype=object),
+                 dataset_properties=dataset_properties
+                 )
+        pipeline = TabularPipeline(dataset_properties=dataset_properties)
+        pipeline = pipeline.fit(X)
+        X = pipeline.transform(X)
+        column_transformer = X['tabular_transformer']
+
+        # check if transformer was added to fit dictionary
+        self.assertIn('tabular_transformer', X.keys())
+        # check if transformer is of expected type
+        self.assertIsInstance(column_transformer, TabularColumnTransformer)
+
+        data = column_transformer.preprocessor.fit_transform(X['X_train'])
+        self.assertIsInstance(data, np.ndarray)
+
+    def test_sparse_data(self):
+        X = np.random.binomial(1, 0.1, (100, 2000))
+        sparse_X = csr_matrix(X)
+        numerical_columns = list(range(2000))
+        categorical_columns = []
+        train_indices = np.array(range(50))
+        dataset_properties = dict(numerical_columns=numerical_columns, categorical_columns=categorical_columns,
+                                  categories=[],
+                                  issparse=True)
+        X = {
+            'X_train': sparse_X[train_indices],
+            'dataset_properties': dataset_properties
+        }
+
+        pipeline = TabularPipeline(dataset_properties=dataset_properties)
+
+        pipeline = pipeline.fit(X)
+        X = pipeline.transform(X)
+        column_transformer = X['tabular_transformer']
+
+        # check if transformer was added to fit dictionary
+        self.assertIn('tabular_transformer', X.keys())
+        # check if transformer is of expected type
+        self.assertIsInstance(column_transformer.preprocessor, ColumnTransformer)
+
+        data = column_transformer.preprocessor.fit_transform(X['X_train'])
+        self.assertIsInstance(data, csr_matrix)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/components/test_training.py b/test/test_pipeline/components/test_training.py
new file mode 100644
index 000000000..ec745d613
--- /dev/null
+++ b/test/test_pipeline/components/test_training.py
@@ -0,0 +1,299 @@
+import copy
+import os
+import sys
+import unittest
+import unittest.mock
+
+import numpy as np
+
+from sklearn.base import clone
+
+import torch
+
+from autoPyTorch.pipeline.components.training.data_loader.base_data_loader import (
+    BaseDataLoaderComponent,
+)
+from autoPyTorch.pipeline.components.training.trainer.MixUpTrainer import (
+    MixUpTrainer
+)
+from autoPyTorch.pipeline.components.training.trainer.StandardTrainer import (
+    StandardTrainer
+)
+from autoPyTorch.pipeline.components.training.trainer.base_trainer import (
+    BaseTrainerComponent,
+)
+from autoPyTorch.pipeline.components.training.trainer.base_trainer_choice import (
+    TrainerChoice,
+)
+
+
+sys.path.append(os.path.dirname(__file__))
+from base import BaseTraining  # noqa (E402: module level import not at top of file)
+
+
+class BaseDataLoaderTest(unittest.TestCase):
+    def test_get_set_config_space(self):
+        """
+        Makes sure that the configuration space of the base data loader
+        is properly working"""
+        loader = BaseDataLoaderComponent()
+
+        cs = loader.get_hyperparameter_search_space()
+
+        # Make sure that the batch size is a valid hyperparameter
+        self.assertEqual(cs.get_hyperparameter('batch_size').default_value, 64)
+
+        # Make sure we can properly set some random configs
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            loader.set_hyperparameters(config)
+
+            self.assertEqual(loader.batch_size,
+                             config_dict['batch_size'])
+
+    def test_check_requirements(self):
+        """ Makes sure that we catch the proper requirements for the
+        data loader"""
+
+        fit_dictionary = {'dataset_properties': {}}
+
+        loader = BaseDataLoaderComponent()
+
+        # Make sure we catch all possible errors in check requirements
+
+        # No input in fit dictionary
+        with self.assertRaisesRegex(
+            ValueError,
+            'To fit a data loader, expected fit dictionary to have split_id.'
+        ):
+            loader.fit(fit_dictionary)
+
+        # Backend Missing
+        fit_dictionary.update({'split_id': 0})
+        with self.assertRaisesRegex(ValueError,
+                                    'backend is needed to load the data from'):
+            loader.fit(fit_dictionary)
+
+        # Then the is small fit
+        fit_dictionary.update({'backend': unittest.mock.Mock()})
+        with self.assertRaisesRegex(ValueError,
+                                    'is_small_pre-process is required to know if th'):
+            loader.fit(fit_dictionary)
+
+    def test_fit_transform(self):
+        """ Makes sure that fit and transform work as intended """
+        backend = unittest.mock.Mock()
+        fit_dictionary = {
+            'X_train': np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+            'y_train': np.array([0, 1, 0]),
+            'train_indices': [0, 1],
+            'val_indices': [2],
+            'dataset_properties': {'is_small_preprocess': True},
+            'working_dir': '/tmp',
+            'split_id': 0,
+            'backend': backend,
+        }
+        dataset = unittest.mock.MagicMock()
+        dataset.__len__.return_value = 1
+        datamanager = unittest.mock.MagicMock()
+        datamanager.get_dataset_for_training.return_value = (dataset, dataset)
+        fit_dictionary['backend'].load_datamanager.return_value = datamanager
+
+        # Mock child classes requirements
+        loader = BaseDataLoaderComponent()
+        loader.build_transform = unittest.mock.Mock()
+        loader._check_transform_requirements = unittest.mock.Mock()
+
+        loader.fit(fit_dictionary)
+
+        # Fit means that we created the data loaders
+        self.assertIsInstance(loader.train_data_loader, torch.utils.data.DataLoader)
+        self.assertIsInstance(loader.val_data_loader, torch.utils.data.DataLoader)
+
+        # Transforms adds this fit dictionaries
+        transformed_fit_dictionary = loader.transform(fit_dictionary)
+        self.assertIn('train_data_loader', transformed_fit_dictionary)
+        self.assertIn('val_data_loader', transformed_fit_dictionary)
+
+        self.assertEqual(transformed_fit_dictionary['train_data_loader'],
+                         loader.train_data_loader)
+        self.assertEqual(transformed_fit_dictionary['val_data_loader'],
+                         loader.val_data_loader)
+
+
+class BaseTrainerComponentTest(BaseTraining, unittest.TestCase):
+
+    def test_evaluate(self):
+        """
+        Makes sure we properly evaluate data, returning a proper loss
+        and metric
+        """
+        trainer = BaseTrainerComponent()
+        trainer.prepare(
+            model=self.model,
+            metrics=self.metrics,
+            criterion=self.criterion,
+            budget_tracker=self.budget_tracker,
+            optimizer=self.optimizer,
+            device=self.device,
+            metrics_during_training=True,
+            scheduler=None,
+            task_type=self.task_type
+        )
+
+        prev_loss, prev_metrics = trainer.evaluate(self.loader, epoch=1, writer=None)
+        self.assertIn('accuracy', prev_metrics)
+
+        # Fit the model
+        self._overfit_model()
+
+        # Loss and metrics should have improved after fit
+        # And the prediction should be better than random
+        loss, metrics = trainer.evaluate(self.loader, epoch=1, writer=None)
+        self.assertGreater(prev_loss, loss)
+        self.assertGreater(metrics['accuracy'], prev_metrics['accuracy'])
+        self.assertGreater(metrics['accuracy'], 0.5)
+
+
+class StandartTrainerTest(BaseTraining, unittest.TestCase):
+
+    def test_epoch_training(self):
+        """
+        Makes sure we are able to train a model and produce good
+        training performance
+        """
+        trainer = StandardTrainer()
+        trainer.prepare(
+            scheduler=None,
+            model=self.model,
+            metrics=self.metrics,
+            criterion=self.criterion,
+            budget_tracker=self.budget_tracker,
+            optimizer=self.optimizer,
+            device=self.device,
+            metrics_during_training=True,
+            task_type=self.task_type
+        )
+
+        # Train the model
+        counter = 0
+        accuracy = 0
+        while accuracy < 0.7:
+            loss, metrics = trainer.train_epoch(self.loader, epoch=1, logger=self.logger, writer=None)
+            counter += 1
+            accuracy = metrics['accuracy']
+
+            if counter > 1000:
+                self.fail("Could not overfit a dummy binary classification under 1000 epochs")
+
+
+class MixUpTrainerTest(BaseTraining, unittest.TestCase):
+
+    def test_epoch_training(self):
+        """
+        Makes sure we are able to train a model and produce good
+        training performance
+        """
+        trainer = MixUpTrainer(alpha=0.5)
+        trainer.prepare(
+            scheduler=None,
+            model=self.model,
+            metrics=self.metrics,
+            criterion=self.criterion,
+            budget_tracker=self.budget_tracker,
+            optimizer=self.optimizer,
+            device=self.device,
+            metrics_during_training=True,
+            task_type=self.task_type
+        )
+
+        # Train the model
+        counter = 0
+        accuracy = 0
+        while accuracy < 0.7:
+            loss, metrics = trainer.train_epoch(self.loader, epoch=1, logger=self.logger, writer=None)
+            counter += 1
+            accuracy = metrics['accuracy']
+
+            if counter > 1000:
+                self.fail("Could not overfit a dummy binary classification under 1000 epochs")
+
+
+class TrainerTest(unittest.TestCase):
+    def test_every_trainer_is_valid(self):
+        """
+        Makes sure that every trainer is a valid estimator.
+        That is, we can fully create an object via get/set params.
+
+        This also test that we can properly initialize each one
+        of them
+        """
+        trainer_choice = TrainerChoice(dataset_properties={})
+
+        # Make sure all components are returned
+        self.assertEqual(len(trainer_choice.get_components().keys()), 2)
+
+        # For every optimizer in the components, make sure
+        # that it complies with the scikit learn estimator.
+        # This is important because usually components are forked to workers,
+        # so the set/get params methods should recreate the same object
+        for name, trainer in trainer_choice.get_components().items():
+            config = trainer.get_hyperparameter_search_space().sample_configuration()
+            estimator = trainer(**config)
+            estimator_clone = clone(estimator)
+            estimator_clone_params = estimator_clone.get_params()
+
+            # Make sure all keys are copied properly
+            for k, v in estimator.get_params().items():
+                self.assertIn(k, estimator_clone_params)
+
+            # Make sure the params getter of estimator are honored
+            klass = estimator.__class__
+            new_object_params = estimator.get_params(deep=False)
+            for name, param in new_object_params.items():
+                new_object_params[name] = clone(param, safe=False)
+            new_object = klass(**new_object_params)
+            params_set = new_object.get_params(deep=False)
+
+            for name in new_object_params:
+                param1 = new_object_params[name]
+                param2 = params_set[name]
+                self.assertEqual(param1, param2)
+
+    def test_get_set_config_space(self):
+        """Make sure that we can setup a valid choice in the trainer
+        choice"""
+        trainer_choice = TrainerChoice(dataset_properties={})
+        cs = trainer_choice.get_hyperparameter_search_space()
+
+        # Make sure that all hyperparameters are part of the serach space
+        self.assertListEqual(
+            sorted(cs.get_hyperparameter('__choice__').choices),
+            sorted(list(trainer_choice.get_components().keys()))
+        )
+
+        # Make sure we can properly set some random configs
+        # Whereas just one iteration will make sure the algorithm works,
+        # doing five iterations increase the confidence. We will be able to
+        # catch component specific crashes
+        for i in range(5):
+            config = cs.sample_configuration()
+            config_dict = copy.deepcopy(config.get_dictionary())
+            trainer_choice.set_hyperparameters(config)
+
+            self.assertEqual(trainer_choice.choice.__class__,
+                             trainer_choice.get_components()[config_dict['__choice__']])
+
+            # Then check the choice configuration
+            selected_choice = config_dict.pop('__choice__', None)
+            for key, value in config_dict.items():
+                # Remove the selected_choice string from the parameter
+                # so we can query in the object for it
+                key = key.replace(selected_choice + ':', '')
+                self.assertIn(key, vars(trainer_choice.choice))
+                self.assertEqual(value, trainer_choice.choice.__dict__[key])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_pipeline/test_cross_validation.py b/test/test_pipeline/test_cross_validation.py
deleted file mode 100644
index 1f77c753b..000000000
--- a/test/test_pipeline/test_cross_validation.py
+++ /dev/null
@@ -1,114 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import numpy as np
-import time
-
-import torch
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-from sklearn.model_selection import KFold, StratifiedKFold
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation
-from autoPyTorch.components.training.budget_types import BudgetTypeEpochs
-from autoPyTorch.pipeline.nodes.create_dataset_info import DataSetInfo
-
-class TestCrossValidationMethods(unittest.TestCase):
-
-
-    def test_cross_validation(self):
-
-        class ResultNode(PipelineNode):
-            def fit(self, X, Y, train_indices, valid_indices):
-                return { 'loss': np.sum(X[valid_indices]), 'info': {'a': np.sum(X[train_indices]), 'b': np.sum(X[valid_indices])} }
-
-        pipeline = Pipeline([
-            CrossValidation([
-                ResultNode()
-            ])
-        ])
-
-        pipeline["CrossValidation"].add_cross_validator("k_fold", KFold, lambda x: x.reshape((-1 ,)))
-        pipeline["CrossValidation"].add_cross_validator("stratified_k_fold", StratifiedKFold, lambda x: x.reshape((-1 ,)))
-
-        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-        y_train = np.array([[1], [0], [1]])
-
-        # test cv_splits
-        pipeline_config = pipeline.get_pipeline_config(cross_validator="k_fold", cross_validator_args={"n_splits": 3})
-        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
-        dataset_info = DataSetInfo()
-        dataset_info.categorical_features = [None] * 3
-        dataset_info.x_shape = x_train.shape
-        dataset_info.y_shape = y_train.shape
-        pipeline_config["random_seed"] = 42
-
-        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
-                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
-                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
-                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)
-
-        self.assertEqual(cv_result['loss'], 15)
-        self.assertDictEqual(cv_result['info'], {'a': 30, 'b': 15})
-
-        
-        # test validation split
-        pipeline_config = pipeline.get_pipeline_config(validation_split=0.3)
-        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
-        pipeline_config['random_seed'] = 42
-        dataset_info = DataSetInfo()
-        dataset_info.categorical_features = [None] * 3
-        dataset_info.x_shape = x_train.shape
-        dataset_info.y_shape = y_train.shape
-
-        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
-                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
-                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
-                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)
-
-        self.assertEqual(cv_result['loss'], 24)
-        self.assertDictEqual(cv_result['info'], {'a': 21, 'b': 24})
-
-
-        # test stratified cv split
-        x_valid = x_train
-        y_valid = y_train
-        x_train = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]])
-        y_train = np.array([[1], [1], [0], [0], [1], [0]])
-
-        pipeline_config = pipeline.get_pipeline_config(cross_validator="stratified_k_fold", cross_validator_args={"n_splits": 3})
-        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
-        pipeline_config['random_seed'] = 42
-        dataset_info = DataSetInfo()
-        dataset_info.categorical_features = [None] * 3
-        dataset_info.x_shape = x_train.shape
-        dataset_info.y_shape = y_train.shape
-
-        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
-                                          X_train=x_train, Y_train=y_train, X_valid=None, Y_valid=None, 
-                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
-                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)
-
-        self.assertEqual(cv_result['loss'], 57)
-        self.assertDictEqual(cv_result['info'], {'a': 114, 'b': 57})
-
-        pipeline_config = pipeline.get_pipeline_config()
-        pipeline_config_space = pipeline.get_hyperparameter_search_space(**pipeline_config)
-        pipeline_config['random_seed'] = 42
-        dataset_info = DataSetInfo()
-        dataset_info.categorical_features = [None] * 3
-        dataset_info.x_shape = x_train.shape
-        dataset_info.y_shape = y_train.shape
-
-        cv_result = pipeline.fit_pipeline(hyperparameter_config=pipeline_config_space, pipeline_config=pipeline_config, 
-                                          X_train=x_train, Y_train=y_train, X_valid=x_valid, Y_valid=y_valid, 
-                                          budget=5, budget_type=BudgetTypeEpochs, one_hot_encoder=None,
-                                          optimize_start_time=time.time(), refit=False, dataset_info=dataset_info, rescore=False)
-
-        self.assertEqual(cv_result['loss'], 45)
-        self.assertDictEqual(cv_result['info'], {'a': 171, 'b': 45})
\ No newline at end of file
diff --git a/test/test_pipeline/test_imputation.py b/test/test_pipeline/test_imputation.py
deleted file mode 100644
index 0a0d02d92..000000000
--- a/test/test_pipeline/test_imputation.py
+++ /dev/null
@@ -1,43 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import numpy as np
-import time
-
-import torch
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.pipeline.nodes.imputation import Imputation
-from numpy.testing import assert_array_equal
-from autoPyTorch.pipeline.nodes.create_dataset_info import DataSetInfo
-
-
-
-class TestImputation(unittest.TestCase):
-
-
-    def test_imputation(self):
-        X = np.array([[1, np.nan, 3], [4, 5, 6], [7, 8, np.nan],
-            [np.nan, 2, 3], [4, 5, np.nan], [7, np.nan, 9]])
-        train_indices = np.array([0, 1, 2])
-        valid_indices = np.array([3, 4, 5])
-        dataset_info = DataSetInfo()
-        dataset_info.categorical_features = [False, True, False]
-        hyperparameter_config = {Imputation.get_name() +  ConfigWrapper.delimiter + "strategy": "median"}
-
-        imputation_node = Imputation()
-
-        fit_result = imputation_node.fit(hyperparameter_config=hyperparameter_config, X=X, train_indices=train_indices,
-            dataset_info=dataset_info)
-
-        assert_array_equal(fit_result['X'][train_indices], np.array([[1, 3, 9], [4, 6, 5], [7, 4.5, 8]]))
-        assert_array_equal(fit_result['X'][valid_indices], np.array([[4, 3, 2], [4, 4.5, 5], [7, 9, 9]]))
-        assert_array_equal(fit_result['dataset_info'].categorical_features, [False, False, True])
-
-        X_test = np.array([[np.nan, np.nan, np.nan]])
-
-        predict_result = imputation_node.predict(X=X_test, imputation_preprocessor=fit_result['imputation_preprocessor'], all_nan_columns=np.array([False] * 3))
-        assert_array_equal(predict_result['X'], np.array([[4, 4.5, 9]]))
\ No newline at end of file
diff --git a/test/test_pipeline/test_initialization.py b/test/test_pipeline/test_initialization.py
deleted file mode 100644
index fbb342b31..000000000
--- a/test/test_pipeline/test_initialization.py
+++ /dev/null
@@ -1,40 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import torch
-import torch.nn as nn
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.initialization_selector import InitializationSelector
-
-from autoPyTorch.components.networks.feature.mlpnet import MlpNet
-from autoPyTorch.components.networks.initialization import SparseInitialization, SimpleInitializer
-from torch.nn import Linear
-
-
-class TestInitializationSelectorMethods(unittest.TestCase):
-
-    def test_initialization_selector(self):
-        pipeline = Pipeline([
-            InitializationSelector()
-        ])
-
-        selector = pipeline[InitializationSelector.get_name()]
-        selector.add_initialization_method("sparse", SparseInitialization)
-        selector.add_initializer('simple_initializer', SimpleInitializer)
-        network = MlpNet({"activation": "relu", "num_layers": 1, "num_units_1": 10, "use_dropout": False}, in_features=5, out_features=1, embedding=None)
-
-        pipeline_config = pipeline.get_pipeline_config()
-        pipeline_config["random_seed"] = 42
-        hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration()
-        hyper_config["InitializationSelector:initializer:initialize_bias"] = "No"
-        hyper_config["InitializationSelector:initialization_method"] = "sparse"
-        pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, network=network)
-
-        layer = [l for l in network.layers if isinstance(l, Linear)][0]
-        self.assertEqual((layer.weight.data != 0).sum(), 5)
-
-
-
diff --git a/test/test_pipeline/test_log_selector.py b/test/test_pipeline/test_log_selector.py
deleted file mode 100644
index 7a0cac701..000000000
--- a/test/test_pipeline/test_log_selector.py
+++ /dev/null
@@ -1,34 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector
-
-class TestLogFunctionSelectorMethods(unittest.TestCase):
-
-    def test_selector(self):
-
-        def log_fnc1(network, epoch):
-            print("a")
-
-        def log_fnc2(network, epoch):
-            print("b")
-
-        selector = LogFunctionsSelector()
-        pipeline = Pipeline([
-            selector
-        ])
-        selector.add_log_function("log1", log_fnc1)
-        selector.add_log_function("log2", log_fnc2)
-
-        pipeline_config = pipeline.get_pipeline_config(additional_logs=["log2"])
-        pipeline.fit_pipeline(pipeline_config=pipeline_config)
-
-        log_functions = selector.fit_output['log_functions']
-
-        self.assertListEqual([x.log for x in log_functions], [log_fnc2])
-
-
-
diff --git a/test/test_pipeline/test_loss_selector.py b/test/test_pipeline/test_loss_selector.py
deleted file mode 100644
index 7ab1ae545..000000000
--- a/test/test_pipeline/test_loss_selector.py
+++ /dev/null
@@ -1,37 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import torch
-import numpy as np
-import torch.nn as nn
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector
-from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeighted
-
-class TestLossSelectorMethods(unittest.TestCase):
-
-    def test_loss_selector(self):
-        pipeline = Pipeline([
-            LossModuleSelector()
-        ])
-
-        selector = pipeline[LossModuleSelector.get_name()]
-        selector.add_loss_module("L1", nn.L1Loss)
-        selector.add_loss_module("cross_entropy", nn.CrossEntropyLoss, LossWeightStrategyWeighted(), True)
-
-        pipeline_config = pipeline.get_pipeline_config(loss_modules=["L1", "cross_entropy"])
-        pipeline_hyperparameter_config = pipeline.get_hyperparameter_search_space(**pipeline_config).sample_configuration()
-
-        pipeline_hyperparameter_config["LossModuleSelector:loss_module"] = "L1"
-        pipeline.fit_pipeline(hyperparameter_config=pipeline_hyperparameter_config, train_indices=np.array([0, 1, 2]), X=np.random.rand(3,3), Y=np.random.rand(3, 2), pipeline_config=pipeline_config, tmp=None)
-        selected_loss = pipeline[selector.get_name()].fit_output['loss_function']
-        self.assertEqual(type(selected_loss.function), nn.L1Loss)
-
-        pipeline_hyperparameter_config["LossModuleSelector:loss_module"] = "cross_entropy"
-        pipeline.fit_pipeline(hyperparameter_config=pipeline_hyperparameter_config, train_indices=np.array([0, 1, 2]), X=np.random.rand(3,3), Y=np.array([[1, 0], [0, 1], [1, 0]]), pipeline_config=pipeline_config, tmp=None)
-        selected_loss = pipeline[selector.get_name()].fit_output['loss_function']
-        self.assertEqual(type(selected_loss.function), nn.CrossEntropyLoss)
-        self.assertEqual(selected_loss(torch.tensor([[0.0, 10000.0]]), torch.tensor([[0, 1]])), 0)
\ No newline at end of file
diff --git a/test/test_pipeline/test_losses.py b/test/test_pipeline/test_losses.py
new file mode 100644
index 000000000..7cb744a29
--- /dev/null
+++ b/test/test_pipeline/test_losses.py
@@ -0,0 +1,47 @@
+import pytest
+
+import torch
+from torch import nn
+
+from autoPyTorch.pipeline.components.training.losses import get_loss_instance
+
+
+@pytest.mark.parametrize('output_type', ['multiclass',
+                                         'binary',
+                                         'continuous-multioutput',
+                                         'continuous'])
+def test_get_no_name(output_type):
+    dataset_properties = {'task_type': 'tabular_classification', 'output_type': output_type}
+    loss = get_loss_instance(dataset_properties)
+    assert isinstance(loss, nn.Module)
+
+
+@pytest.mark.parametrize('output_type_name', [('multiclass', 'CrossEntropyLoss'),
+                                              ('binary', 'BCEWithLogitsLoss')])
+def test_get_name(output_type_name):
+    output_type, name = output_type_name
+    dataset_properties = {'task_type': 'tabular_classification', 'output_type': output_type}
+    loss = get_loss_instance(dataset_properties, name)
+    assert isinstance(loss, nn.Module)
+    assert str(loss) == f"{name}()"
+
+
+def test_get_name_error():
+    dataset_properties = {'task_type': 'tabular_classification', 'output_type': 'multiclass'}
+    name = 'BCELoss'
+    with pytest.raises(ValueError, match=r"Invalid name entered for task [a-z]+_[a-z]+, "):
+        get_loss_instance(dataset_properties, name)
+
+
+def test_losses():
+    list_properties = [{'task_type': 'tabular_classification', 'output_type': 'multiclass'},
+                       {'task_type': 'tabular_classification', 'output_type': 'binary'},
+                       {'task_type': 'tabular_regression', 'output_type': 'continuous'}]
+    pred_cross_entropy = torch.randn(4, 4, requires_grad=True)
+    list_predictions = [pred_cross_entropy, torch.empty(4).random_(2), torch.randn(4)]
+    list_names = [None, 'BCEWithLogitsLoss', None]
+    list_targets = [torch.empty(4, dtype=torch.long).random_(4), torch.empty(4).random_(2), torch.randn(4)]
+    for dataset_properties, pred, target, name in zip(list_properties, list_predictions, list_targets, list_names):
+        loss = get_loss_instance(dataset_properties=dataset_properties, name=name)
+        score = loss(pred, target)
+        assert isinstance(score, torch.Tensor)
diff --git a/test/test_pipeline/test_lr_scheduler_selector.py b/test/test_pipeline/test_lr_scheduler_selector.py
deleted file mode 100644
index b1bad3cf9..000000000
--- a/test/test_pipeline/test_lr_scheduler_selector.py
+++ /dev/null
@@ -1,56 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import torch
-import torch.optim.lr_scheduler as lr_scheduler
-import torch.nn as nn
-import numpy as np
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
-from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector
-
-from autoPyTorch.components.networks.feature.mlpnet import MlpNet
-from autoPyTorch.components.networks.feature.shapedmlpnet import ShapedMlpNet
-from autoPyTorch.components.optimizer.optimizer import AdamOptimizer, SgdOptimizer
-from autoPyTorch.components.lr_scheduler.lr_schedulers import SchedulerStepLR, SchedulerExponentialLR
-
-class TestLearningrateSchedulerSelectorMethods(unittest.TestCase):
-
-    def test_lr_scheduler_selector(self):
-        pipeline = Pipeline([
-            NetworkSelector(),
-            OptimizerSelector(),
-            LearningrateSchedulerSelector(),
-        ])
-
-        net_selector = pipeline[NetworkSelector.get_name()]
-        net_selector.add_network("mlpnet", MlpNet)
-        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
-        net_selector.add_final_activation('none', nn.Sequential())
-
-        opt_selector = pipeline[OptimizerSelector.get_name()]
-        opt_selector.add_optimizer("adam", AdamOptimizer)
-        opt_selector.add_optimizer("sgd", SgdOptimizer)
-
-        lr_scheduler_selector = pipeline[LearningrateSchedulerSelector.get_name()]
-        lr_scheduler_selector.add_lr_scheduler("step", SchedulerStepLR)
-        lr_scheduler_selector.add_lr_scheduler("exp", SchedulerExponentialLR)
-
-
-        pipeline_config = pipeline.get_pipeline_config()
-        pipeline_config["random_seed"] = 42
-        hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration()
-
-        pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config,
-                                X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential(), training_techniques=[], train_indices=np.array([0, 1, 2]))
-
-        sampled_lr_scheduler = pipeline[lr_scheduler_selector.get_name()].fit_output['training_techniques'][0].training_components['lr_scheduler']
-
-        self.assertIn(type(sampled_lr_scheduler), [lr_scheduler.ExponentialLR, lr_scheduler.StepLR])
-
-
-
diff --git a/test/test_pipeline/test_metric_selector.py b/test/test_pipeline/test_metric_selector.py
deleted file mode 100644
index 188e6fbec..000000000
--- a/test/test_pipeline/test_metric_selector.py
+++ /dev/null
@@ -1,33 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector
-
-from autoPyTorch.components.metrics.standard_metrics import accuracy, auc_metric, mean_distance
-
-class TestMetricSelectorMethods(unittest.TestCase):
-
-    def test_selector(self):
-        pipeline = Pipeline([
-            MetricSelector()
-        ])
-
-        selector = pipeline[MetricSelector.get_name()]
-        selector.add_metric("auc", auc_metric)
-        selector.add_metric("accuracy", accuracy)
-        selector.add_metric("mean", mean_distance)
-
-        pipeline_config = pipeline.get_pipeline_config(optimize_metric="accuracy", additional_metrics=['auc', 'mean'])
-        pipeline.fit_pipeline(pipeline_config=pipeline_config)
-
-        selected_optimize_metric = selector.fit_output['optimize_metric']
-        selected_additional_metrics = selector.fit_output['additional_metrics']
-
-        self.assertEqual(selected_optimize_metric.metric, accuracy)
-        self.assertSetEqual(set(x.metric for x in selected_additional_metrics), set([auc_metric, mean_distance]))
-
-
-
diff --git a/test/test_pipeline/test_metrics.py b/test/test_pipeline/test_metrics.py
new file mode 100644
index 000000000..153995cff
--- /dev/null
+++ b/test/test_pipeline/test_metrics.py
@@ -0,0 +1,192 @@
+import numpy as np
+
+import pytest
+
+import sklearn.metrics
+
+
+from autoPyTorch.constants import (
+    BINARY,
+    CONTINUOUS,
+    OUTPUT_TYPES_TO_STRING,
+    STRING_TO_TASK_TYPES,
+    TABULAR_CLASSIFICATION,
+    TABULAR_REGRESSION,
+    TASK_TYPES_TO_STRING
+)
+from autoPyTorch.metrics import accuracy
+from autoPyTorch.pipeline.components.training.metrics.base import (
+    _PredictMetric,
+    _ThresholdMetric,
+    autoPyTorchMetric,
+    make_metric,
+)
+from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score, get_metrics
+
+
+@pytest.mark.parametrize('output_type', ['multiclass',
+                                         'multiclass-multioutput',
+                                         'binary'])
+def test_get_no_name_classification(output_type):
+    dataset_properties = {'task_type': 'tabular_classification',
+                          'output_type': output_type}
+    metrics = get_metrics(dataset_properties)
+    for metric in metrics:
+        assert isinstance(metric, autoPyTorchMetric)
+
+
+@pytest.mark.parametrize('output_type', ['continuous', 'continuous-multioutput'])
+def test_get_no_name_regression(output_type):
+    dataset_properties = {'task_type': 'tabular_regression',
+                          'output_type': output_type}
+    metrics = get_metrics(dataset_properties)
+    for metric in metrics:
+        assert isinstance(metric, autoPyTorchMetric)
+
+
+@pytest.mark.parametrize('metric', ['accuracy', 'average_precision',
+                                    'balanced_accuracy', 'f1'])
+def test_get_name(metric):
+    dataset_properties = {'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
+                          'output_type': OUTPUT_TYPES_TO_STRING[BINARY]}
+    metrics = get_metrics(dataset_properties, [metric])
+    for i in range(len(metrics)):
+        assert isinstance(metrics[i], autoPyTorchMetric)
+        assert metrics[i].name.lower() == metric.lower()
+
+
+def test_get_name_error():
+    dataset_properties = {'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
+                          'output_type': OUTPUT_TYPES_TO_STRING[BINARY]}
+    names = ['root_mean_sqaured_error', 'average_precision']
+    with pytest.raises(ValueError, match=r"Invalid name entered for task [a-z]+_[a-z]+, "):
+        get_metrics(dataset_properties, names)
+
+
+def test_classification_metrics():
+    # test of all classification metrics
+    dataset_properties = {'task_type': TASK_TYPES_TO_STRING[TABULAR_CLASSIFICATION],
+                          'output_type': OUTPUT_TYPES_TO_STRING[BINARY]}
+    y_target = np.array([0, 1, 0, 1])
+    y_pred = np.array([0, 0, 0, 1])
+    metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True)
+    score_dict = calculate_score(y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics)
+    assert isinstance(score_dict, dict)
+    for name, score in score_dict.items():
+        assert isinstance(name, str)
+        assert isinstance(score, float)
+
+
+def test_regression_metrics():
+    # test of all regression metrics
+    dataset_properties = {'task_type': TASK_TYPES_TO_STRING[TABULAR_REGRESSION],
+                          'output_type': OUTPUT_TYPES_TO_STRING[CONTINUOUS]}
+    y_target = np.array([0.1, 0.6, 0.7, 0.4])
+    y_pred = np.array([0.6, 0.7, 0.4, 1])
+    metrics = get_metrics(dataset_properties=dataset_properties, all_supported_metrics=True)
+    score_dict = calculate_score(y_pred, y_target, STRING_TO_TASK_TYPES[dataset_properties['task_type']], metrics)
+
+    assert isinstance(score_dict, dict)
+    for name, score in score_dict.items():
+        assert isinstance(name, str)
+        assert isinstance(score, float)
+
+
+def test_predictmetric_binary():
+    y_true = np.array([0, 0, 1, 1])
+    y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
+
+    scorer = _PredictMetric(
+        'accuracy', sklearn.metrics.accuracy_score, 1, 0, 1, {})
+
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(1.0)
+
+    y_pred = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(0.5)
+
+    y_pred = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(0.5)
+
+    scorer = _PredictMetric(
+        'bac', sklearn.metrics.balanced_accuracy_score,
+        1, 0, 1, {})
+
+    score = scorer(y_true, y_pred)
+    assert score, pytest.approx(0.5)
+
+    scorer = _PredictMetric(
+        'accuracy', sklearn.metrics.accuracy_score, 1, 0, -1, {})
+
+    y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(-1.0)
+
+
+def test_threshold_scorer_binary():
+    y_true = [0, 0, 1, 1]
+    y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
+
+    scorer = _ThresholdMetric(
+        'roc_auc', sklearn.metrics.roc_auc_score, 1, 0, 1, {})
+
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(1.0)
+
+    y_pred = np.array([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(0.5)
+
+    y_pred = np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]])
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(0.5)
+
+    scorer = _ThresholdMetric(
+        'roc_auc', sklearn.metrics.roc_auc_score, 1, 0, -1, {})
+
+    y_pred = np.array([[1.0, 0.0], [1.0, 0.0], [0.0, 1.0], [0.0, 1.0]])
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(-1.0)
+
+
+def test_sign_flip():
+    y_true = np.arange(0, 1.01, 0.1)
+    y_pred = y_true.copy()
+
+    scorer = make_metric(
+        'r2', sklearn.metrics.r2_score, greater_is_better=True)
+
+    score = scorer(y_true, y_pred + 1.0)
+    assert score == pytest.approx(-9.0)
+
+    score = scorer(y_true, y_pred + 0.5)
+    assert score == pytest.approx(-1.5)
+
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(1.0)
+
+    scorer = make_metric(
+        'r2', sklearn.metrics.r2_score, greater_is_better=False)
+
+    score = scorer(y_true, y_pred + 1.0)
+    assert score == pytest.approx(9.0)
+
+    score = scorer(y_true, y_pred + 0.5)
+    assert score == pytest.approx(1.5)
+
+    score = scorer(y_true, y_pred)
+    assert score == pytest.approx(-1.0)
+
+
+def test_classification_only_metric():
+    y_true = np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0])
+    y_pred = \
+        np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [1.0, 0.0], [1.0, 0.0], [1.0, 0.0]])
+    scorer = accuracy
+
+    score = calculate_score(y_true, y_pred, TABULAR_CLASSIFICATION, [scorer])
+
+    previous_score = scorer._optimum
+    assert score['accuracy'] == pytest.approx(previous_score)
diff --git a/test/test_pipeline/test_network_selector.py b/test/test_pipeline/test_network_selector.py
deleted file mode 100644
index 57de3735a..000000000
--- a/test/test_pipeline/test_network_selector.py
+++ /dev/null
@@ -1,38 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import torch
-import torch.nn as nn
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-
-from autoPyTorch.components.networks.feature.mlpnet import MlpNet
-from autoPyTorch.components.networks.feature.shapedmlpnet import ShapedMlpNet
-
-class TestNetworkSelectorMethods(unittest.TestCase):
-
-    def test_network_selector(self):
-        pipeline = Pipeline([
-            NetworkSelector()
-        ])
-
-        selector = pipeline[NetworkSelector.get_name()]
-        selector.add_network("mlpnet", MlpNet)
-        selector.add_network("shapedmlpnet", ShapedMlpNet)
-        selector.add_final_activation('none', nn.Sequential())
-
-        pipeline_config = pipeline.get_pipeline_config()
-        pipeline_config["random_seed"] = 42
-        hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration()
-        pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, 
-                                X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential())
-
-        sampled_network = pipeline[selector.get_name()].fit_output['network']
-
-        self.assertIn(type(sampled_network), [MlpNet, ShapedMlpNet])
-
-
-
diff --git a/test/test_pipeline/test_normalization_strategy_selector.py b/test/test_pipeline/test_normalization_strategy_selector.py
deleted file mode 100644
index 0393b4dd9..000000000
--- a/test/test_pipeline/test_normalization_strategy_selector.py
+++ /dev/null
@@ -1,44 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import numpy as np
-import time
-
-import torch
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector
-from autoPyTorch.pipeline.nodes.create_dataset_info import DataSetInfo
-from numpy.testing import assert_array_almost_equal
-from sklearn.preprocessing import MinMaxScaler
-
-
-class TestNormalizationStrategySelector(unittest.TestCase):
-
-
-    def test_normalization_strategy_selector(self):
-        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9],
-                     [3, 2, 1], [6, 5, 4], [9, 8, 7]])
-        train_indices = np.array([0, 1, 2])
-        valid_indices = np.array([3, 4, 5])
-        dataset_info = DataSetInfo()
-        dataset_info.categorical_features = [False, True, False]
-        hyperparameter_config = {NormalizationStrategySelector.get_name() +  ConfigWrapper.delimiter + "normalization_strategy": "minmax"}
-
-        normalizer_node = NormalizationStrategySelector()
-        normalizer_node.add_normalization_strategy("minmax", MinMaxScaler)
-
-        fit_result = normalizer_node.fit(hyperparameter_config=hyperparameter_config, X=X, train_indices=train_indices,
-            dataset_info=dataset_info)
-
-        assert_array_almost_equal(fit_result['X'][train_indices], np.array([[0, 0, 2], [0.5, 0.5, 5], [1, 1, 8]]))
-        assert_array_almost_equal(fit_result['X'][valid_indices], np.array([[2/6, -2/6, 2], [5/6, 1/6, 5], [8/6, 4/6, 8]]))
-        assert_array_almost_equal(fit_result['dataset_info'].categorical_features, [False, False, True])
-
-        X_test = np.array([[1, 2, 3]])
-
-        predict_result = normalizer_node.predict(X=X_test, normalizer=fit_result["normalizer"])
-        assert_array_almost_equal(predict_result['X'], np.array([[0, 0, 2]]))
\ No newline at end of file
diff --git a/test/test_pipeline/test_optimization_algorithm.py b/test/test_pipeline/test_optimization_algorithm.py
deleted file mode 100644
index fc1151f81..000000000
--- a/test/test_pipeline/test_optimization_algorithm.py
+++ /dev/null
@@ -1,59 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import netifaces
-import logging
-import numpy as np
-
-import torch
-
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.base.pipeline_node import PipelineNode
-from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm
-from autoPyTorch.utils.config.config_option import ConfigOption
-from hpbandster.core.result import json_result_logger
-
-class TestOptimizationAlgorithmMethods(unittest.TestCase):
-
-    def test_optimizer(self):
-
-        class ResultNode(PipelineNode):
-            def fit(self, X_train, Y_train):
-                return {'loss': X_train.shape[1], 'info': {'train_a': X_train.shape[1], 'train_b': Y_train.shape[1]}}
-
-            def get_hyperparameter_search_space(self, **pipeline_config):
-                cs = CS.ConfigurationSpace()
-                cs.add_hyperparameter(CSH.UniformIntegerHyperparameter('hyper', lower=0, upper=30))
-                return cs
-            
-            def get_pipeline_config_options(self):
-                return [
-                    ConfigOption("result_logger_dir", default=".", type="directory"),
-                    ConfigOption("optimize_metric", default="a", type=str),
-                ]
-
-        logger = logging.getLogger('hpbandster')
-        logger.setLevel(logging.ERROR)
-        logger = logging.getLogger('autonet')
-        logger.setLevel(logging.ERROR)
-
-        pipeline = Pipeline([
-            OptimizationAlgorithm([
-                ResultNode()
-            ])
-        ])
-
-        pipeline_config = pipeline.get_pipeline_config(num_iterations=1, budget_type='epochs', result_logger_dir=".")
-        pipeline.fit_pipeline(pipeline_config=pipeline_config, X_train=np.random.rand(15,10), Y_train=np.random.rand(15, 5), X_valid=None, Y_valid=None,
-            result_loggers=[json_result_logger(directory=".", overwrite=True)], dataset_info=None, shutdownables=[])
-
-        result_of_opt_pipeline = pipeline[OptimizationAlgorithm.get_name()].fit_output['optimized_hyperparameter_config']
-        print(pipeline[OptimizationAlgorithm.get_name()].fit_output)
-
-        self.assertIn(result_of_opt_pipeline[ResultNode.get_name() + ConfigWrapper.delimiter + 'hyper'], list(range(0, 31)))
diff --git a/test/test_pipeline/test_optimizer_selector.py b/test/test_pipeline/test_optimizer_selector.py
deleted file mode 100644
index 45b0991ef..000000000
--- a/test/test_pipeline/test_optimizer_selector.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import torch
-import torch.nn as nn
-import torch.optim as optim
-
-from autoPyTorch.pipeline.base.pipeline import Pipeline
-from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector
-from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector
-
-from autoPyTorch.components.networks.feature.mlpnet import MlpNet
-from autoPyTorch.components.networks.feature.shapedmlpnet import ShapedMlpNet
-from autoPyTorch.components.optimizer.optimizer import AutoNetOptimizerBase, AdamOptimizer, SgdOptimizer
-
-class TestOptimizerSelectorMethods(unittest.TestCase):
-
-    def test_optimizer_selector(self):
-        pipeline = Pipeline([
-            NetworkSelector(),
-            OptimizerSelector()
-        ])
-
-        net_selector = pipeline[NetworkSelector.get_name()]
-        net_selector.add_network("mlpnet", MlpNet)
-        net_selector.add_network("shapedmlpnet", ShapedMlpNet)
-        net_selector.add_final_activation('none', nn.Sequential())
-
-        opt_selector = pipeline[OptimizerSelector.get_name()]
-        opt_selector.add_optimizer("adam", AdamOptimizer)
-        opt_selector.add_optimizer("sgd", SgdOptimizer)
-
-        pipeline_config = pipeline.get_pipeline_config()
-        pipeline_config["random_seed"] = 42
-        hyper_config = pipeline.get_hyperparameter_search_space().sample_configuration()
-        pipeline.fit_pipeline(hyperparameter_config=hyper_config, pipeline_config=pipeline_config, 
-                                X=torch.rand(3,3), Y=torch.rand(3, 2), embedding=nn.Sequential())
-
-        sampled_optimizer = opt_selector.fit_output['optimizer']
-
-        self.assertIn(type(sampled_optimizer), [optim.Adam, optim.SGD])
-
-
-
diff --git a/test/test_pipeline/test_pipeline.py b/test/test_pipeline/test_pipeline.py
new file mode 100644
index 000000000..b54083935
--- /dev/null
+++ b/test/test_pipeline/test_pipeline.py
@@ -0,0 +1,124 @@
+import ConfigSpace as CS
+import ConfigSpace.hyperparameters as CSH
+
+import pytest
+
+from autoPyTorch.pipeline.base_pipeline import BasePipeline
+from autoPyTorch.pipeline.components.base_choice import autoPyTorchChoice
+from autoPyTorch.pipeline.components.base_component import autoPyTorchComponent
+
+
+class DummyComponent(autoPyTorchComponent):
+    def __init__(self, a=0, b='orange', random_state=None):
+        self.a = a
+        self.b = b
+        self.fitted = False
+
+    def get_hyperparameter_search_space(self, dataset_properties=None):
+        cs = CS.ConfigurationSpace()
+        a = CSH.UniformIntegerHyperparameter('a', lower=10, upper=100, log=False)
+        b = CSH.CategoricalHyperparameter('b', choices=['red', 'green', 'blue'])
+        cs.add_hyperparameters([a, b])
+        return cs
+
+    def fit(self, X, y):
+        self.fitted = True
+        return self
+
+
+class DummyChoice(autoPyTorchChoice):
+    def get_components(self):
+        return {
+            'DummyComponent2': DummyComponent,
+            'DummyComponent3': DummyComponent,
+        }
+
+    def get_hyperparameter_search_space(self, dataset_properties=None, default=None,
+                                        include=None, exclude=None):
+        cs = CS.ConfigurationSpace()
+        cs.add_hyperparameter(
+            CSH.CategoricalHyperparameter(
+                '__choice__',
+                list(self.get_components().keys()),
+            )
+        )
+        return cs
+
+
+class BasePipelineMock(BasePipeline):
+    def __init__(self):
+        pass
+
+    def _get_pipeline_steps(self, dataset_properties):
+        return [
+            ('DummyComponent1', DummyComponent(a=10, b='red')),
+            ('DummyChoice', DummyChoice(self.dataset_properties))
+        ]
+
+
+@pytest.fixture
+def base_pipeline():
+    """Create a pipeline and test the different properties of it"""
+    base_pipeline = BasePipelineMock()
+    base_pipeline.dataset_properties = {}
+    base_pipeline.steps = [
+        ('DummyComponent1', DummyComponent(a=10, b='red')),
+        ('DummyChoice', DummyChoice(base_pipeline.dataset_properties))
+    ]
+    return base_pipeline
+
+
+def test_pipeline_base_config_space(base_pipeline):
+    """Makes sure that the pipeline can build a proper
+    configuration space via its base config methods"""
+    cs = base_pipeline._get_base_search_space(
+        cs=CS.ConfigurationSpace(),
+        include={}, exclude={}, dataset_properties={},
+        pipeline=base_pipeline.steps
+    )
+
+    # The hyperparameters a and b of the dummy component
+    # must be in the hyperparameter search space
+    # If parsing the configuration correctly, hyper param a
+    # lower bound should be properly defined
+    assert 'DummyComponent1:a' in cs
+    assert 10 == cs.get_hyperparameter('DummyComponent1:a').lower
+    assert 'DummyComponent1:b' in cs
+
+    # For the choice, we make sure the choice
+    # is among components 2 and 4
+    assert 'DummyChoice:__choice__' in cs
+    assert ('DummyComponent2', 'DummyComponent3') == cs.get_hyperparameter(
+        'DummyChoice:__choice__').choices
+
+
+def test_pipeline_set_config(base_pipeline):
+    config = base_pipeline._get_base_search_space(
+        cs=CS.ConfigurationSpace(),
+        include={}, exclude={}, dataset_properties={},
+        pipeline=base_pipeline.steps
+    ).sample_configuration()
+
+    base_pipeline.set_hyperparameters(config)
+
+    # Check that the proper hyperparameters where set
+    config_dict = config.get_dictionary()
+    assert config_dict['DummyComponent1:a'] == base_pipeline.named_steps['DummyComponent1'].a
+    assert config_dict['DummyComponent1:b'] == base_pipeline.named_steps['DummyComponent1'].b
+
+    # Make sure that the proper component choice was made
+    # according to the config
+    # The orange check makes sure that the pipeline is setting the
+    # hyperparameters individually, as orange should only happen on the
+    # choice, as it is not a hyperparameter from the cs
+    assert isinstance(base_pipeline.named_steps['DummyChoice'].choice, DummyComponent)
+    assert 'orange' == base_pipeline.named_steps['DummyChoice'].choice.b
+
+
+def test_get_default_options(base_pipeline):
+    default_options = base_pipeline.get_default_pipeline_options()
+    # test if dict is returned
+    assert isinstance(default_options, dict)
+    for option, default in default_options.items():
+        # check whether any defaults is none
+        assert default is not None
diff --git a/test/test_pipeline/test_resampling_strategy_selector.py b/test/test_pipeline/test_resampling_strategy_selector.py
deleted file mode 100644
index 1710f0b59..000000000
--- a/test/test_pipeline/test_resampling_strategy_selector.py
+++ /dev/null
@@ -1,97 +0,0 @@
-__author__ = "Max Dippel, Michael Burkart and Matthias Urban"
-__version__ = "0.0.1"
-__license__ = "BSD"
-
-import unittest
-import numpy as np
-import time
-
-import torch
-import ConfigSpace as CS
-import ConfigSpace.hyperparameters as CSH
-from autoPyTorch.utils.configspace_wrapper import ConfigWrapper
-from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector
-from numpy.testing import assert_array_almost_equal
-from autoPyTorch.components.preprocessing.resampling import TargetSizeStrategyUpsample, \
-    RandomOverSamplingWithReplacement, RandomUnderSamplingWithReplacement
-
-
-class TestResamplingStrategySelector(unittest.TestCase):
-
-
-    def test_resampling_strategy_selector_only_train(self):
-        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
-        Y = np.array([[0, 1], [1, 0], [1, 0]])
-        train_indices = np.array([0, 1, 2])
-        hyperparameter_config = {
-            ResamplingStrategySelector.get_name() +  ConfigWrapper.delimiter + "over_sampling_method": "random_over_sampling",
-            ResamplingStrategySelector.get_name() +  ConfigWrapper.delimiter + "under_sampling_method": "random_under_sampling",
-            ResamplingStrategySelector.get_name() +  ConfigWrapper.delimiter + "target_size_strategy": "up",
-        }
-
-        resampler_node = ResamplingStrategySelector()
-        resampler_node.add_over_sampling_method("random_over_sampling", RandomOverSamplingWithReplacement)
-        resampler_node.add_under_sampling_method("random_under_sampling", RandomUnderSamplingWithReplacement)
-        resampler_node.add_target_size_strategy("up", TargetSizeStrategyUpsample)
-
-        pipeline_config = {"random_seed": 42, "shuffle": True}
-        fit_result = resampler_node.fit(pipeline_config=pipeline_config, hyperparameter_config=hyperparameter_config, X=X, Y=Y, train_indices=train_indices,
-            valid_indices=None)
-
-        assert_array_almost_equal(sorted(fit_result['train_indices']), np.array([0, 1, 2, 3]))
-        num_0 = 0
-        num_1 = 0
-        for i in range(fit_result['X'].shape[0]):
-            x = fit_result['X'][i, :]
-            y = fit_result['Y'][i, :]
-
-            if np.all(y == np.array([0, 1])):
-                assert_array_almost_equal(x, np.array([1, 2, 3]))
-                num_0 += 1
-            else:
-                self.assertTrue(np.all(x == np.array([4, 5, 6])) or  np.all(x == np.array([7, 8, 9])))
-                num_1 += 1
-        self.assertEqual(num_0, 2)
-        self.assertEqual(num_1, 2)
-    
-    def test_resampling_strategy_selector_with_valid(self):
-        X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9],
-                      [1, 2, 3], [4, 5, 6], [7, 8, 9]])
-        Y = np.array([[0, 1], [1, 0], [1, 0],
-                      [0, 1], [1, 0], [1, 0]])
-        train_indices = np.array([0, 1, 2])
-        valid_indices = np.array([3, 4, 5])
-        hyperparameter_config = {
-            ResamplingStrategySelector.get_name() +  ConfigWrapper.delimiter + "over_sampling_method": "random_over_sampling",
-            ResamplingStrategySelector.get_name() +  ConfigWrapper.delimiter + "under_sampling_method": "random_under_sampling",
-            ResamplingStrategySelector.get_name() +  ConfigWrapper.delimiter + "target_size_strategy": "up",
-        }
-
-        resampler_node = ResamplingStrategySelector()
-        resampler_node.add_over_sampling_method("random_over_sampling", RandomOverSamplingWithReplacement)
-        resampler_node.add_under_sampling_method("random_under_sampling", RandomUnderSamplingWithReplacement)
-        resampler_node.add_target_size_strategy("up", TargetSizeStrategyUpsample)
-
-        pipeline_config = {"random_seed": 42, "shuffle": True}
-        fit_result = resampler_node.fit(pipeline_config=pipeline_config, hyperparameter_config=hyperparameter_config, X=X, Y=Y, train_indices=train_indices,
-            valid_indices=valid_indices)
-
-        assert_array_almost_equal(sorted(fit_result['train_indices']), np.array([0, 1, 2, 3]))
-        assert_array_almost_equal(sorted(fit_result['valid_indices']), np.array([4, 5, 6]))
-        assert_array_almost_equal(fit_result['X'][fit_result['valid_indices']], X[valid_indices])
-        assert_array_almost_equal(fit_result['Y'][fit_result['valid_indices']], Y[valid_indices])
-        self.assertEqual(fit_result['X'].shape, (7, 3))
-        num_0 = 0
-        num_1 = 0
-        for i in range(fit_result['X'][fit_result['train_indices']].shape[0]):
-            x = fit_result['X'][fit_result['train_indices']][i, :]
-            y = fit_result['Y'][fit_result['train_indices']][i, :]
-
-            if np.all(y == np.array([0, 1])):
-                assert_array_almost_equal(x, np.array([1, 2, 3]))
-                num_0 += 1
-            else:
-                self.assertTrue(np.all(x == np.array([4, 5, 6])) or  np.all(x == np.array([7, 8, 9])))
-                num_1 += 1
-        self.assertEqual(num_0, 2)
-        self.assertEqual(num_1, 2)
diff --git a/test/test_pipeline/test_tabular_classification.py b/test/test_pipeline/test_tabular_classification.py
new file mode 100644
index 000000000..d01fcaa08
--- /dev/null
+++ b/test/test_pipeline/test_tabular_classification.py
@@ -0,0 +1,185 @@
+import numpy as np
+
+import pytest
+
+import torch
+
+from autoPyTorch.pipeline.components.setup.early_preprocessor.utils import get_preprocess_transforms
+from autoPyTorch.pipeline.tabular_classification import TabularClassificationPipeline
+from autoPyTorch.utils.common import FitRequirement
+
+
+@pytest.mark.parametrize("fit_dictionary", ['fit_dictionary_numerical_only',
+                                            'fit_dictionary_categorical_only',
+                                            'fit_dictionary_num_and_categorical'], indirect=True)
+class TestTabularClassification:
+    def test_pipeline_fit(self, fit_dictionary):
+        """This test makes sure that the pipeline is able to fit
+        given random combinations of hyperparameters across the pipeline"""
+
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+        cs = pipeline.get_hyperparameter_search_space()
+        config = cs.sample_configuration()
+        pipeline.set_hyperparameters(config)
+        pipeline.fit(fit_dictionary)
+
+        # To make sure we fitted the model, there should be a
+        # run summary object with accuracy
+        run_summary = pipeline.named_steps['trainer'].run_summary
+        assert run_summary is not None
+
+        # Make sure that performance was properly captured
+        assert run_summary.performance_tracker['train_loss'][1] > 0
+        assert run_summary.total_parameter_count > 0
+        assert 'accuracy' in run_summary.performance_tracker['train_metrics'][1]
+
+        # Make sure a network was fit
+        assert isinstance(pipeline.named_steps['network'].choice.get_network(), torch.nn.Module)
+
+    def test_pipeline_predict(self, fit_dictionary):
+        """This test makes sure that the pipeline is able to fit
+        given random combinations of hyperparameters across the pipeline"""
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+
+        cs = pipeline.get_hyperparameter_search_space()
+        config = cs.sample_configuration()
+        pipeline.set_hyperparameters(config)
+
+        pipeline.fit(fit_dictionary)
+
+        prediction = pipeline.predict(
+            fit_dictionary['backend'].load_datamanager().test_tensors[0])
+        assert isinstance(prediction, np.ndarray)
+        assert prediction.shape == (200, 2)
+
+    def test_pipeline_predict_proba(self, fit_dictionary):
+        """This test makes sure that the pipeline is able to fit
+        given random combinations of hyperparameters across the pipeline
+        And then predict using predict probability
+        """
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+
+        cs = pipeline.get_hyperparameter_search_space()
+        config = cs.sample_configuration()
+        pipeline.set_hyperparameters(config)
+
+        pipeline.fit(fit_dictionary)
+
+        prediction = pipeline.predict_proba(
+            fit_dictionary['backend'].load_datamanager().test_tensors[0])
+        assert isinstance(prediction, np.ndarray)
+        assert prediction.shape == (200, 2)
+
+    def test_pipeline_transform(self, fit_dictionary):
+        """
+        In the context of autopytorch, transform expands a fit dictionary with
+        components that where previously fit. We can use this as a nice way to make sure
+        that fit properly work.
+        This code is added in light of components not properly added to the fit dicitonary
+        """
+
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+        cs = pipeline.get_hyperparameter_search_space()
+        config = cs.sample_configuration()
+        pipeline.set_hyperparameters(config)
+
+        pipeline.fit(fit_dictionary)
+
+        # We do not want to make the same early preprocessing operation to the fit dictionary
+        if 'X_train' in fit_dictionary:
+            fit_dictionary.pop('X_train')
+
+        transformed_fit_dictionary = pipeline.transform(fit_dictionary)
+
+        # First, we do not lose anyone! (We use a fancy subset containment check)
+        assert fit_dictionary.items() <= transformed_fit_dictionary.items()
+
+        # Then the pipeline should have added the following keys
+        expected_keys = {'imputer', 'encoder', 'scaler', 'tabular_transformer',
+                         'preprocess_transforms', 'network', 'optimizer', 'lr_scheduler',
+                         'train_data_loader', 'val_data_loader', 'run_summary'}
+        assert expected_keys.issubset(set(transformed_fit_dictionary.keys()))
+
+        # Then we need to have transformations being created.
+        assert len(get_preprocess_transforms(transformed_fit_dictionary)) > 0
+
+        # We expect the transformations to be in the pipeline at anytime for inference
+        assert 'preprocess_transforms' in transformed_fit_dictionary.keys()
+
+    @pytest.mark.parametrize("is_small_preprocess", [True, False])
+    def test_default_configuration(self, fit_dictionary, is_small_preprocess):
+        """Makes sure that when no config is set, we can trust the
+        default configuration from the space"""
+
+        fit_dictionary['is_small_preprocess'] = is_small_preprocess
+
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+
+        pipeline.fit(fit_dictionary)
+
+    def test_remove_key_check_requirements(self, fit_dictionary):
+        """Makes sure that when a key is removed from X, correct error is outputted"""
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+        for key in ['job_id', 'device', 'split_id', 'use_pynisher', 'torch_num_threads',
+                    'dataset_properties', ]:
+            fit_dictionary_copy = fit_dictionary.copy()
+            fit_dictionary_copy.pop(key)
+            with pytest.raises(ValueError, match=r"To fit .+?, expected fit dictionary to have"):
+                pipeline.fit(fit_dictionary_copy)
+
+    def test_network_optimizer_lr_handshake(self, fit_dictionary):
+        """Fitting a network should put the network in the X"""
+        # Create the pipeline to check. A random config should be sufficient
+        pipeline = TabularClassificationPipeline(
+            dataset_properties=fit_dictionary['dataset_properties'])
+        cs = pipeline.get_hyperparameter_search_space()
+        config = cs.sample_configuration()
+        pipeline.set_hyperparameters(config)
+
+        # Make sure that fitting a network adds a "network" to X
+        assert 'network' in pipeline.named_steps.keys()
+        X = pipeline.named_steps['network'].fit(
+            fit_dictionary,
+            None
+        ).transform(fit_dictionary)
+        assert 'network' in X
+
+        # Then fitting a optimizer should fail if no network:
+        assert 'optimizer' in pipeline.named_steps.keys()
+        with pytest.raises(
+            ValueError,
+            match=r"To fit .+?, expected fit dictionary to have 'network' but got .*"
+        ):
+            pipeline.named_steps['optimizer'].fit({'dataset_properties': {}}, None)
+
+        # No error when network is passed
+        X = pipeline.named_steps['optimizer'].fit(X, None).transform(X)
+        assert 'optimizer' in X
+
+        # Then fitting a optimizer should fail if no network:
+        assert 'lr_scheduler' in pipeline.named_steps.keys()
+        with pytest.raises(
+            ValueError,
+            match=r"To fit .+?, expected fit dictionary to have 'optimizer' but got .*"
+        ):
+            pipeline.named_steps['lr_scheduler'].fit({'dataset_properties': {}}, None)
+
+        # No error when network is passed
+        X = pipeline.named_steps['lr_scheduler'].fit(X, None).transform(X)
+        assert 'optimizer' in X
+
+    def test_get_fit_requirements(self, fit_dictionary):
+        dataset_properties = {'numerical_columns': [], 'categorical_columns': []}
+        pipeline = TabularClassificationPipeline(dataset_properties=dataset_properties)
+        fit_requirements = pipeline.get_fit_requirements()
+
+        # check if fit requirements is a list of FitRequirement named tuples
+        assert isinstance(fit_requirements, list)
+        for requirement in fit_requirements:
+            assert isinstance(requirement, FitRequirement)
diff --git a/test/test_utils/__init__.py b/test/test_utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/test/test_utils/test_backend.py b/test/test_utils/test_backend.py
new file mode 100644
index 000000000..becea67fb
--- /dev/null
+++ b/test/test_utils/test_backend.py
@@ -0,0 +1,83 @@
+# -*- encoding: utf-8 -*-
+import builtins
+import unittest
+import unittest.mock
+
+import pytest
+
+from autoPyTorch.utils.backend import Backend
+
+
+class BackendStub(Backend):
+
+    def __init__(self):
+        self.__class__ = Backend
+
+
+##########################################################################################
+#                                       Fixtures
+##########################################################################################
+@pytest.fixture
+def backend_stub():
+    backend = BackendStub()
+    backend.internals_directory = '/'
+    return backend
+
+
+def _setup_load_model_mocks(openMock, pickleLoadMock, seed, idx, budget):
+    model_path = '/runs/%s_%s_%s/%s.%s.%s.model' % (seed, idx, budget, seed, idx, budget)
+    file_handler = 'file_handler'
+    expected_model = 'model'
+
+    fileMock = unittest.mock.MagicMock()
+    fileMock.__enter__.return_value = file_handler
+
+    openMock.side_effect = \
+        lambda path, flag: fileMock if path == model_path and flag == 'rb' else None
+    pickleLoadMock.side_effect = lambda fh: expected_model if fh == file_handler else None
+
+    return expected_model
+
+
+##########################################################################################
+#                                         Tests
+##########################################################################################
+@unittest.mock.patch('pickle.load')
+@unittest.mock.patch('os.path.exists')
+def test_load_model_by_seed_and_id(exists_mock, pickleLoadMock, backend_stub):
+    exists_mock.return_value = False
+    open_mock = unittest.mock.mock_open(read_data='Data')
+    with unittest.mock.patch(
+        'autoPyTorch.utils.backend.open',
+        open_mock,
+        create=True,
+    ):
+        seed = 13
+        idx = 17
+        budget = 50.0
+        expected_model = _setup_load_model_mocks(open_mock,
+                                                 pickleLoadMock,
+                                                 seed, idx, budget)
+
+        actual_model = backend_stub.load_model_by_seed_and_id_and_budget(
+            seed, idx, budget)
+
+        assert expected_model == actual_model
+
+
+@unittest.mock.patch('pickle.load')
+@unittest.mock.patch.object(builtins, 'open')
+@unittest.mock.patch('os.path.exists')
+def test_loads_models_by_identifiers(exists_mock, openMock, pickleLoadMock, backend_stub):
+    exists_mock.return_value = True
+    seed = 13
+    idx = 17
+    budget = 50.0
+    expected_model = _setup_load_model_mocks(
+        openMock, pickleLoadMock, seed, idx, budget)
+    expected_dict = {(seed, idx, budget): expected_model}
+
+    actual_dict = backend_stub.load_models_by_identifiers([(seed, idx, budget)])
+
+    assert isinstance(actual_dict, dict)
+    assert expected_dict == actual_dict