diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 1b38f317..451c2249 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -27,7 +27,6 @@ jobs: - name: Setup environment run: | sudo apt-get update - sudo apt-get -y install openmpi-bin libopenmpi-dev - name: Install run: | diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 46962026..db574279 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -40,7 +40,7 @@ jobs: - name: Setup environment run: | sudo apt-get update - sudo apt-get -y install openmpi-bin libopenmpi-dev libopenblas-dev + sudo apt-get -y install libopenblas-dev - name: Install run: | @@ -71,23 +71,10 @@ jobs: COVERAGE_COVERAGE: yes # https://github.com/nedbat/coveragepy/blob/65bf33fc03209ffb01bbbc0d900017614645ee7a/coverage/control.py#L255-L261 run: | coverage run --source=optuna -m pytest tests \ - --ignore tests/allennlp_tests \ --ignore tests/test_mxnet.py coverage combine coverage xml - - name: Multi-node tests - env: - OMP_NUM_THREADS: 1 - PYTHONPATH: . - COVERAGE_PROCESS_START: .coveragerc - COVERAGE_COVERAGE: yes - run: | - export OMPI_MCA_rmaps_base_oversubscribe=yes - mpirun -n 2 -- coverage run -m pytest tests/test_chainermn.py - coverage combine --append - coverage xml - - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 with: diff --git a/.github/workflows/mac-tests.yml b/.github/workflows/mac-tests.yml index 40ab2afd..35954374 100644 --- a/.github/workflows/mac-tests.yml +++ b/.github/workflows/mac-tests.yml @@ -54,8 +54,6 @@ jobs: - name: Setup mac environment run: | - brew install libomp - brew install open-mpi brew install openblas - name: Install @@ -86,20 +84,11 @@ jobs: - name: Tests run: | pytest tests \ - --ignore tests/allennlp_tests \ --ignore tests/test_mxnet.py env: OMP_NUM_THREADS: 1 - - name: Tests MPI - run: | - export OMPI_MCA_rmaps_base_oversubscribe=yes - mpirun -n 2 -- pytest tests/test_chainermn.py - env: - OMP_NUM_THREADS: 1 - - name: Tests(Deprecated) run: | pip install "numpy<1.24" - pytest tests/allennlp_tests pytest tests/test_mxnet.py diff --git a/.github/workflows/tests-mpi.yml b/.github/workflows/tests-mpi.yml deleted file mode 100644 index 7dcbbdaf..00000000 --- a/.github/workflows/tests-mpi.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: Tests (MPI) - -on: - push: - branches: - - master - pull_request: {} - schedule: - - cron: '0 23 * * SUN-THU' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }} - cancel-in-progress: true - -jobs: - tests-mpi: - if: (github.event_name == 'schedule' && github.repository == 'optuna/optuna-integration') || (github.event_name != 'schedule') - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] - - steps: - - name: Checkout - uses: actions/checkout@v3 - - - name: Setup Python${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Setup cache - uses: actions/cache@v3 - env: - cache-name: test-mpi - with: - path: ~/.cache/pip - key: ${{ runner.os }}-${{ matrix.python-version }}-${{ env.cache-name }}-${{ hashFiles('**/pyproject.toml') }}-v1 - restore-keys: | - ${{ runner.os }}-${{ matrix.python-version }}-${{ env.cache-name }}-${{ hashFiles('**/pyproject.toml') }} - - - name: Setup environment - run: | - sudo apt-get update - sudo apt-get -y install openmpi-bin libopenmpi-dev - - - name: Install - run: | - python -m pip install --upgrade pip - pip install --progress-bar off .[test] - pip install --progress-bar off .[all] - - - name: Output installed packages - run: | - pip freeze --all - - - name: Output dependency tree - run: | - pip install pipdeptree - pipdeptree - - - name: Tests - run: | - export OMPI_MCA_rmaps_base_oversubscribe=yes - mpirun -n 2 -- pytest tests/test_chainermn.py - env: - OMP_NUM_THREADS: 1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d1f40e64..2bbb0ca9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -53,7 +53,7 @@ jobs: - name: Setup environment run: | sudo apt-get update - sudo apt-get -y install openmpi-bin libopenmpi-dev libopenblas-dev + sudo apt-get -y install libopenblas-dev - name: Install run: | @@ -87,13 +87,9 @@ jobs: - name: Tests run: | pytest tests \ - --ignore tests/allennlp_tests \ --ignore tests/test_mxnet.py - name: Tests(Deprecated) run: | pip install "numpy<1.24" - if [ ${{ matrix.python-version }} != 3.11 ]; then - pytest tests/allennlp_tests - fi pytest tests/test_mxnet.py diff --git a/pyproject.toml b/pyproject.toml index c854e068..2e082a12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,11 +52,8 @@ document = [ "sphinx_rtd_theme", ] all = [ - "allennlp>=2.2.0; python_version<'3.11'", "catalyst", - "chainer>=5.0.0", "fastai", - "mpi4py", "mxnet", "shap", "skorch", diff --git a/tests/allennlp_tests/__init__.py b/tests/allennlp_tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/allennlp_tests/example.jsonnet b/tests/allennlp_tests/example.jsonnet deleted file mode 100644 index c0472b39..00000000 --- a/tests/allennlp_tests/example.jsonnet +++ /dev/null @@ -1,61 +0,0 @@ -local DROPOUT = std.parseJson(std.extVar('DROPOUT')); - - -{ - dataset_reader: { - type: 'sequence_tagging', - word_tag_delimiter: '/', - token_indexers: { - tokens: { - type: 'single_id', - lowercase_tokens: true, - }, - token_characters: { - type: 'characters', - }, - }, - }, - train_data_path: 'tests/allennlp_tests/sentences.train', - validation_data_path: 'tests/allennlp_tests/sentences.valid', - model: { - type: 'simple_tagger', - text_field_embedder: { - token_embedders: { - tokens: { - type: 'embedding', - embedding_dim: 5, - }, - token_characters: { - type: 'character_encoding', - embedding: { - embedding_dim: 4, - }, - encoder: { - type: 'cnn', - embedding_dim: 4, - num_filters: 5, - ngram_filter_sizes: [3], - }, - dropout: DROPOUT, - }, - }, - }, - encoder: { - type: 'lstm', - input_size: 10, - hidden_size: 10, - num_layers: 2, - dropout: 0, - bidirectional: true, - }, - }, - data_loader: { - batch_size: 32, - }, - trainer: { - optimizer: 'adam', - num_epochs: 1, - patience: 10, - cuda_device: -1, - }, -} diff --git a/tests/allennlp_tests/example_with_environment_variables.jsonnet b/tests/allennlp_tests/example_with_environment_variables.jsonnet deleted file mode 100644 index f61f652b..00000000 --- a/tests/allennlp_tests/example_with_environment_variables.jsonnet +++ /dev/null @@ -1,65 +0,0 @@ -local DROPOUT = std.parseJson(std.extVar('DROPOUT')); -local LEARNING_RATE = std.parseJson(std.extVar('LEARNING_RATE')); - - -{ - dataset_reader: { - type: 'sequence_tagging', - word_tag_delimiter: '/', - token_indexers: { - tokens: { - type: 'tiny_single_id', - lowercase_tokens: true, - }, - token_characters: { - type: 'characters', - }, - }, - }, - train_data_path: std.extVar('TRAIN_PATH'), - validation_data_path: std.extVar('VALID_PATH'), - model: { - type: 'simple_tagger', - text_field_embedder: { - token_embedders: { - tokens: { - type: 'embedding', - embedding_dim: 5, - }, - token_characters: { - type: 'character_encoding', - embedding: { - embedding_dim: 4, - }, - encoder: { - type: 'cnn', - embedding_dim: 4, - num_filters: 5, - ngram_filter_sizes: [3], - }, - dropout: DROPOUT, - }, - }, - }, - encoder: { - type: 'lstm', - input_size: 10, - hidden_size: 10, - num_layers: 2, - dropout: 0, - bidirectional: true, - }, - }, - data_loader: { - batch_size: 32, - }, - trainer: { - optimizer: { - type: 'adam', - lr: LEARNING_RATE, - }, - num_epochs: 1, - patience: 10, - cuda_device: -1, - }, -} diff --git a/tests/allennlp_tests/example_with_executor_and_pruner.jsonnet b/tests/allennlp_tests/example_with_executor_and_pruner.jsonnet deleted file mode 100644 index d4cdbe18..00000000 --- a/tests/allennlp_tests/example_with_executor_and_pruner.jsonnet +++ /dev/null @@ -1,66 +0,0 @@ -local DROPOUT = std.parseJson(std.extVar('DROPOUT')); - - -{ - dataset_reader: { - type: 'sequence_tagging', - word_tag_delimiter: '/', - token_indexers: { - tokens: { - type: 'single_id', - lowercase_tokens: true, - }, - token_characters: { - type: 'characters', - }, - }, - }, - train_data_path: 'tests/allennlp_tests/sentences.train', - validation_data_path: 'tests/allennlp_tests/sentences.valid', - model: { - type: 'simple_tagger', - text_field_embedder: { - token_embedders: { - tokens: { - type: 'embedding', - embedding_dim: 5, - }, - token_characters: { - type: 'character_encoding', - embedding: { - embedding_dim: 4, - }, - encoder: { - type: 'cnn', - embedding_dim: 4, - num_filters: 5, - ngram_filter_sizes: [3], - }, - dropout: DROPOUT, - }, - }, - }, - encoder: { - type: 'lstm', - input_size: 10, - hidden_size: 10, - num_layers: 2, - dropout: 0, - bidirectional: true, - }, - }, - data_loader: { - batch_size: 32, - }, - trainer: { - optimizer: 'adam', - num_epochs: 1, - patience: 10, - cuda_device: -1, - callbacks: [ - { - type: 'optuna_pruner', - } - ], - }, -} diff --git a/tests/allennlp_tests/example_with_executor_and_pruner_distributed.jsonnet b/tests/allennlp_tests/example_with_executor_and_pruner_distributed.jsonnet deleted file mode 100644 index af9fb7c8..00000000 --- a/tests/allennlp_tests/example_with_executor_and_pruner_distributed.jsonnet +++ /dev/null @@ -1,69 +0,0 @@ -local DROPOUT = std.parseJson(std.extVar('DROPOUT')); - - -{ - dataset_reader: { - type: 'sequence_tagging', - word_tag_delimiter: '/', - token_indexers: { - tokens: { - type: 'single_id', - lowercase_tokens: true, - }, - token_characters: { - type: 'characters', - }, - }, - }, - train_data_path: 'tests/allennlp_tests/sentences.train', - validation_data_path: 'tests/allennlp_tests/sentences.valid', - model: { - type: 'simple_tagger', - text_field_embedder: { - token_embedders: { - tokens: { - type: 'embedding', - embedding_dim: 5, - }, - token_characters: { - type: 'character_encoding', - embedding: { - embedding_dim: 4, - }, - encoder: { - type: 'cnn', - embedding_dim: 4, - num_filters: 5, - ngram_filter_sizes: [3], - }, - dropout: DROPOUT, - }, - }, - }, - encoder: { - type: 'lstm', - input_size: 10, - hidden_size: 10, - num_layers: 2, - dropout: 0, - bidirectional: true, - }, - }, - data_loader: { - batch_size: 32, - }, - trainer: { - optimizer: 'adam', - num_epochs: 1, - patience: 10, - cuda_device: -1, - callbacks: [ - { - type: 'optuna_pruner', - } - ], - }, - distributed: { - cuda_devices: [-1, -1], - }, -} diff --git a/tests/allennlp_tests/example_with_include_package.jsonnet b/tests/allennlp_tests/example_with_include_package.jsonnet deleted file mode 100644 index 105bc5d2..00000000 --- a/tests/allennlp_tests/example_with_include_package.jsonnet +++ /dev/null @@ -1,61 +0,0 @@ -local DROPOUT = std.parseJson(std.extVar('DROPOUT')); - - -{ - dataset_reader: { - type: 'sequence_tagging', - word_tag_delimiter: '/', - token_indexers: { - tokens: { - type: 'tiny_single_id', - lowercase_tokens: true, - }, - token_characters: { - type: 'characters', - }, - }, - }, - train_data_path: 'tests/allennlp_tests/sentences.train', - validation_data_path: 'tests/allennlp_tests/sentences.valid', - model: { - type: 'simple_tagger', - text_field_embedder: { - token_embedders: { - tokens: { - type: 'embedding', - embedding_dim: 5, - }, - token_characters: { - type: 'character_encoding', - embedding: { - embedding_dim: 4, - }, - encoder: { - type: 'cnn', - embedding_dim: 4, - num_filters: 5, - ngram_filter_sizes: [3], - }, - dropout: DROPOUT, - }, - }, - }, - encoder: { - type: 'lstm', - input_size: 10, - hidden_size: 10, - num_layers: 2, - dropout: 0, - bidirectional: true, - }, - }, - data_loader: { - batch_size: 32, - }, - trainer: { - optimizer: 'adam', - num_epochs: 1, - patience: 10, - cuda_device: -1, - }, -} diff --git a/tests/allennlp_tests/invalid.jsonnet b/tests/allennlp_tests/invalid.jsonnet deleted file mode 100644 index 8b137891..00000000 --- a/tests/allennlp_tests/invalid.jsonnet +++ /dev/null @@ -1 +0,0 @@ - diff --git a/tests/allennlp_tests/pruning_test.jsonl b/tests/allennlp_tests/pruning_test.jsonl deleted file mode 100644 index b2bd32bf..00000000 --- a/tests/allennlp_tests/pruning_test.jsonl +++ /dev/null @@ -1,7 +0,0 @@ -{"id": "train_01", "text": "Chainer is a Python-based deep learning framework aiming at flexibility.", "label": 1} -{"id": "train_02", "text": "It provides automatic differentiation APIs based on the define-by-run approach (a.k.a. dynamic computational graphs) as well as object-oriented high-level APIs to build and train neural networks.", "label": 1} -{"id": "train_03", "text": "It also supports CUDA/cuDNN using CuPy for high performance training and inference.", "label": 1} -{"id": "train_04", "text": "For more details about Chainer, see the documents and resources listed above and join the community in Forum, Slack, and Twitter.", "label": 1} -{"id": "train_05", "text": "Optuna is an automatic hyperparameter optimization software framework, particularly designed for machine learning.", "label": 0} -{"id": "train_06", "text": "It features an imperative, define-by-run style user API.", "label": 0} -{"id": "train_07", "text": "Thanks to our define-by-run API, the code written with Optuna enjoys high modularity, and the user of Optuna can dynamically construct the search spaces for the hyperparameters.", "label": 0} diff --git a/tests/allennlp_tests/sentences.train b/tests/allennlp_tests/sentences.train deleted file mode 100644 index df0fc3d9..00000000 --- a/tests/allennlp_tests/sentences.train +++ /dev/null @@ -1,3 +0,0 @@ -Optuna/NNP is/VBZ an/DT automatic/JJ hyperparameter/NN optimization/NN software/NN framework/NN ,/, particularly/RB designed/VBN for/IN machine/NN learning/NN ./. -It/PRP features/VBZ an/DT imperative/JJ ,/, define/VB -/HYPH by/IN -/HYPH run/NN style/NN user/NN API/NN ./. -Thanks/NNS to/IN our/PRP$ define/NN -/HYPH by/IN -/HYPH run/NN API/NNP ,/, the/DT code/NN written/VBN with/IN Optuna/NNP enjoys/VBZ high/JJ modularity/NN ,/, and/CC the/DT user/NN of/IN Optuna/NNP can/MD dynamically/RB construct/VB the/DT search/NN spaces/NNS for/IN the/DT hyperparameters/NNS ./. diff --git a/tests/allennlp_tests/sentences.valid b/tests/allennlp_tests/sentences.valid deleted file mode 100644 index 5bff8fd4..00000000 --- a/tests/allennlp_tests/sentences.valid +++ /dev/null @@ -1,3 +0,0 @@ -Optuna/NNP is/VBZ an/DT hyperparameter/JJ optimization/NN software/NN framework/NN ,/, designed/VBN for/IN machine/NN learning/NN ./. -It/PRP features/VBZ an/DT imperative/JJ ,/, define/VB -/HYPH by/IN -/HYPH run/NN style/NN user/NN API/NN ./. -Thanks/NNS to/IN our/PRP$ define/NN -/HYPH by/IN -/HYPH run/NN API/NNP ,/, the/DT code/NN written/VBN with/IN Optuna/NNP enjoys/VBZ high/JJ modularity/NN ./. diff --git a/tests/allennlp_tests/test.jsonnet b/tests/allennlp_tests/test.jsonnet deleted file mode 100644 index 8756f346..00000000 --- a/tests/allennlp_tests/test.jsonnet +++ /dev/null @@ -1,7 +0,0 @@ -{ - model: { - dropout: 0.1, - input_size: 100, - hidden_size: [100, 200, 300], - }, -} diff --git a/tests/allennlp_tests/test_allennlp.py b/tests/allennlp_tests/test_allennlp.py deleted file mode 100644 index 2b57cfa2..00000000 --- a/tests/allennlp_tests/test_allennlp.py +++ /dev/null @@ -1,421 +0,0 @@ -from __future__ import annotations - -import json -import os -import tempfile -from unittest import mock - -import optuna -from optuna.testing.pruners import DeterministicPruner -import pytest - -import optuna_integration -from optuna_integration._imports import try_import -from optuna_integration.allennlp import AllenNLPExecutor -from optuna_integration.allennlp import AllenNLPPruningCallback -from optuna_integration.allennlp._pruner import _create_pruner -from optuna_integration.allennlp._variables import _VariableManager - - -with try_import(): - import _jsonnet - import psutil - import torch.optim - - import allennlp.data - import allennlp.data.dataset_readers - import allennlp.data.tokenizers - import allennlp.models - import allennlp.modules - import allennlp.modules.seq2vec_encoders - import allennlp.modules.text_field_embedders - import allennlp.training - - -def test_build_params() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("LEARNING_RATE", 1e-2, 1e-1) - trial.suggest_float("DROPOUT", 0.0, 0.5) - executor = AllenNLPExecutor(trial, "tests/allennlp_tests/test.jsonnet", "test") - params = executor._build_params() - - assert params["model"]["dropout"] == 0.1 - assert params["model"]["input_size"] == 100 - assert params["model"]["hidden_size"] == [100, 200, 300] - - -def test_build_params_overwriting_environment_variable() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("LEARNING_RATE", 1e-2, 1e-1) - trial.suggest_float("DROPOUT", 0.0, 0.5) - os.environ["TRAIN_PATH"] = "tests/allennlp_tests/sentences.train" - os.environ["VALID_PATH"] = "tests/allennlp_tests/sentences.valid" - executor = AllenNLPExecutor( - trial, - "tests/allennlp_tests/example_with_environment_variables.jsonnet", - "test", - ) - params = executor._build_params() - os.environ.pop("TRAIN_PATH") - os.environ.pop("VALID_PATH") - assert params["train_data_path"] == "tests/allennlp_tests/sentences.train" - assert params["validation_data_path"] == "tests/allennlp_tests/sentences.valid" - - -def test_build_params_when_optuna_and_environment_variable_both_exist() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("LEARNING_RATE", 1e-2, 1e-2) - os.environ["TRAIN_PATH"] = "tests/allennlp_tests/sentences.train" - os.environ["VALID_PATH"] = "tests/allennlp_tests/sentences.valid" - os.environ["LEARNING_RATE"] = "1e-3" - os.environ["DROPOUT"] = "0.0" - executor = AllenNLPExecutor( - trial, - "tests/allennlp_tests/example_with_environment_variables.jsonnet", - "test", - ) - params = executor._build_params() - os.environ.pop("TRAIN_PATH") - os.environ.pop("VALID_PATH") - os.environ.pop("LEARNING_RATE") - os.environ.pop("DROPOUT") - - # Optuna trial overwrites a parameter specified by environment variable. - assert params["trainer"]["optimizer"]["lr"] == 1e-2 - path = params["model"]["text_field_embedder"]["token_embedders"]["token_characters"]["dropout"] - assert path == 0.0 - - -def test_missing_config_file() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("LEARNING_RATE", 1e-2, 1e-1) - trial.suggest_float("DROPOUT", 0.0, 0.5) - trial.suggest_int("MAX_FILTER_SIZE", 3, 6) - trial.suggest_int("NUM_FILTERS", 16, 128) - trial.suggest_int("NUM_OUTPUT_LAYERS", 1, 3) - trial.suggest_int("HIDDEN_SIZE", 16, 128) - - executor = AllenNLPExecutor(trial, "undefined.jsonnet", "test") - with pytest.raises(RuntimeError): - executor.run() - - -def test_invalid_config_file() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("LEARNING_RATE", 1e-2, 1e-1) - trial.suggest_float("DROPOUT", 0.0, 0.5) - trial.suggest_int("MAX_FILTER_SIZE", 3, 6) - trial.suggest_int("NUM_FILTERS", 16, 128) - trial.suggest_int("NUM_OUTPUT_LAYERS", 1, 3) - trial.suggest_int("HIDDEN_SIZE", 16, 128) - - executor = AllenNLPExecutor(trial, "tests/allennlp_tests/invalid.jsonnet", "test") - with pytest.raises(RuntimeError): - executor.run() - - -def test_invalid_param_name() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("_____DROPOUT", 0.0, 0.5) - - executor = AllenNLPExecutor(trial, "tests/allennlp_tests/example.jsonnet", "test") - with pytest.raises(RuntimeError): - executor.run() - - -def test_allennlp_executor() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("DROPOUT", 0.0, 0.5) - - with tempfile.TemporaryDirectory() as tmp_dir: - executor = AllenNLPExecutor(trial, "tests/allennlp_tests/example.jsonnet", tmp_dir) - result = executor.run() - assert isinstance(result, float) - - -def test_allennlp_executor_with_include_package() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("DROPOUT", 0.0, 0.5) - - with tempfile.TemporaryDirectory() as tmp_dir: - executor = AllenNLPExecutor( - trial, - "tests/allennlp_tests/example_with_include_package.jsonnet", - tmp_dir, - include_package="tests.allennlp_tests.tiny_single_id", - ) - result = executor.run() - assert isinstance(result, float) - - -def test_allennlp_executor_with_include_package_arr() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("DROPOUT", 0.0, 0.5) - - with tempfile.TemporaryDirectory() as tmp_dir: - executor = AllenNLPExecutor( - trial, - "tests/allennlp_tests/example_with_include_package.jsonnet", - tmp_dir, - include_package=["tests.allennlp_tests.tiny_single_id"], - ) - result = executor.run() - assert isinstance(result, float) - - -def test_allennlp_executor_with_options() -> None: - study = optuna.create_study(direction="maximize") - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("DROPOUT", 0.0, 0.5) - package_name = "tests.allennlp_tests.tiny_single_id" - - with tempfile.TemporaryDirectory() as tmp_dir: - executor = AllenNLPExecutor( - trial, - "tests/allennlp_tests/example_with_include_package.jsonnet", - tmp_dir, - force=True, - file_friendly_logging=True, - include_package=package_name, - ) - - # ``executor.run`` loads ``metrics.json`` - # after running ``allennlp.commands.train.train_model``. - with open(os.path.join(executor._serialization_dir, "metrics.json"), "w") as fout: - json.dump({executor._metrics: 1.0}, fout) - - expected_include_packages = [package_name, "optuna_integration.allennlp"] - with mock.patch("allennlp.commands.train.train_model", return_value=None) as mock_obj: - executor.run() - assert mock_obj.call_args[1]["force"] - assert mock_obj.call_args[1]["file_friendly_logging"] - assert mock_obj.call_args[1]["include_package"] == expected_include_packages - - -def test_dump_best_config() -> None: - with tempfile.TemporaryDirectory() as tmp_dir: - - def objective(trial: optuna.Trial) -> float: - trial.suggest_float("DROPOUT", dropout, dropout) - executor = AllenNLPExecutor(trial, input_config_file, tmp_dir) - return executor.run() - - dropout = 0.5 - input_config_file = os.path.join( - os.path.dirname(os.path.realpath(__file__)), "example.jsonnet" - ) - output_config_file = os.path.join(tmp_dir, "result.json") - - study = optuna.create_study(direction="maximize") - study.optimize(objective, n_trials=1) - - optuna_integration.allennlp.dump_best_config(input_config_file, output_config_file, study) - best_config = json.loads(_jsonnet.evaluate_file(output_config_file)) - model_config = best_config["model"] - target_config = model_config["text_field_embedder"]["token_embedders"]["token_characters"] - assert target_config["dropout"] == dropout - - -def test_dump_best_config_with_environment_variables() -> None: - with tempfile.TemporaryDirectory() as tmp_dir: - - def objective(trial: optuna.Trial) -> float: - trial.suggest_float("DROPOUT", dropout, dropout) - trial.suggest_float("LEARNING_RATE", 1e-2, 1e-1) - executor = AllenNLPExecutor( - trial, - input_config_file, - tmp_dir, - include_package="tests.allennlp_tests.tiny_single_id", - ) - return executor.run() - - dropout = 0.5 - input_config_file = os.path.join( - os.path.dirname(os.path.realpath(__file__)), - "example_with_environment_variables.jsonnet", - ) - output_config_file = os.path.join(tmp_dir, "result.json") - - os.environ["TRAIN_PATH"] = "tests/allennlp_tests/sentences.train" - os.environ["VALID_PATH"] = "tests/allennlp_tests/sentences.valid" - - study = optuna.create_study(direction="maximize") - study.optimize(objective, n_trials=1) - - optuna_integration.allennlp.dump_best_config(input_config_file, output_config_file, study) - best_config = json.loads(_jsonnet.evaluate_file(output_config_file)) - assert os.getenv("TRAIN_PATH") == best_config["train_data_path"] - assert os.getenv("VALID_PATH") == best_config["validation_data_path"] - os.environ.pop("TRAIN_PATH") - os.environ.pop("VALID_PATH") - - -def test_allennlp_pruning_callback() -> None: - with tempfile.TemporaryDirectory() as tmp_dir: - - def objective(trial: optuna.Trial) -> float: - reader = allennlp.data.dataset_readers.TextClassificationJsonReader( # type: ignore[attr-defined] # NOQA: E501 - tokenizer=allennlp.data.tokenizers.WhitespaceTokenizer(), # type: ignore[attr-defined] # NOQA: E501 - ) - data_loader = allennlp.data.data_loaders.MultiProcessDataLoader( # type: ignore[attr-defined] # NOQA: E501 - reader=reader, - data_path="tests/allennlp_tests/pruning_test.jsonl", - batch_size=16, - ) - vocab = allennlp.data.Vocabulary.from_instances( # type: ignore[attr-defined] - data_loader.iter_instances() - ) - - data_loader.index_with(vocab) - - embedder = allennlp.modules.text_field_embedders.BasicTextFieldEmbedder( # type: ignore[attr-defined] # NOQA: E501 - {"tokens": allennlp.modules.Embedding(50, vocab=vocab)} # type: ignore[attr-defined] # NOQA: E501 - ) - encoder = allennlp.modules.seq2vec_encoders.GruSeq2VecEncoder( # type: ignore[attr-defined] # NOQA: E501 - input_size=50, hidden_size=50 - ) - model = allennlp.models.BasicClassifier( # type: ignore[attr-defined] - text_field_embedder=embedder, seq2vec_encoder=encoder, vocab=vocab - ) - optimizer = torch.optim.SGD(model.parameters(), lr=0.1) - - serialization_dir = os.path.join(tmp_dir, "trial_{}".format(trial.number)) - trainer = allennlp.training.GradientDescentTrainer( # type: ignore[attr-defined] - model=model, - optimizer=optimizer, - data_loader=data_loader, - patience=None, - num_epochs=1, - serialization_dir=serialization_dir, - callbacks=[AllenNLPPruningCallback(trial, "training_loss")], - ) - trainer.train() - return 1.0 - - study = optuna.create_study(pruner=DeterministicPruner(True)) - study.optimize(objective, n_trials=1) - assert study.trials[0].state == optuna.trial.TrialState.PRUNED - - study = optuna.create_study(pruner=DeterministicPruner(False)) - study.optimize(objective, n_trials=1) - assert study.trials[0].state == optuna.trial.TrialState.COMPLETE - assert study.trials[0].value == 1.0 - - -def test_allennlp_pruning_callback_with_invalid_storage() -> None: - input_config_file = "tests/allennlp_tests/example_with_executor_and_pruner.jsonnet" - - with tempfile.TemporaryDirectory() as tmp_dir: - - def objective(trial: optuna.Trial) -> float: - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("DROPOUT", 0.0, 0.5) - executor = AllenNLPExecutor(trial, input_config_file, tmp_dir) - return executor.run() - - study = optuna.create_study( - direction="maximize", - pruner=optuna.pruners.HyperbandPruner(), - storage=None, - ) - - with pytest.raises(RuntimeError): - study.optimize(objective) - - -@pytest.mark.parametrize( - "pruner_class,pruner_kwargs", - [ - ( - optuna.pruners.HyperbandPruner, - {"min_resource": 3, "max_resource": 10, "reduction_factor": 5}, - ), - ( - optuna.pruners.MedianPruner, - {"n_startup_trials": 8, "n_warmup_steps": 1, "interval_steps": 3}, - ), - (optuna.pruners.NopPruner, {}), - ( - optuna.pruners.PercentilePruner, - {"percentile": 50.0, "n_startup_trials": 10, "n_warmup_steps": 1, "interval_steps": 3}, - ), - ( - optuna.pruners.SuccessiveHalvingPruner, - {"min_resource": 3, "reduction_factor": 5, "min_early_stopping_rate": 1}, - ), - ( - optuna.pruners.ThresholdPruner, - {"lower": 0.0, "upper": 1.0, "n_warmup_steps": 3, "interval_steps": 2}, - ), - ], -) -@pytest.mark.parametrize( - "input_config_file", - [ - "tests/allennlp_tests/example_with_executor_and_pruner.jsonnet", - "tests/allennlp_tests/example_with_executor_and_pruner_distributed.jsonnet", # noqa: E501 - ], -) -def test_allennlp_pruning_callback_with_executor( - pruner_class: type[optuna.pruners.BasePruner], - pruner_kwargs: dict[str, int | float], - input_config_file: str, -) -> None: - def run_allennlp_executor(pruner: optuna.pruners.BasePruner) -> None: - study = optuna.create_study(direction="maximize", pruner=pruner, storage=storage) - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("DROPOUT", 0.0, 0.5) - executor = AllenNLPExecutor(trial, input_config_file, serialization_dir) - executor.run() - - with tempfile.TemporaryDirectory() as tmp_dir: - pruner_name = pruner_class.__name__ - os.mkdir(os.path.join(tmp_dir, pruner_name)) - storage = "sqlite:///" + os.path.join(tmp_dir, pruner_name, "result.db") - serialization_dir = os.path.join(tmp_dir, pruner_name, "allennlp") - - pruner = pruner_class(**pruner_kwargs) - run_allennlp_executor(pruner) - process = psutil.Process() - manager = _VariableManager(process.ppid()) - ret_pruner = _create_pruner( - manager.get_value("pruner_class"), - manager.get_value("pruner_kwargs"), - ) - - assert isinstance(ret_pruner, pruner_class) - for key, value in pruner_kwargs.items(): - assert getattr(ret_pruner, "_{}".format(key)) == value - - -def test_allennlp_pruning_callback_with_invalid_executor() -> None: - class SomeNewPruner(optuna.pruners.BasePruner): - def __init__(self) -> None: - pass - - def prune(self, study: optuna.study.Study, trial: optuna.trial.FrozenTrial) -> bool: - return False - - input_config_file = "tests/allennlp_tests/example_with_executor_and_pruner.jsonnet" - - with tempfile.TemporaryDirectory() as tmp_dir: - storage = "sqlite:///" + os.path.join(tmp_dir, "result.db") - serialization_dir = os.path.join(tmp_dir, "allennlp") - pruner = SomeNewPruner() - - study = optuna.create_study(direction="maximize", pruner=pruner, storage=storage) - trial = optuna.trial.Trial(study, study._storage.create_new_trial(study._study_id)) - trial.suggest_float("DROPOUT", 0.0, 0.5) - - with pytest.raises(ValueError): - AllenNLPExecutor(trial, input_config_file, serialization_dir) diff --git a/tests/allennlp_tests/tiny_single_id.py b/tests/allennlp_tests/tiny_single_id.py deleted file mode 100644 index 9b32bae8..00000000 --- a/tests/allennlp_tests/tiny_single_id.py +++ /dev/null @@ -1,58 +0,0 @@ -from __future__ import annotations - -import itertools - -from allennlp.data.token_indexers.token_indexer import IndexedTokenList -from allennlp.data.token_indexers.token_indexer import TokenIndexer -from allennlp.data.tokenizers.token_class import Token -from allennlp.data.vocabulary import Vocabulary - - -@TokenIndexer.register("tiny_single_id") -class SingleIdTokenIndexer(TokenIndexer): - """Tiny implementation of SingleIdTokenIndexer. - - This class is based on allennlp SingleIdTokenIndexer. - https://github.com/allenai/allennlp/blob/master/ - allennlp/data/token_indexers/single_id_token_indexer.py - - """ - - def __init__( - self, - lowercase_tokens: bool = False, - start_tokens: list[str] | None = None, - end_tokens: list[str] | None = None, - token_min_padding_length: int = 0, - ) -> None: - super().__init__(token_min_padding_length) - self.lowercase_tokens = lowercase_tokens - - self._start_tokens = [Token(st) for st in (start_tokens or [])] - self._end_tokens = [Token(et) for et in (end_tokens or [])] - - def count_vocab_items(self, token: Token, counter: dict[str, dict[str, int]]) -> None: - text = token.text - assert isinstance(text, str) - - if self.lowercase_tokens: - text = text.lower() - counter["tokens"][text] += 1 - - def tokens_to_indices( - self, tokens: list[Token], vocabulary: Vocabulary - ) -> dict[str, list[int]]: - indices: list[int] = [] - - for token in itertools.chain(self._start_tokens, tokens, self._end_tokens): - text = token.text - assert isinstance(text, str) - - if self.lowercase_tokens: - text = text.lower() - indices.append(vocabulary.get_token_index(text, "tokens")) - - return {"tokens": indices} - - def get_empty_token_list(self) -> IndexedTokenList: - return {"tokens": []} diff --git a/tests/test_chainer.py b/tests/test_chainer.py deleted file mode 100644 index e5765f78..00000000 --- a/tests/test_chainer.py +++ /dev/null @@ -1,141 +0,0 @@ -from __future__ import annotations - -from collections import namedtuple -import math -import typing -from unittest.mock import Mock -from unittest.mock import patch - -import numpy as np -import optuna -from optuna.testing.pruners import DeterministicPruner -import pytest - -from optuna_integration._imports import try_import -from optuna_integration.chainer import ChainerPruningExtension - - -with try_import() as _imports: - import chainer - from chainer.dataset import DatasetMixin # type: ignore[attr-defined] - import chainer.links as L - from chainer.training import triggers - -if not _imports.is_successful(): - DatasetMixin = object # type: ignore[misc, assignment] # NOQA - - -class FixedValueDataset(DatasetMixin): - size = 16 - - def __len__(self) -> int: - return self.size - - def get_example(self, i: int) -> tuple[np.ndarray, np.signedinteger]: - return np.array([1.0], np.float32), np.intc(0) - - -def test_chainer_pruning_extension_trigger() -> None: - study = optuna.create_study() - trial = study.ask() - - extension = ChainerPruningExtension(trial, "main/loss", (1, "epoch")) - assert isinstance( - extension._pruner_trigger, triggers.IntervalTrigger # type: ignore[attr-defined] - ) - extension = ChainerPruningExtension( - trial, "main/loss", triggers.IntervalTrigger(1, "epoch") # type: ignore[attr-defined] - ) - assert isinstance( - extension._pruner_trigger, triggers.IntervalTrigger # type: ignore[attr-defined] - ) - extension = ChainerPruningExtension( - trial, - "main/loss", - triggers.ManualScheduleTrigger(1, "epoch"), # type: ignore[attr-defined] - ) - assert isinstance( - extension._pruner_trigger, triggers.ManualScheduleTrigger # type: ignore[attr-defined] - ) - - with pytest.raises(TypeError): - ChainerPruningExtension( - trial, "main/loss", triggers.TimeTrigger(1.0) # type: ignore[attr-defined] - ) - - -def test_chainer_pruning_extension() -> None: - @typing.no_type_check - def objective(trial: optuna.trial.Trial) -> float: - model = L.Classifier(chainer.Sequential(L.Linear(None, 2))) - optimizer = chainer.optimizers.Adam() - optimizer.setup(model) - - train_iter = chainer.iterators.SerialIterator(FixedValueDataset(), 16) - updater = chainer.training.StandardUpdater(train_iter, optimizer) - trainer = chainer.training.Trainer(updater, (1, "epoch")) - trainer.extend(ChainerPruningExtension(trial, "main/loss", (1, "epoch"))) - - trainer.run(show_loop_exception_msg=False) - return 1.0 - - study = optuna.create_study(pruner=DeterministicPruner(True)) - study.optimize(objective, n_trials=1) - assert study.trials[0].state == optuna.trial.TrialState.PRUNED - - study = optuna.create_study(pruner=DeterministicPruner(False)) - study.optimize(objective, n_trials=1) - assert study.trials[0].state == optuna.trial.TrialState.COMPLETE - assert study.trials[0].value == 1.0 - - -def test_chainer_pruning_extension_observation_nan() -> None: - study = optuna.create_study(pruner=DeterministicPruner(True)) - trial = study.ask() - extension = ChainerPruningExtension(trial, "main/loss", (1, "epoch")) - - MockTrainer = namedtuple("MockTrainer", ("observation", "updater")) - MockUpdater = namedtuple("MockUpdater", ("epoch")) - trainer = MockTrainer(observation={"main/loss": float("nan")}, updater=MockUpdater(1)) - - with patch.object(extension, "_observation_exists", Mock(return_value=True)) as mock: - with pytest.raises(optuna.TrialPruned): - extension(trainer) - assert mock.call_count == 1 - - -def test_observation_exists() -> None: - study = optuna.create_study() - trial = study.ask() - MockTrainer = namedtuple("MockTrainer", ("observation",)) - trainer = MockTrainer(observation={"OK": 0}) - - # Trigger is deactivated. Return False whether trainer has observation or not. - with patch.object( - triggers.IntervalTrigger, # type: ignore[attr-defined] - "__call__", - Mock(return_value=False), - ) as mock: - extension = ChainerPruningExtension(trial, "NG", (1, "epoch")) - assert extension._observation_exists(trainer) is False - extension = ChainerPruningExtension(trial, "OK", (1, "epoch")) - assert extension._observation_exists(trainer) is False - assert mock.call_count == 2 - - # Trigger is activated. Return True if trainer has observation. - with patch.object( - triggers.IntervalTrigger, "__call__", Mock(return_value=True) # type: ignore[attr-defined] - ) as mock: - extension = ChainerPruningExtension(trial, "NG", (1, "epoch")) - assert extension._observation_exists(trainer) is False - extension = ChainerPruningExtension(trial, "OK", (1, "epoch")) - assert extension._observation_exists(trainer) is True - assert mock.call_count == 2 - - -def test_get_float_value() -> None: - assert 1.0 == ChainerPruningExtension._get_float_value(1.0) - assert 1.0 == ChainerPruningExtension._get_float_value( - chainer.Variable(np.array([1.0])) # type: ignore[attr-defined] - ) - assert math.isnan(ChainerPruningExtension._get_float_value(float("nan"))) diff --git a/tests/test_chainermn.py b/tests/test_chainermn.py deleted file mode 100644 index 3450a97e..00000000 --- a/tests/test_chainermn.py +++ /dev/null @@ -1,425 +0,0 @@ -from __future__ import annotations - -import gc -from types import TracebackType -from typing import Any -from typing import Callable - -from optuna import create_study -from optuna import distributions -from optuna import pruners -from optuna import Study -from optuna import TrialPruned -from optuna.pruners import BasePruner -from optuna.storages import BaseStorage -from optuna.storages import InMemoryStorage -from optuna.storages import RDBStorage -from optuna.testing.pruners import DeterministicPruner -from optuna.testing.storages import StorageSupplier -from optuna.trial import TrialState -import pytest - -from optuna_integration.chainermn import ChainerMNStudy -from optuna_integration.chainermn import ChainerMNTrial - - -try: - import chainermn - from chainermn.communicators.communicator_base import CommunicatorBase # NOQA - - _available = True -except ImportError: - _available = False - -STORAGE_MODES = ["sqlite"] -PRUNER_INIT_FUNCS = [lambda: pruners.MedianPruner(), lambda: pruners.SuccessiveHalvingPruner()] - - -class Func: - def __init__(self) -> None: - self.suggested_values: dict[int, dict[str, Any]] = {} - - def __call__(self, trial: ChainerMNTrial, comm: "CommunicatorBase") -> float: - x = trial.suggest_float("x", -10, 10) - y = trial.suggest_float("y", 20, 30, log=True) - z = trial.suggest_categorical("z", (-1.0, 1.0)) - - self.suggested_values[trial.number] = {} - self.suggested_values[trial.number]["x"] = x - self.suggested_values[trial.number]["y"] = y - self.suggested_values[trial.number]["z"] = z - - return (x - 2) ** 2 + (y - 25) ** 2 + z - - -class MultiNodeStorageSupplier(StorageSupplier): - def __init__(self, storage_specifier: str, comm: "CommunicatorBase") -> None: - super().__init__(storage_specifier) - self.comm = comm - self.storage: RDBStorage | None = None - - def __enter__(self) -> RDBStorage: - if self.comm.rank == 0: - storage = super(MultiNodeStorageSupplier, self).__enter__() - assert isinstance(storage, RDBStorage) - url = str(storage.engine.url) - else: - url = "dummy_url" - - url = self.comm.mpi_comm.bcast(url) - self.storage = RDBStorage(url) - return self.storage - - def __exit__( - self, exc_type: type[BaseException], exc_val: BaseException, exc_tb: TracebackType - ) -> None: - # Explicitly call storage's __del__ before sqlite tempfile is deleted. - del self.storage - gc.collect() - self.comm.mpi_comm.barrier() - - if self.comm.rank == 0: - super(MultiNodeStorageSupplier, self).__exit__(exc_type, exc_val, exc_tb) - - -@pytest.fixture -def comm() -> "CommunicatorBase": - if not _available: - pytest.skip("This test requires ChainerMN.") - - return chainermn.create_communicator("naive") - - -class TestChainerMNStudy: - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_init(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_study = ChainerMNStudy(study, comm) - - assert mn_study.study_name == study.study_name - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_init_with_multiple_study_names(storage_mode: str, comm: "CommunicatorBase") -> None: - TestChainerMNStudy._check_multi_node(comm) - - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - # Create study_name for each rank. - name = create_study(storage=storage).study_name - study = Study(name, storage) - - with pytest.raises(ValueError): - ChainerMNStudy(study, comm) - - @staticmethod - def test_init_with_incompatible_storage(comm: "CommunicatorBase") -> None: - study = create_study(storage=InMemoryStorage(), study_name="in-memory-study") - - with pytest.raises(ValueError): - ChainerMNStudy(study, comm) - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_optimize(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_study = ChainerMNStudy(study, comm) - - # Invoke optimize. - n_trials = 20 - func = Func() - mn_study.optimize(func, n_trials=n_trials) - - # Assert trial counts. - assert len(mn_study.trials) == n_trials - - # Assert the same parameters have been suggested among all nodes. - for trial in mn_study.trials: - assert trial.params == func.suggested_values[trial.number] - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - @pytest.mark.parametrize("pruner_init_func", PRUNER_INIT_FUNCS) - def test_pruning( - storage_mode: str, pruner_init_func: Callable[[], BasePruner], comm: "CommunicatorBase" - ) -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - pruner = pruner_init_func() - study = TestChainerMNStudy._create_shared_study(storage, comm, pruner=pruner) - mn_study = ChainerMNStudy(study, comm) - - def objective(_trial: ChainerMNTrial, _comm: bool) -> float: - raise TrialPruned # Always be pruned. - - # Invoke optimize. - n_trials = 20 - mn_study.optimize(objective, n_trials=n_trials) - - # Assert trial count. - assert len(mn_study.trials) == n_trials - - # Assert pruned trial count. - pruned_trials = [t for t in mn_study.trials if t.state == TrialState.PRUNED] - assert len(pruned_trials) == n_trials - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_failure(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_study = ChainerMNStudy(study, comm) - - def objective(_trial: ChainerMNTrial, _comm: bool) -> float: - raise ValueError # Always fails. - - # Invoke optimize in which `ValueError` is accepted. - n_trials = 20 - mn_study.optimize(objective, n_trials=n_trials, catch=(ValueError,)) - - # Assert trial count. - assert len(mn_study.trials) == n_trials - - # Assert failed trial count. - failed_trials = [t for t in mn_study.trials if t.state == TrialState.FAIL] - assert len(failed_trials) == n_trials - - # Synchronize nodes before executing the next optimization. - comm.mpi_comm.barrier() - - # Invoke optimize in which no exceptions are accepted. - with pytest.raises(ValueError): - mn_study.optimize(objective, n_trials=n_trials, catch=()) - - # Assert trial count. - assert len(mn_study.trials) == n_trials + 1 - - # Assert failed trial count. - failed_trials = [t for t in mn_study.trials if t.state == TrialState.FAIL] - assert len(failed_trials) == n_trials + 1 - - @staticmethod - def _create_shared_study( - storage: BaseStorage, - comm: "CommunicatorBase", - pruner: BasePruner | None = None, - ) -> Study: - name_local = create_study(storage=storage).study_name if comm.rank == 0 else None - name_bcast = comm.mpi_comm.bcast(name_local) - - return Study(name_bcast, storage, pruner=pruner) - - @staticmethod - def _check_multi_node(comm: "CommunicatorBase") -> None: - if comm.size < 2: - pytest.skip("This test is for multi-node only.") - - -class TestChainerMNTrial: - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_init(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_trial = _create_new_chainermn_trial(study, comm) - trial = study.trials[-1] - - assert mn_trial.number == trial.number - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_suggest_float(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - low1 = 0.5 - high1 = 1.0 - for _ in range(10): - mn_trial = _create_new_chainermn_trial(study, comm) - - x1 = mn_trial.suggest_float("x1", low1, high1) - assert low1 <= x1 <= high1 - - x2 = mn_trial.suggest_float("x1", low1, high1) - - assert x1 == x2 - - with pytest.raises(ValueError): - mn_trial.suggest_float("x1", low1, high1, log=True) - - low2 = 1e-7 - high2 = 1e-2 - for _ in range(10): - mn_trial = _create_new_chainermn_trial(study, comm) - - x3 = mn_trial.suggest_float("x2", low2, high2, log=True) - assert low2 <= x3 <= high2 - - x4 = mn_trial.suggest_float("x2", low2, high2, log=True) - assert x3 == x4 - - with pytest.raises(ValueError): - mn_trial.suggest_float("x2", low2, high2) - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_suggest_float_with_step(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - low = 0.0 - high = 10.0 - step = 1.0 - for _ in range(10): - mn_trial = _create_new_chainermn_trial(study, comm) - - x1 = mn_trial.suggest_float("x", low, high, step=step) - assert low <= x1 <= high - - x2 = mn_trial.suggest_float("x", low, high, step=step) - assert x1 == x2 - - if comm.rank == 0: - with pytest.warns(RuntimeWarning): - mn_trial.suggest_float("x", low, high) - else: - mn_trial.suggest_float("x", low, high) - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - @pytest.mark.parametrize("enable_log", [False, True]) - def test_suggest_int_step1( - storage_mode: str, comm: "CommunicatorBase", enable_log: bool - ) -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - low = 1 - high = 10 - step = 1 - for _ in range(10): - mn_trial = _create_new_chainermn_trial(study, comm) - - x1 = mn_trial.suggest_int("x", low, high, step=step, log=enable_log) - assert low <= x1 <= high - - x2 = mn_trial.suggest_int("x", low, high, step=step, log=enable_log) - assert x1 == x2 - - with pytest.raises(ValueError): - mn_trial.suggest_float("x", low, high) - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_suggest_int_step2(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - low = 1 - high = 9 - step = 2 - for _ in range(10): - mn_trial = _create_new_chainermn_trial(study, comm) - - x1 = mn_trial.suggest_int("x", low, high, step=step, log=False) - assert low <= x1 <= high - - x2 = mn_trial.suggest_int("x", low, high, step=step, log=False) - assert x1 == x2 - - with pytest.raises(ValueError): - mn_trial.suggest_float("x", low, high) - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_suggest_categorical(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - choices = ("a", "b", "c") - for _ in range(10): - mn_trial = _create_new_chainermn_trial(study, comm) - - x1 = mn_trial.suggest_categorical("x", choices) - assert x1 in choices - - x2 = mn_trial.suggest_categorical("x", choices) - assert x1 == x2 - - with pytest.raises(ValueError): - mn_trial.suggest_float("x", 0.0, 1.0) - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - @pytest.mark.parametrize("is_pruning", [True, False]) - def test_report_and_should_prune( - storage_mode: str, comm: "CommunicatorBase", is_pruning: bool - ) -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study( - storage, comm, DeterministicPruner(is_pruning) - ) - mn_trial = _create_new_chainermn_trial(study, comm) - mn_trial.report(1.0, 0) - assert mn_trial.should_prune() == is_pruning - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_params(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_trial = _create_new_chainermn_trial(study, comm) - - x = mn_trial.suggest_categorical("x", [1]) - assert mn_trial.params["x"] == x - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_distributions(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_trial = _create_new_chainermn_trial(study, comm) - - mn_trial.suggest_categorical("x", [1]) - assert mn_trial.distributions == { - "x": distributions.CategoricalDistribution(choices=(1,)) - } - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_user_attrs(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_trial = _create_new_chainermn_trial(study, comm) - - mn_trial.set_user_attr("data", "MNIST") - assert mn_trial.user_attrs["data"] == "MNIST" - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_call_with_mpi(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_trial = _create_new_chainermn_trial(study, comm) - with pytest.raises(RuntimeError): - - def func() -> None: - raise RuntimeError - - mn_trial._call_with_mpi(func) - - @staticmethod - @pytest.mark.parametrize("storage_mode", STORAGE_MODES) - def test_datetime_start(storage_mode: str, comm: "CommunicatorBase") -> None: - with MultiNodeStorageSupplier(storage_mode, comm) as storage: - study = TestChainerMNStudy._create_shared_study(storage, comm) - mn_trial = _create_new_chainermn_trial(study, comm) - - assert mn_trial.datetime_start is not None - - -def _create_new_chainermn_trial(study: Study, comm: "CommunicatorBase") -> ChainerMNTrial: - if comm.rank == 0: - trial = study.ask() - mn_trial = ChainerMNTrial(trial, comm) - else: - mn_trial = ChainerMNTrial(None, comm) - - comm.mpi_comm.barrier() - return mn_trial