From 9f2b9813ed8f5be6fc49e455d52a3f32ca128a47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tha=C3=AFs=20de=20Boisfoss=C3=A9?=
 <4139100+Esadruhn@users.noreply.github.com>
Date: Mon, 20 Jul 2020 11:12:20 +0200
Subject: [PATCH] Make tests run on the local and remote backend (#117)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Aurélien Gasser <aurelien.gasser@owkin.com>
---
 Makefile                             |  7 ++-
 README.md                            | 32 +++++++++--
 ci/run-ci.py                         |  1 +
 local-backend-values.yaml            |  7 +++
 substratest/client.py                |  5 +-
 substratest/factory.py               |  2 +-
 tests/conftest.py                    | 82 +++++++++++++++++++++-------
 tests/settings.py                    | 18 +++++-
 tests/test_execution.py              | 46 +++++++++++-----
 tests/test_execution_compute_plan.py |  5 +-
 tests/test_network.py                |  2 +
 tests/test_permissions.py            | 10 ++++
 tests/test_system.py                 |  4 +-
 13 files changed, 173 insertions(+), 48 deletions(-)
 create mode 100644 local-backend-values.yaml

diff --git a/Makefile b/Makefile
index ef93a202..1e677aee 100644
--- a/Makefile
+++ b/Makefile
@@ -10,12 +10,17 @@ pyclean:
 	find . -type f -name "*.py[co]" -delete
 	find . -type d -name "__pycache__" -delete
 
-test: pyclean
+test: test-remote test-local
+
+test-remote: pyclean
 	pytest tests -rs -v --durations=0
 
 test-minimal: pyclean
 	pytest tests -rs -v --durations=0 -m "not slow"
 
+test-local: pyclean
+	pytest tests -rs -v --durations=0 --local
+
 install:
 	pip3 install -r requirements.txt
 
diff --git a/README.md b/README.md
index 5a8dab23..693baba1 100644
--- a/README.md
+++ b/README.md
@@ -12,8 +12,8 @@ Install tests dependencies:
 pip3 install -r requirements.txt
 ```
 
-The tests suite requires a Substra network up and running. The network can be started
-either with skaffold (Kubernetes), with docker-compose, or manually.
+The tests suite requires a Substra network up and running to test the remote backend.  
+The network can be started either with skaffold (Kubernetes), with docker-compose, or manually.
 
 The substra project is needed for running the tests.
 It can be found [here](https://github.com/SubstraFoundation/substra)
@@ -22,6 +22,15 @@ You will need to install it thanks to the `pip` binary.
 
 # Run the tests
 
+The tests can run both on the remmote backend and the local backend. To run the complete
+test suite on both backend:
+
+```bash
+make test
+```
+
+# Run the tests on the remote backend
+
 The network configuration is described in a yaml file.
 
 Two configuration files are currently available:
@@ -31,16 +40,16 @@ Two configuration files are currently available:
 To run the tests using the default `values.yaml` file:
 
 ```
-make test
+make test-remote
 ```
 
 To run the tests using the provided `local-values.yaml` (or a custom config file):
 
 ```
-SUBSTRA_TESTS_CONFIG_FILEPATH=local-values.yaml make test
+SUBSTRA_TESTS_CONFIG_FILEPATH=local-values.yaml make test-remote
 ```
 
-# Minimal mode
+## Minimal mode
 
 Since tests can take a long time to run, some of them are marked as slow. You can run the "fast" ones with:
 
@@ -52,6 +61,18 @@ Note that `test_compute_plan` from `test_execution_compute_plan.py` is not marke
 seconds to complete. This is because it covers a very basic use case of the platform and is needed to ensure basic
 features aren't broken.
 
+# Run the tests on the local backend
+
+The network configuration is described in a yaml file: `local-backend-values.yaml` and cannot be changed.
+
+To run the tests using on the local backend:
+
+```
+make test-local
+```
+
+Some tests are skipped in this mode as they need the remote backend to run.
+
 # Test design guidelines
 
 When adding or modifying tests, please follow these guidelines:
@@ -68,3 +89,4 @@ When adding or modifying tests, please follow these guidelines:
 1. Each test should not complete before all the tuples it created have been executed. This requirement ensures that the next test will be launched with a substra network ready to execute new tuples
 1. Tests must not use hardcoded network configuration settings/values. Use settings files instead (e.g. `values.yaml`)
 1. Tests should target a substra network with at least 2 organisations
+1. By default, a test must pass on the remote and local backend. If the test is specific to one backend, add the corresponding mark.
diff --git a/ci/run-ci.py b/ci/run-ci.py
index ad195965..f81cc66e 100755
--- a/ci/run-ci.py
+++ b/ci/run-ci.py
@@ -424,6 +424,7 @@ def run_tests():
     print('\n# Run tests')
 
     try:
+        # Run the tests on the remote and local backend
         call(f'kubectl --context {KUBE_CONTEXT} exec {substra_tests_pod} -n substra-tests -- make test')
         return True
     except subprocess.CalledProcessError:
diff --git a/local-backend-values.yaml b/local-backend-values.yaml
new file mode 100644
index 00000000..5a04ef30
--- /dev/null
+++ b/local-backend-values.yaml
@@ -0,0 +1,7 @@
+options:
+  celery_task_max_retries: 0
+  enable_intermediate_model_removal: False
+nodes:
+  - name: 'local-backend'
+    msp_id: 'local-backend'
+    address: ''
diff --git a/substratest/client.py b/substratest/client.py
index 281e2346..05729262 100644
--- a/substratest/client.py
+++ b/substratest/client.py
@@ -14,11 +14,12 @@ class Client:
     Parses responses from server to return Asset instances.
     """
 
-    def __init__(self, node_id, address, user, password):
+    def __init__(self, backend, node_id=None, address=None, user=None, password=None):
         super().__init__()
 
         self.node_id = node_id
-        self._client = substra.Client(url=address, version="0.0", insecure=False)
+        self.backend = backend
+        self._client = substra.Client(backend=backend, url=address, version="0.0", insecure=False)
         self._client.login(user, password)
 
     def add_data_sample(self, spec, *args, **kwargs):
diff --git a/substratest/factory.py b/substratest/factory.py
index 20d2b827..46bb34c9 100644
--- a/substratest/factory.py
+++ b/substratest/factory.py
@@ -479,7 +479,7 @@ def __init__(self, name):
         self._dataset_counter = Counter()
         self._objective_counter = Counter()
         self._algo_counter = Counter()
-        self._workdir = pathlib.Path(tempfile.mkdtemp())
+        self._workdir = pathlib.Path(tempfile.mkdtemp(prefix='/tmp/'))
         self._uuid = name
 
     def __enter__(self):
diff --git a/tests/conftest.py b/tests/conftest.py
index 3d9d1a20..0804f748 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,6 +28,45 @@ def pytest_configure(config):
     config.addinivalue_line(
         "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')",
     )
+    config.addinivalue_line(
+        "markers", "local_only: marks tests as local backend only (deselect with '-m \"not local_only\"')",
+    )
+    config.addinivalue_line(
+        "markers", "remote_only: marks tests as remote backend only (deselect with '-m \"not remote_only\"')",
+    )
+
+
+def pytest_addoption(parser):
+    """Command line arguments to configure the network to be local or remote"""
+    parser.addoption(
+        "--local",
+        action="store_true",
+        help="Run the tests on the local backend only. Otherwise run the tests only on the remote backend."
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    """Skip the remote tests if local backend and local tests if remote backend.
+    By default, run only on the remote backend.
+    """
+    local = config.getoption("--local")
+    if local:
+        skip_marker = pytest.mark.skip(reason="remove the --local option to run")
+        keyword = "remote_only"
+    else:
+        skip_marker = pytest.mark.skip(reason="need the --local option to run")
+        keyword = "local_only"
+    for item in items:
+        if keyword in item.keywords:
+            item.add_marker(skip_marker)
+
+
+@pytest.fixture(scope="session")
+def backend(request):
+    local = request.config.getoption("--local")
+    if local:
+        return "local"
+    return "remote"
 
 
 class _DataEnv:
@@ -60,21 +99,6 @@ class Network:
     clients: typing.List[sbt.Client] = dataclasses.field(default_factory=list)
 
 
-def _get_network():
-    """Create network instance from settings."""
-    cfg = settings.load()
-    clients = [sbt.Client(
-        node_id=n.msp_id,
-        address=n.address,
-        user=n.user,
-        password=n.password,
-    ) for n in cfg.nodes]
-    return Network(
-        options=cfg.options,
-        clients=clients,
-    )
-
-
 @pytest.fixture
 def factory(request):
     """Factory fixture.
@@ -87,20 +111,37 @@ def factory(request):
         yield f
 
 
-@pytest.fixture
-def network():
+@pytest.fixture(scope="session")
+def network(backend):
     """Network fixture.
 
     Network must started outside of the tests environment and the network is kept
     alive while running all tests.
 
+    Create network instance from settings.
+
     Returns an instance of the `Network` class.
     """
-    return _get_network()
+    if backend == "remote":
+        cfg = settings.load()
+    else:
+        # TODO check what enable_intermediate_model_removal does
+        cfg = settings.load_local_backend()
+    clients = [sbt.Client(
+        backend=backend,
+        node_id=n.msp_id,
+        address=n.address,
+        user=n.user,
+        password=n.password,
+    ) for n in cfg.nodes]
+    return Network(
+        options=cfg.options,
+        clients=clients,
+    )
 
 
 @pytest.fixture(scope="session")
-def default_data_env():
+def default_data_env(network):
     """Fixture with pre-existing assets in all nodes.
 
     The following assets will be created for each node:
@@ -114,7 +155,6 @@ def default_data_env():
 
     Returns the assets created.
     """
-    network = _get_network()
     factory_name = f"{TESTS_RUN_UUID}_global"
 
     with sbt.AssetsFactory(name=factory_name) as f:
@@ -145,7 +185,7 @@ def default_data_env():
             objectives.append(objective)
 
         assets = _DataEnv(datasets=datasets, objectives=objectives)
-        return assets
+        yield assets
 
 
 @pytest.fixture
diff --git a/tests/settings.py b/tests/settings.py
index c3336056..65ff4e9d 100644
--- a/tests/settings.py
+++ b/tests/settings.py
@@ -9,6 +9,8 @@
 DEFAULT_NETWORK_CONFIGURATION_PATH = os.path.join(CURRENT_DIR, '../', 'values.yaml')
 SUBSTRA_TESTS_CONFIG_FILEPATH = os.getenv('SUBSTRA_TESTS_CONFIG_FILEPATH', DEFAULT_NETWORK_CONFIGURATION_PATH)
 
+DEFAULT_NETWORK_LOCAL_CONFIGURATION_PATH = os.path.join(CURRENT_DIR, '../', 'local-backend-values.yaml')
+
 MIN_NODES = 2
 
 
@@ -36,6 +38,7 @@ class Settings:
 
 
 _SETTINGS = None
+_LOCAL_SETTINGS = None
 
 
 def _load_yaml(path):
@@ -43,7 +46,6 @@ def _load_yaml(path):
     with open(path) as f:
         data = yaml.load(f, Loader=yaml.Loader)
     nodes = [NodeCfg(**kw) for kw in data['nodes']]
-    assert len(nodes) >= MIN_NODES, f'not enough nodes: {len(nodes)}'
     return Settings(
         path=path,
         nodes=nodes,
@@ -63,10 +65,24 @@ def load():
         return _SETTINGS
 
     s = _load_yaml(SUBSTRA_TESTS_CONFIG_FILEPATH)
+    assert len(s.nodes) >= MIN_NODES, f'not enough nodes: {len(s.nodes)}'
     _SETTINGS = s
     return _SETTINGS
 
 
+def load_local_backend():
+    """Loads settings static configuration.
+
+    As the configuration is static and immutable, it is loaded only once from the disk.
+
+    Returns an instance of the `Settings` class.
+    """
+    global _LOCAL_SETTINGS
+    if _LOCAL_SETTINGS is None:
+        _LOCAL_SETTINGS = _load_yaml(DEFAULT_NETWORK_LOCAL_CONFIGURATION_PATH)
+    return _LOCAL_SETTINGS
+
+
 # TODO that's a bad idea to expose the static configuration, it has been done to allow
 #      tests parametrization but this won't work for specific tests written with more
 #      nodes
diff --git a/tests/test_execution.py b/tests/test_execution.py
index ceca7583..864b5292 100644
--- a/tests/test_execution.py
+++ b/tests/test_execution.py
@@ -1,4 +1,3 @@
-import math
 import pytest
 
 import substra
@@ -94,6 +93,7 @@ def test_federated_learning_workflow(factory, client, default_datasets):
 
 
 @pytest.mark.slow
+@pytest.mark.remote_only
 def test_tuples_execution_on_different_nodes(factory, client_1, client_2, default_objective_1, default_dataset_2):
     """Execution of a traintuple on node 1 and the following testtuple on node 2."""
     # add test data samples / dataset / objective on node 1
@@ -132,9 +132,13 @@ def test_traintuple_execution_failure(factory, client, default_dataset_1):
         dataset=default_dataset_1,
         data_samples=default_dataset_1.train_data_sample_keys,
     )
-    traintuple = client.add_traintuple(spec).future().wait(raises=False)
-    assert traintuple.status == assets.Status.failed
-    assert traintuple.out_model is None
+    if client.backend == "remote":
+        traintuple = client.add_traintuple(spec).future().wait(raises=False)
+        assert traintuple.status == assets.Status.failed
+        assert traintuple.out_model is None
+    else:
+        with pytest.raises(substra.sdk.backends.local.compute.spawner.ExecutionError):
+            traintuple = client.add_traintuple(spec)
 
 
 @pytest.mark.slow
@@ -149,10 +153,14 @@ def test_composite_traintuple_execution_failure(factory, client, default_dataset
         dataset=default_dataset,
         data_samples=default_dataset.train_data_sample_keys,
     )
-    composite_traintuple = client.add_composite_traintuple(spec).future().wait(raises=False)
-    assert composite_traintuple.status == assets.Status.failed
-    assert composite_traintuple.out_head_model.out_model is None
-    assert composite_traintuple.out_trunk_model.out_model is None
+    if client.backend == "remote":
+        composite_traintuple = client.add_composite_traintuple(spec).future().wait(raises=False)
+        assert composite_traintuple.status == assets.Status.failed
+        assert composite_traintuple.out_head_model.out_model is None
+        assert composite_traintuple.out_trunk_model.out_model is None
+    else:
+        with pytest.raises(substra.sdk.backends.local.compute.spawner.ExecutionError):
+            composite_traintuple = client.add_composite_traintuple(spec)
 
 
 @pytest.mark.slow
@@ -179,12 +187,16 @@ def test_aggregatetuple_execution_failure(factory, client, default_dataset):
         traintuples=composite_traintuples,
         worker=client.node_id,
     )
-    aggregatetuple = client.add_aggregatetuple(spec).future().wait(raises=False)
-    for composite_traintuple in composite_traintuples:
-        composite_traintuple = client.get_composite_traintuple(composite_traintuple.key)
-        assert composite_traintuple.status == assets.Status.done
-    assert aggregatetuple.status == assets.Status.failed
-    assert aggregatetuple.out_model is None
+    if client.backend == "remote":
+        aggregatetuple = client.add_aggregatetuple(spec).future().wait(raises=False)
+        for composite_traintuple in composite_traintuples:
+            composite_traintuple = client.get_composite_traintuple(composite_traintuple.key)
+            assert composite_traintuple.status == assets.Status.done
+        assert aggregatetuple.status == assets.Status.failed
+        assert aggregatetuple.out_model is None
+    else:
+        with pytest.raises(substra.sdk.backends.local.compute.spawner.ExecutionError):
+            aggregatetuple = client.add_aggregatetuple(spec)
 
 
 @pytest.mark.slow
@@ -351,7 +363,11 @@ def test_aggregate_composite_traintuples(factory, network, clients, default_data
             traintuple=traintuple,
         )
         testtuple = clients[0].add_testtuple(spec).future().wait()
-        assert testtuple.dataset.perf == 32
+        if clients[0].backend == 'remote':
+            assert testtuple.dataset.perf == 32
+        else:
+            # There is only one dataset in 'datasets' in local, hence the difference
+            assert testtuple.dataset.perf == 30
 
     if not network.options.enable_intermediate_model_removal:
         return
diff --git a/tests/test_execution_compute_plan.py b/tests/test_execution_compute_plan.py
index 3c751f5b..7283ace2 100644
--- a/tests/test_execution_compute_plan.py
+++ b/tests/test_execution_compute_plan.py
@@ -1,4 +1,3 @@
-import math
 import pytest
 import substra
 import substratest as sbt
@@ -7,6 +6,7 @@
 from substratest import assets
 
 
+@pytest.mark.remote_only
 def test_compute_plan(factory, client_1, client_2, default_dataset_1, default_dataset_2, default_objective_1):
     """Execution of a compute plan containing multiple traintuples:
     - 1 traintuple executed on node 1
@@ -242,6 +242,7 @@ def test_compute_plan_update(factory, client, default_dataset, default_objective
 
 
 @pytest.mark.slow
+@pytest.mark.remote_only
 def test_compute_plan_single_client_failure(factory, client, default_dataset, default_objective):
     """In a compute plan with 3 traintuples, failing the root traintuple
     should cancel its descendents and the associated testtuples"""
@@ -379,6 +380,7 @@ def test_compute_plan_aggregate_composite_traintuples(factory, clients, default_
 
 
 @pytest.mark.slow
+@pytest.mark.remote_only
 def test_compute_plan_remove_intermediary_model(factory, client, default_dataset, default_objective):
     """
     Create a simple compute plan with clean_models at true, see it done and
@@ -456,6 +458,7 @@ def test_compute_plan_circular_dependency_failure(factory, client, default_datas
 
 
 @pytest.mark.slow
+@pytest.mark.remote_only
 def test_execution_compute_plan_canceled(factory, client, default_dataset):
     # XXX A canceled compute plan can be done if the it is canceled while it last tuples
     #     are executing on the workers. The compute plan status will in this case change
diff --git a/tests/test_network.py b/tests/test_network.py
index 6fe323d3..90202bf8 100644
--- a/tests/test_network.py
+++ b/tests/test_network.py
@@ -205,6 +205,7 @@ def test_add_composite_algo(factory, client):
     assert algo == algo_copy
 
 
+@pytest.mark.remote_only  # No node saved in the local backend
 def test_list_nodes(client, network):
     """Nodes are properly registered and list nodes returns expected nodes."""
     nodes = client.list_node()
@@ -241,6 +242,7 @@ def test_list_asset(asset_type, client):
     method()  # should not raise
 
 
+@pytest.mark.remote_only
 @pytest.mark.parametrize(
     'asset_type', sbt.assets.AssetType.can_be_get(),
 )
diff --git a/tests/test_permissions.py b/tests/test_permissions.py
index faee6582..365bdd5b 100644
--- a/tests/test_permissions.py
+++ b/tests/test_permissions.py
@@ -9,6 +9,7 @@
 MSP_IDS = settings.MSP_IDS
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 @pytest.mark.parametrize('is_public', [True, False])
 def test_permission_creation(is_public, factory, client):
     """Test asset creation with simple permission."""
@@ -18,6 +19,7 @@ def test_permission_creation(is_public, factory, client):
     assert dataset.permissions.process.public is is_public
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 @pytest.mark.parametrize('permissions', [
     Permissions(public=True, authorized_ids=[]),
     Permissions(public=False, authorized_ids=[]),
@@ -43,6 +45,7 @@ def test_get_metadata(permissions, factory, clients):
             assert d.permissions.process.public == permissions.public
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 def test_permission_invalid_node_id(factory, client):
     """Test asset creation with invalid permission."""
     invalid_node = 'unknown-node'
@@ -53,6 +56,7 @@ def test_permission_invalid_node_id(factory, client):
     assert "invalid permission input values" in str(exc.value)
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 @pytest.mark.parametrize('permissions', [
     Permissions(public=True, authorized_ids=[]),
     Permissions(public=False, authorized_ids=[MSP_IDS[1]]),
@@ -69,6 +73,7 @@ def test_download_asset_access_granted(permissions, factory, client_1, client_2)
     assert content == spec.read_opener()
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 def test_download_asset_access_restricted(factory, client_1, client_2):
     """Test public asset can be downloaded by all nodes."""
     permissions = Permissions(public=False, authorized_ids=[])
@@ -82,6 +87,7 @@ def test_download_asset_access_restricted(factory, client_1, client_2):
         client_2.download_opener(dataset.key)
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 @pytest.mark.parametrize('permissions_1,permissions_2,expected_permissions', [
     (
         Permissions(public=False, authorized_ids=[MSP_IDS[1]]),
@@ -127,6 +133,7 @@ def test_merge_permissions(permissions_1, permissions_2, expected_permissions,
     assert set(tuple_permissions.authorized_ids) == set(expected_permissions.authorized_ids)
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 def test_permissions_denied_process(factory, client_1, client_2):
     # setup data
 
@@ -159,6 +166,7 @@ def test_permissions_denied_process(factory, client_1, client_2):
         client_1.add_traintuple(spec)
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 @pytest.mark.slow
 @pytest.mark.xfail(reason='permission check not yet implemented in the backend')
 def test_permissions_denied_model_process(factory, client_1, client_2, network):
@@ -207,6 +215,7 @@ def test_permissions_denied_model_process(factory, client_1, client_2, network):
         client_2.add_traintuple(spec)
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 @pytest.mark.skipif(len(MSP_IDS) < 3, reason='requires at least 3 nodes')
 def test_merge_permissions_denied_process(factory, clients):
     """Test to process asset with merged permissions from 2 other nodes
@@ -272,6 +281,7 @@ def test_merge_permissions_denied_process(factory, clients):
             client_3.add_testtuple(spec)
 
 
+@pytest.mark.remote_only  # no check on permissions with the local backend
 def test_permissions_denied_head_model_process(factory, client_1, client_2):
     # setup data
 
diff --git a/tests/test_system.py b/tests/test_system.py
index dc3930c6..ed33d9c1 100644
--- a/tests/test_system.py
+++ b/tests/test_system.py
@@ -4,6 +4,9 @@
 from . import settings
 
 
+# CELERY_TASK_MAX_RETRIES uses the remote backend settings
+# no check on permissions with the local backend
+@pytest.mark.remote_only
 @pytest.mark.parametrize('fail_count,status', (
     (settings.CELERY_TASK_MAX_RETRIES, 'done'),
     (settings.CELERY_TASK_MAX_RETRIES + 1, 'failed'),
@@ -69,4 +72,3 @@ def test_execution_retry_on_fail(fail_count, status, factory, client, default_da
     # - if it fails less than 2 times, it should be marked as "done"
     # - if it fails 2 times or more, it should be marked as "failed"
     assert traintuple.status == status
-