From 1fb81236ac0c32e533070976f142a62443bfcdfa Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Wed, 21 Jun 2023 14:05:29 +0200
Subject: [PATCH 1/6] getmodels: use modeldb.science website

---
 README.md          |   3 +-
 modeldb/config.py  |  10 +--
 modeldb/modeldb.py | 210 ++++++++++++++++++++++++++++-----------------
 3 files changed, 139 insertions(+), 84 deletions(-)

diff --git a/README.md b/README.md
index df70c71..fa44fbd 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,8 @@ The following commands are now available:
   | MODELDB_RUN_FILE        | yaml file containing run instructions for models (required for `runmodels`) |
   | MODELDB_METADATA_FILE   | yaml file containing model info for those downloaded with `getmodels`       |
   | MODELS_ZIP_DIR          | location of cache folder for models populated via `getmodels`               |
-  | MDB_NEURON_MODELS_URL   | url used to get list of all NEURON model ids (necessary for `getmodels`)    |
+  | MDB_NEURON_MODELS_URL   | url template used to get NEURON model IDs and last-updated timestamps (needed for `getmodels`) |
+  | MDB_MODEL_METADATA_URL  | url template used to get metadata about a single NEURON model (needed for `getmodels`) |
   | MDB_MODEL_DOWNLOAD_URL  | url template used for model downloading (cf `{model_id}`)                   |
 
 ## Model Run
diff --git a/modeldb/config.py b/modeldb/config.py
index 7478fff..594cd12 100644
--- a/modeldb/config.py
+++ b/modeldb/config.py
@@ -2,17 +2,15 @@
 
 import os
 
-# MDB_NEURON_MODELS_URL = "https://senselab.med.yale.edu/_site/webapi/object.json/?cl=19&oid=1882"
 MDB_NEURON_MODELS_URL = (
-    "http://modeldb.science/api/v1/models?modeling_application=NEURON"
-)
-MDB_MODEL_DOWNLOAD_URL = (
-    "https://senselab.med.yale.edu/_site/webapi/object.json/{model_id}"
+    "http://modeldb.science/api/v1/models/{model_field}?modeling_application=NEURON"
 )
+MDB_MODEL_METADATA_URL = "https://modeldb.science/api/v1/models/{model_id}"
+MDB_MODEL_DOWNLOAD_URL = "https://modeldb.science/eavBinDown?o={model_id}"
 
 ROOT_DIR = os.path.abspath(__file__ + "/../../")
 
 MODELS_ZIP_DIR = "%s/cache" % ROOT_DIR
 MODELDB_ROOT_DIR = "%s/modeldb" % ROOT_DIR
 MODELDB_METADATA_FILE = "%s/modeldb-meta.yaml" % MODELDB_ROOT_DIR
-MODELDB_RUN_FILE = "%s/modeldb-run.yaml" % MODELDB_ROOT_DIR
\ No newline at end of file
+MODELDB_RUN_FILE = "%s/modeldb-run.yaml" % MODELDB_ROOT_DIR
diff --git a/modeldb/modeldb.py b/modeldb/modeldb.py
index e72fb73..53bc402 100644
--- a/modeldb/modeldb.py
+++ b/modeldb/modeldb.py
@@ -1,77 +1,91 @@
-from . import config
+from .config import *
+from .data import Model
 import logging
 import multiprocessing
-import base64
 import os
+from pprint import pformat
+from .progressbar import ProgressBar
 import requests
 import time
-from .progressbar import ProgressBar
 import yaml
-from .data import Model
-from .config import *
-import traceback
-from pprint import pformat
+
 
 def download_model(arg_tuple):
-    model_id, model_run_info = arg_tuple
+    model_id, model_run_info, expected_ver_date = arg_tuple
     try:
-        model_json = requests.get(MDB_MODEL_DOWNLOAD_URL.format(model_id=model_id)).json()
+        # Fetch the model metadata from ModelDB.
+        model_json = requests.get(
+            MDB_MODEL_METADATA_URL.format(model_id=model_id)
+        ).json()
+        # Check that the timestamp matches our expectations.
+        assert model_json["ver_date"] == expected_ver_date
+        # Assemble a Model object from the JSON metadata just fetched
         model = Model(
-            *(
-                model_json[key]
-                for key in ("object_id", "object_name", "object_created", "object_ver_date")
-            )
+            model_json["id"],
+            model_json["name"],
+            model_json["created"],
+            model_json["ver_date"],
         )
-        url = None
-        for att in model_json["object_attribute_values"]:
-            if att["attribute_id"] == 23:
-                url = att["value"]
-                break
-        # print(model.id)
-        model_zip_uri = os.path.join(
-            MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model.id)
-        )
-        with open(model_zip_uri, "wb+") as zipfile:
-            zipfile.write(base64.standard_b64decode(url["file_content"]))
-
+        # Now fetch the actual model data .zip file. By default this also comes
+        # from ModelDB, but it can be overriden to come from GitHub instead.
         if "github" in model_run_info:
-            # This means we should try to replace the version of the model that
-            # we downloaded from the ModelDB API just above with a version from
-            # GitHub
+            # This means we should try to download the model content from
+            # GitHub instead of from ModelDB.
             github = model_run_info["github"]
             organisation = "ModelDBRepository"
-            suffix = "" # default branch
+            suffix = ""  # default branch
             if github == "default":
+                # Using
+                #   github: "default"
+                # in modeldb-run.yaml implies that we fetch the HEAD of the
+                # default branch from ModelDBRepository on GitHub. In general
+                # this should be the same thing as fetching from ModelDB.
                 pass
             elif github.startswith("pull/"):
+                # Using
+                #   github: "pull/4"
+                # in modeldb-run.yaml implies that we use the branch from pull
+                # request #4 to ModelDBRepository/{model_id} on GitHub. This is
+                # used if you want to test updates to models.
                 pr_number = int(github[5:])
                 suffix = "/pull/{}/head".format(pr_number)
-            elif github.startswith('/'):
-                # /org implies that we use the default branch from org/model_id
+            elif github.startswith("/"):
+                # Using
+                #   github: "/myname"
+                # in modeldb-run.yaml implies that we fetch the HEAD of the
+                # default branch of myname/{model_id} on GitHub. This is useful
+                # if you need to test changes to a model that does not exist on
+                # GitHub under the ModelDBRepository organisation.
                 organisation = github[1:]
             else:
                 raise Exception("Invalid value for github key: {}".format(github))
-            github_url = "https://api.github.com/repos/{organisation}/{model_id}/zipball{suffix}".format(
+            url = "https://api.github.com/repos/{organisation}/{model_id}/zipball{suffix}".format(
                 model_id=model_id, organisation=organisation, suffix=suffix
             )
-            # Replace the local file `model_zip_uri` with the zip file we
-            # downloaded from `github_url`
-            num_attempts = 3
-            status_codes = []
-            for _ in range(num_attempts):
-                github_response = requests.get(github_url)
-                status_codes.append(github_response.status_code)
-                if github_response.status_code == requests.codes.ok:
-                    break
-                time.sleep(5)
-            else:
-                raise Exception(
-                    "Failed to download {} with status codes {}".format(
-                        github_url, status_codes
-                    )
-                )
-            with open(model_zip_uri, "wb+") as zipfile:
-                zipfile.write(github_response.content)
+        else:
+            # Get the .zip file from ModelDB, not from GitHub.
+            url = MDB_MODEL_DOWNLOAD_URL.format(model_id=model_id)
+
+        # Construct the path we want to save the .zip at locally.
+        model_zip_uri = os.path.join(
+            MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model.id)
+        )
+
+        # Download the model data from `url`. Retry a few times on failure.
+        num_attempts = 3
+        status_codes = []
+        for _ in range(num_attempts):
+            model_download_response = requests.get(url)
+            status_codes.append(model_download_response.status_code)
+            if model_download_response.status_code == requests.codes.ok:
+                break
+            time.sleep(5)
+        else:
+            raise Exception(
+                "Failed to download {} with status codes {}".format(url, status_codes)
+            )
+        with open(model_zip_uri, "wb+") as zipfile:
+            zipfile.write(model_download_response.content)
     except Exception as e:  #  noqa
         model = e
 
@@ -79,6 +93,7 @@ def download_model(arg_tuple):
 
 
 class ModelDB(object):
+    logger = None
     metadata = property(lambda self: self._metadata)
     run_instr = property(lambda self: self._run_instr)
 
@@ -87,38 +102,75 @@ def __init__(self):
         self._run_instr = {}
 
         self._load_run_instructions()
+        self._setup_logging()
         try:
             self._load_metadata()
         except FileNotFoundError:
-            logging.warning(
-                "{} not found!".format(MODELDB_METADATA_FILE)
-            )
+            ModelDB.logger.warning("{} not found!".format(MODELDB_METADATA_FILE))
         except yaml.YAMLError as y:
-            logging.error("Error loading {}: {}".format(MODELDB_METADATA_FILE, y))
+            ModelDB.logger.error(
+                "Error loading {}: {}".format(MODELDB_METADATA_FILE, y)
+            )
             raise y
         except Exception as e:
             raise e
 
-    def _download_models(self, model_list=None):
+    def download_models(self, model_list=None):
         if not os.path.isdir(MODELS_ZIP_DIR):
-            logging.info("Creating cache directory: {}".format(MODELS_ZIP_DIR))
+            ModelDB.logger.info("Creating cache directory: {}".format(MODELS_ZIP_DIR))
             os.mkdir(MODELS_ZIP_DIR)
-        models = requests.get(MDB_NEURON_MODELS_URL).json() if model_list is None else model_list
-        pool = multiprocessing.Pool()
-        processed_models = pool.imap_unordered(
-            download_model,
-            [(model_id, self._run_instr.get(model_id, {})) for model_id in models],
-        )
+        # Fetch the list of NEURON model IDs, and a list of timestamps for
+        # those models. We do this even if `model_list` is not None to build
+        # the model ID -> timestamp mapping.
+        def query(field):
+            return requests.get(MDB_NEURON_MODELS_URL.format(model_field=field)).json()
+
+        all_model_ids = query("id")
+        all_model_timestamps = query("ver_date")
+        metadata = {
+            model_id: timestamp
+            for model_id, timestamp in zip(all_model_ids, all_model_timestamps)
+        }
+        # If we were passed a non-None `model_list`, restrict those models now.
+        if model_list is not None:
+            metadata = {model_id: metadata[model_id] for model_id in model_list}
+        # For each model in `metadata`, check if a cached entry exists and is
+        # up to date. If not, download it.
+        models_to_download = []
+        for model_id, new_ver_date in metadata.items():
+            if model_id in self._metadata:
+                cached_ver_date = self._metadata[model_id]._ver_date
+                if cached_ver_date == new_ver_date:
+                    ModelDB.logger.debug(
+                        "Model {} cache up to date ({})".format(model_id, new_ver_date)
+                    )
+                    continue
+                else:
+                    ModelDB.logger.debug(
+                        "Model {} cache out of date (cached: {}, new: {})".format(
+                            model_id, cached_ver_date, new_ver_date
+                        )
+                    )
+            else:
+                ModelDB.logger.debug("Model {} not found in cache".format(model_id))
+            models_to_download.append(
+                (model_id, self._run_instr.get(model_id, {}), new_ver_date)
+            )
+        # Download the missing or out of date models in parallel
+        pool = multiprocessing.Pool(8)
+        processed_models = pool.imap_unordered(download_model, models_to_download)
         download_err = {}
-        for model_id, model in ProgressBar.iter(processed_models, len(models)):
+        for model_id, model in ProgressBar.iter(
+            processed_models, len(models_to_download)
+        ):
             if not isinstance(model, Exception):
                 self._metadata[model_id] = model
             else:
                 download_err[model_id] = model
 
         if download_err:
-            logging.error("Error downloading models:")
-            logging.error(pformat(download_err))
+            ModelDB.logger.error("Error downloading models:")
+            ModelDB.logger.error(pformat(download_err))
 
         self._save_metadata()
 
@@ -134,15 +186,19 @@ def _save_metadata(self):
         with open(MODELDB_METADATA_FILE, "w+") as meta_file:
             yaml.dump(self._metadata, meta_file, sort_keys=True)
 
-    def download_models(self, model_list=None):
-        if model_list is None:
-            try:
-                os.remove(MODELDB_METADATA_FILE)
-            except OSError:
-                pass
-        self._download_models(model_list)
-
-    # TODO -> check/update models
-    def update_models(self):
-        pass
-
+    def _setup_logging(self):
+        if ModelDB.logger is not None:
+            return
+        formatter = logging.Formatter(
+            fmt="%(asctime)s :: %(levelname)-8s :: %(message)s"
+        )
+        consoleHandler = logging.StreamHandler()
+        consoleHandler.setFormatter(formatter)
+        consoleHandler.setLevel(logging.INFO)
+        fileHandler = logging.FileHandler("modeldb.log")
+        fileHandler.setFormatter(formatter)
+        fileHandler.setLevel(logging.DEBUG)
+        ModelDB.logger = logging.getLogger("modeldb")
+        ModelDB.logger.setLevel(logging.DEBUG)
+        ModelDB.logger.addHandler(consoleHandler)
+        ModelDB.logger.addHandler(fileHandler)

From 3c742b0dfd7f2a7fda112ddbd270a3f901324465 Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Wed, 21 Jun 2023 14:08:28 +0200
Subject: [PATCH 2/6] CI: drop monthly cache invalidation

---
 .github/workflows/nrn-modeldb-ci.yaml | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/nrn-modeldb-ci.yaml b/.github/workflows/nrn-modeldb-ci.yaml
index 9b1e2a7..7646089 100644
--- a/.github/workflows/nrn-modeldb-ci.yaml
+++ b/.github/workflows/nrn-modeldb-ci.yaml
@@ -138,20 +138,17 @@ jobs:
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         #Install project in editable mode
         python -m pip install -e .
-        echo "date=$(date -u "+%Y%m")" >> $GITHUB_OUTPUT
     
     - name: Cache ModelDB models
-      id: cache-models
       uses: actions/cache@v3
       with:
         path: |
           cache
           modeldb/modeldb-meta.yaml
-        key: models-${{steps.install-deps.outputs.date}}
+        key: models
 
     - name: Get ModelDB models
-      if: steps.cache-models.outputs.cache-hit != 'true'
-      run: getmodels
+      run: getmodels $MODELS_TO_RUN
 
     - name: Run Models with NEURON V1 -> ${{ env.NEURON_V1 }}
       run: |

From 7092989e6392e378eb1f815f648679cabda65a11 Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Wed, 21 Jun 2023 14:15:09 +0200
Subject: [PATCH 3/6] CI: tweak cache key so it misses

---
 .github/workflows/nrn-modeldb-ci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/nrn-modeldb-ci.yaml b/.github/workflows/nrn-modeldb-ci.yaml
index 7646089..2653f35 100644
--- a/.github/workflows/nrn-modeldb-ci.yaml
+++ b/.github/workflows/nrn-modeldb-ci.yaml
@@ -145,7 +145,7 @@ jobs:
         path: |
           cache
           modeldb/modeldb-meta.yaml
-        key: models
+        key: dynamic-models
 
     - name: Get ModelDB models
       run: getmodels $MODELS_TO_RUN

From ca6314983f78a19e9f84c78eea63108ea3b70d78 Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Wed, 21 Jun 2023 16:35:32 +0200
Subject: [PATCH 4/6] Add a better error message

---
 .github/workflows/nrn-modeldb-ci.yaml | 2 --
 modeldb/modeldb.py                    | 7 +++++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/nrn-modeldb-ci.yaml b/.github/workflows/nrn-modeldb-ci.yaml
index 2653f35..0619fab 100644
--- a/.github/workflows/nrn-modeldb-ci.yaml
+++ b/.github/workflows/nrn-modeldb-ci.yaml
@@ -159,7 +159,6 @@ jobs:
           python -m pip install $NEURON_V1
         fi
         nrn_ver=`python -c "from neuron import __version__ as nrn_ver; print(nrn_ver)"`
-        ps uxf # debug
         runmodels --gout --workdir=$nrn_ver $MODELS_TO_RUN
         # Filter out the gout data before generating HTML reports. The HTML
         # diff uses the original gout files on disk anyway. Compress the large
@@ -186,7 +185,6 @@ jobs:
           python -m pip install $NEURON_V2
         fi
         nrn_ver=`python -c "from neuron import __version__ as nrn_ver; print(nrn_ver)"`
-        ps uxf # debug
         runmodels --gout --workdir=$nrn_ver $MODELS_TO_RUN
         # Filter out the gout data before generating HTML reports. The HTML
         # diff uses the original gout files on disk anyway. Compress the large
diff --git a/modeldb/modeldb.py b/modeldb/modeldb.py
index 53bc402..3927d01 100644
--- a/modeldb/modeldb.py
+++ b/modeldb/modeldb.py
@@ -133,6 +133,13 @@ def query(field):
         }
         # If we were passed a non-None `model_list`, restrict those models now.
         if model_list is not None:
+            missing_ids = set(model_list) - set(metadata.keys())
+            if len(missing_ids):
+                raise Exception(
+                    "Model IDs {} were explicitly requested, but are not known NEURON models.".format(
+                        missing_ids
+                    )
+                )
             metadata = {model_id: metadata[model_id] for model_id in model_list}
         # For each model in `metadata`, check if a cached entry exists and is
         # up to date. If not, download it.

From 8dce11db511fcd473908436b916a9a99f8aa3f6d Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Wed, 21 Jun 2023 17:04:20 +0200
Subject: [PATCH 5/6] suggestion from Robert

---
 modeldb/modeldb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeldb/modeldb.py b/modeldb/modeldb.py
index 3927d01..e47e3d0 100644
--- a/modeldb/modeldb.py
+++ b/modeldb/modeldb.py
@@ -134,7 +134,7 @@ def query(field):
         # If we were passed a non-None `model_list`, restrict those models now.
         if model_list is not None:
             missing_ids = set(model_list) - set(metadata.keys())
-            if len(missing_ids):
+            if missing_ids:
                 raise Exception(
                     "Model IDs {} were explicitly requested, but are not known NEURON models.".format(
                         missing_ids

From 714d9592eb687b1bfcb254a2bf9d9bb43483331f Mon Sep 17 00:00:00 2001
From: Olli Lupton <oliver.lupton@epfl.ch>
Date: Wed, 21 Jun 2023 17:12:19 +0200
Subject: [PATCH 6/6] change download url

---
 modeldb/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modeldb/config.py b/modeldb/config.py
index 594cd12..493894e 100644
--- a/modeldb/config.py
+++ b/modeldb/config.py
@@ -6,7 +6,7 @@
     "http://modeldb.science/api/v1/models/{model_field}?modeling_application=NEURON"
 )
 MDB_MODEL_METADATA_URL = "https://modeldb.science/api/v1/models/{model_id}"
-MDB_MODEL_DOWNLOAD_URL = "https://modeldb.science/eavBinDown?o={model_id}"
+MDB_MODEL_DOWNLOAD_URL = "https://modeldb.science/download/{model_id}"
 
 ROOT_DIR = os.path.abspath(__file__ + "/../../")