From 1fb81236ac0c32e533070976f142a62443bfcdfa Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 21 Jun 2023 14:05:29 +0200 Subject: [PATCH 1/6] getmodels: use modeldb.science website --- README.md | 3 +- modeldb/config.py | 10 +-- modeldb/modeldb.py | 210 ++++++++++++++++++++++++++++----------------- 3 files changed, 139 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index df70c71..fa44fbd 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,8 @@ The following commands are now available: | MODELDB_RUN_FILE | yaml file containing run instructions for models (required for `runmodels`) | | MODELDB_METADATA_FILE | yaml file containing model info for those downloaded with `getmodels` | | MODELS_ZIP_DIR | location of cache folder for models populated via `getmodels` | - | MDB_NEURON_MODELS_URL | url used to get list of all NEURON model ids (necessary for `getmodels`) | + | MDB_NEURON_MODELS_URL | url template used to get NEURON model IDs and last-updated timestamps (needed for `getmodels`) | + | MDB_MODEL_METADATA_URL | url template used to get metadata about a single NEURON model (needed for `getmodels`) | | MDB_MODEL_DOWNLOAD_URL | url template used for model downloading (cf `{model_id}`) | ## Model Run diff --git a/modeldb/config.py b/modeldb/config.py index 7478fff..594cd12 100644 --- a/modeldb/config.py +++ b/modeldb/config.py @@ -2,17 +2,15 @@ import os -# MDB_NEURON_MODELS_URL = "https://senselab.med.yale.edu/_site/webapi/object.json/?cl=19&oid=1882" MDB_NEURON_MODELS_URL = ( - "http://modeldb.science/api/v1/models?modeling_application=NEURON" -) -MDB_MODEL_DOWNLOAD_URL = ( - "https://senselab.med.yale.edu/_site/webapi/object.json/{model_id}" + "http://modeldb.science/api/v1/models/{model_field}?modeling_application=NEURON" ) +MDB_MODEL_METADATA_URL = "https://modeldb.science/api/v1/models/{model_id}" +MDB_MODEL_DOWNLOAD_URL = "https://modeldb.science/eavBinDown?o={model_id}" ROOT_DIR = os.path.abspath(__file__ + "/../../") MODELS_ZIP_DIR = "%s/cache" % ROOT_DIR MODELDB_ROOT_DIR = "%s/modeldb" % ROOT_DIR MODELDB_METADATA_FILE = "%s/modeldb-meta.yaml" % MODELDB_ROOT_DIR -MODELDB_RUN_FILE = "%s/modeldb-run.yaml" % MODELDB_ROOT_DIR \ No newline at end of file +MODELDB_RUN_FILE = "%s/modeldb-run.yaml" % MODELDB_ROOT_DIR diff --git a/modeldb/modeldb.py b/modeldb/modeldb.py index e72fb73..53bc402 100644 --- a/modeldb/modeldb.py +++ b/modeldb/modeldb.py @@ -1,77 +1,91 @@ -from . import config +from .config import * +from .data import Model import logging import multiprocessing -import base64 import os +from pprint import pformat +from .progressbar import ProgressBar import requests import time -from .progressbar import ProgressBar import yaml -from .data import Model -from .config import * -import traceback -from pprint import pformat + def download_model(arg_tuple): - model_id, model_run_info = arg_tuple + model_id, model_run_info, expected_ver_date = arg_tuple try: - model_json = requests.get(MDB_MODEL_DOWNLOAD_URL.format(model_id=model_id)).json() + # Fetch the model metadata from ModelDB. + model_json = requests.get( + MDB_MODEL_METADATA_URL.format(model_id=model_id) + ).json() + # Check that the timestamp matches our expectations. + assert model_json["ver_date"] == expected_ver_date + # Assemble a Model object from the JSON metadata just fetched model = Model( - *( - model_json[key] - for key in ("object_id", "object_name", "object_created", "object_ver_date") - ) + model_json["id"], + model_json["name"], + model_json["created"], + model_json["ver_date"], ) - url = None - for att in model_json["object_attribute_values"]: - if att["attribute_id"] == 23: - url = att["value"] - break - # print(model.id) - model_zip_uri = os.path.join( - MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model.id) - ) - with open(model_zip_uri, "wb+") as zipfile: - zipfile.write(base64.standard_b64decode(url["file_content"])) - + # Now fetch the actual model data .zip file. By default this also comes + # from ModelDB, but it can be overriden to come from GitHub instead. if "github" in model_run_info: - # This means we should try to replace the version of the model that - # we downloaded from the ModelDB API just above with a version from - # GitHub + # This means we should try to download the model content from + # GitHub instead of from ModelDB. github = model_run_info["github"] organisation = "ModelDBRepository" - suffix = "" # default branch + suffix = "" # default branch if github == "default": + # Using + # github: "default" + # in modeldb-run.yaml implies that we fetch the HEAD of the + # default branch from ModelDBRepository on GitHub. In general + # this should be the same thing as fetching from ModelDB. pass elif github.startswith("pull/"): + # Using + # github: "pull/4" + # in modeldb-run.yaml implies that we use the branch from pull + # request #4 to ModelDBRepository/{model_id} on GitHub. This is + # used if you want to test updates to models. pr_number = int(github[5:]) suffix = "/pull/{}/head".format(pr_number) - elif github.startswith('/'): - # /org implies that we use the default branch from org/model_id + elif github.startswith("/"): + # Using + # github: "/myname" + # in modeldb-run.yaml implies that we fetch the HEAD of the + # default branch of myname/{model_id} on GitHub. This is useful + # if you need to test changes to a model that does not exist on + # GitHub under the ModelDBRepository organisation. organisation = github[1:] else: raise Exception("Invalid value for github key: {}".format(github)) - github_url = "https://api.github.com/repos/{organisation}/{model_id}/zipball{suffix}".format( + url = "https://api.github.com/repos/{organisation}/{model_id}/zipball{suffix}".format( model_id=model_id, organisation=organisation, suffix=suffix ) - # Replace the local file `model_zip_uri` with the zip file we - # downloaded from `github_url` - num_attempts = 3 - status_codes = [] - for _ in range(num_attempts): - github_response = requests.get(github_url) - status_codes.append(github_response.status_code) - if github_response.status_code == requests.codes.ok: - break - time.sleep(5) - else: - raise Exception( - "Failed to download {} with status codes {}".format( - github_url, status_codes - ) - ) - with open(model_zip_uri, "wb+") as zipfile: - zipfile.write(github_response.content) + else: + # Get the .zip file from ModelDB, not from GitHub. + url = MDB_MODEL_DOWNLOAD_URL.format(model_id=model_id) + + # Construct the path we want to save the .zip at locally. + model_zip_uri = os.path.join( + MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model.id) + ) + + # Download the model data from `url`. Retry a few times on failure. + num_attempts = 3 + status_codes = [] + for _ in range(num_attempts): + model_download_response = requests.get(url) + status_codes.append(model_download_response.status_code) + if model_download_response.status_code == requests.codes.ok: + break + time.sleep(5) + else: + raise Exception( + "Failed to download {} with status codes {}".format(url, status_codes) + ) + with open(model_zip_uri, "wb+") as zipfile: + zipfile.write(model_download_response.content) except Exception as e: # noqa model = e @@ -79,6 +93,7 @@ def download_model(arg_tuple): class ModelDB(object): + logger = None metadata = property(lambda self: self._metadata) run_instr = property(lambda self: self._run_instr) @@ -87,38 +102,75 @@ def __init__(self): self._run_instr = {} self._load_run_instructions() + self._setup_logging() try: self._load_metadata() except FileNotFoundError: - logging.warning( - "{} not found!".format(MODELDB_METADATA_FILE) - ) + ModelDB.logger.warning("{} not found!".format(MODELDB_METADATA_FILE)) except yaml.YAMLError as y: - logging.error("Error loading {}: {}".format(MODELDB_METADATA_FILE, y)) + ModelDB.logger.error( + "Error loading {}: {}".format(MODELDB_METADATA_FILE, y) + ) raise y except Exception as e: raise e - def _download_models(self, model_list=None): + def download_models(self, model_list=None): if not os.path.isdir(MODELS_ZIP_DIR): - logging.info("Creating cache directory: {}".format(MODELS_ZIP_DIR)) + ModelDB.logger.info("Creating cache directory: {}".format(MODELS_ZIP_DIR)) os.mkdir(MODELS_ZIP_DIR) - models = requests.get(MDB_NEURON_MODELS_URL).json() if model_list is None else model_list - pool = multiprocessing.Pool() - processed_models = pool.imap_unordered( - download_model, - [(model_id, self._run_instr.get(model_id, {})) for model_id in models], - ) + # Fetch the list of NEURON model IDs, and a list of timestamps for + # those models. We do this even if `model_list` is not None to build + # the model ID -> timestamp mapping. + def query(field): + return requests.get(MDB_NEURON_MODELS_URL.format(model_field=field)).json() + + all_model_ids = query("id") + all_model_timestamps = query("ver_date") + metadata = { + model_id: timestamp + for model_id, timestamp in zip(all_model_ids, all_model_timestamps) + } + # If we were passed a non-None `model_list`, restrict those models now. + if model_list is not None: + metadata = {model_id: metadata[model_id] for model_id in model_list} + # For each model in `metadata`, check if a cached entry exists and is + # up to date. If not, download it. + models_to_download = [] + for model_id, new_ver_date in metadata.items(): + if model_id in self._metadata: + cached_ver_date = self._metadata[model_id]._ver_date + if cached_ver_date == new_ver_date: + ModelDB.logger.debug( + "Model {} cache up to date ({})".format(model_id, new_ver_date) + ) + continue + else: + ModelDB.logger.debug( + "Model {} cache out of date (cached: {}, new: {})".format( + model_id, cached_ver_date, new_ver_date + ) + ) + else: + ModelDB.logger.debug("Model {} not found in cache".format(model_id)) + models_to_download.append( + (model_id, self._run_instr.get(model_id, {}), new_ver_date) + ) + # Download the missing or out of date models in parallel + pool = multiprocessing.Pool(8) + processed_models = pool.imap_unordered(download_model, models_to_download) download_err = {} - for model_id, model in ProgressBar.iter(processed_models, len(models)): + for model_id, model in ProgressBar.iter( + processed_models, len(models_to_download) + ): if not isinstance(model, Exception): self._metadata[model_id] = model else: download_err[model_id] = model if download_err: - logging.error("Error downloading models:") - logging.error(pformat(download_err)) + ModelDB.logger.error("Error downloading models:") + ModelDB.logger.error(pformat(download_err)) self._save_metadata() @@ -134,15 +186,19 @@ def _save_metadata(self): with open(MODELDB_METADATA_FILE, "w+") as meta_file: yaml.dump(self._metadata, meta_file, sort_keys=True) - def download_models(self, model_list=None): - if model_list is None: - try: - os.remove(MODELDB_METADATA_FILE) - except OSError: - pass - self._download_models(model_list) - - # TODO -> check/update models - def update_models(self): - pass - + def _setup_logging(self): + if ModelDB.logger is not None: + return + formatter = logging.Formatter( + fmt="%(asctime)s :: %(levelname)-8s :: %(message)s" + ) + consoleHandler = logging.StreamHandler() + consoleHandler.setFormatter(formatter) + consoleHandler.setLevel(logging.INFO) + fileHandler = logging.FileHandler("modeldb.log") + fileHandler.setFormatter(formatter) + fileHandler.setLevel(logging.DEBUG) + ModelDB.logger = logging.getLogger("modeldb") + ModelDB.logger.setLevel(logging.DEBUG) + ModelDB.logger.addHandler(consoleHandler) + ModelDB.logger.addHandler(fileHandler) From 3c742b0dfd7f2a7fda112ddbd270a3f901324465 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 21 Jun 2023 14:08:28 +0200 Subject: [PATCH 2/6] CI: drop monthly cache invalidation --- .github/workflows/nrn-modeldb-ci.yaml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/nrn-modeldb-ci.yaml b/.github/workflows/nrn-modeldb-ci.yaml index 9b1e2a7..7646089 100644 --- a/.github/workflows/nrn-modeldb-ci.yaml +++ b/.github/workflows/nrn-modeldb-ci.yaml @@ -138,20 +138,17 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi #Install project in editable mode python -m pip install -e . - echo "date=$(date -u "+%Y%m")" >> $GITHUB_OUTPUT - name: Cache ModelDB models - id: cache-models uses: actions/cache@v3 with: path: | cache modeldb/modeldb-meta.yaml - key: models-${{steps.install-deps.outputs.date}} + key: models - name: Get ModelDB models - if: steps.cache-models.outputs.cache-hit != 'true' - run: getmodels + run: getmodels $MODELS_TO_RUN - name: Run Models with NEURON V1 -> ${{ env.NEURON_V1 }} run: | From 7092989e6392e378eb1f815f648679cabda65a11 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 21 Jun 2023 14:15:09 +0200 Subject: [PATCH 3/6] CI: tweak cache key so it misses --- .github/workflows/nrn-modeldb-ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nrn-modeldb-ci.yaml b/.github/workflows/nrn-modeldb-ci.yaml index 7646089..2653f35 100644 --- a/.github/workflows/nrn-modeldb-ci.yaml +++ b/.github/workflows/nrn-modeldb-ci.yaml @@ -145,7 +145,7 @@ jobs: path: | cache modeldb/modeldb-meta.yaml - key: models + key: dynamic-models - name: Get ModelDB models run: getmodels $MODELS_TO_RUN From ca6314983f78a19e9f84c78eea63108ea3b70d78 Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 21 Jun 2023 16:35:32 +0200 Subject: [PATCH 4/6] Add a better error message --- .github/workflows/nrn-modeldb-ci.yaml | 2 -- modeldb/modeldb.py | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nrn-modeldb-ci.yaml b/.github/workflows/nrn-modeldb-ci.yaml index 2653f35..0619fab 100644 --- a/.github/workflows/nrn-modeldb-ci.yaml +++ b/.github/workflows/nrn-modeldb-ci.yaml @@ -159,7 +159,6 @@ jobs: python -m pip install $NEURON_V1 fi nrn_ver=`python -c "from neuron import __version__ as nrn_ver; print(nrn_ver)"` - ps uxf # debug runmodels --gout --workdir=$nrn_ver $MODELS_TO_RUN # Filter out the gout data before generating HTML reports. The HTML # diff uses the original gout files on disk anyway. Compress the large @@ -186,7 +185,6 @@ jobs: python -m pip install $NEURON_V2 fi nrn_ver=`python -c "from neuron import __version__ as nrn_ver; print(nrn_ver)"` - ps uxf # debug runmodels --gout --workdir=$nrn_ver $MODELS_TO_RUN # Filter out the gout data before generating HTML reports. The HTML # diff uses the original gout files on disk anyway. Compress the large diff --git a/modeldb/modeldb.py b/modeldb/modeldb.py index 53bc402..3927d01 100644 --- a/modeldb/modeldb.py +++ b/modeldb/modeldb.py @@ -133,6 +133,13 @@ def query(field): } # If we were passed a non-None `model_list`, restrict those models now. if model_list is not None: + missing_ids = set(model_list) - set(metadata.keys()) + if len(missing_ids): + raise Exception( + "Model IDs {} were explicitly requested, but are not known NEURON models.".format( + missing_ids + ) + ) metadata = {model_id: metadata[model_id] for model_id in model_list} # For each model in `metadata`, check if a cached entry exists and is # up to date. If not, download it. From 8dce11db511fcd473908436b916a9a99f8aa3f6d Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 21 Jun 2023 17:04:20 +0200 Subject: [PATCH 5/6] suggestion from Robert --- modeldb/modeldb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeldb/modeldb.py b/modeldb/modeldb.py index 3927d01..e47e3d0 100644 --- a/modeldb/modeldb.py +++ b/modeldb/modeldb.py @@ -134,7 +134,7 @@ def query(field): # If we were passed a non-None `model_list`, restrict those models now. if model_list is not None: missing_ids = set(model_list) - set(metadata.keys()) - if len(missing_ids): + if missing_ids: raise Exception( "Model IDs {} were explicitly requested, but are not known NEURON models.".format( missing_ids From 714d9592eb687b1bfcb254a2bf9d9bb43483331f Mon Sep 17 00:00:00 2001 From: Olli Lupton Date: Wed, 21 Jun 2023 17:12:19 +0200 Subject: [PATCH 6/6] change download url --- modeldb/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeldb/config.py b/modeldb/config.py index 594cd12..493894e 100644 --- a/modeldb/config.py +++ b/modeldb/config.py @@ -6,7 +6,7 @@ "http://modeldb.science/api/v1/models/{model_field}?modeling_application=NEURON" ) MDB_MODEL_METADATA_URL = "https://modeldb.science/api/v1/models/{model_id}" -MDB_MODEL_DOWNLOAD_URL = "https://modeldb.science/eavBinDown?o={model_id}" +MDB_MODEL_DOWNLOAD_URL = "https://modeldb.science/download/{model_id}" ROOT_DIR = os.path.abspath(__file__ + "/../../")