Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

getmodels: download from GitHub #72

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/nrn-modeldb-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ jobs:
NEURON_V1: ${{ github.event.inputs.neuron_v1 || inputs.neuron_v1 || 'neuron' }}
NEURON_V2: ${{ github.event.inputs.neuron_v2 || inputs.neuron_v2 || 'neuron-nightly' }}
MODELS_TO_RUN: ${{ github.event.inputs.models_to_run || inputs.models_to_run || '' }}
GH_TOKEN: ${{ github.token }}

steps:

Expand Down Expand Up @@ -130,6 +131,8 @@ jobs:
id: install-deps
run: |
set
# Install GH cli
sudo apt-get install gh
# Set up Xvfb
sudo apt-get install xvfb
sudo /usr/bin/Xvfb $DISPLAY -screen 0 1600x1200x24 -noreset -nolock -shmem & # run in bg
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ The following commands are now available:
| MODELDB_RUN_FILE | yaml file containing run instructions for models (required for `runmodels`) |
| MODELDB_METADATA_FILE | yaml file containing model info for those downloaded with `getmodels` |
| MODELS_ZIP_DIR | location of cache folder for models populated via `getmodels` |
| MDB_NEURON_MODELS_URL | url used to get list of all NEURON model ids (necessary for `getmodels`) |
| MDB_MODEL_DOWNLOAD_URL | url template used for model downloading (cf `{model_id}`) |

## Model Run

Expand Down
2 changes: 1 addition & 1 deletion modeldb/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def modeldb_config(args=None):

Examples
modeldb-config
modeldb-config --item=MDB_NEURON_MODELS_URL
modeldb-config --item=MODELS_ZIP_DIR
"""
options = docopt(modeldb_config.__doc__, args)
item = options.pop("--item", None)
Expand Down
8 changes: 0 additions & 8 deletions modeldb/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,6 @@

import os

# MDB_NEURON_MODELS_URL = "https://senselab.med.yale.edu/_site/webapi/object.json/?cl=19&oid=1882"
MDB_NEURON_MODELS_URL = (
"http://modeldb.science/api/v1/models?modeling_application=NEURON"
)
MDB_MODEL_DOWNLOAD_URL = (
"https://senselab.med.yale.edu/_site/webapi/object.json/{model_id}"
)

ROOT_DIR = os.path.abspath(__file__ + "/../../")

MODELS_ZIP_DIR = "%s/cache" % ROOT_DIR
Expand Down
14 changes: 0 additions & 14 deletions modeldb/data.py

This file was deleted.

104 changes: 41 additions & 63 deletions modeldb/modeldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,75 +7,37 @@
import time
from .progressbar import ProgressBar
import yaml
from .data import Model
from .config import *
import traceback
from pprint import pformat


def download_model(arg_tuple):
model_id, model_run_info = arg_tuple
try:
model_json = requests.get(MDB_MODEL_DOWNLOAD_URL.format(model_id=model_id)).json()
model = Model(
*(
model_json[key]
for key in ("object_id", "object_name", "object_created", "object_ver_date")
)
)
url = None
for att in model_json["object_attribute_values"]:
if att["attribute_id"] == 23:
url = att["value"]
break
# print(model.id)
model_zip_uri = os.path.join(
MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model.id)
MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model_id))

suffix = model_run_info["github"] if "github" in model_run_info else "master"
github_url = "https://github.com/ModelDBRepository/{model_id}/archive/refs/heads/{suffix}.zip".format(
model_id=model_id, suffix=suffix
)
with open(model_zip_uri, "wb+") as zipfile:
zipfile.write(base64.standard_b64decode(url["file_content"]))

if "github" in model_run_info:
# This means we should try to replace the version of the model that
# we downloaded from the ModelDB API just above with a version from
# GitHub
github = model_run_info["github"]
organisation = "ModelDBRepository"
suffix = "" # default branch
if github == "default":
pass
elif github.startswith("pull/"):
pr_number = int(github[5:])
suffix = "/pull/{}/head".format(pr_number)
elif github.startswith('/'):
# /org implies that we use the default branch from org/model_id
organisation = github[1:]
else:
raise Exception("Invalid value for github key: {}".format(github))
github_url = "https://api.github.com/repos/{organisation}/{model_id}/zipball{suffix}".format(
model_id=model_id, organisation=organisation, suffix=suffix
)
# Replace the local file `model_zip_uri` with the zip file we
# downloaded from `github_url`
num_attempts = 3
status_codes = []
for _ in range(num_attempts):
github_response = requests.get(github_url)
status_codes.append(github_response.status_code)
if github_response.status_code == requests.codes.ok:
break
time.sleep(5)
else:
raise Exception(
"Failed to download {} with status codes {}".format(
github_url, status_codes
)
)
with open(model_zip_uri, "wb+") as zipfile:
zipfile.write(github_response.content)

# download github_url to model_zip_uri
logging.info("Downloading model {} from {}".format(model_id, github_url))
response = requests.get(github_url, stream=True)
if response.status_code != 200:
raise Exception("Failed to download model: {}".format(response.text))
with open(model_zip_uri, "wb") as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.flush()
logging.info("Downloaded model {} to {}".format(model_id, model_zip_uri))
except Exception as e: # noqa
model = e
github_url = e

return model_id, model
return model_id, github_url


class ModelDB(object):
Expand All @@ -99,22 +61,38 @@ def __init__(self):
except Exception as e:
raise e

def _gh_cli_get_neuron_simulator_repositories(self):
import subprocess

# Run the gh command to fetch the repository list and capture the output
command = ['gh', 'repo', 'list', 'modeldbrepository', '--topic', 'neuron-simulator', '--json', 'name', '-L', '2000']
output = subprocess.check_output(command, text=True)

# Parse the json output to get the repository names
import json
repositories = json.loads(output)
return [int(repository['name']) for repository in repositories]

def _download_models(self, model_list=None):
if not os.path.isdir(MODELS_ZIP_DIR):
logging.info("Creating cache directory: {}".format(MODELS_ZIP_DIR))
os.mkdir(MODELS_ZIP_DIR)
models = requests.get(MDB_NEURON_MODELS_URL).json() if model_list is None else model_list
models = self._gh_cli_get_neuron_simulator_repositories() if model_list is None else model_list
pool = multiprocessing.Pool()
processed_models = pool.imap_unordered(
download_model,
[(model_id, self._run_instr.get(model_id, {})) for model_id in models],
)
download_err = {}
for model_id, model in ProgressBar.iter(processed_models, len(models)):
if not isinstance(model, Exception):
self._metadata[model_id] = model
for model_id, model_url in ProgressBar.iter(processed_models, len(models)):

if not isinstance(model_url, Exception):
model_meta = {}
model_meta["id"] = model_id
model_meta["url"] = model_url
self._metadata[model_id] = model_meta
else:
download_err[model_id] = model
download_err[model_id] = model_url

if download_err:
logging.error("Error downloading models:")
Expand Down
5 changes: 2 additions & 3 deletions modeldb/modelrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import platform
import sys
from .progressbar import ProgressBar
from .data import Model
from . import modeldb
from .config import *
from .hocscripts import *
Expand Down Expand Up @@ -35,7 +34,7 @@ def is_dir_non_empty(directory):
class ModelRun(dict):
def __init__(self, model, working_dir, clean=False, norun=False, inplace=False):
super().__init__()
self._model = model
super().update(model)
self._working_dir = os.path.abspath(working_dir)
self._logs = []
self._gout = []
Expand Down Expand Up @@ -88,7 +87,7 @@ def _fetch_model(self):
run_time = property(lambda self: self._run_time)
run_times = property(lambda self: self._run_times)

id = property(lambda self: self._model.id)
id = property(lambda self: self["id"])


def curate_log_string(model, logstr):
Expand Down
5 changes: 3 additions & 2 deletions modeldb/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ def _speedup(a, b):
runtime_dict[k][runkey] = _speedup(data_a[k]["run_times"][runkey], data_b[k]["run_times"][runkey])

# compare gout
gout_a_file = os.path.join(data_a[k]["run_info"]["start_dir"], "gout")
gout_b_file = os.path.join(data_b[k]["run_info"]["start_dir"], "gout")
gout_a_file = os.path.join(start_dir_a, "gout")
gout_b_file = os.path.join(start_dir_b, "gout")

# gout may be missing in one of the paths. `diff -N` treats non-existent files as empty.
if os.path.isfile(gout_a_file) or os.path.isfile(gout_b_file):
# https://stackoverflow.com/questions/1180606/using-subprocess-popen-for-process-with-large-output
Expand Down