getmodels: download from GitHub

alexsavulescu · alexsavulescu · commit 7e2ee23ba6b8 · 2023-01-09T09:54:25.000+01:00
diff --git a/modeldb/config.py b/modeldb/config.py
@@ -2,13 +2,9 @@
 
 import os
 
-# MDB_NEURON_MODELS_URL = "https://senselab.med.yale.edu/_site/webapi/object.json/?cl=19&oid=1882"
 MDB_NEURON_MODELS_URL = (
     "http://modeldb.science/api/v1/models?modeling_application=NEURON"
 )
-MDB_MODEL_DOWNLOAD_URL = (
-    "https://senselab.med.yale.edu/_site/webapi/object.json/{model_id}"
-)
 
 ROOT_DIR = os.path.abspath(__file__ + "/../../")
 
diff --git a/modeldb/data.py b/modeldb/data.py
diff --git a/modeldb/modeldb.py b/modeldb/modeldb.py
@@ -7,70 +7,36 @@
 import time
 from .progressbar import ProgressBar
 import yaml
-from .data import Model
 from .config import *
 import traceback
 from pprint import pformat
 
 def download_model(arg_tuple):
     model_id, model_run_info = arg_tuple
     try:
-        model_json = requests.get(MDB_MODEL_DOWNLOAD_URL.format(model_id=model_id)).json()
-        model = Model(
-            *(
-                model_json[key]
-                for key in ("object_id", "object_name", "object_created", "object_ver_date")
-            )
-        )
-        url = None
-        for att in model_json["object_attribute_values"]:
-            if att["attribute_id"] == 23:
-                url = att["value"]
-                break
-        # print(model.id)
         model_zip_uri = os.path.join(
-            MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model.id)
+            MODELS_ZIP_DIR, "{model_id}.zip".format(model_id=model_id))
+
+        suffix = model_run_info["github"] if "github" in model_run_info else "master"
+        github_url = "https://github.com/ModelDBRepository/{model_id}/archive/refs/heads/{suffix}.zip".format(
+            model_id=model_id, suffix=suffix
         )
-        with open(model_zip_uri, "wb+") as zipfile:
-            zipfile.write(base64.standard_b64decode(url["file_content"]))
-
-        if "github" in model_run_info:
-            # This means we should try to replace the version of the model that
-            # we downloaded from the ModelDB API just above with a version from
-            # GitHub
-            github = model_run_info["github"]
-            if github == "default":
-                suffix = ""
-            elif github.startswith("pull/"):
-                pr_number = int(github[5:])
-                suffix = "/pull/{}/head".format(pr_number)
-            else:
-                raise Exception("Invalid value for github key: {}".format(github))
-            github_url = "https://api.github.com/repos/ModelDBRepository/{model_id}/zipball{suffix}".format(
-                model_id=model_id, suffix=suffix
-            )
-            # Replace the local file `model_zip_uri` with the zip file we
-            # downloaded from `github_url`
-            num_attempts = 3
-            status_codes = []
-            for _ in range(num_attempts):
-                github_response = requests.get(github_url)
-                status_codes.append(github_response.status_code)
-                if github_response.status_code == requests.codes.ok:
-                    break
-                time.sleep(5)
-            else:
-                raise Exception(
-                    "Failed to download {} with status codes {}".format(
-                        github_url, status_codes
-                    )
-                )
-            with open(model_zip_uri, "wb+") as zipfile:
-                zipfile.write(github_response.content)
+      
+        # download github_url to model_zip_uri
+        logging.info("Downloading model {} from {}".format(model_id, github_url))
+        response = requests.get(github_url, stream=True)
+        if response.status_code != 200:
+            raise Exception("Failed to download model: {}".format(response.text))
+        with open(model_zip_uri, "wb") as f:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+                    f.flush()
+        logging.info("Downloaded model {} to {}".format(model_id, model_zip_uri))
     except Exception as e:  #  noqa
-        model = e
+        github_url = e
 
-    return model_id, model
+    return model_id, github_url
 
 
 class ModelDB(object):
@@ -105,11 +71,15 @@ def _download_models(self, model_list=None):
             [(model_id, self._run_instr.get(model_id, {})) for model_id in models],
         )
         download_err = {}
-        for model_id, model in ProgressBar.iter(processed_models, len(models)):
-            if not isinstance(model, Exception):
-                self._metadata[model_id] = model
+        for model_id, model_url in ProgressBar.iter(processed_models, len(models)):
+            
+            if not isinstance(model_url, Exception):
+                model_meta = {}
+                model_meta["id"] = model_id
+                model_meta["url"] = model_url   
+                self._metadata[model_id] = model_meta
             else:
-                download_err[model_id] = model
+                download_err[model_id] = model_url
 
         if download_err:
             logging.error("Error downloading models:")
diff --git a/modeldb/modelrun.py b/modeldb/modelrun.py
@@ -4,7 +4,6 @@
 import platform
 import sys
 from .progressbar import ProgressBar
-from .data import Model
 from . import modeldb
 from .config import *
 from .hocscripts import *
@@ -35,7 +34,7 @@ def is_dir_non_empty(directory):
 class ModelRun(dict):
     def __init__(self, model, working_dir, clean=False, norun=False, inplace=False):
         super().__init__()
-        self._model = model
+        super().update(model)
         self._working_dir = os.path.abspath(working_dir)
         self._logs = []
         self._gout = []
@@ -88,7 +87,7 @@ def _fetch_model(self):
     run_time = property(lambda self: self._run_time)
     run_times = property(lambda self: self._run_times)
 
-    id = property(lambda self: self._model.id)
+    id = property(lambda self: self["id"])
 
 
 def curate_log_string(model, logstr):
diff --git a/modeldb/report.py b/modeldb/report.py
@@ -98,8 +98,9 @@ def _speedup(a, b):
                         runtime_dict[k][runkey] = _speedup(data_a[k]["run_times"][runkey], data_b[k]["run_times"][runkey])
                 
                 # compare gout
-                gout_a_file = os.path.join(data_a[k]["run_info"]["start_dir"], "gout")
-                gout_b_file = os.path.join(data_b[k]["run_info"]["start_dir"], "gout")
+                gout_a_file = os.path.join(start_dir_a, "gout")
+                gout_b_file = os.path.join(start_dir_b, "gout")
+
                 # gout may be missing in one of the paths. `diff -N` treats non-existent files as empty.
                 if os.path.isfile(gout_a_file) or os.path.isfile(gout_b_file):
                     diff_out = subprocess.getoutput(