gizatechxyz · Gonmeso · Apr 25, 2024 · Apr 22, 2024 · Apr 22, 2024 · Apr 23, 2024
diff --git a/giza_actions/model.py b/giza_actions/model.py
@@ -1,11 +1,13 @@
 import logging
+import os
 from pathlib import Path
 from typing import Dict, Optional
 
 import numpy as np
 import onnx
 import onnxruntime as ort
 import requests
+from diskcache import Cache
 from giza import API_HOST
 from giza.client import ApiClient, EndpointsClient, ModelsClient, VersionsClient
 from giza.utils.enums import Framework, VersionStatus
@@ -79,12 +81,14 @@ def __init__(
             self._get_credentials()
             self.model = self._get_model(id)
             self.version = self._get_version(version)
-            self.session = self._set_session()
             self.framework = self.version.framework
             self.uri = self._retrieve_uri()
             self.endpoint_id = self._get_endpoint_id()
+            if output_path:
+                self.session = self._set_session(output_path)
             if output_path:
                 self._download_model(output_path)
+            self._cache = Cache(os.path.join(os.getcwd(), "tmp", "cachedir"))
 
     def _get_endpoint_id(self):
         """
@@ -149,7 +153,7 @@ def _get_version(self, version_id: int):
         """
         return self.version_client.get(self.model.id, version_id)
 
-    def _set_session(self):
+    def _set_session(self, output_path: str):
         """
         Set onnxruntime session for the model specified by model id.
 
@@ -163,9 +167,13 @@ def _set_session(self):
             )
 
         try:
-            onnx_model = self.version_client.download_original(
-                self.model.id, self.version.version
-            )
+            cache_str = f"{self.model.id}_{self.version.version}_model"
+            self._download_model(output_path)
+
+            if cache_str in self._cache:
+                file_path = Path(self._cache.get(cache_str))
+                with open(file_path, "rb") as f:
+                    onnx_model = f.read()
 
             return ort.InferenceSession(onnx_model)
 
@@ -189,21 +197,28 @@ def _download_model(self, output_path: str):
                 f"Model version status is not completed {self.version.status}"
             )
 
-        onnx_model = self.version_client.download_original(
-            self.model.id, self.version.version
-        )
+        cache_str = f"{self.model.id}_{self.version.version}_model"
 
-        logger.info("ONNX model is ready, downloading! ✅")
+        if cache_str not in self._cache:
+            onnx_model = self.version_client.download_original(
+                self.model.id, self.version.version
+            )
 
-        if ".onnx" in output_path:
-            save_path = Path(output_path)
-        else:
-            save_path = Path(f"{output_path}/{self.model.name}.onnx")
+            logger.info("ONNX model is ready, downloading! ✅")
+
+            if ".onnx" in output_path:
+                save_path = Path(output_path)
+            else:
+                save_path = Path(f"{output_path}/{self.model.name}.onnx")
+
+            with open(save_path, "wb") as f:
+                f.write(onnx_model)
 
-        with open(save_path, "wb") as f:
-            f.write(onnx_model)
+            self._cache[cache_str] = save_path
 
-        logger.info(f"ONNX model saved at: {save_path} ✅")
+            logger.info(f"ONNX model saved at: {save_path} ✅")
+        else:
+            logger.info(f"ONNX model already downloaded at: {output_path} ✅.")
 
     def _get_credentials(self):
         """
@@ -221,6 +236,7 @@ def predict(
         custom_output_dtype: Optional[str] = None,
         job_size: str = "M",
         dry_run: bool = False,
+        output_path: Optional[str] = None,
     ):
         """
         Makes a prediction using either a local ONNX session or a remote deployed model, depending on the
@@ -272,7 +288,7 @@ def predict(
                     logger.info("Serialized: %s", serialized_output)
 
                     if custom_output_dtype is None:
-                        output_dtype = self._get_output_dtype()
+                        output_dtype = self._get_output_dtype(output_path)
                     else:
                         output_dtype = custom_output_dtype
 
@@ -388,17 +404,21 @@ def _parse_cairo_response(self, response, data_type: str):
         """
         return deserialize(response, data_type)
 
-    def _get_output_dtype(self):
+    def _get_output_dtype(self, output_path: str):
         """
         Retrieve the Cairo output data type base on the operator type of the final node.
 
         Returns:
             The output dtype as a string.
         """
 
-        file = self.version_client.download_original(
-            self.model.id, self.version.version
-        )
+        cache_str = f"{self.model.id}_{self.version.version}_model"
+        self._download_model(output_path)
+
+        if cache_str in self._cache:
+            file_path = Path(self._cache.get(cache_str))
+            with open(file_path, "rb") as f:
+                file = f.read()
 
         model = onnx.load_model_from_string(file)
         graph = model.graph

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -27,6 +27,7 @@ giza-osiris = ">=0.2.6,<1.0.0"
 loguru = "^0.7.2"
 eth-ape = {version = "^0.7.10", optional = true }
 ape-etherscan = {version = "^0.7.2", optional = true }
+diskcache = "^5.6.3"
 
 [tool.poetry.extras]
 agents = ["eth-ape", "ape-etherscan"]