Merge pull request #22 from gizatechxyz/feature/integrate-ezkl

Add EZKL integration to actions
gizatechxyz · Feb 26, 2024 · 0ce4d0a · 0ce4d0a
2 parents bacedd8 + 9b433b1
commit 0ce4d0a
Show file tree

Hide file tree

Showing 8 changed files with 451 additions and 181 deletions.
diff --git a/examples/ezkl/linear_regression/README.md b/examples/ezkl/linear_regression/README.md
@@ -0,0 +1,117 @@
+# Train a Linear Regression Using EZKL backend
+
+This example demonstrates how to train a linear regression model using the EZKL backend.
+
+First, install the `torch`, `hummingbird-ml` and `scikit-learn` packages by running the following command:
+
+```bash
+pip install torch hummingbird-ml scikit-learn
+```
+
+This example uses the `scikit-learn` package to train a linear regression model and the `hummingbird-ml` package to convert the trained model to `torch` and then into ONNX, this is to maximize compatibiloity with `ezkl`.
+
+The code can be found in the [train_linear_regression.py](train_linear_regression.py) file, but we will explain each step.
+
+## Train a Linear Regression Model
+
+The following code trains a linear regression model using the `scikit-learn` package:
+
+```python
+import numpy as np
+from sklearn.linear_model import LinearRegression
+
+# Create a dataset
+X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
+y = np.dot(X, np.array([1, 2])) + 3
+
+# Train a linear regression model
+model = LinearRegression().fit(X, y)
+```
+
+## Convert the Trained Model to `torch`
+
+The following code converts the trained model to `torch` using the `hummingbird-ml` package:
+
+```python
+import hummingbird.ml
+
+# Convert the trained model to `torch`
+hb_model = hummingbird.ml.convert(model, "torch")
+```
+
+More information about the `hummingbird-ml` package can be found [here](https://github.com/microsoft/hummingbird).
+
+## Convert the Trained Model to ONNX
+
+Now that we have a torch model, we can export it to ONNX using the default utilities in the `torch` package:
+
+```python
+    # Convert the trained model to ONNX
+    sample = np.array([7, 2])
+    # Input to the model
+    shape = sample.shape
+    x = torch.rand(1, *shape, requires_grad=True)
+
+    # Export the model
+    torch.onnx.export(
+        model,
+        x,
+        "network.onnx",
+        export_params=True,
+        opset_version=10,
+        do_constant_folding=True,
+        input_names=["input"],
+        output_names=["output"],
+        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
+    )
+```
+
+## Create a `input.json` file for transpilation
+
+For the transpilation we need an example of the input data, in this case we will use the `sample` variable to create the `input.json` file:
+
+```python
+with open("input.json", "w") as f:
+    f.write(
+        json.dumps(
+            {
+                "input_shapes": [sample.shape],
+                "input_data": [sample.tolist()],
+            }
+        )
+    )
+```
+
+## Deploy the verifiable model using EZKL framework
+
+The first step is to use the `giza-cli` to transpile the model and create a version job. Once this job finishes we will be able to deploy the model as a service.
+
+```bash
+giza transpile --framework EZKL --input-data? input.json network.onnx
+```
+
+The next step is to deploy the model as a service.
+
+```bash
+giza deployments deploy --framework EZKL --model-id <model_id> --version-id <version_id>
+```
+
+## Perform a prediction
+
+Using the `predict_action.py` you can add the generated `model_id` and `version_id` to the `predict_action.py` file and run the following command:
+
+```bash
+python predict_action.py
+```
+
+This will start the action to perform the prediction, it includes two tasks, an example of how to perform a prediction using the `GizaModel`:
+
+```python
+model = GizaModel(id=MODEL_ID, version=VERSION)
+
+result, request_id = model.predict(input_feed=[7, 2], verifiable=True, job_size="S")
+
+print(f"Result: {result}, request_id: {request_id}")
+```
+
+The latter will take the request and wait for the proof to be created, check the script for [more information](predict_action.py).
diff --git a/examples/ezkl/linear_regression/predict_action.py b/examples/ezkl/linear_regression/predict_action.py
@@ -0,0 +1,76 @@
+import time
+
+import requests
+from giza import API_HOST
+from giza.client import DeploymentsClient
+
+from giza_actions.action import Action, action
+from giza_actions.model import GizaModel
+from giza_actions.task import task
+
+MODEL_ID = ...  # The ID of the model
+VERSION = ...  # The version of the model
+
+
+def get_deployment_id():
+    """
+    Retrieve the deployment ID for the model and version.
+
+    Returns:
+        int: The ID of the deployment.
+    """
+    client = DeploymentsClient(API_HOST)
+    return client.list(MODEL_ID, VERSION).__root__[0].id
+
+
+@task
+def predict():
+    """
+    Predict using the model and version for a linear regression model.
+
+    Returns:
+        tuple: The result of the prediction and the request ID.
+    """
+    model = GizaModel(id=MODEL_ID, version=VERSION)
+
+    result, request_id = model.predict(input_feed=[7, 2], verifiable=True, job_size="S")
+
+    print(f"Result: {result}, request_id: {request_id}")
+    return result, request_id
+
+
+@task
+def wait_for_proof(request_id):
+    """
+    Wait for the proof associated with the request ID. For 240 seconds, it will attempt to retrieve the proof every 5 seconds.
+
+    Args:
+        request_id (str): The ID of the request.
+    """
+    print(f"Waiting for proof for request_id: {request_id}")
+    client = DeploymentsClient(API_HOST)
+
+    timeout = time.time() + 240
+    while True:
+        now = time.time()
+        if now > timeout:
+            print("Proof retrieval timed out")
+            break
+        try:
+            proof = client.get_proof(MODEL_ID, VERSION, get_deployment_id(), request_id)
+            print(f"Proof: {proof.json(exclude_unset=True)}")
+            break
+        except requests.exceptions.HTTPError:
+            print("Proof retrieval failing, sleeping for 5 seconds")
+            time.sleep(5)
+
+
+@action(log_prints=True)
+def inference():
+    result, request_id = predict()
+    wait_for_proof(request_id)
+
+
+if __name__ == "__main__":
+    action_deploy = Action(entrypoint=inference, name="ezkl-linear-regression")
+    action_deploy.serve(name="ezkl-linear-regression")
diff --git a/examples/ezkl/linear_regression/train_linear_regression.py b/examples/ezkl/linear_regression/train_linear_regression.py
@@ -0,0 +1,71 @@
+import json
+
+import numpy as np
+import torch
+from hummingbird.ml import convert
+from sklearn.linear_model import LinearRegression
+
+from giza_actions.action import Action, action
+from giza_actions.task import task
+
+
+@task
+def train():
+    X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
+
+    y = np.dot(X, np.array([1, 2])) + 3
+    reg = LinearRegression().fit(X, y)
+
+    return reg
+
+
+@task
+def convert_to_torch(linear_regression, sample):
+    return convert(linear_regression, "torch", sample).model
+
+
+@task
+def convert_to_onnx(model, sample):
+    # Input to the model
+    shape = sample.shape
+    x = torch.rand(1, *shape, requires_grad=True)
+
+    # Export the model
+    torch.onnx.export(
+        model,
+        x,
+        "network.onnx",
+        export_params=True,
+        opset_version=10,
+        do_constant_folding=True,
+        input_names=["input"],
+        output_names=["output"],
+        dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}},
+    )
+
+
+@task
+def create_input_file(sample: np.ndarray):
+    with open("input.json", "w") as f:
+        f.write(
+            json.dumps(
+                {
+                    "input_shapes": [sample.shape],
+                    "input_data": [sample.tolist()],
+                }
+            )
+        )
+
+
+@action(log_prints=True)
+def model_to_onnx():
+    lr = train()
+    sample = np.array([7, 2])
+    model = convert_to_torch(lr, sample)
+    convert_to_onnx(model, sample)
+    create_input_file(sample)
+
+
+if __name__ == "__main__":
+    action_deploy = Action(entrypoint=model_to_onnx, name="linear-regression-to-onnx")
+    action_deploy.serve(name="linear-regression-to-onnx")