From 53305b8a44519924b786b1da61c5aba6cab86323 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Sat, 15 Feb 2025 13:33:22 -0800
Subject: [PATCH 01/16] support to launch Aqua services without jupyterlab

---
 ads/aqua/extension/base_handler.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ads/aqua/extension/base_handler.py b/ads/aqua/extension/base_handler.py
index 19dda9ce5..80af4fc44 100644
--- a/ads/aqua/extension/base_handler.py
+++ b/ads/aqua/extension/base_handler.py
@@ -37,6 +37,10 @@ def __init__(
         except Exception:
             pass
 
+    def prepare(self, *args, **kwargs):
+        """The base class prepare is not required for Aqua"""
+        pass
+        
     @staticmethod
     def serialize(obj: Any):
         """Serialize the object.

From 8be1a973e17dcef69b1c24860fe4ab26b3519ddc Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Tue, 18 Feb 2025 13:36:17 -0800
Subject: [PATCH 02/16] Ability to start aqua api server

---
 ads/aqua/server/__init__.py               |    5 +
 ads/aqua/server/__main__.py               |   25 +
 ads/aqua/server/app.py                    |   48 +
 ads/aqua/server/aqua_spec.yml             | 1291 +++++++++++++++++++++
 docs/source/index.rst                     |    7 +
 docs/source/user_guide/aqua/apiserver.rst |   84 ++
 pyproject.toml                            |    8 +
 7 files changed, 1468 insertions(+)
 create mode 100644 ads/aqua/server/__init__.py
 create mode 100644 ads/aqua/server/__main__.py
 create mode 100644 ads/aqua/server/app.py
 create mode 100644 ads/aqua/server/aqua_spec.yml
 create mode 100644 docs/source/user_guide/aqua/apiserver.rst

diff --git a/ads/aqua/server/__init__.py b/ads/aqua/server/__init__.py
new file mode 100644
index 000000000..3d8af46df
--- /dev/null
+++ b/ads/aqua/server/__init__.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
diff --git a/ads/aqua/server/__main__.py b/ads/aqua/server/__main__.py
new file mode 100644
index 000000000..1f8126dac
--- /dev/null
+++ b/ads/aqua/server/__main__.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import os
+from logging import getLogger
+
+from dotenv import load_dotenv
+
+from ads.aqua.server.app import start_server
+
+logger = getLogger(__name__)
+config_location = os.path.join(os.getcwd(), ".env")
+if os.path.exists(config_location):
+    logger.info(f"Loading environment variables from {config_location}")
+    load_dotenv(dotenv_path=config_location)
+    logger.info("Environment variables loaded successfully")
+else:
+    logger.info(
+        f"{config_location} not found. Conside using `.env` file to setup defalut environment variables"
+    )
+
+start_server()
diff --git a/ads/aqua/server/app.py b/ads/aqua/server/app.py
new file mode 100644
index 000000000..41e7a53e9
--- /dev/null
+++ b/ads/aqua/server/app.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import os
+from logging import getLogger
+
+import tornado.ioloop
+import tornado.web
+
+from ads.aqua.extension import __handlers__
+
+logger = getLogger(__name__)
+AQUA_PORT = "AQUA_PORT"
+AQUA_HOST = "AQUA_HOST"
+AQUA_PROCESS_COUNT = "AQUA_PROCESS_COUNT"
+AQUA_CORS_ENABLE = "AQUA_CORS_ENABLE"
+
+URL_PATTERN = r"/aqua/"
+
+
+def prepare(self):
+    self.set_header("Access-Control-Allow-Origin", "*")
+
+
+def make_app():
+    # Patch the prepare method to allow CORS request
+    if os.environ.get(AQUA_CORS_ENABLE, "0") == "1":
+        for _, handler in __handlers__:
+            handler.prepare = prepare
+    handlers = [(URL_PATTERN + url, handler) for url, handler in __handlers__]
+    # logger.debug(handlers)
+    return tornado.web.Application(handlers)
+
+
+def start_server():
+    app = make_app()
+    server = tornado.httpserver.HTTPServer(app)
+    port = int(os.environ.get(AQUA_PORT, 8080))
+    host = os.environ.get(AQUA_HOST, "0.0.0.0")
+    processes = int(os.environ.get(AQUA_PROCESS_COUNT, 0))
+    server.bind(port=port, address=host)
+    server.start(processes)
+    logger.info(f"Starting the server from directory: {os.getcwd()}")
+    logger.info(f"Aqua API server running on http://{host}:{port}")
+    tornado.ioloop.IOLoop.current().start()
diff --git a/ads/aqua/server/aqua_spec.yml b/ads/aqua/server/aqua_spec.yml
new file mode 100644
index 000000000..672d84507
--- /dev/null
+++ b/ads/aqua/server/aqua_spec.yml
@@ -0,0 +1,1291 @@
+openapi: '3.0.3'
+info:
+  title: AI Quick Actions
+  version: '1.0'
+servers:
+  - url: "http://localhost:8080/aqua"
+  - url: http://{host}:{port}/aqua
+paths:
+  /model:
+    get:
+      summary: "List Models"
+      description: "Returns array of AquaModel object. To fetch finetuned model set the compartment_id in the query param"
+      operationId: "listModels"
+      parameters:
+        - name: compartment_id
+          in: query
+          description: "compartment id where model exists. Set this for registered model or fine tuned model"
+          required: false
+          schema:
+            type: string
+        - name: model_type
+          in: query
+          description: "Type of the model"
+          required: false
+          schema:
+            type: string
+            enum:
+              - BASE
+              - FT
+            example: "FT"  # Only applicable if compartment_id is provided
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AquaModelList'
+        '404':
+          description: "Model not found"
+
+    post:
+      summary: "Register Model"
+      description: "Register model into data science service."
+      operationId: "registerModel"
+      requestBody:
+        description: "User object that needs to be added"
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/ImportModelDetails'
+            examples:
+              verifiedModelOrCachedmodel:
+                summary: "Verfied or Cached Model"
+                description: "The inference container and finetuning container is automatically set"
+                value:
+                  model: "meta-llama/Llama-3.2-1B-Instruct"
+                  os_path: "oci://<bucketname>@<namespace>/models"
+                  download_from_hf: true
+                  cleanup_model_cache: true
+                  ignore_patterns: ["original/*"]
+              registerFromObjectStorageLocation:
+                summary: "Register from object storage"
+                description: "If the model is already available on the object storage, set the os_path to the model artifact path. Set the `download_from_hf` to false"
+                value:
+                  model: "meta-llama/Llama-3.2-1B-Instruct"
+                  os_path: "oci://<bucketname>@<namespace>/models/meta-llama/Llama-3.2-1B-Instruct"
+                  download_from_hf: false
+                  cleanup_model_cache: true
+                  ignore_patterns: ["original/*"]
+              unverifiedModel:
+                summary: "Unverfied models"
+                description: "In case of unverified model, explicity provide the inference container and the finetuning container"
+                value:
+                  model: "meta-llama/Llama-3.3-70B-Instruct"
+                  os_path: "oci://<bucketname>@<namespace>/models"
+                  download_from_hf: true
+                  cleanup_model_cache: true
+                  ignore_patterns: ["original/*"]
+                  inference_container: "dsmc://odsc-vllm-serving:0.6.4.post1.1"
+                  finetuning_container: "dsmc://odsc-llm-fine-tuning:2.2.62.70"
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AquaModel'
+        '404':
+          description: "Model not found"
+  /model/{model_id}:
+     get:
+      summary: "Retrieve a model"
+      description: "Returns a AquaModel if model_id is base model. If it is fine tuned model it returns AquaFineTuneModel. To fetch finetuned model set the compartment_id in the query param"
+      operationId: "getModel"
+      parameters:
+        - name: model_id
+          in: path
+          description: "ID of the model to retrieve"
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: "Model object"
+          content:
+            application/json:
+              schema:
+                oneOf:
+                  - $ref: '#/components/schemas/AquaModel'
+                  - $ref: "#/components/schemas/AquaFineTuneModel"
+        '404':
+          description: "Model not found"
+  /finetuning:
+    post:
+      summary: "Create a fine-tuning job"
+      operationId: createFineTuningJob
+      requestBody:
+        description: "Input data for creating a fine-tuning job."
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateFineTuningDetails"
+            example:
+              "ft_source_id": "ocid1.datasciencemodel.oc1..<UNIQUEID>"
+              "ft_name": "My FineTuning Job"
+              "dataset_path": "oci://bucket@namespace/path/to/my-data.jsonl"
+              "report_path": "oci://bucket@namespace/path/to/"
+              "ft_parameters":
+                "epochs": 10
+                "learning_rate": 0.001
+              "shape_name": "VM.GPU.A10.2"
+              "replica": 1
+              "validation_set_size": 0.2
+              "ft_description": "API Testing."
+              "compartment_id": "ocid1.compartment.oc1..<UNIQUEID>"
+              "experiment_name": "API Testing"
+              "experiment_description": "Testing API"
+              "log_group_id": "ocid1.loggroup.oc1..<UNIQUEID>"
+              "log_id": "ocid1.log.oc1..<UNIQUEID>"
+      responses:
+        "200":
+          description: "Fine-tuning job created successfully."
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AquaFineTuneModel"
+        "400":
+          description: "Invalid input data."
+          content:
+            application/json:
+              schema:
+                type: object
+  /finetuning/config/{model_id}:
+    get:
+      summary: "Get fine-tuning configuration for a base model"
+      description: "Fetches the shapes and default replica and batch size for different supported shapes."
+      operationId: getFinetuningConfig
+      parameters:
+        - name: model_id
+          in: path
+          description: "The base model id for which to retrieve the fine-tuning configuration."
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Fine-tuning configuration retrieved successfully."
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties: true
+        "400":
+          description: "Invalid request. Model id is required or the request is invalid."
+          content:
+            application/json:
+              schema:
+                type: object
+  /finetuning/{model_id}/params:
+    get:
+      summary: "Get default fine-tuning parameters for a model"
+      description: "Fetches the fine tuning parameter defaults set for fine tuning"
+      operationId: getFinetuningDefaultParams
+      parameters:
+        - name: model_id
+          in: path
+          description: "The base model id for which to get the default fine-tuning parameters."
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: "Default fine-tuning parameters retrieved successfully."
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AquaFineTuneValidation"
+        "400":
+          description: "Invalid request or missing model id."
+          content:
+            application/json:
+              schema:
+                type: object
+    post:
+      summary: "Validate fine-tuning parameters for a model"
+      operationId: validateFineTuningParams
+      parameters:
+        - name: model_id
+          in: path
+          description: "The model id for which to validate fine-tuning parameters."
+          required: true
+          schema:
+            type: string
+      requestBody:
+        description: "JSON object containing the 'params' to be validated."
+        required: true
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                params:
+                  type: object
+                  description: "Fine-tuning parameters to validate."
+      responses:
+        "200":
+          description: "Fine-tuning parameters validated successfully."
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AquaFineTuneValidation"
+        "400":
+          description: "Invalid input data."
+          content:
+            application/json:
+              schema:
+                type: object
+  /deployments:
+    get:
+      summary: "Lists all the AI Quick Actions deployment in the compartment"
+      description: "Lists all the deployments."
+      operationId: listDeployment
+      parameters:
+        - name: compartment_id
+          in: query
+          description: "If not provided, default compartment set at the API server will be used."
+          required: false
+          schema:
+            type: string
+            example: "ocid1.compartment.oc1..<UNIQUEID>"
+        - name: project_id
+          in: query
+          description: "If not provided, default project_id set at the API server will be used."
+          required: false
+          schema:
+            type: string
+            example: "ocid1.datascienceproject.oc1..<UNIQUEID>"
+      responses:
+        "200":
+          description: "Deployment details retrieved successfully."
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AquaDeploymentList"
+        "400":
+          description: "Invalid deployment id provided."
+          content:
+            application/json:
+              schema:
+                type: object
+    post:
+      summary: "Create a new model deployment"
+      description: "Creates a new Aqua model deployment with the provided configuration."
+      operationId: createDeployment
+      requestBody:
+        required: true
+        description: "Deployment configuration parameters."
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/AquaCreateDeployment"
+      responses:
+        "200":
+          description: "Deployment created successfully."
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AquaDeployment"
+        "400":
+          description: "Invalid input data."
+          content:
+            application/json:
+              schema:
+                type: object
+  /deployments/{deployment_id}:
+    get:
+      summary: "Retrieve a model deployment"
+      description: "Retrieves the details of a specific Aqua model deployment by its deployment_id."
+      operationId: getDeployment
+      parameters:
+        - name: deployment_id
+          in: path
+          description: "The unique identifier of the deployment."
+          required: true
+          schema:
+            type: string
+            example: "ocid1.datasciencemodeldeployment.oc1..<UNIQUEID>"
+      responses:
+        "200":
+          description: "Deployment details retrieved successfully."
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AquaDeploymentDetail"
+        "400":
+          description: "Invalid deployment id provided."
+          content:
+            application/json:
+              schema:
+                type: object
+  /deployment/delete/{deployment_id}:
+    delete:
+      summary: "Delete a model deployment"
+      description: "Deletes the Aqua model deployment specified by deployment_id."
+      operationId: deleteDeployment
+      parameters:
+        - name: deployment_id
+          in: path
+          description: "The unique identifier of the deployment to be deleted."
+          required: true
+          schema:
+            type: string
+            example: "deployment-ocid-001"
+      responses:
+        "200":
+          description: "Deployment deleted successfully."
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  message:
+                    type: string
+                    example: "Deployment deleted successfully."
+        "400":
+          description: "Invalid deployment id provided."
+          content:
+            application/json:
+              schema:
+                type: object
+  /evaluation/config:
+      get:
+        summary: "Retrieve evaluation configuration"
+        description: "Returns the evaluation configuration for a given evaluation id. If the evaluation id is missing, a 400 error is returned."
+        operationId: getEvaluationConfig
+        responses:
+          "200":
+            description: "Evaluation configuration retrieved successfully."
+            content:
+              application/json:
+                schema:
+                  type: object
+                  additionalProperties: true
+          "400":
+            description: "Invalid evaluation id provided."
+            content:
+              application/json:
+                schema:
+                  type: object
+  /evaluation/{eval_id}:
+    get:
+      summary: "Get Evaluation Run details"
+      operationId: getEvaluation
+      parameters:
+        - name: eval_id
+          in: path
+          description: "ID of the evaluation model to retrieve"
+          required: true
+          schema:
+            type: string
+      responses:
+        '200':
+          description: "Model object"
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AquaEvaluationSummary'
+        '404':
+          description: "Model not found"
+
+  /evaluation:
+    get:
+      summary: "List Evaluation"
+      description: "Returns array of AquaEvaluationSummary object."
+      operationId: "listEvaluation"
+      parameters:
+        - name: compartment_id
+          in: query
+          description: "compartment id where evaluation run exists. If not set, will use the default set at the API server"
+          required: false
+          schema:
+            type: string
+      responses:
+        '200':
+          description: OK
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/AquaEvaluationSummary'
+        '404':
+          description: "Evaluation Run not found"
+    post:
+      summary: "Create evaluation job"
+      description: "Creates a new evaluation job using the provided evaluation details."
+      operationId: createEvaluationJob
+      requestBody:
+        description: "Payload for creating a new evaluation job."
+        required: true
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/CreateEvaluationDetails"
+      responses:
+        "200":
+          description: "Evaluation job created successfully."
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/AquaEvaluationSummary"
+        "400":
+          description: "Invalid input data."
+          content:
+            application/json:
+              schema:
+                type: object
+components:
+  schemas:
+    FineTuningShapeInfo:
+      type: object
+      properties:
+        instance_shape:
+          type: string
+          description: "Shape of the instance"
+        replica:
+          type: integer
+          description: "Replica count"
+      example:
+        instance_shape: "VM.Standard2.1"
+        replica: 1
+
+    AquaFineTuneValidation:
+      type: object
+      properties:
+        type:
+          type: string
+          default: "Automatic split"
+          description: "Type of validation used"
+        value:
+          type: string
+          description: "Validation value"
+      example:
+        type: "Automatic split"
+        value: "80:20"
+
+    ModelFormat:
+      type: string
+      description: "Model format enumeration (external type placeholder)"
+      example: "SAFETENSORS"
+
+    ModelValidationResult:
+      type: object
+      properties:
+        model_file:
+          type: string
+          description: "Path or identifier of the model file"
+        model_formats:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelFormat'
+          description: "List of supported model formats"
+        telemetry_model_name:
+          type: string
+          description: "Telemetry name of the model"
+        tags:
+          type: object
+          description: "Arbitrary tags"
+          additionalProperties: true
+      example:
+        model_file: "model.pkl"
+        model_formats: ["ONNX"]
+        telemetry_model_name: "MyModelTelemetry"
+        tags:
+          environment: "production"
+
+    AquaFineTuningMetric:
+      type: object
+      properties:
+        name:
+          type: string
+          description: "Metric name"
+        category:
+          type: string
+          description: "Metric category"
+        scores:
+          type: array
+          items:
+            type: number
+          description: "List of metric scores"
+      example:
+        name: "accuracy"
+        category: "performance"
+        scores: [0.95, 0.96]
+
+    AquaModelLicense:
+      type: object
+      properties:
+        id:
+          type: string
+          description: "License identifier"
+        license:
+          type: string
+          description: "License text or reference"
+      example:
+        id: "lic-123"
+        license: "Apache-2.0"
+
+    AquaModelSummary:
+      type: object
+      properties:
+        compartment_id:
+          type: string
+        icon:
+          type: string
+        id:
+          type: string
+        is_fine_tuned_model:
+          type: boolean
+        license:
+          type: string
+        name:
+          type: string
+        organization:
+          type: string
+        project_id:
+          type: string
+        tags:
+          type: object
+          additionalProperties: true
+        task:
+          type: string
+        time_created:
+          type: string
+        console_link:
+          type: string
+        search_text:
+          type: string
+        ready_to_deploy:
+          type: boolean
+          default: true
+        ready_to_finetune:
+          type: boolean
+          default: false
+        ready_to_import:
+          type: boolean
+          default: false
+        nvidia_gpu_supported:
+          type: boolean
+          default: false
+        arm_cpu_supported:
+          type: boolean
+          default: false
+        model_file:
+          type: string
+        model_formats:
+          type: array
+          items:
+            $ref: '#/components/schemas/ModelFormat'
+      example:
+        compartment_id: "comp-123"
+        id: "model-456"
+        is_fine_tuned_model: false
+        name: "Example Model"
+        ready_to_deploy: true
+        model_formats: ["ONNX"]
+
+    AquaModel:
+      allOf:
+        - $ref: '#/components/schemas/AquaModelSummary'
+        - type: object
+          properties:
+            model_card:
+              type: string
+            inference_container:
+              type: string
+            inference_container_uri:
+              type: string
+            finetuning_container:
+              type: string
+            evaluation_container:
+              type: string
+            artifact_location:
+              type: string
+          example:
+            model_card: "http://example.com/model-card"
+            inference_container: "inference-container:latest"
+            inference_container_uri: "oci://inference_container_uri"
+            finetuning_container: "finetuning-container:latest"
+            evaluation_container: "evaluation-container:latest"
+            artifact_location: "/path/to/artifact"
+    AquaModelList:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: "#/components/schemas/AquaModel"
+    HFModelContainerInfo:
+      type: object
+      properties:
+        inference_container:
+          type: string
+        finetuning_container:
+          type: string
+      example:
+        inference_container: "inference-container:latest"
+        finetuning_container: "finetuning-container:latest"
+
+    HFModelSummary:
+      type: object
+      properties:
+        model_info:
+          type: object
+          description: "Hugging Face ModelInfo object (external type placeholder)"
+          example:
+            modelId: "hf-model-001"
+        aqua_model_info:
+          $ref: '#/components/schemas/AquaModelSummary'
+      example:
+        model_info:
+          modelId: "hf-model-001"
+        aqua_model_info:
+          id: "model-456"
+          name: "Example Model"
+
+    AquaResourceIdentifier:
+      type: object
+      description: "A resource identifier containing id, name, and url."
+      properties:
+        id:
+          type: string
+        name:
+          type: string
+        url:
+          type: string
+      example:
+        id: "res-789"
+        name: "Resource Name"
+        url: "http://example.com/resource"
+
+    AquaEvalFTCommon:
+      type: object
+      properties:
+        lifecycle_state:
+          type: string
+        lifecycle_details:
+          type: string
+        job:
+          $ref: '#/components/schemas/AquaResourceIdentifier'
+        source:
+          $ref: '#/components/schemas/AquaResourceIdentifier'
+        experiment:
+          $ref: '#/components/schemas/AquaResourceIdentifier'
+        log_group:
+          $ref: '#/components/schemas/AquaResourceIdentifier'
+        log:
+          $ref: '#/components/schemas/AquaResourceIdentifier'
+      example:
+        lifecycle_state: "SUCCEEDED"
+        lifecycle_details: "Completed successfully"
+        job:
+          id: "job-001"
+          name: "Job One"
+          url: "http://example.com/job/001"
+        source:
+          id: "src-002"
+          name: "Source Two"
+          url: "http://example.com/src/002"
+        experiment:
+          id: "exp-003"
+          name: "Experiment Three"
+          url: "http://example.com/exp/003"
+        log_group:
+          id: "lg-004"
+          name: "Log Group Four"
+          url: "http://example.com/lg/004"
+        log:
+          id: "log-005"
+          name: "Log Five"
+          url: "http://example.com/log/005"
+
+    AquaFineTuneModel:
+      allOf:
+        - $ref: '#/components/schemas/AquaModel'
+        - $ref: '#/components/schemas/AquaEvalFTCommon'
+        - type: object
+          properties:
+            dataset:
+              type: string
+            validation:
+              $ref: '#/components/schemas/AquaFineTuneValidation'
+            shape_info:
+              $ref: '#/components/schemas/FineTuningShapeInfo'
+            metrics:
+              type: array
+              items:
+                $ref: '#/components/schemas/AquaFineTuningMetric'
+          example:
+            dataset: "training_data.csv"
+            validation:
+              type: "Automatic split"
+              value: "20%"
+            shape_info:
+              instance_shape: "VM.Standard2.1"
+              replica: 2
+            metrics:
+              - name: "accuracy"
+                category: "performance"
+                scores: [0.95]
+    AquaFineTuneModelList:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: "#/components/schemas/AquaFineTuneModel"
+
+    ImportModelDetails:
+      type: object
+      properties:
+        model:
+          type: string
+          description: "Name of the model"
+        os_path:
+          type: string
+          description: "Path to the model in the objet storage. This is of the format oci://namesapace@bucket_name/path/to/model. "
+        download_from_hf:
+          type: boolean
+          default: true
+          description: "Will download the model from the huggingface and uploaded to the provided object storage path"
+        local_dir:
+          type: string
+          description: "Local directory where the model will be downloaded to from huggingface"
+        cleanup_model_cache:
+          type: boolean
+          default: false
+          description: "Cleanup local path"
+        inference_container:
+          type: string
+          description: "The inference container to use for model deployment"
+        finetuning_container:
+          type: string
+          description: "The Fine tuning container to use for model deployment"
+        compartment_id:
+          type: string
+        project_id:
+          type: string
+        model_file:
+          type: string
+          description: "Required if the model is of type GGUF"
+        inference_container_uri:
+          type: string
+        allow_patterns:
+          description: "Artifact file patterns that should be included while downloading from HuggingFace"
+          type: array
+          items:
+            type: string
+        ignore_patterns:
+          description: "Artifact file patterns that should be ignored while downloading from HuggingFace"
+          type: array
+          items:
+            type: string
+        freeform_tags:
+          description: "Freeform tags to be added to the model created"
+          type: object
+          additionalProperties: true
+        defined_tags:
+          type: object
+          additionalProperties: true
+        ignore_model_artifact_check:
+          description: "Flag to ignore checking config.json or safetensor file presence in the huggingface repo"
+          type: boolean
+          default: false
+      required:
+        - model
+        - os_path
+
+    AquaFineTuningParams:
+      type: object
+      description: "Class for maintaining Aqua fine-tuning model parameters"
+      properties:
+        epochs:
+          type: integer
+          nullable: true
+          example: 10
+        learning_rate:
+          type: number
+          format: float
+          nullable: true
+          example: 0.001
+        sample_packing:
+          oneOf:
+            - type: boolean
+            - type: string
+              enum: ["auto"]
+          default: "auto"
+          example: "auto"
+        batch_size:
+          type: integer
+          nullable: true
+          description: "Internally used as micro_batch_size"
+          example: 32
+        sequence_len:
+          type: integer
+          nullable: true
+          example: 128
+        pad_to_sequence_len:
+          type: boolean
+          nullable: true
+          example: true
+        lora_r:
+          type: integer
+          nullable: true
+          example: 4
+        lora_alpha:
+          type: integer
+          nullable: true
+          example: 16
+        lora_dropout:
+          type: number
+          format: float
+          nullable: true
+          example: 0.1
+        lora_target_linear:
+          type: boolean
+          nullable: true
+          example: false
+        lora_target_modules:
+          type: array
+          items:
+            type: string
+          nullable: true
+          example: ["module1", "module2"]
+        early_stopping_patience:
+          type: integer
+          nullable: true
+          example: 3
+        early_stopping_threshold:
+          type: number
+          format: float
+          nullable: true
+          example: 0.01
+      additionalProperties: true
+
+    AquaFineTuningSummary:
+      type: object
+      description: "Represents a summary of Aqua FineTuning job."
+      properties:
+        id:
+          type: string
+          example: "ft-job-1234"
+        name:
+          type: string
+          example: "Fine Tuning Job Name"
+        console_url:
+          type: string
+          example: "https://console.example.com/jobs/ft-job-1234"
+        lifecycle_state:
+          type: string
+          example: "SUCCEEDED"
+        lifecycle_details:
+          type: string
+          example: "Completed successfully"
+        time_created:
+          type: string
+          example: "2025-01-01T12:00:00Z"
+        tags:
+          type: object
+          additionalProperties: true
+          example: { "env": "prod" }
+        experiment:
+          $ref: "#/components/schemas/AquaResourceIdentifier"
+        source:
+          $ref: "#/components/schemas/AquaResourceIdentifier"
+        job:
+          $ref: "#/components/schemas/AquaResourceIdentifier"
+        parameters:
+          $ref: "#/components/schemas/AquaFineTuningParams"
+    CreateFineTuningDetails:
+      type: object
+      description: "Class to create aqua model fine-tuning instance."
+      properties:
+        ft_source_id:
+          type: string
+          example: "ocid.datasciencemodel.......<UNIQUEID>"
+        ft_name:
+          type: string
+          example: "My FineTuning Job"
+        dataset_path:
+          type: string
+          example: "oci://bucket@namespace/path/to/dataset.jsonl"
+        report_path:
+          type: string
+          example: "oci://bucket@namespace/path/to/report/"
+        ft_parameters:
+          type: object
+          description: "Parameters for fine-tuning job"
+          example:
+            epochs: 10
+            learning_rate: 0.001
+        shape_name:
+          type: string
+          example: "VM.GPU.A10.2"
+        replica:
+          type: integer
+          example: 2
+        validation_set_size:
+          type: number
+          format: float
+          example: 0.2
+        ft_description:
+          type: string
+          nullable: true
+          example: "A description for the fine-tuning job."
+        compartment_id:
+          type: string
+          nullable: true
+          example: "ocid.compartment.......<UNIQUEID>"
+        project_id:
+          type: string
+          nullable: true
+          example: "ocid.datascienceproject.......<UNIQUEID>"
+        experiment_id:
+          type: string
+          nullable: true
+          example: "experiment-9012"
+        experiment_name:
+          type: string
+          nullable: true
+          example: "Experiment Name"
+        experiment_description:
+          type: string
+          nullable: true
+          example: "Description of the experiment"
+        block_storage_size:
+          type: integer
+          nullable: true
+          example: 256
+        subnet_id:
+          type: string
+          nullable: true
+          example: "subnet-3456"
+        log_group_id:
+          type: string
+          nullable: true
+          example: "loggroup-7890"
+        log_id:
+          type: string
+          nullable: true
+          example: "log-1234"
+        watch_logs:
+          type: boolean
+          nullable: true
+          default: false
+          example: false
+        force_overwrite:
+          type: boolean
+          nullable: true
+          default: false
+          example: false
+        freeform_tags:
+          type: object
+          additionalProperties: true
+          nullable: true
+          example: { "key": "value" }
+        defined_tags:
+          type: object
+          additionalProperties: true
+          nullable: true
+          example: { "namespace": { "key": "value" } }
+      required:
+        - ft_source_id
+        - ft_name
+        - dataset_path
+        - report_path
+        - ft_parameters
+        - shape_name
+        - replica
+        - validation_set_size
+
+    ModelParams:
+      type: object
+      description: "Parameters for model deployment."
+      properties:
+        max_tokens:
+          type: integer
+          nullable: true
+          example: 256
+        temperature:
+          type: number
+          format: float
+          nullable: true
+          example: 0.7
+        top_k:
+          type: number
+          format: float
+          nullable: true
+          example: 50
+        top_p:
+          type: number
+          format: float
+          nullable: true
+          example: 0.9
+        model:
+          type: string
+          nullable: true
+          example: "gpt-3.5-turbo"
+      additionalProperties: false
+
+    ShapeInfo:
+      type: object
+      description: "Information about the shape configuration for the deployment."
+      properties:
+        instance_shape:
+          type: string
+          nullable: true
+          example: "VM.Standard2.1"
+        instance_count:
+          type: integer
+          nullable: true
+          example: 2
+        ocpus:
+          type: number
+          format: float
+          nullable: true
+          example: 1.0
+        memory_in_gbs:
+          type: number
+          format: float
+          nullable: true
+          example: 15.0
+      additionalProperties: false
+
+    AquaDeployment:
+      type: object
+      description: "Represents an Aqua Model Deployment."
+      properties:
+        id:
+          type: string
+          example: "deployment-ocid-001"
+        display_name:
+          type: string
+          example: "My Model Deployment"
+        aqua_service_model:
+          type: boolean
+          example: true
+        model_id:
+          type: string
+          example: "model-ocid-1234"
+        aqua_model_name:
+          type: string
+          example: "Aqua Model Name"
+        state:
+          type: string
+          example: "ACTIVE"
+        description:
+          type: string
+          nullable: true
+          example: "Deployment description"
+        created_on:
+          type: string
+          format: date-time
+          example: "2025-01-01T12:00:00Z"
+        created_by:
+          type: string
+          example: "user@example.com"
+        endpoint:
+          type: string
+          example: "https://endpoint.example.com"
+        private_endpoint_id:
+          type: string
+          nullable: true
+          example: "private-endpoint-ocid-001"
+        console_link:
+          type: string
+          example: "https://console.example.com/model-deployments/deployment-ocid-001"
+        lifecycle_details:
+          type: string
+          nullable: true
+          example: "All systems go"
+        shape_info:
+          $ref: "#/components/schemas/ShapeInfo"
+        tags:
+          type: object
+          additionalProperties: true
+          example: { "env": "prod", "project": "example" }
+        environment_variables:
+          type: object
+          additionalProperties: true
+          example: { "VAR1": "value1", "VAR2": "value2" }
+        cmd:
+          type: array
+          items:
+            type: string
+          example: ["python", "serve.py"]
+      required:
+        - id
+        - display_name
+        - aqua_service_model
+        - model_id
+        - state
+        - created_on
+        - created_by
+        - endpoint
+
+    AquaDeploymentList:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            $ref: "#/components/schemas/AquaDeployment"
+
+    AquaCreateDeployment:
+      type: object
+      properties:
+        compartment_id:
+          type: string
+          example: "ocid1.compartment.oc1..<UNIQUEID>"
+        project_id:
+          type: string
+          example: "ocid1.datascienceproject.oc1..<UNIQUEID>"
+        log_group_id:
+          type: string
+          example: "ocid1.loggroup.oc1..<UNIQUEID>"
+        access_log_id:
+          type: string
+          example: "ocid1.log.oc1..<UNIQUEID>"
+        predict_log_id:
+          type: string
+          example: "ocid1.log.oc1..<UNIQUEID>"
+        description:
+          type: string
+          example: "llama model deployment"
+        instance_count:
+          type: integer
+          example: 1
+        bandwidth_mbps:
+          type: integer
+          example: 10
+        env_var:
+          type: object
+          example: {"key": "value"}
+        container_family:
+          type: string
+          example: "odsc-vllm-serving"
+        ocpus:
+          type: number
+          description: "applicable when CPU shape is chose. Not applicable for GPU shape"
+          example: 40
+        memory_in_gbs:
+          type: number
+          description: "applicable when CPU shape is chose. Not applicable for GPU shape"
+          example: 256
+        model_file:
+          type: string
+          example: "my_model.gguf"
+          description: "Applicable when there are multiple gguf files."
+        private_endpoint_id:
+          type: string
+          example: "ocid1.datasciencemodeldeploymentprivatendpoint.oc1..<UNIQUEID>"
+        cmd_var:
+          type: string
+          example: "ocid1.compartment.oc1..<UNIQUEID>"
+
+    AquaDeploymentDetail:
+      allOf:
+        - $ref: "#/components/schemas/AquaDeployment"
+        - type: object
+          description: "Represents details of an Aqua deployment with logging information."
+          properties:
+            log_group:
+              $ref: "#/components/schemas/AquaResourceIdentifier"
+            log:
+              $ref: "#/components/schemas/AquaResourceIdentifier"
+    AquaEvaluationMetric:
+      type: object
+      description: "Represents a single evaluation metric for a model."
+      properties:
+        name:
+          type: string
+          description: "Name of the evaluation metric."
+          example: "accuracy"
+        value:
+          type: number
+          format: float
+          description: "Numeric value of the evaluation metric."
+          example: 0.95
+        threshold:
+          type: number
+          format: float
+          description: "Threshold value for evaluation comparison."
+          example: 0.90
+      required:
+        - name
+        - value
+
+    AquaEvaluationSummary:
+      type: object
+      description: "Represents a summary of model evaluation results."
+      properties:
+        id:
+          type: string
+          description: "Unique identifier of the evaluation job."
+          example: "eval-ocid-001"
+        model_id:
+          type: string
+          description: "Identifier of the evaluated model."
+          example: "model-ocid-1234"
+        evaluator:
+          type: string
+          description: "Name or identifier of the evaluator."
+          example: "AutoEvaluator"
+        evaluation_metrics:
+          type: array
+          description: "List of evaluation metrics."
+          items:
+            $ref: "#/components/schemas/AquaEvaluationMetric"
+        status:
+          type: string
+          description: "Evaluation status (e.g. SUCCEEDED, FAILED)."
+          example: "SUCCEEDED"
+        created_on:
+          type: string
+          format: date-time
+          description: "Timestamp when the evaluation was created."
+          example: "2025-01-15T10:30:00Z"
+        comments:
+          type: string
+          description: "Optional comments regarding the evaluation."
+          example: "Evaluation completed successfully."
+      required:
+        - id
+        - model_id
+        - evaluator
+        - evaluation_metrics
+        - status
+        - created_on
+
+    CreateEvaluationDetails:
+      type: object
+      description: "Payload for creating a new model evaluation job."
+      properties:
+        model_id:
+          type: string
+          description: "Identifier of the model to be evaluated."
+          example: "model-ocid-1234"
+        dataset_path:
+          type: string
+          description: "Path to the dataset used for evaluation. This can be a local or object storage path."
+          example: "oci://bucket/path/to/evaluation_data.csv"
+        evaluator:
+          type: string
+          description: "Evaluator name or identifier."
+          example: "AutoEvaluator"
+        evaluation_params:
+          type: object
+          description: "Additional evaluation parameters."
+          additionalProperties: true
+          example:
+            batch_size: 32
+            metric: "accuracy"
+        comments:
+          type: string
+          description: "Optional comments or description for the evaluation job."
+          example: "Evaluating model performance on test dataset."
+      required:
+        - model_id
+        - dataset_path
+        - evaluator
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7d3aaec12..0cd9cdeda 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -35,6 +35,13 @@ Oracle Accelerated Data Science (ADS)
    user_guide/cli/opctl/configure
    user_guide/cli/opctl/local-development-setup
 
+.. toctree::
+   :hidden:
+   :maxdepth: 5
+   :caption: AI Quick Actions API Server
+
+   user_guide/aqua/apiserver
+
 .. toctree::
    :hidden:
    :maxdepth: 5
diff --git a/docs/source/user_guide/aqua/apiserver.rst b/docs/source/user_guide/aqua/apiserver.rst
new file mode 100644
index 000000000..ebb34813e
--- /dev/null
+++ b/docs/source/user_guide/aqua/apiserver.rst
@@ -0,0 +1,84 @@
+===========================
+AI Quick Actions API Server
+===========================
+
+AI Quick Actions is accessible through the Python SDK APIs and CLI. If the CLI or Python SDK doesn't work for you, you can host the Aqua API server and integrate with it. We also provide you with the OpenAPI 3.0 specification so that you can autogenerate client bindings for the language of your choice.
+
+**Prerequisite**
+
+1. Install oracle-ads - ``pip install "oracle-ads[aquaapi]"``
+2. Set up AI Quick Actions `policies <https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/policies/README.md>`_
+
+API Specification
+=================
+
+Access API specification from `aqua_spec.yaml <https://github.com/oracle/accelerated-data-science/blob/main/ads/aqua/server/aqua_spec.yaml>`_ 
+
+
+Configuration
+=============
+
+The preferred way to set up the configuration is to create a file named ``.env`` and set up your preferences. Here is a sample content of ``.env``:
+
+.. code-block:: shell
+
+    OCI_IAM_TYPE=security_token
+    OCI_CONFIG_PROFILE=aqua
+    HF_TOKEN=<your token>
+
+Authentication
+--------------
+
+AI Quick Actions will need to connect to OCI services to accomplish different functionalities. You can use api_key, security_token, resource_principal, instance_principal, etc. 
+
+You can set up the preferred authentication mechanism through the following environment variables - 
+
+.. code-block:: shell
+
+    # set this to api_key/resource_principal/instance_principal/security_token
+    OCI_IAM_TYPE=security_token
+    # Optional Profile name
+    OCI_CONFIG_PROFILE=<profile-name>
+
+Set up Hugging Face token, if you will be registering the model with the download from Hugging Face option - 
+
+.. code-block:: shell
+
+    HF_TOKEN=<your token>
+
+
+Default Settings
+----------------
+
+You can set up the following default values to avoid having to input them during API calls - 
+
+.. code-block:: shell
+
+    NB_SESSION_COMPARTMENT_OCID=ocid1.compartment...<UNIQUEID>
+    PROJECT_OCID=ocid1.datascienceproject.oc1...<UNIQUEID>
+
+    # Optional - If you are on a dev tenancy, you may be restricted from using default network settings. In that case, set up AQUA_JOB_SUBJECT_ID to the preferred subnet ID. This is required only while launching FineTuning jobs
+    AQUA_JOB_SUBNET_ID=ocid1.subnet.oc1...<UNIQUEID>
+
+Webserver Settings
+------------------
+
+.. code-block:: shell
+
+    # Default value is 8080
+    AQUA_PORT=8080
+    # Default value is 0.0.0.0
+    AQUA_HOST="0.0.0.0"
+    # Default value is 0. Set the number of processes you wish to start to handle requests
+    AQUA_PROCESS_COUNT=1 
+    # If you face CORS related issues while accessing the API, set - 
+    AQUA_CORS_ENABLE=1
+
+Starting the server
+===================
+
+Once you have the ``.env`` file ready, you can start the server from the same folder as ``.env`` by running - 
+
+.. code-block:: shell
+
+    python -m ads.aqua.server
diff --git a/pyproject.toml b/pyproject.toml
index 7cbe8e1ce..ae4c60bb7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -207,6 +207,14 @@ pii = [
 ]
 llm = ["langchain>=0.2", "langchain-community", "langchain_openai", "pydantic>=2,<3", "evaluate>=0.4.0"]
 aqua = ["jupyter_server"]
+aquaapi = [
+  "tornado",
+  "notebook~=6.5",
+  "fire",
+  "cachetools",
+  "huggingface_hub",
+  "python-dotenv"
+]
 
 # To reduce backtracking (decrese deps install time) during test/dev env setup reducing number of versions pip is
 # trying to use. Ref - https://pip.pypa.io/en/stable/topics/dependency-resolution/#possible-ways-to-reduce-backtracking.

From 81809502bba67a699d2075f74a194d33d01352b1 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Tue, 18 Feb 2025 13:37:12 -0800
Subject: [PATCH 03/16] Ability to start local server

---
 ads/aqua/server/__init__.py | 1 -
 ads/aqua/server/__main__.py | 1 -
 ads/aqua/server/app.py      | 1 -
 3 files changed, 3 deletions(-)

diff --git a/ads/aqua/server/__init__.py b/ads/aqua/server/__init__.py
index 3d8af46df..0fb4f1549 100644
--- a/ads/aqua/server/__init__.py
+++ b/ads/aqua/server/__init__.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*--
 
 # Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
diff --git a/ads/aqua/server/__main__.py b/ads/aqua/server/__main__.py
index 1f8126dac..afccd11ee 100644
--- a/ads/aqua/server/__main__.py
+++ b/ads/aqua/server/__main__.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*--
 
 # Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
diff --git a/ads/aqua/server/app.py b/ads/aqua/server/app.py
index 41e7a53e9..8d2a0eab6 100644
--- a/ads/aqua/server/app.py
+++ b/ads/aqua/server/app.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*--
 
 # Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

From 2d4e990a798f98a3727573f67969c02403691f0f Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Thu, 20 Feb 2025 16:20:08 -0800
Subject: [PATCH 04/16] fix typo

---
 ads/aqua/server/__main__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ads/aqua/server/__main__.py b/ads/aqua/server/__main__.py
index afccd11ee..10a709e50 100644
--- a/ads/aqua/server/__main__.py
+++ b/ads/aqua/server/__main__.py
@@ -17,8 +17,8 @@
     load_dotenv(dotenv_path=config_location)
     logger.info("Environment variables loaded successfully")
 else:
-    logger.info(
-        f"{config_location} not found. Conside using `.env` file to setup defalut environment variables"
+    logger.warning(
+        f"{config_location} not found. Consider using `.env` file to setup default environment variables"
     )
 
 start_server()

From de823122e32e0fe1cce5776ec0a8e58b76cd5720 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Thu, 20 Feb 2025 16:31:12 -0800
Subject: [PATCH 05/16] Update pyproject.toml

---
 pyproject.toml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ae4c60bb7..75ef1183d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -206,8 +206,7 @@ pii = [
   "report-creator>=1.0.32",
 ]
 llm = ["langchain>=0.2", "langchain-community", "langchain_openai", "pydantic>=2,<3", "evaluate>=0.4.0"]
-aqua = ["jupyter_server"]
-aquaapi = [
+aqua = [
   "tornado",
   "notebook~=6.5",
   "fire",

From f6c8c65d5f4e3845faf5a13f0e885c0c2af11bef Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Thu, 20 Feb 2025 16:18:02 -0800
Subject: [PATCH 06/16] fix typo

---
 ads/aqua/extension/aqua_ws_msg_handler.py   |   4 +
 ads/aqua/extension/model_handler.py         | 110 ++++++++++++++++----
 ads/aqua/extension/models/ws_models.py      |  14 +++
 ads/aqua/extension/models_ws_msg_handler.py |  43 +++++++-
 ads/aqua/extension/ui_websocket_handler.py  |  33 +++---
 ads/aqua/model/entities.py                  |   3 +-
 ads/aqua/model/model.py                     |  34 +++++-
 ads/aqua/model/utils.py                     |  24 +++++
 ads/aqua/server/app.py                      |  15 ++-
 9 files changed, 240 insertions(+), 40 deletions(-)
 create mode 100644 ads/aqua/model/utils.py

diff --git a/ads/aqua/extension/aqua_ws_msg_handler.py b/ads/aqua/extension/aqua_ws_msg_handler.py
index 1fcbbf946..373fdd154 100644
--- a/ads/aqua/extension/aqua_ws_msg_handler.py
+++ b/ads/aqua/extension/aqua_ws_msg_handler.py
@@ -10,6 +10,7 @@
 from typing import List
 
 from tornado.web import HTTPError
+from tornado.websocket import WebSocketHandler
 
 from ads.aqua import logger
 from ads.aqua.common.decorator import handle_exceptions
@@ -53,6 +54,9 @@ def process(self) -> BaseResponse:
         """
         pass
 
+    def set_ws_connection(self, con: WebSocketHandler):
+        self.ws_connection = con
+
     def write_error(self, status_code, **kwargs):
         """AquaWSMSGhandler errors are JSON, not human pages."""
         reason = kwargs.get("reason")
diff --git a/ads/aqua/extension/model_handler.py b/ads/aqua/extension/model_handler.py
index a5b89f8d1..36ba08705 100644
--- a/ads/aqua/extension/model_handler.py
+++ b/ads/aqua/extension/model_handler.py
@@ -2,10 +2,16 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-from typing import Optional
+import threading
+from functools import partial
+from logging import getLogger
+from typing import Dict, List, Optional
 from urllib.parse import urlparse
+from uuid import uuid4
 
+from tornado.ioloop import IOLoop
 from tornado.web import HTTPError
+from tornado.websocket import WebSocketHandler
 
 from ads.aqua.common.decorator import handle_exceptions
 from ads.aqua.common.enums import (
@@ -19,10 +25,16 @@
 )
 from ads.aqua.extension.base_handler import AquaAPIhandler
 from ads.aqua.extension.errors import Errors
+from ads.aqua.extension.models_ws_msg_handler import (
+    REGISTRATION_STATUS,
+    AquaModelWSMsgHandler,
+)
 from ads.aqua.model import AquaModelApp
-from ads.aqua.model.entities import AquaModelSummary, HFModelSummary
+from ads.aqua.model.entities import AquaModel, AquaModelSummary, HFModelSummary
 from ads.aqua.ui import ModelFormat
 
+logger = getLogger(__name__)
+
 
 class AquaModelHandler(AquaAPIhandler):
     """Handler for Aqua Model REST APIs."""
@@ -108,6 +120,7 @@ def post(self, *args, **kwargs):  # noqa: ARG002
         HTTPError
             Raises HTTPError if inputs are missing or are invalid
         """
+        job_id = str(uuid4())
         try:
             input_data = self.get_json_body()
         except Exception as ex:
@@ -145,27 +158,80 @@ def post(self, *args, **kwargs):  # noqa: ARG002
             str(input_data.get("ignore_model_artifact_check", "false")).lower()
             == "true"
         )
-
-        return self.finish(
-            AquaModelApp().register(
-                model=model,
-                os_path=os_path,
-                download_from_hf=download_from_hf,
-                local_dir=local_dir,
-                cleanup_model_cache=cleanup_model_cache,
-                inference_container=inference_container,
-                finetuning_container=finetuning_container,
-                compartment_id=compartment_id,
-                project_id=project_id,
-                model_file=model_file,
-                inference_container_uri=inference_container_uri,
-                allow_patterns=allow_patterns,
-                ignore_patterns=ignore_patterns,
-                freeform_tags=freeform_tags,
-                defined_tags=defined_tags,
-                ignore_model_artifact_check=ignore_model_artifact_check,
+        async_mode = input_data.get("async_mode", False)
+
+        def model_register_progress_callback(register_id: str, status: Dict[str, str]):
+            """Callback method to track the model register progress"""
+            logger.info(f"Progress for {register_id}: {status}")
+            subscribers: List[WebSocketHandler] = (
+                AquaModelWSMsgHandler.status_subscriber.get(REGISTRATION_STATUS, {})
+                .get(register_id, {})
+                .get("subscriber", [])
             )
-        )
+            for subscriber in subscribers:
+                if (
+                    subscriber
+                    and subscriber.ws_connection
+                    and subscriber.ws_connection.stream.socket
+                ):
+                    try:
+                        subscriber.write_message(status)
+                    except Exception as e:
+                        print(e)
+                        IOLoop.current().add_callback(
+                            lambda: subscriber.write_message(status)
+                        )
+            if len(subscribers) == 0:
+                AquaModelWSMsgHandler.register_status[register_id] = status
+
+        def register_model(callback=None) -> AquaModel:
+            """Wrapper method to help initialize callback in case of async mode"""
+            try:
+                registered_model = AquaModelApp().register(
+                    model=model,
+                    os_path=os_path,
+                    download_from_hf=download_from_hf,
+                    local_dir=local_dir,
+                    cleanup_model_cache=cleanup_model_cache,
+                    inference_container=inference_container,
+                    finetuning_container=finetuning_container,
+                    compartment_id=compartment_id,
+                    project_id=project_id,
+                    model_file=model_file,
+                    inference_container_uri=inference_container_uri,
+                    allow_patterns=allow_patterns,
+                    ignore_patterns=ignore_patterns,
+                    freeform_tags=freeform_tags,
+                    defined_tags=defined_tags,
+                    ignore_model_artifact_check=ignore_model_artifact_check,
+                    callback=callback,
+                )
+            except Exception as e:
+                if async_mode:
+                    model_register_progress_callback(
+                        register_id=job_id,
+                        status={"state": "FAILED", "message": str(e)},
+                    )
+                    raise
+                else:
+                    raise
+            return registered_model
+
+        if async_mode:
+            t = threading.Thread(
+                target=register_model,
+                args=(partial(model_register_progress_callback, register_id=job_id),),
+                daemon=True,
+            )
+            t.start()
+            output = {
+                "state": "ACCEPTED",
+                "job_id": job_id,
+                "progress_url": f"ws://host:port/aqua/ws/{job_id}",
+            }
+        else:
+            output = register_model()
+        return self.finish(output)
 
     @handle_exceptions
     def put(self, id):
diff --git a/ads/aqua/extension/models/ws_models.py b/ads/aqua/extension/models/ws_models.py
index 38432e22b..c5ec06c6c 100644
--- a/ads/aqua/extension/models/ws_models.py
+++ b/ads/aqua/extension/models/ws_models.py
@@ -23,6 +23,7 @@ class RequestResponseType(ExtendedEnum):
     AdsVersion = "AdsVersion"
     CompatibilityCheck = "CompatibilityCheck"
     Error = "Error"
+    RegisterModelStatus = "RegisterModelStatus"
 
 
 @dataclass
@@ -141,3 +142,16 @@ class AquaWsError(DataClassSerializable):
 class ErrorResponse(BaseResponse):
     data: AquaWsError
     kind = RequestResponseType.Error
+
+
+@dataclass
+class RequestStatus(DataClassSerializable):
+    status: str
+    message: str
+
+
+@dataclass
+class ModelRegisterRequest(DataClassSerializable):
+    status: str
+    job_id: str
+    message: str = ""
diff --git a/ads/aqua/extension/models_ws_msg_handler.py b/ads/aqua/extension/models_ws_msg_handler.py
index 8df4a0232..d2ba0a8be 100644
--- a/ads/aqua/extension/models_ws_msg_handler.py
+++ b/ads/aqua/extension/models_ws_msg_handler.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import json
@@ -11,18 +11,28 @@
 from ads.aqua.extension.models.ws_models import (
     ListModelsResponse,
     ModelDetailsResponse,
+    ModelRegisterRequest,
     RequestResponseType,
 )
 from ads.aqua.model import AquaModelApp
 
+REGISTRATION_STATUS = "registration_status"
+
 
 class AquaModelWSMsgHandler(AquaWSMsgHandler):
+    status_subscriber = {}
+    register_status = {}  # Not threadsafe
+
     def __init__(self, message: Union[str, bytes]):
         super().__init__(message)
 
     @staticmethod
     def get_message_types() -> List[RequestResponseType]:
-        return [RequestResponseType.ListModels, RequestResponseType.ModelDetails]
+        return [
+            RequestResponseType.ListModels,
+            RequestResponseType.ModelDetails,
+            RequestResponseType.RegisterModelStatus,
+        ]
 
     @handle_exceptions
     def process(self) -> Union[ListModelsResponse, ModelDetailsResponse]:
@@ -47,3 +57,32 @@ def process(self) -> Union[ListModelsResponse, ModelDetailsResponse]:
                 kind=RequestResponseType.ModelDetails,
                 data=response,
             )
+        elif request.get("kind") == "RegisterModelStatus":
+            job_id = request.get("job_id")
+            if REGISTRATION_STATUS not in AquaModelWSMsgHandler.status_subscriber:
+                AquaModelWSMsgHandler.status_subscriber = {
+                    REGISTRATION_STATUS: {job_id: {"subscriber": []}}
+                }
+            if REGISTRATION_STATUS in AquaModelWSMsgHandler.status_subscriber:
+                if (
+                    job_id
+                    in AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS]
+                ):
+                    AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
+                        job_id
+                    ]["subscriber"].append(self.ws_connection)
+                else:
+                    AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
+                        job_id
+                    ] = {"subscriber": [self.ws_connection]}
+            print(AquaModelWSMsgHandler.register_status)
+            if "state" in AquaModelWSMsgHandler.register_status.get(job_id, {}):
+                return ModelRegisterRequest(
+                    status=AquaModelWSMsgHandler.register_status[job_id]["state"],
+                    message=AquaModelWSMsgHandler.register_status[job_id]["message"],
+                    job_id=job_id,
+                )
+            else:
+                return ModelRegisterRequest(
+                    status="SUBSCRIBED", job_id=job_id, message=""
+                )
diff --git a/ads/aqua/extension/ui_websocket_handler.py b/ads/aqua/extension/ui_websocket_handler.py
index 77dfc301d..6b95bf866 100644
--- a/ads/aqua/extension/ui_websocket_handler.py
+++ b/ads/aqua/extension/ui_websocket_handler.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright (c) 2024 Oracle and/or its affiliates.
+# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 import concurrent.futures
 from asyncio.futures import Future
@@ -46,10 +45,12 @@ def get_aqua_internal_error_response(message_id: str) -> ErrorResponse:
 class AquaUIWebSocketHandler(WebSocketHandler):
     """Handler for Aqua Websocket."""
 
-    _handlers_: List[Type[AquaWSMsgHandler]] = [AquaEvaluationWSMsgHandler,
-                                                AquaDeploymentWSMsgHandler,
-                                                AquaModelWSMsgHandler,
-                                                AquaCommonWsMsgHandler]
+    _handlers_: List[Type[AquaWSMsgHandler]] = [
+        AquaEvaluationWSMsgHandler,
+        AquaDeploymentWSMsgHandler,
+        AquaModelWSMsgHandler,
+        AquaCommonWsMsgHandler,
+    ]
 
     thread_pool: ThreadPoolExecutor
 
@@ -98,10 +99,17 @@ def on_message(self, message: Union[str, bytes]):
             raise ValueError(f"No handler found for message type {request.kind}")
         else:
             message_handler = handler(message)
+            message_handler.set_ws_connection(self)
             future: Future = self.thread_pool.submit(message_handler.process)
             self.future_message_map[future] = request
             future.add_done_callback(self.on_message_processed)
 
+    def on_close(self) -> None:
+        self.thread_pool.shutdown()
+        logger.info("AQUA WebSocket closed")
+
+
+class AquaAsyncRequestProgressWebSocketHandler(AquaUIWebSocketHandler):
     def on_message_processed(self, future: concurrent.futures.Future):
         """Callback function to handle the response from the various AquaWSMsgHandlers."""
         try:
@@ -120,11 +128,12 @@ def on_message_processed(self, future: concurrent.futures.Future):
         finally:
             self.future_message_map.pop(future)
             # Send the response back to the client on the event thread
-            IOLoop.current().run_sync(lambda: self.write_message(response.to_json()))
-
-    def on_close(self) -> None:
-        self.thread_pool.shutdown()
-        logger.info("AQUA WebSocket closed")
+            IOLoop.current().add_callback(
+                lambda: self.write_message(response.to_json())
+            )
 
 
-__handlers__ = [("ws?([^/]*)", AquaUIWebSocketHandler)]
+__handlers__ = [
+    ("ws?([^/]*)", AquaUIWebSocketHandler),
+    ("ws?/progress([^/]*)", AquaAsyncRequestProgressWebSocketHandler),
+]
diff --git a/ads/aqua/model/entities.py b/ads/aqua/model/entities.py
index 991c67b54..5528c85bf 100644
--- a/ads/aqua/model/entities.py
+++ b/ads/aqua/model/entities.py
@@ -11,7 +11,7 @@
 
 import re
 from dataclasses import InitVar, dataclass, field
-from typing import List, Optional
+from typing import Callable, List, Optional
 
 import oci
 from huggingface_hub import hf_api
@@ -295,6 +295,7 @@ class ImportModelDetails(CLIBuilderMixin):
     freeform_tags: Optional[dict] = None
     defined_tags: Optional[dict] = None
     ignore_model_artifact_check: Optional[bool] = None
+    callback: Optional[Callable] = None
 
     def __post_init__(self):
         self._command = "model register"
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index fff23578f..571deb945 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -5,7 +5,7 @@
 import pathlib
 from datetime import datetime, timedelta
 from threading import Lock
-from typing import Dict, List, Optional, Set, Union
+from typing import Callable, Dict, List, Optional, Set, Union
 
 import oci
 from cachetools import TTLCache
@@ -76,6 +76,7 @@
     ModelFormat,
     ModelValidationResult,
 )
+from ads.aqua.model.utils import HFModelProgressTracker
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
 from ads.common.auth import default_signer
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
@@ -1132,10 +1133,12 @@ def _validate_model(
 
         hf_download_config_present = False
 
+        logger.info("Getting files from huggingface")
         if import_model_details.download_from_hf:
             safetensors_model_files = self.get_hf_model_files(
                 model_name, ModelFormat.SAFETENSORS
             )
+            logger.info(f"following files found {safetensors_model_files}")
             if (
                 safetensors_model_files
                 and AQUA_MODEL_ARTIFACT_CONFIG in safetensors_model_files
@@ -1403,6 +1406,7 @@ def _download_model_from_hf(
         local_dir: str = None,
         allow_patterns: List[str] = None,
         ignore_patterns: List[str] = None,
+        callback: Callable = None,
     ) -> str:
         """This helper function downloads the model artifact from Hugging Face to a local folder, then uploads
         to object storage location.
@@ -1428,24 +1432,34 @@ def _download_model_from_hf(
             local_dir = os.path.join(local_dir, model_name)
             os.makedirs(local_dir, exist_ok=True)
 
+        def tqdm_callback(self, status):  # noqa: ARG001
+            callback(status)
+
         # if local_dir is not set, the return value points to the cached data folder
+        tqdm = HFModelProgressTracker
+        tqdm.callback = tqdm_callback
+        logger.info(f"callback is {tqdm.callback}")
         local_dir = snapshot_download(
             repo_id=model_name,
             local_dir=local_dir,
             allow_patterns=allow_patterns,
             ignore_patterns=ignore_patterns,
+            tqdm_class=tqdm,
         )
+        callback({"state": "Model download complete"})
         # Upload to object storage and skip .cache/huggingface/ folder
         logger.debug(
             f"Uploading local artifacts from local directory {local_dir} to {os_path}."
         )
         # Upload to object storage
+        callback({"status": "Object Storage upload started"})
         model_artifact_path = upload_folder(
             os_path=os_path,
             local_dir=local_dir,
             model_name=model_name,
             exclude_pattern=f"{HF_METADATA_FOLDER}*",
         )
+        callback({"state": f"Uploaded model to {os_path}"})
 
         return model_artifact_path
 
@@ -1479,6 +1493,13 @@ def register(
         if not import_model_details:
             import_model_details = ImportModelDetails(**kwargs)
 
+        def publish_status(status):
+            """Invoke callback with the status"""
+            if import_model_details.callback:
+                import_model_details.callback(status=status)
+            else:
+                logger.info("No callback registered")
+
         # If OCID of a model is passed, we need to copy the defaults for Tags and metadata from the service model.
         verified_model: Optional[DataScienceModel] = None
         if (
@@ -1497,6 +1518,7 @@ def register(
                     f"Found service model for {import_model_details.model}: {model_service_id}"
                 )
                 verified_model = DataScienceModel.from_id(model_service_id)
+                logger.info("fetched model from service catalog")
 
         # Copy the model name from the service model if `model` is ocid
         model_name = (
@@ -1511,15 +1533,18 @@ def register(
             model_name=model_name,
             verified_model=verified_model,
         )
+        publish_status({"state": "Model validation complete"})
 
         # download model from hugginface if indicates
         if import_model_details.download_from_hf:
+            publish_status({"state": "Downloading model from huggingface"})
             artifact_path = self._download_model_from_hf(
                 model_name=model_name,
                 os_path=import_model_details.os_path,
                 local_dir=import_model_details.local_dir,
                 allow_patterns=import_model_details.allow_patterns,
                 ignore_patterns=import_model_details.ignore_patterns,
+                callback=publish_status,
             ).rstrip("/")
         else:
             artifact_path = import_model_details.os_path.rstrip("/")
@@ -1537,6 +1562,9 @@ def register(
             freeform_tags=import_model_details.freeform_tags,
             defined_tags=import_model_details.defined_tags,
         )
+        publish_status(
+            {"state": "Model Created", "message": f"Model id is: {ds_model.id}"}
+        )
         # registered model will always have inference and evaluation container, but
         # fine-tuning container may be not set
         inference_container = ds_model.custom_metadata_list.get(
@@ -1587,7 +1615,9 @@ def register(
             cleanup_local_hf_model_artifact(
                 model_name=model_name, local_dir=import_model_details.local_dir
             )
-
+        publish_status(
+            {"state": "SUCCESS", "description": f"Model id is: {ds_model.id}"}
+        )
         return AquaModel(**aqua_model_attributes)
 
     def _if_show(self, model: DataScienceModel) -> bool:
diff --git a/ads/aqua/model/utils.py b/ads/aqua/model/utils.py
new file mode 100644
index 000000000..2320d479a
--- /dev/null
+++ b/ads/aqua/model/utils.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+from tqdm import tqdm
+
+
+class HFModelProgressTracker(tqdm):
+    def __init__(self, *args, **kwargs):
+        """
+        A custom tqdm class that calls `callback` each time progress is updated.
+
+        """
+        super().__init__(*args, **kwargs)
+
+    def callback(self, *args, **kwargs):
+        pass
+
+    def update(self, n=1):
+        # Perform the standard progress update
+        super().update(n)
+        # Invoke the callback with the current progress value (self.n)
+        self.callback({"status": f"{self.n} of {self.total} files downloaded"})
diff --git a/ads/aqua/server/app.py b/ads/aqua/server/app.py
index 8d2a0eab6..61db24a48 100644
--- a/ads/aqua/server/app.py
+++ b/ads/aqua/server/app.py
@@ -4,7 +4,7 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import os
-from logging import getLogger
+from logging import DEBUG, getLogger
 
 import tornado.ioloop
 import tornado.web
@@ -35,7 +35,20 @@ def make_app():
 
 
 def start_server():
+    access_log = getLogger("tornado.access")
+    # Set the logging level to DEBUG
+    access_log.setLevel(DEBUG)
     app = make_app()
+    logger.info("Endpoints:")
+    for rule in app.wildcard_router.rules:
+        # Depending on the rule type, the route may be stored in different properties.
+        # If the rule has a regex matcher, you can get its pattern.
+        regex = (
+            rule.matcher.regex.pattern
+            if hasattr(rule.matcher, "regex")
+            else str(rule.matcher)
+        )
+        print(f"\t\t{regex}")
     server = tornado.httpserver.HTTPServer(app)
     port = int(os.environ.get(AQUA_PORT, 8080))
     host = os.environ.get(AQUA_HOST, "0.0.0.0")

From b6cf7ba7c53febe564ee94176b064692e648c169 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 21 Feb 2025 11:34:08 -0800
Subject: [PATCH 07/16] tqdm callback fix

---
 ads/aqua/model/utils.py                     | 15 +++++--
 tests/unitary/with_extras/aqua/test_tqdm.py | 49 +++++++++++++++++++++
 2 files changed, 61 insertions(+), 3 deletions(-)
 create mode 100644 tests/unitary/with_extras/aqua/test_tqdm.py

diff --git a/ads/aqua/model/utils.py b/ads/aqua/model/utils.py
index 2320d479a..ec8b136ef 100644
--- a/ads/aqua/model/utils.py
+++ b/ads/aqua/model/utils.py
@@ -7,6 +7,8 @@
 
 
 class HFModelProgressTracker(tqdm):
+    hooks = []
+
     def __init__(self, *args, **kwargs):
         """
         A custom tqdm class that calls `callback` each time progress is updated.
@@ -14,11 +16,18 @@ def __init__(self, *args, **kwargs):
         """
         super().__init__(*args, **kwargs)
 
-    def callback(self, *args, **kwargs):
-        pass
+    @staticmethod
+    def register_hooks(hook):
+        HFModelProgressTracker.hooks.append(hook)
 
     def update(self, n=1):
         # Perform the standard progress update
         super().update(n)
         # Invoke the callback with the current progress value (self.n)
-        self.callback({"status": f"{self.n} of {self.total} files downloaded"})
+        for hook in HFModelProgressTracker.hooks:
+            hook({"status": f"{self.n} of {self.total} files downloaded"})
+
+    def close(self):
+        for hook in HFModelProgressTracker.hooks:
+            hook({"status": f"{self.n} of {self.total} files downloaded"})
+        super().close()
diff --git a/tests/unitary/with_extras/aqua/test_tqdm.py b/tests/unitary/with_extras/aqua/test_tqdm.py
new file mode 100644
index 000000000..b6039481d
--- /dev/null
+++ b/tests/unitary/with_extras/aqua/test_tqdm.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+from unittest.mock import MagicMock
+
+from tqdm.contrib.concurrent import thread_map
+
+from ads.aqua.model.utils import HFModelProgressTracker
+
+
+def test_custom_tqdm_thread_map():
+    def process(item):
+        import time
+
+        time.sleep(0.1)
+        return item
+
+    items = list(range(0, 10))
+    callback = MagicMock()
+
+    HFModelProgressTracker.register_hooks(callback)
+    # print(HFModelProgressTracker.hooks)
+    thread_map(
+        process,
+        items,
+        desc=f"Fetching {len(items)} items",
+        tqdm_class=HFModelProgressTracker,
+        max_workers=3,
+    )
+    callback.assert_called()
+
+
+def test_custom_tqdm():
+    callback = MagicMock()
+    HFModelProgressTracker.register_hooks(callback)
+    with HFModelProgressTracker(range(10), desc="Processing") as bar:
+        for _ in bar:
+            # Simulate work
+            import time
+
+            time.sleep(0.01)
+    callback.assert_called()
+
+
+if __name__ == "__main__":
+    test_custom_tqdm_thread_map()

From a244dc5144388227a2bcdea836862747df63d1be Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 21 Feb 2025 14:15:45 -0800
Subject: [PATCH 08/16] fix deps

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 15fad7980..e4936a6a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -205,6 +205,7 @@ pii = [
 ]
 llm = ["langchain>=0.2", "langchain-community", "langchain_openai", "pydantic>=2,<3", "evaluate>=0.4.0"]
 aqua = [
+  "jupyter_server",
   "tornado",
   "notebook~=6.5",
   "fire",

From 99871a19b09cb768cbc5709240c0eca89e4d2b9c Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 21 Feb 2025 14:14:33 -0800
Subject: [PATCH 09/16] code cleanup

---
 ads/aqua/model/entities.py | 6 ++++++
 ads/aqua/model/model.py    | 4 +---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/ads/aqua/model/entities.py b/ads/aqua/model/entities.py
index 5528c85bf..aeeb92650 100644
--- a/ads/aqua/model/entities.py
+++ b/ads/aqua/model/entities.py
@@ -299,3 +299,9 @@ class ImportModelDetails(CLIBuilderMixin):
 
     def __post_init__(self):
         self._command = "model register"
+
+
+@dataclass
+class TaskStatus(DataClassSerializable):
+    state: str
+    message: str
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 571deb945..01a5c6c2b 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -1133,12 +1133,10 @@ def _validate_model(
 
         hf_download_config_present = False
 
-        logger.info("Getting files from huggingface")
         if import_model_details.download_from_hf:
             safetensors_model_files = self.get_hf_model_files(
                 model_name, ModelFormat.SAFETENSORS
             )
-            logger.info(f"following files found {safetensors_model_files}")
             if (
                 safetensors_model_files
                 and AQUA_MODEL_ARTIFACT_CONFIG in safetensors_model_files
@@ -1452,7 +1450,7 @@ def tqdm_callback(self, status):  # noqa: ARG001
             f"Uploading local artifacts from local directory {local_dir} to {os_path}."
         )
         # Upload to object storage
-        callback({"status": "Object Storage upload started"})
+        callback({"state": "Object Storage upload started"})
         model_artifact_path = upload_folder(
             os_path=os_path,
             local_dir=local_dir,

From 7ca41c5c183a6c6cd74e93059bfa990fe2cfd50e Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 21 Feb 2025 15:55:21 -0800
Subject: [PATCH 10/16] code refactor

---
 ads/aqua/model/model.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 01a5c6c2b..1929cbc08 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -75,6 +75,7 @@
     ImportModelDetails,
     ModelFormat,
     ModelValidationResult,
+    TaskStatus,
 )
 from ads.aqua.model.utils import HFModelProgressTracker
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
@@ -1491,7 +1492,7 @@ def register(
         if not import_model_details:
             import_model_details = ImportModelDetails(**kwargs)
 
-        def publish_status(status):
+        def publish_status(status: TaskStatus):
             """Invoke callback with the status"""
             if import_model_details.callback:
                 import_model_details.callback(status=status)
@@ -1531,11 +1532,21 @@ def publish_status(status):
             model_name=model_name,
             verified_model=verified_model,
         )
-        publish_status({"state": "Model validation complete"})
+        publish_status(
+            TaskStatus(
+                state="MODEL_VALIDATION_SUCCESSFUL",
+                message="Model information validated",
+            )
+        )
 
         # download model from hugginface if indicates
         if import_model_details.download_from_hf:
-            publish_status({"state": "Downloading model from huggingface"})
+            publish_status(
+                TaskStatus(
+                    state="MODEL_DOWNLOAD_BEGIN",
+                    message=f"Downloading {model_name} from Hugging Face",
+                )
+            )
             artifact_path = self._download_model_from_hf(
                 model_name=model_name,
                 os_path=import_model_details.os_path,

From 722595c9fa842f184774c5d789ea66fec065fe4d Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 21 Feb 2025 16:37:17 -0800
Subject: [PATCH 11/16] code cleanup

---
 ads/aqua/extension/model_handler.py | 22 +++++++++-----
 ads/aqua/model/entities.py          | 14 ++++++++-
 ads/aqua/model/model.py             | 45 +++++++++++++++++++++++------
 3 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/ads/aqua/extension/model_handler.py b/ads/aqua/extension/model_handler.py
index 36ba08705..d20c818a6 100644
--- a/ads/aqua/extension/model_handler.py
+++ b/ads/aqua/extension/model_handler.py
@@ -5,7 +5,7 @@
 import threading
 from functools import partial
 from logging import getLogger
-from typing import Dict, List, Optional
+from typing import List, Optional
 from urllib.parse import urlparse
 from uuid import uuid4
 
@@ -30,7 +30,13 @@
     AquaModelWSMsgHandler,
 )
 from ads.aqua.model import AquaModelApp
-from ads.aqua.model.entities import AquaModel, AquaModelSummary, HFModelSummary
+from ads.aqua.model.entities import (
+    AquaModel,
+    AquaModelSummary,
+    HFModelSummary,
+    TaskStatus,
+    TaskStatusEnum,
+)
 from ads.aqua.ui import ModelFormat
 
 logger = getLogger(__name__)
@@ -160,7 +166,7 @@ def post(self, *args, **kwargs):  # noqa: ARG002
         )
         async_mode = input_data.get("async_mode", False)
 
-        def model_register_progress_callback(register_id: str, status: Dict[str, str]):
+        def model_register_progress_callback(register_id: str, status: TaskStatus):
             """Callback method to track the model register progress"""
             logger.info(f"Progress for {register_id}: {status}")
             subscribers: List[WebSocketHandler] = (
@@ -175,14 +181,14 @@ def model_register_progress_callback(register_id: str, status: Dict[str, str]):
                     and subscriber.ws_connection.stream.socket
                 ):
                     try:
-                        subscriber.write_message(status)
+                        subscriber.write_message(status.to_json())
                     except Exception as e:
                         print(e)
                         IOLoop.current().add_callback(
-                            lambda: subscriber.write_message(status)
+                            lambda: subscriber.write_message(status.to_json())
                         )
             if len(subscribers) == 0:
-                AquaModelWSMsgHandler.register_status[register_id] = status
+                AquaModelWSMsgHandler.register_status[register_id] = status.to_json()
 
         def register_model(callback=None) -> AquaModel:
             """Wrapper method to help initialize callback in case of async mode"""
@@ -210,7 +216,9 @@ def register_model(callback=None) -> AquaModel:
                 if async_mode:
                     model_register_progress_callback(
                         register_id=job_id,
-                        status={"state": "FAILED", "message": str(e)},
+                        status=TaskStatus(
+                            state=TaskStatusEnum.REGISTRATION_FAILED, message=str(e)
+                        ),
                     )
                     raise
                 else:
diff --git a/ads/aqua/model/entities.py b/ads/aqua/model/entities.py
index aeeb92650..55b519ed3 100644
--- a/ads/aqua/model/entities.py
+++ b/ads/aqua/model/entities.py
@@ -24,6 +24,7 @@
 from ads.aqua.model.enums import FineTuningDefinedMetadata
 from ads.aqua.training.exceptions import exit_code_dict
 from ads.aqua.ui import ModelFormat
+from ads.common.extended_enum import ExtendedEnum
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_log_links
 from ads.model.datascience_model import DataScienceModel
@@ -301,7 +302,18 @@ def __post_init__(self):
         self._command = "model register"
 
 
+class TaskStatusEnum(ExtendedEnum):
+    MODEL_VALIDATION_SUCCESSFUL = "MODEL_VALIDATION_SUCCESSFUL"
+    MODEL_DOWNLOAD_STARTED = "MODEL_DOWNLOAD_STARTED"
+    MODEL_DOWNLOAD_SUCCESSFUL = "MODEL_DOWNLOAD_SUCCESSFUL"
+    MODEL_UPLOAD_STARTED = "MODEL_UPLOAD_STARTED"
+    MODEL_UPLOAD_SUCCESSFUL = "MODEL_UPLOAD_SUCCESSFUL"
+    DATASCIENCE_MODEL_CREATED = "DATASCIENCE_MODEL_CREATED"
+    MODEL_REGISTRATION_SUCCESSFUL = "MODEL_REGISTRATION_SUCCESSFUL"
+    REGISTRATION_FAILED = "REGISTRATION_FAILED"
+
+
 @dataclass
 class TaskStatus(DataClassSerializable):
-    state: str
+    state: TaskStatusEnum
     message: str
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 1929cbc08..031a0dd72 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -76,6 +76,7 @@
     ModelFormat,
     ModelValidationResult,
     TaskStatus,
+    TaskStatusEnum,
 )
 from ads.aqua.model.utils import HFModelProgressTracker
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
@@ -1434,10 +1435,15 @@ def _download_model_from_hf(
         def tqdm_callback(self, status):  # noqa: ARG001
             callback(status)
 
+        def publish_status(status: TaskStatus):
+            """wrapper to avoid repeated null check"""
+            if callback:
+                callback(status)
+
         # if local_dir is not set, the return value points to the cached data folder
         tqdm = HFModelProgressTracker
-        tqdm.callback = tqdm_callback
-        logger.info(f"callback is {tqdm.callback}")
+        if callback:
+            tqdm.register_hooks(tqdm_callback)
         local_dir = snapshot_download(
             repo_id=model_name,
             local_dir=local_dir,
@@ -1445,20 +1451,35 @@ def tqdm_callback(self, status):  # noqa: ARG001
             ignore_patterns=ignore_patterns,
             tqdm_class=tqdm,
         )
-        callback({"state": "Model download complete"})
+        publish_status(
+            TaskStatus(
+                state=TaskStatusEnum.MODEL_DOWNLOAD_SUCCESSFUL,
+                message="Model download complete",
+            )
+        )
         # Upload to object storage and skip .cache/huggingface/ folder
         logger.debug(
             f"Uploading local artifacts from local directory {local_dir} to {os_path}."
         )
         # Upload to object storage
-        callback({"state": "Object Storage upload started"})
+        publish_status(
+            TaskStatus(
+                state=TaskStatusEnum.MODEL_UPLOAD_STARTED,
+                message=f"Uploading model to Object Storage: {os_path}",
+            )
+        )
         model_artifact_path = upload_folder(
             os_path=os_path,
             local_dir=local_dir,
             model_name=model_name,
             exclude_pattern=f"{HF_METADATA_FOLDER}*",
         )
-        callback({"state": f"Uploaded model to {os_path}"})
+        publish_status(
+            TaskStatus(
+                state=TaskStatusEnum.MODEL_UPLOAD_SUCCESSFUL,
+                message=f"Model uploaded successfully to {os_path}",
+            )
+        )
 
         return model_artifact_path
 
@@ -1534,7 +1555,7 @@ def publish_status(status: TaskStatus):
         )
         publish_status(
             TaskStatus(
-                state="MODEL_VALIDATION_SUCCESSFUL",
+                state=TaskStatusEnum.MODEL_VALIDATION_SUCCESSFUL,
                 message="Model information validated",
             )
         )
@@ -1543,7 +1564,7 @@ def publish_status(status: TaskStatus):
         if import_model_details.download_from_hf:
             publish_status(
                 TaskStatus(
-                    state="MODEL_DOWNLOAD_BEGIN",
+                    state=TaskStatusEnum.MODEL_DOWNLOAD_STARTED,
                     message=f"Downloading {model_name} from Hugging Face",
                 )
             )
@@ -1572,7 +1593,10 @@ def publish_status(status: TaskStatus):
             defined_tags=import_model_details.defined_tags,
         )
         publish_status(
-            {"state": "Model Created", "message": f"Model id is: {ds_model.id}"}
+            TaskStatus(
+                TaskStatusEnum.DATASCIENCE_MODEL_CREATED,
+                message=f"DataScience model created. Model id is: {ds_model.id}",
+            )
         )
         # registered model will always have inference and evaluation container, but
         # fine-tuning container may be not set
@@ -1625,7 +1649,10 @@ def publish_status(status: TaskStatus):
                 model_name=model_name, local_dir=import_model_details.local_dir
             )
         publish_status(
-            {"state": "SUCCESS", "description": f"Model id is: {ds_model.id}"}
+            TaskStatus(
+                state=TaskStatusEnum.MODEL_REGISTRATION_SUCCESSFUL,
+                message=f"Model {model_name} successfully registered. Model id is: {ds_model.id}",
+            )
         )
         return AquaModel(**aqua_model_attributes)
 

From fd3051eb58457fb52e296cdfe035492336324628 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Mon, 24 Feb 2025 15:17:43 -0800
Subject: [PATCH 12/16] code refactoring

---
 ads/aqua/model/entities.py |  1 +
 ads/aqua/model/model.py    |  3 ++-
 ads/aqua/model/utils.py    | 16 ++++++++++++++--
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/ads/aqua/model/entities.py b/ads/aqua/model/entities.py
index 55b519ed3..584a05a13 100644
--- a/ads/aqua/model/entities.py
+++ b/ads/aqua/model/entities.py
@@ -311,6 +311,7 @@ class TaskStatusEnum(ExtendedEnum):
     DATASCIENCE_MODEL_CREATED = "DATASCIENCE_MODEL_CREATED"
     MODEL_REGISTRATION_SUCCESSFUL = "MODEL_REGISTRATION_SUCCESSFUL"
     REGISTRATION_FAILED = "REGISTRATION_FAILED"
+    MODEL_DOWNLOAD_INPROGRESS = "MODEL_DOWNLOAD_INPROGRESS"
 
 
 @dataclass
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 031a0dd72..b35147ed3 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -1432,7 +1432,7 @@ def _download_model_from_hf(
             local_dir = os.path.join(local_dir, model_name)
             os.makedirs(local_dir, exist_ok=True)
 
-        def tqdm_callback(self, status):  # noqa: ARG001
+        def tqdm_callback(status: TaskStatus):  # noqa: ARG001
             callback(status)
 
         def publish_status(status: TaskStatus):
@@ -1754,6 +1754,7 @@ def _find_matching_aqua_model(self, model_id: str) -> Optional[str]:
         aqua_model_list = self.list()
 
         for aqua_model_summary in aqua_model_list:
+            print(aqua_model_summary.name.lower())
             if aqua_model_summary.name.lower() == model_id_lower:
                 logger.info(
                     f"Found matching verified model id {aqua_model_summary.id} for the model {model_id}"
diff --git a/ads/aqua/model/utils.py b/ads/aqua/model/utils.py
index ec8b136ef..2caa21720 100644
--- a/ads/aqua/model/utils.py
+++ b/ads/aqua/model/utils.py
@@ -5,6 +5,8 @@
 
 from tqdm import tqdm
 
+from ads.aqua.model.entities import TaskStatus, TaskStatusEnum
+
 
 class HFModelProgressTracker(tqdm):
     hooks = []
@@ -25,9 +27,19 @@ def update(self, n=1):
         super().update(n)
         # Invoke the callback with the current progress value (self.n)
         for hook in HFModelProgressTracker.hooks:
-            hook({"status": f"{self.n} of {self.total} files downloaded"})
+            hook(
+                TaskStatus(
+                    state=TaskStatusEnum.MODEL_DOWNLOAD_INPROGRESS,
+                    message=f"{self.n} of {self.total} files downloaded",
+                )
+            )
 
     def close(self):
         for hook in HFModelProgressTracker.hooks:
-            hook({"status": f"{self.n} of {self.total} files downloaded"})
+            hook(
+                TaskStatus(
+                    state=TaskStatusEnum.MODEL_DOWNLOAD_INPROGRESS,
+                    message=f"{self.n} of {self.total} files downloaded",
+                )
+            )
         super().close()

From 876f826e2d70df5e5e405c39082272e8281c3585 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Tue, 25 Feb 2025 16:42:38 -0800
Subject: [PATCH 13/16] code refactoring

---
 ads/aqua/common/task_status.py              |  27 +++
 ads/aqua/extension/model_handler.py         |  63 ++----
 ads/aqua/extension/models/ws_models.py      |   2 +-
 ads/aqua/extension/models_ws_msg_handler.py |  70 ++++---
 ads/aqua/extension/status_manager.py        | 200 ++++++++++++++++++++
 ads/aqua/model/entities.py                  |  19 --
 ads/aqua/model/model.py                     |   6 +-
 ads/aqua/model/utils.py                     |   2 +-
 8 files changed, 301 insertions(+), 88 deletions(-)
 create mode 100644 ads/aqua/common/task_status.py
 create mode 100644 ads/aqua/extension/status_manager.py

diff --git a/ads/aqua/common/task_status.py b/ads/aqua/common/task_status.py
new file mode 100644
index 000000000..6427844c5
--- /dev/null
+++ b/ads/aqua/common/task_status.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+from dataclasses import dataclass
+
+from ads.common.extended_enum import ExtendedEnum
+from ads.common.serializer import DataClassSerializable
+
+
+class TaskStatusEnum(ExtendedEnum):
+    MODEL_VALIDATION_SUCCESSFUL = "MODEL_VALIDATION_SUCCESSFUL"
+    MODEL_DOWNLOAD_STARTED = "MODEL_DOWNLOAD_STARTED"
+    MODEL_DOWNLOAD_SUCCESSFUL = "MODEL_DOWNLOAD_SUCCESSFUL"
+    MODEL_UPLOAD_STARTED = "MODEL_UPLOAD_STARTED"
+    MODEL_UPLOAD_SUCCESSFUL = "MODEL_UPLOAD_SUCCESSFUL"
+    DATASCIENCE_MODEL_CREATED = "DATASCIENCE_MODEL_CREATED"
+    MODEL_REGISTRATION_SUCCESSFUL = "MODEL_REGISTRATION_SUCCESSFUL"
+    REGISTRATION_FAILED = "REGISTRATION_FAILED"
+    MODEL_DOWNLOAD_INPROGRESS = "MODEL_DOWNLOAD_INPROGRESS"
+
+
+@dataclass
+class TaskStatus(DataClassSerializable):
+    state: TaskStatusEnum
+    message: str
diff --git a/ads/aqua/extension/model_handler.py b/ads/aqua/extension/model_handler.py
index d20c818a6..b14ea0d00 100644
--- a/ads/aqua/extension/model_handler.py
+++ b/ads/aqua/extension/model_handler.py
@@ -3,21 +3,19 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import threading
-from functools import partial
 from logging import getLogger
-from typing import List, Optional
+from typing import Optional
 from urllib.parse import urlparse
 from uuid import uuid4
 
-from tornado.ioloop import IOLoop
 from tornado.web import HTTPError
-from tornado.websocket import WebSocketHandler
 
 from ads.aqua.common.decorator import handle_exceptions
 from ads.aqua.common.enums import (
     CustomInferenceContainerTypeFamily,
 )
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
+from ads.aqua.common.task_status import TaskStatus, TaskStatusEnum
 from ads.aqua.common.utils import (
     get_hf_model_info,
     is_valid_ocid,
@@ -25,17 +23,16 @@
 )
 from ads.aqua.extension.base_handler import AquaAPIhandler
 from ads.aqua.extension.errors import Errors
-from ads.aqua.extension.models_ws_msg_handler import (
-    REGISTRATION_STATUS,
-    AquaModelWSMsgHandler,
+from ads.aqua.extension.status_manager import (
+    RegistrationStatus,
+    StatusTracker,
+    TaskNameEnum,
 )
 from ads.aqua.model import AquaModelApp
 from ads.aqua.model.entities import (
     AquaModel,
     AquaModelSummary,
     HFModelSummary,
-    TaskStatus,
-    TaskStatusEnum,
 )
 from ads.aqua.ui import ModelFormat
 
@@ -126,7 +123,7 @@ def post(self, *args, **kwargs):  # noqa: ARG002
         HTTPError
             Raises HTTPError if inputs are missing or are invalid
         """
-        job_id = str(uuid4())
+        task_id = str(uuid4())
         try:
             input_data = self.get_json_body()
         except Exception as ex:
@@ -166,30 +163,6 @@ def post(self, *args, **kwargs):  # noqa: ARG002
         )
         async_mode = input_data.get("async_mode", False)
 
-        def model_register_progress_callback(register_id: str, status: TaskStatus):
-            """Callback method to track the model register progress"""
-            logger.info(f"Progress for {register_id}: {status}")
-            subscribers: List[WebSocketHandler] = (
-                AquaModelWSMsgHandler.status_subscriber.get(REGISTRATION_STATUS, {})
-                .get(register_id, {})
-                .get("subscriber", [])
-            )
-            for subscriber in subscribers:
-                if (
-                    subscriber
-                    and subscriber.ws_connection
-                    and subscriber.ws_connection.stream.socket
-                ):
-                    try:
-                        subscriber.write_message(status.to_json())
-                    except Exception as e:
-                        print(e)
-                        IOLoop.current().add_callback(
-                            lambda: subscriber.write_message(status.to_json())
-                        )
-            if len(subscribers) == 0:
-                AquaModelWSMsgHandler.register_status[register_id] = status.to_json()
-
         def register_model(callback=None) -> AquaModel:
             """Wrapper method to help initialize callback in case of async mode"""
             try:
@@ -214,11 +187,13 @@ def register_model(callback=None) -> AquaModel:
                 )
             except Exception as e:
                 if async_mode:
-                    model_register_progress_callback(
-                        register_id=job_id,
-                        status=TaskStatus(
-                            state=TaskStatusEnum.REGISTRATION_FAILED, message=str(e)
-                        ),
+                    StatusTracker.add_status(
+                        RegistrationStatus(
+                            task_id=task_id,
+                            task_status=TaskStatus(
+                                state=TaskStatusEnum.REGISTRATION_FAILED, message=str(e)
+                            ),
+                        )
                     )
                     raise
                 else:
@@ -228,14 +203,18 @@ def register_model(callback=None) -> AquaModel:
         if async_mode:
             t = threading.Thread(
                 target=register_model,
-                args=(partial(model_register_progress_callback, register_id=job_id),),
+                args=(
+                    StatusTracker.prepare_status_callback(
+                        TaskNameEnum.REGISTRATION_STATUS, task_id=task_id
+                    ),
+                ),
                 daemon=True,
             )
             t.start()
             output = {
                 "state": "ACCEPTED",
-                "job_id": job_id,
-                "progress_url": f"ws://host:port/aqua/ws/{job_id}",
+                "task_id": task_id,
+                "progress_url": f"ws://host:port/aqua/ws/{task_id}",
             }
         else:
             output = register_model()
diff --git a/ads/aqua/extension/models/ws_models.py b/ads/aqua/extension/models/ws_models.py
index c5ec06c6c..ef6420e81 100644
--- a/ads/aqua/extension/models/ws_models.py
+++ b/ads/aqua/extension/models/ws_models.py
@@ -153,5 +153,5 @@ class RequestStatus(DataClassSerializable):
 @dataclass
 class ModelRegisterRequest(DataClassSerializable):
     status: str
-    job_id: str
+    task_id: str
     message: str = ""
diff --git a/ads/aqua/extension/models_ws_msg_handler.py b/ads/aqua/extension/models_ws_msg_handler.py
index d2ba0a8be..8d9c7bc39 100644
--- a/ads/aqua/extension/models_ws_msg_handler.py
+++ b/ads/aqua/extension/models_ws_msg_handler.py
@@ -4,6 +4,7 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
 import json
+from logging import getLogger
 from typing import List, Union
 
 from ads.aqua.common.decorator import handle_exceptions
@@ -14,8 +15,15 @@
     ModelRegisterRequest,
     RequestResponseType,
 )
+from ads.aqua.extension.status_manager import (
+    RegistrationSubscriber,
+    StatusTracker,
+    TaskNameEnum,
+)
 from ads.aqua.model import AquaModelApp
 
+logger = getLogger(__name__)
+
 REGISTRATION_STATUS = "registration_status"
 
 
@@ -58,31 +66,47 @@ def process(self) -> Union[ListModelsResponse, ModelDetailsResponse]:
                 data=response,
             )
         elif request.get("kind") == "RegisterModelStatus":
-            job_id = request.get("job_id")
-            if REGISTRATION_STATUS not in AquaModelWSMsgHandler.status_subscriber:
-                AquaModelWSMsgHandler.status_subscriber = {
-                    REGISTRATION_STATUS: {job_id: {"subscriber": []}}
-                }
-            if REGISTRATION_STATUS in AquaModelWSMsgHandler.status_subscriber:
-                if (
-                    job_id
-                    in AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS]
-                ):
-                    AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
-                        job_id
-                    ]["subscriber"].append(self.ws_connection)
-                else:
-                    AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
-                        job_id
-                    ] = {"subscriber": [self.ws_connection]}
-            print(AquaModelWSMsgHandler.register_status)
-            if "state" in AquaModelWSMsgHandler.register_status.get(job_id, {}):
+            task_id = request.get("task_id")
+            StatusTracker.add_subscriber(
+                subscriber=RegistrationSubscriber(
+                    task_id=task_id, subscriber=self.ws_connection
+                ),
+                notify_latest_status=False,
+            )
+            # if REGISTRATION_STATUS not in AquaModelWSMsgHandler.status_subscriber:
+            #     AquaModelWSMsgHandler.status_subscriber = {
+            #         REGISTRATION_STATUS: {job_id: {"subscriber": []}}
+            #     }
+            # if REGISTRATION_STATUS in AquaModelWSMsgHandler.status_subscriber:
+            #     if (
+            #         job_id
+            #         in AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS]
+            #     ):
+            #         AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
+            #             job_id
+            #         ]["subscriber"].append(self.ws_connection)
+            #     else:
+            #         AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
+            #             job_id
+            #         ] = {"subscriber": [self.ws_connection]}
+
+            latest_status = StatusTracker.get_latest_status(
+                TaskNameEnum.REGISTRATION_STATUS, task_id=task_id
+            )
+            logger.info(latest_status)
+            # if "state" in AquaModelWSMsgHandler.register_status.get(job_id, {}):
+            #     return ModelRegisterRequest(
+            #         status=AquaModelWSMsgHandler.register_status[job_id]["state"],
+            #         message=AquaModelWSMsgHandler.register_status[job_id]["message"],
+            #         job_id=job_id,
+            #     )
+            if latest_status:
                 return ModelRegisterRequest(
-                    status=AquaModelWSMsgHandler.register_status[job_id]["state"],
-                    message=AquaModelWSMsgHandler.register_status[job_id]["message"],
-                    job_id=job_id,
+                    status=latest_status.state,
+                    message=latest_status.message,
+                    task_id=task_id,
                 )
             else:
                 return ModelRegisterRequest(
-                    status="SUBSCRIBED", job_id=job_id, message=""
+                    status="SUBSCRIBED", task_id=task_id, message=""
                 )
diff --git a/ads/aqua/extension/status_manager.py b/ads/aqua/extension/status_manager.py
new file mode 100644
index 000000000..b989745f2
--- /dev/null
+++ b/ads/aqua/extension/status_manager.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import threading
+from dataclasses import dataclass, field
+from functools import partial
+from logging import getLogger
+from typing import Callable, Dict, List, Union
+
+from tornado.ioloop import IOLoop
+from tornado.websocket import WebSocketHandler
+
+from ads.aqua.common.task_status import TaskStatus
+from ads.common.extended_enum import ExtendedEnum
+
+logger = getLogger(__name__)
+
+
+class TaskNameEnum(ExtendedEnum):
+    REGISTRATION_STATUS = "REGISTRATION_STATUS"
+
+
+@dataclass
+class Task:
+    task_name: TaskNameEnum = None
+    task_id: str = None
+
+
+@dataclass
+class Status(Task):
+    task_status: TaskStatus = None
+
+
+@dataclass
+class Subscriber(Task):
+    subscriber: WebSocketHandler = None
+
+
+@dataclass
+class RegistrationStatus(Status):
+    task_name: TaskNameEnum = TaskNameEnum.REGISTRATION_STATUS
+
+
+@dataclass
+class RegistrationSubscriber(Subscriber):
+    task_name: TaskNameEnum = TaskNameEnum.REGISTRATION_STATUS
+
+
+@dataclass
+class StatusSubscription:
+    task_status_list: List[TaskStatus] = field(default_factory=list)
+    subscribers: List[Subscriber] = field(default_factory=list)
+
+
+class StatusTracker:
+    lock = threading.RLock()
+    """
+    Maintains a mapping of task statuses and subscribers for notifications.
+    Example:
+        {
+            "REGISTRATION_STATUS": {
+                "sample-task-id": StatusSubscription(
+                    task_status_list=[TaskStatus(state="MODEL_DOWNLOAD_INPROGRESS", message="1 out of 10 files downloaded")],
+                    subscribers=[Subscriber(subscriber=websocket123)]
+                )
+            }
+        }
+    """
+    status: Dict[TaskNameEnum, Dict[str, StatusSubscription]] = {}
+
+    @staticmethod
+    def get_latest_status(
+        task_name: TaskNameEnum, task_id: str
+    ) -> Union[TaskStatus, None]:
+        """Returns latest task status if availble, else returns None"""
+        task_list = []
+        logger.info(f"Status dump: {StatusTracker.status}")
+        with StatusTracker.lock:
+            task_list = (
+                StatusTracker.status.get(task_name, {})
+                .get(task_id, StatusSubscription())
+                .task_status_list
+            )
+        return task_list[-1] if task_list else None
+
+    @staticmethod
+    def get_statuses(task_name: TaskNameEnum, task_id: str) -> Union[TaskStatus, None]:
+        """Returns latest task status if availble, else returns None"""
+        with StatusTracker.lock:
+            return (
+                StatusTracker.status.get(task_name, {})
+                .get(task_id, StatusSubscription())
+                .task_status_list
+            )
+
+    @staticmethod
+    def add_status(status: Status, notify=True):
+        """Appends to the status list. Notifies the status to all the subcribers"""
+        logger.info(f"status: {status}")
+        with StatusTracker.lock:
+            if status.task_name not in StatusTracker.status:
+                StatusTracker.status[status.task_name] = {
+                    status.task_id: StatusSubscription(
+                        task_status_list=[status.task_status]
+                    )
+                }
+            elif status.task_id in StatusTracker.status[status.task_name]:
+                StatusTracker.status[status.task_name][
+                    status.task_id
+                ].task_status_list.append(status.task_status)
+            else:
+                StatusTracker.status[status.task_name][status.task_id] = (
+                    StatusSubscription(task_status_list=[status.task_status])
+                )
+        # Since there is a task id, Notify subscribers if any
+        if notify:
+            StatusTracker.notify_latest_to_all(
+                task_name=status.task_name, task_id=status.task_id
+            )
+
+    @staticmethod
+    def notify_latest_to_all(task_name: TaskNameEnum, task_id: str):
+        """Notify the latest task status to all the subscribers"""
+        task_status = StatusTracker.get_latest_status(
+            task_name=task_name, task_id=task_id
+        )
+        logger.info(f"status: {task_status}")
+        subscribers = []
+        with StatusTracker.lock:
+            subscribers = (
+                StatusTracker.status.get(task_name, {})
+                .get(task_id, StatusSubscription())
+                .subscribers
+            )
+        for subscriber in subscribers:
+            StatusTracker.send_message(status=task_status, subscriber=subscriber)
+
+    @staticmethod
+    def notify(task_name: TaskNameEnum, subscriber: Subscriber, latest_only=True):
+        """Notify the subscriber of all the status"""
+        if latest_only:
+            task_status = StatusTracker.get_latest_status(
+                task_name=task_name, task_id=subscriber.task_id
+            )
+        else:
+            task_status = StatusTracker.get_statuses(
+                task_name=task_name, task_id=subscriber.task_id
+            )
+        logger.info(task_status)
+        StatusTracker.send_message(status=task_status, subscriber=subscriber)
+
+    @staticmethod
+    def send_message(status: TaskStatus, subscriber: Subscriber):
+        if (
+            subscriber
+            and subscriber.ws_connection
+            and subscriber.ws_connection.stream.socket
+        ):
+            try:
+                subscriber.write_message(status.to_json())
+            except Exception as e:
+                print(e)
+                IOLoop.current().add_callback(
+                    lambda: subscriber.write_message(status.to_json())
+                )
+
+    @staticmethod
+    def add_subscriber(subscriber: Subscriber, notify_latest_status=True):
+        """Appends to the subscriber list"""
+        with StatusTracker.lock:
+            if subscriber.task_name not in StatusTracker.status:
+                StatusTracker.status[subscriber.task_name] = {
+                    subscriber.task_id: StatusSubscription(
+                        subscribers=[subscriber.subscriber]
+                    )
+                }
+            elif subscriber.task_id in StatusTracker.status[subscriber.task_name]:
+                StatusTracker.status[subscriber.task_name][
+                    subscriber.task_id
+                ].subscribers.append(subscriber.subscriber)
+            else:
+                StatusTracker.status[subscriber.task_name][subscriber.task_id] = (
+                    StatusSubscription(subscribers=[subscriber.subscriber])
+                )
+        if notify_latest_status:
+            StatusTracker.notify(
+                task_name=subscriber.task_name, task_id=subscriber.task_id
+            )
+
+    @staticmethod
+    def prepare_status_callback(
+        task_name: TaskNameEnum, task_id: str
+    ) -> Callable[[TaskStatus], None]:
+        def callback(task_name: TaskNameEnum, task_id: str, status: TaskStatus):
+            st = Status(task_name=task_name, task_id=task_id, task_status=status)
+            StatusTracker.add_status(st)
+
+        return partial(callback, task_name=task_name, task_id=task_id)
diff --git a/ads/aqua/model/entities.py b/ads/aqua/model/entities.py
index 584a05a13..5528c85bf 100644
--- a/ads/aqua/model/entities.py
+++ b/ads/aqua/model/entities.py
@@ -24,7 +24,6 @@
 from ads.aqua.model.enums import FineTuningDefinedMetadata
 from ads.aqua.training.exceptions import exit_code_dict
 from ads.aqua.ui import ModelFormat
-from ads.common.extended_enum import ExtendedEnum
 from ads.common.serializer import DataClassSerializable
 from ads.common.utils import get_log_links
 from ads.model.datascience_model import DataScienceModel
@@ -300,21 +299,3 @@ class ImportModelDetails(CLIBuilderMixin):
 
     def __post_init__(self):
         self._command = "model register"
-
-
-class TaskStatusEnum(ExtendedEnum):
-    MODEL_VALIDATION_SUCCESSFUL = "MODEL_VALIDATION_SUCCESSFUL"
-    MODEL_DOWNLOAD_STARTED = "MODEL_DOWNLOAD_STARTED"
-    MODEL_DOWNLOAD_SUCCESSFUL = "MODEL_DOWNLOAD_SUCCESSFUL"
-    MODEL_UPLOAD_STARTED = "MODEL_UPLOAD_STARTED"
-    MODEL_UPLOAD_SUCCESSFUL = "MODEL_UPLOAD_SUCCESSFUL"
-    DATASCIENCE_MODEL_CREATED = "DATASCIENCE_MODEL_CREATED"
-    MODEL_REGISTRATION_SUCCESSFUL = "MODEL_REGISTRATION_SUCCESSFUL"
-    REGISTRATION_FAILED = "REGISTRATION_FAILED"
-    MODEL_DOWNLOAD_INPROGRESS = "MODEL_DOWNLOAD_INPROGRESS"
-
-
-@dataclass
-class TaskStatus(DataClassSerializable):
-    state: TaskStatusEnum
-    message: str
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index b35147ed3..714a51234 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -26,6 +26,7 @@
     AquaRuntimeError,
     AquaValueError,
 )
+from ads.aqua.common.task_status import TaskStatus, TaskStatusEnum
 from ads.aqua.common.utils import (
     LifecycleStatus,
     _build_resource_identifier,
@@ -75,8 +76,6 @@
     ImportModelDetails,
     ModelFormat,
     ModelValidationResult,
-    TaskStatus,
-    TaskStatusEnum,
 )
 from ads.aqua.model.utils import HFModelProgressTracker
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
@@ -1515,6 +1514,9 @@ def register(
 
         def publish_status(status: TaskStatus):
             """Invoke callback with the status"""
+            logger.info(
+                f"Publishing status using callback: {import_model_details.callback}"
+            )
             if import_model_details.callback:
                 import_model_details.callback(status=status)
             else:
diff --git a/ads/aqua/model/utils.py b/ads/aqua/model/utils.py
index 2caa21720..c16303fcf 100644
--- a/ads/aqua/model/utils.py
+++ b/ads/aqua/model/utils.py
@@ -5,7 +5,7 @@
 
 from tqdm import tqdm
 
-from ads.aqua.model.entities import TaskStatus, TaskStatusEnum
+from ads.aqua.common.task_status import TaskStatus, TaskStatusEnum
 
 
 class HFModelProgressTracker(tqdm):

From 16658b5abc282bfec6e40d88fd4dd1bb9aaa8c11 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 28 Feb 2025 09:17:42 -0800
Subject: [PATCH 14/16] Make download tracker threadsafe

---
 ads/aqua/extension/models_ws_msg_handler.py | 22 -------------------
 ads/aqua/model/model.py                     |  6 +++---
 ads/aqua/model/utils.py                     | 24 +++++++++++++++------
 tests/unitary/with_extras/aqua/test_tqdm.py | 11 +++++-----
 4 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/ads/aqua/extension/models_ws_msg_handler.py b/ads/aqua/extension/models_ws_msg_handler.py
index 8d9c7bc39..e18fee2ed 100644
--- a/ads/aqua/extension/models_ws_msg_handler.py
+++ b/ads/aqua/extension/models_ws_msg_handler.py
@@ -73,33 +73,11 @@ def process(self) -> Union[ListModelsResponse, ModelDetailsResponse]:
                 ),
                 notify_latest_status=False,
             )
-            # if REGISTRATION_STATUS not in AquaModelWSMsgHandler.status_subscriber:
-            #     AquaModelWSMsgHandler.status_subscriber = {
-            #         REGISTRATION_STATUS: {job_id: {"subscriber": []}}
-            #     }
-            # if REGISTRATION_STATUS in AquaModelWSMsgHandler.status_subscriber:
-            #     if (
-            #         job_id
-            #         in AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS]
-            #     ):
-            #         AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
-            #             job_id
-            #         ]["subscriber"].append(self.ws_connection)
-            #     else:
-            #         AquaModelWSMsgHandler.status_subscriber[REGISTRATION_STATUS][
-            #             job_id
-            #         ] = {"subscriber": [self.ws_connection]}
 
             latest_status = StatusTracker.get_latest_status(
                 TaskNameEnum.REGISTRATION_STATUS, task_id=task_id
             )
             logger.info(latest_status)
-            # if "state" in AquaModelWSMsgHandler.register_status.get(job_id, {}):
-            #     return ModelRegisterRequest(
-            #         status=AquaModelWSMsgHandler.register_status[job_id]["state"],
-            #         message=AquaModelWSMsgHandler.register_status[job_id]["message"],
-            #         job_id=job_id,
-            #     )
             if latest_status:
                 return ModelRegisterRequest(
                     status=latest_status.state,
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
index 714a51234..d1a6c3370 100644
--- a/ads/aqua/model/model.py
+++ b/ads/aqua/model/model.py
@@ -77,7 +77,7 @@
     ModelFormat,
     ModelValidationResult,
 )
-from ads.aqua.model.utils import HFModelProgressTracker
+from ads.aqua.model.utils import prepare_progress_tracker_with_callback
 from ads.aqua.ui import AquaContainerConfig, AquaContainerConfigItem
 from ads.common.auth import default_signer
 from ads.common.oci_resource import SEARCH_TYPE, OCIResource
@@ -1440,9 +1440,9 @@ def publish_status(status: TaskStatus):
                 callback(status)
 
         # if local_dir is not set, the return value points to the cached data folder
-        tqdm = HFModelProgressTracker
+        tqdm = None
         if callback:
-            tqdm.register_hooks(tqdm_callback)
+            tqdm = prepare_progress_tracker_with_callback(tqdm_callback)
         local_dir = snapshot_download(
             repo_id=model_name,
             local_dir=local_dir,
diff --git a/ads/aqua/model/utils.py b/ads/aqua/model/utils.py
index c16303fcf..af7564d32 100644
--- a/ads/aqua/model/utils.py
+++ b/ads/aqua/model/utils.py
@@ -3,12 +3,16 @@
 # Copyright (c) 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+from typing import Callable, List, Union
+
 from tqdm import tqdm
 
 from ads.aqua.common.task_status import TaskStatus, TaskStatusEnum
 
 
 class HFModelProgressTracker(tqdm):
+    """snapshot_download method from huggingface_hub library is used to download the models. This class provides a way to register for callbacks as the downloads of different files are complete."""
+
     hooks = []
 
     def __init__(self, *args, **kwargs):
@@ -18,15 +22,11 @@ def __init__(self, *args, **kwargs):
         """
         super().__init__(*args, **kwargs)
 
-    @staticmethod
-    def register_hooks(hook):
-        HFModelProgressTracker.hooks.append(hook)
-
     def update(self, n=1):
         # Perform the standard progress update
         super().update(n)
         # Invoke the callback with the current progress value (self.n)
-        for hook in HFModelProgressTracker.hooks:
+        for hook in self.hooks:
             hook(
                 TaskStatus(
                     state=TaskStatusEnum.MODEL_DOWNLOAD_INPROGRESS,
@@ -35,7 +35,7 @@ def update(self, n=1):
             )
 
     def close(self):
-        for hook in HFModelProgressTracker.hooks:
+        for hook in self.hooks:
             hook(
                 TaskStatus(
                     state=TaskStatusEnum.MODEL_DOWNLOAD_INPROGRESS,
@@ -43,3 +43,15 @@ def close(self):
                 )
             )
         super().close()
+
+
+def prepare_progress_tracker_with_callback(
+    hook: Union[Callable, List[Callable]],
+) -> "HFModelProgressTrackerWithHook":  # type: ignore  # noqa: F821
+    """Provide a list of callables or single callable to be invoked upon download progress. snapshot_download only allows to pass in class, does not allow for tqdm_kwargs supported by thread_map.
+    This class provides a thread safe way to use hooks"""
+
+    class HFModelProgressTrackerWithHook(HFModelProgressTracker):
+        hooks = hook if isinstance(hook, list) else [hook]
+
+    return HFModelProgressTrackerWithHook
diff --git a/tests/unitary/with_extras/aqua/test_tqdm.py b/tests/unitary/with_extras/aqua/test_tqdm.py
index b6039481d..95a29bb99 100644
--- a/tests/unitary/with_extras/aqua/test_tqdm.py
+++ b/tests/unitary/with_extras/aqua/test_tqdm.py
@@ -8,7 +8,7 @@
 
 from tqdm.contrib.concurrent import thread_map
 
-from ads.aqua.model.utils import HFModelProgressTracker
+from ads.aqua.model.utils import prepare_progress_tracker_with_callback
 
 
 def test_custom_tqdm_thread_map():
@@ -21,13 +21,12 @@ def process(item):
     items = list(range(0, 10))
     callback = MagicMock()
 
-    HFModelProgressTracker.register_hooks(callback)
-    # print(HFModelProgressTracker.hooks)
+    clz = prepare_progress_tracker_with_callback(callback)
     thread_map(
         process,
         items,
         desc=f"Fetching {len(items)} items",
-        tqdm_class=HFModelProgressTracker,
+        tqdm_class=clz,
         max_workers=3,
     )
     callback.assert_called()
@@ -35,8 +34,8 @@ def process(item):
 
 def test_custom_tqdm():
     callback = MagicMock()
-    HFModelProgressTracker.register_hooks(callback)
-    with HFModelProgressTracker(range(10), desc="Processing") as bar:
+    clz = prepare_progress_tracker_with_callback(callback)
+    with clz(range(10), desc="Processing") as bar:
         for _ in bar:
             # Simulate work
             import time

From 4813195095b47b65fd5fdda1c6c6c6f07dcf6b96 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 28 Feb 2025 09:22:59 -0800
Subject: [PATCH 15/16] add oci cli deps

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index e4936a6a6..f3b28279a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -205,6 +205,7 @@ pii = [
 ]
 llm = ["langchain>=0.2", "langchain-community", "langchain_openai", "pydantic>=2,<3", "evaluate>=0.4.0"]
 aqua = [
+  "oci-cli",,
   "jupyter_server",
   "tornado",
   "notebook~=6.5",

From 8de4ad863f41a7b1079abe1951a8aa4d659aba00 Mon Sep 17 00:00:00 2001
From: Mayoor Rao <mayoor.rao@oracle.com>
Date: Fri, 28 Feb 2025 10:04:58 -0800
Subject: [PATCH 16/16] add oci cli deps

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index f3b28279a..49686bb8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -205,7 +205,7 @@ pii = [
 ]
 llm = ["langchain>=0.2", "langchain-community", "langchain_openai", "pydantic>=2,<3", "evaluate>=0.4.0"]
 aqua = [
-  "oci-cli",,
+  "oci-cli",
   "jupyter_server",
   "tornado",
   "notebook~=6.5",