diff --git a/README-development.md b/README-development.md index 9163b629a..fdecd7596 100644 --- a/README-development.md +++ b/README-development.md @@ -248,7 +248,7 @@ All the unit tests can be found [here](https://github.com/oracle/accelerated-dat The following commands detail how the unit tests can be run. ``` # Run all tests in AQUA project -python -m pytest -q tests/unitary/with_extras/aqua/test_deployment.py +python -m pytest -q tests/unitary/with_extras/aqua/* # Run all tests specific to a module within in AQUA project (ex. test_deployment.py, test_model.py, etc.) python -m pytest -q tests/unitary/with_extras/aqua/test_deployment.py diff --git a/ads/aqua/app.py b/ads/aqua/app.py index 480bfd544..72a7e29e1 100644 --- a/ads/aqua/app.py +++ b/ads/aqua/app.py @@ -6,9 +6,11 @@ import os import traceback from dataclasses import fields +from datetime import datetime, timedelta from typing import Any, Dict, Optional, Union import oci +from cachetools import TTLCache, cached from oci.data_science.models import UpdateModelDetails, UpdateModelProvenanceDetails from ads import set_auth @@ -269,6 +271,7 @@ def if_artifact_exist(self, model_id: str, **kwargs) -> bool: logger.info(f"Artifact not found in model {model_id}.") return False + @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=1), timer=datetime.now)) def get_config( self, model_id: str, @@ -337,6 +340,9 @@ def get_config( config_file_path = os.path.join(config_path, config_file_name) if is_path_exists(config_file_path): try: + logger.debug( + f"Loading config: `{config_file_name}` from `{config_path}`" + ) config = load_config( config_path, config_file_name=config_file_name, diff --git a/ads/aqua/common/entities.py b/ads/aqua/common/entities.py index 13ebf294a..bd7b2ede8 100644 --- a/ads/aqua/common/entities.py +++ b/ads/aqua/common/entities.py @@ -2,10 +2,14 @@ # Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +import re from typing import Any, Dict, Optional from oci.data_science.models import Model -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator + +from ads.aqua import logger +from ads.aqua.config.utils.serializer import Serializable class ContainerSpec: @@ -25,7 +29,6 @@ class ContainerSpec: class ModelConfigResult(BaseModel): """ Represents the result of getting the AQUA model configuration. - Attributes: model_details (Dict[str, Any]): A dictionary containing model details extracted from OCI. config (Dict[str, Any]): A dictionary of the loaded configuration. @@ -42,3 +45,222 @@ class Config: extra = "ignore" arbitrary_types_allowed = True protected_namespaces = () + + +class GPUSpecs(Serializable): + """ + Represents the GPU specifications for a compute instance. + """ + + gpu_memory_in_gbs: Optional[int] = Field( + default=None, description="The amount of GPU memory available (in GB)." + ) + gpu_count: Optional[int] = Field( + default=None, description="The number of GPUs available." + ) + gpu_type: Optional[str] = Field( + default=None, description="The type of GPU (e.g., 'V100, A100, H100')." + ) + + +class GPUShapesIndex(Serializable): + """ + Represents the index of GPU shapes. + + Attributes + ---------- + shapes (Dict[str, GPUSpecs]): A mapping of compute shape names to their GPU specifications. + """ + + shapes: Dict[str, GPUSpecs] = Field( + default_factory=dict, + description="Mapping of shape names to GPU specifications.", + ) + + +class ComputeShapeSummary(Serializable): + """ + Represents the specifications of a compute instance's shape. + """ + + core_count: Optional[int] = Field( + default=None, description="The number of CPU cores available." + ) + memory_in_gbs: Optional[int] = Field( + default=None, description="The amount of memory (in GB) available." + ) + name: Optional[str] = Field( + default=None, description="The name identifier of the compute shape." + ) + shape_series: Optional[str] = Field( + default=None, description="The series or category of the compute shape." + ) + gpu_specs: Optional[GPUSpecs] = Field( + default=None, + description="The GPU specifications associated with the compute shape.", + ) + + @model_validator(mode="after") + @classmethod + def set_gpu_specs(cls, model: "ComputeShapeSummary") -> "ComputeShapeSummary": + """ + Validates and populates GPU specifications if the shape_series indicates a GPU-based shape. + + - If the shape_series contains "GPU", the validator first checks if the shape name exists + in the GPU_SPECS dictionary. If found, it creates a GPUSpecs instance with the corresponding data. + - If the shape is not found in the GPU_SPECS, it attempts to extract the GPU count from the shape name + using a regex pattern (looking for a number following a dot at the end of the name). + + The information about shapes is taken from: https://docs.oracle.com/en-us/iaas/data-science/using/supported-shapes.htm + + Returns: + ComputeShapeSummary: The updated instance with gpu_specs populated if applicable. + """ + try: + if ( + model.shape_series + and "GPU" in model.shape_series.upper() + and model.name + and not model.gpu_specs + ): + # Try to extract gpu_count from the shape name using a regex (e.g., "VM.GPU3.2" -> gpu_count=2) + match = re.search(r"\.(\d+)$", model.name) + if match: + gpu_count = int(match.group(1)) + model.gpu_specs = GPUSpecs(gpu_count=gpu_count) + except Exception as err: + logger.debug( + f"Error occurred in attempt to extract GPU specification for the f{model.name}. " + f"Details: {err}" + ) + return model + + +class AquaMultiModelRef(Serializable): + """ + Lightweight model descriptor used for multi-model deployment. + + This class only contains essential details + required to fetch complete model metadata and deploy models. + + Attributes + ---------- + model_id : str + The unique identifier of the model. + model_name : Optional[str] + The name of the model. + gpu_count : Optional[int] + Number of GPUs required for deployment. + env_var : Optional[Dict[str, Any]] + Optional environment variables to override during deployment. + artifact_location : Optional[str] + Artifact path of model in the multimodel group. + """ + + model_id: str = Field(..., description="The model OCID to deploy.") + model_name: Optional[str] = Field(None, description="The name of model.") + gpu_count: Optional[int] = Field( + None, description="The gpu count allocation for the model." + ) + env_var: Optional[dict] = Field( + default_factory=dict, description="The environment variables of the model." + ) + artifact_location: Optional[str] = Field( + None, description="Artifact path of model in the multimodel group." + ) + + class Config: + extra = "ignore" + protected_namespaces = () + + +class ContainerPath(Serializable): + """ + Represents a parsed container path, extracting the path, name, and version. + + This model is designed to parse a container path string of the format + ':'. It extracts the following components: + - `path`: The full path up to the version. + - `name`: The last segment of the path, representing the image name. + - `version`: The version number following the final colon. + + Example Usage: + -------------- + >>> container = ContainerPath(full_path="iad.ocir.io/ociodscdev/odsc-llm-evaluate:0.1.2.9") + >>> container.path + 'iad.ocir.io/ociodscdev/odsc-llm-evaluate' + >>> container.name + 'odsc-llm-evaluate' + >>> container.version + '0.1.2.9' + + >>> container = ContainerPath(full_path="custom-scheme://path/to/versioned-model:2.5.1") + >>> container.path + 'custom-scheme://path/to/versioned-model' + >>> container.name + 'versioned-model' + >>> container.version + '2.5.1' + + Attributes + ---------- + full_path : str + The complete container path string to be parsed. + path : Optional[str] + The full path up to the version (e.g., 'iad.ocir.io/ociodscdev/odsc-llm-evaluate'). + name : Optional[str] + The image name, which is the last segment of `path` (e.g., 'odsc-llm-evaluate'). + version : Optional[str] + The version number following the final colon in the path (e.g., '0.1.2.9'). + + Methods + ------- + validate(values: Any) -> Any + Validates and parses the `full_path`, extracting `path`, `name`, and `version`. + """ + + full_path: str + path: Optional[str] = None + name: Optional[str] = None + version: Optional[str] = None + + @model_validator(mode="before") + @classmethod + def validate(cls, values: Any) -> Any: + """ + Validates and parses the full container path, extracting the image path, image name, and version. + + Parameters + ---------- + values : dict + The dictionary of values being validated, containing 'full_path'. + + Returns + ------- + dict + Updated values dictionary with extracted 'path', 'name', and 'version'. + """ + full_path = values.get("full_path", "").strip() + + # Regex to parse : + match = re.match( + r"^(?P.+?)(?::(?P[\w\.]+))?$", full_path + ) + + if not match: + raise ValueError( + "Invalid container path format. Expected format: ':'" + ) + + # Extract image_path and version + values["path"] = match.group("image_path") + values["version"] = match.group("image_version") + + # Extract image_name as the last segment of image_path + values["name"] = values["path"].split("/")[-1] + + return values + + class Config: + extra = "ignore" + protected_namespaces = () diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py index 16686ac9f..6144877ee 100644 --- a/ads/aqua/common/enums.py +++ b/ads/aqua/common/enums.py @@ -25,6 +25,7 @@ class Tags(ExtendedEnum): AQUA_TAG = "OCI_AQUA" AQUA_SERVICE_MODEL_TAG = "aqua_service_model" AQUA_FINE_TUNED_MODEL_TAG = "aqua_fine_tuned_model" + AQUA_MODEL_ID_TAG = "aqua_model_id" AQUA_MODEL_NAME_TAG = "aqua_model_name" AQUA_EVALUATION = "aqua_evaluation" AQUA_FINE_TUNING = "aqua_finetuning" @@ -34,6 +35,7 @@ class Tags(ExtendedEnum): AQUA_EVALUATION_MODEL_ID = "evaluation_model_id" MODEL_FORMAT = "model_format" MODEL_ARTIFACT_FILE = "model_file" + MULTIMODEL_TYPE_TAG = "aqua_multimodel" class InferenceContainerType(ExtendedEnum): @@ -44,6 +46,7 @@ class InferenceContainerType(ExtendedEnum): class InferenceContainerTypeFamily(ExtendedEnum): AQUA_VLLM_CONTAINER_FAMILY = "odsc-vllm-serving" + AQUA_VLLM_V1_CONTAINER_FAMILY = "odsc-vllm-serving-v1" AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving" AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving" diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py index 1272b4329..c36fc3cab 100644 --- a/ads/aqua/common/utils.py +++ b/ads/aqua/common/utils.py @@ -17,8 +17,9 @@ from functools import wraps from pathlib import Path from string import Template -from typing import List, Union +from typing import Any, Dict, List, Optional, Union +import fsspec import oci from cachetools import TTLCache, cached from huggingface_hub.constants import HF_HUB_CACHE @@ -32,8 +33,9 @@ ) from oci.data_science.models import JobRun, Model from oci.object_storage.models import ObjectSummary -from pydantic import ValidationError +from pydantic import BaseModel, ValidationError +from ads.aqua.common.entities import GPUShapesIndex from ads.aqua.common.enums import ( InferenceContainerParamType, InferenceContainerType, @@ -61,6 +63,7 @@ VLLM_INFERENCE_RESTRICTED_PARAMS, ) from ads.aqua.data import AquaResourceIdentifier +from ads.common import auth as authutil from ads.common.auth import AuthState, default_signer from ads.common.decorator.threaded import threaded from ads.common.extended_enum import ExtendedEnum @@ -76,6 +79,7 @@ from ads.config import ( AQUA_MODEL_DEPLOYMENT_FOLDER, AQUA_SERVICE_MODELS_BUCKET, + CONDA_BUCKET_NAME, CONDA_BUCKET_NS, TENANCY_OCID, ) @@ -247,7 +251,7 @@ def load_config(file_path: str, config_file_name: str, **kwargs) -> dict: return config -def list_os_files_with_extension(oss_path: str, extension: str) -> [str]: +def list_os_files_with_extension(oss_path: str, extension: str) -> List[str]: """ List files in the specified directory with the given extension. @@ -934,6 +938,25 @@ def get_combined_params(params1: str = None, params2: str = None) -> str: return " ".join(combined_params) +def build_params_string(params: dict) -> str: + """Builds params string from params dict + + Parameters + ---------- + params: + Parameter dict with key-value pairs + + Returns + ------- + A params string. + """ + return ( + " ".join(f"{name} {value}" for name, value in params.items()).strip() + if params + else UNKNOWN + ) + + def copy_model_config(artifact_path: str, os_path: str, auth: dict = None): """Copies the aqua model config folder from the artifact path to the user provided object storage path. The config folder is overwritten if the files already exist at the destination path. @@ -1214,3 +1237,80 @@ def build_pydantic_error_message(ex: ValidationError): for e in ex.errors() if "loc" in e and e["loc"] } or "; ".join(e["msg"] for e in ex.errors()) + + +def is_pydantic_model(obj: object) -> bool: + """ + Returns True if obj is a Pydantic model class or an instance of a Pydantic model. + + Args: + obj: The object or class to check. + + Returns: + bool: True if obj is a subclass or instance of BaseModel, False otherwise. + """ + cls = obj if isinstance(obj, type) else type(obj) + return issubclass(cls, BaseModel) + + +@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now)) +def load_gpu_shapes_index( + auth: Optional[Dict] = None, +) -> GPUShapesIndex: + """ + Loads the GPU shapes index from Object Storage or a local resource folder. + + The function first attempts to load the file from an Object Storage bucket using fsspec. + If the loading fails (due to connection issues, missing file, etc.), it falls back to + loading the index from a local file. + + Parameters + ---------- + auth: (Dict, optional). Defaults to None. + The default authentication is set using `ads.set_auth` API. If you need to override the + default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate + authentication signer and kwargs required to instantiate IdentityClient object. + + Returns + ------- + GPUShapesIndex: The parsed GPU shapes index. + + Raises + ------ + FileNotFoundError: If the GPU shapes index cannot be found in either Object Storage or locally. + json.JSONDecodeError: If the JSON is malformed. + """ + file_name = "gpu_shapes_index.json" + data: Dict[str, Any] = {} + + # Check if the CONDA_BUCKET_NS environment variable is set. + if CONDA_BUCKET_NS: + try: + auth = auth or authutil.default_signer() + # Construct the object storage path. Adjust bucket name and path as needed. + storage_path = f"oci://{CONDA_BUCKET_NAME}@{CONDA_BUCKET_NS}/{file_name}/1" + logger.debug("Loading GPU shapes index from Object Storage") + with fsspec.open(storage_path, mode="r", **auth) as file_obj: + data = json.load(file_obj) + logger.debug("Successfully loaded GPU shapes index.") + except Exception as ex: + logger.debug( + f"Failed to load GPU shapes index from Object Storage. Details: {ex}" + ) + + # If loading from Object Storage failed, load from the local resource folder. + if not data: + try: + local_path = os.path.join( + os.path.dirname(__file__), "../resources", file_name + ) + logger.debug(f"Loading GPU shapes index from {local_path}.") + with open(local_path) as file_obj: + data = json.load(file_obj) + logger.debug("Successfully loaded GPU shapes index.") + except Exception as e: + logger.debug( + f"Failed to load GPU shapes index from {local_path}. Details: {e}" + ) + + return GPUShapesIndex(**data) diff --git a/ads/aqua/config/container_config.py b/ads/aqua/config/container_config.py index 80a668c7b..01e2f0d50 100644 --- a/ads/aqua/config/container_config.py +++ b/ads/aqua/config/container_config.py @@ -8,6 +8,13 @@ from ads.aqua.common.entities import ContainerSpec from ads.aqua.config.utils.serializer import Serializable +from ads.common.extended_enum import ExtendedEnum + + +class Usage(ExtendedEnum): + INFERENCE = "inference" + BATCH_INFERENCE = "batch_inference" + MULTI_MODEL = "multi_model" class AquaContainerConfigSpec(Serializable): @@ -86,6 +93,7 @@ class AquaContainerConfigItem(Serializable): class Config: extra = "allow" + protected_namespaces = () class AquaContainerConfig(Serializable): diff --git a/ads/aqua/constants.py b/ads/aqua/constants.py index 107570478..0f7a501ba 100644 --- a/ads/aqua/constants.py +++ b/ads/aqua/constants.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ """This module defines constants used in ads.aqua module.""" @@ -29,14 +29,17 @@ READY_TO_FINE_TUNE_STATUS = "TRUE" PRIVATE_ENDPOINT_TYPE = "MODEL_DEPLOYMENT" AQUA_GA_LIST = ["id19sfcrra6z"] +AQUA_MULTI_MODEL_CONFIG = "MULTI_MODEL_CONFIG" AQUA_MODEL_TYPE_SERVICE = "service" AQUA_MODEL_TYPE_CUSTOM = "custom" +AQUA_MODEL_TYPE_MULTI = "multi_model" AQUA_MODEL_ARTIFACT_CONFIG = "config.json" AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME = "_name_or_path" AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE = "model_type" AQUA_MODEL_ARTIFACT_FILE = "model_file" HF_METADATA_FOLDER = ".cache/" HF_LOGIN_DEFAULT_TIMEOUT = 2 +MODEL_NAME_DELIMITER = ";" TRAINING_METRICS_FINAL = "training_metrics_final" VALIDATION_METRICS_FINAL = "validation_metrics_final" diff --git a/ads/aqua/evaluation/entities.py b/ads/aqua/evaluation/entities.py index bb165edd8..7c6651931 100644 --- a/ads/aqua/evaluation/entities.py +++ b/ads/aqua/evaluation/entities.py @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ """ @@ -91,6 +91,11 @@ class CreateAquaEvaluationDetails(Serializable): force_overwrite: Optional[bool] = False freeform_tags: Optional[dict] = None defined_tags: Optional[dict] = None + container_image_uri: Optional[str] = Field( + None, + description="Image URI for evaluation container runtime. " + "The service managed container will be used by default.", + ) class Config: extra = "ignore" diff --git a/ads/aqua/evaluation/evaluation.py b/ads/aqua/evaluation/evaluation.py index 721bb6c89..877030459 100644 --- a/ads/aqua/evaluation/evaluation.py +++ b/ads/aqua/evaluation/evaluation.py @@ -24,6 +24,7 @@ from ads.aqua import logger from ads.aqua.app import AquaApp from ads.aqua.common import utils +from ads.aqua.common.entities import AquaMultiModelRef from ads.aqua.common.enums import ( DataScienceResource, Resource, @@ -76,6 +77,7 @@ CreateAquaEvaluationDetails, ) from ads.aqua.evaluation.errors import EVALUATION_JOB_EXIT_CODE_MESSAGE +from ads.aqua.model.constants import ModelCustomMetadataFields from ads.common.auth import default_signer from ads.common.object_storage_details import ObjectStorageDetails from ads.common.utils import UNKNOWN, get_console_link, get_files, get_log_links @@ -96,6 +98,7 @@ from ads.model.model_metadata import ( MetadataTaxonomyKeys, ModelCustomMetadata, + ModelCustomMetadataItem, ModelProvenanceMetadata, ModelTaxonomyMetadata, ) @@ -138,13 +141,62 @@ def create( create_aqua_evaluation_details: CreateAquaEvaluationDetails = None, **kwargs, ) -> "AquaEvaluationSummary": - """Creates Aqua evaluation for resource. + """Creates Aqua evaluation for resource.\n + For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#model-evaluation Parameters ---------- create_aqua_evaluation_details: CreateAquaEvaluationDetails The CreateAquaEvaluationDetails data class which contains all required and optional fields to create the aqua evaluation. + kwargs: + evaluation_source_id: str + The evaluation source id. Must be either model or model deployment ocid. + evaluation_name: str + The name for evaluation. + dataset_path: str + The dataset path for the evaluation. Could be either a local path from notebook session + or an object storage path. + report_path: str + The report path for the evaluation. Must be an object storage path. + model_parameters: dict + The parameters for the evaluation. + shape_name: str + The shape name for the evaluation job infrastructure. + memory_in_gbs: float + The memory in gbs for the shape selected. + ocpus: float + The ocpu count for the shape selected. + block_storage_size: int + The storage for the evaluation job infrastructure. + compartment_id: (str, optional). Defaults to `None`. + The compartment id for the evaluation. + project_id: (str, optional). Defaults to `None`. + The project id for the evaluation. + evaluation_description: (str, optional). Defaults to `None`. + The description for evaluation + experiment_id: (str, optional). Defaults to `None`. + The evaluation model version set id. If provided, + evaluation model will be associated with it. + experiment_name: (str, optional). Defaults to `None`. + The evaluation model version set name. If provided, + the model version set with the same name will be used if exists, + otherwise a new model version set will be created with the name. + experiment_description: (str, optional). Defaults to `None`. + The description for the evaluation model version set. + log_group_id: (str, optional). Defaults to `None`. + The log group id for the evaluation job infrastructure. + log_id: (str, optional). Defaults to `None`. + The log id for the evaluation job infrastructure. + metrics: (list, optional). Defaults to `None`. + The metrics for the evaluation. + force_overwrite: (bool, optional). Defaults to `False`. + Whether to force overwrite the existing file in object storage. + freeform_tags: (dict, optional) + Freeform tags for the evaluation model + defined_tags: (dict, optional) + Defined tags for the evaluation model + kwargs: The kwargs for creating CreateAquaEvaluationDetails instance if no create_aqua_evaluation_details provided. @@ -183,6 +235,23 @@ def create( evaluation_source = ModelDeployment.from_id( create_aqua_evaluation_details.evaluation_source_id ) + + if Tags.MULTIMODEL_TYPE_TAG in evaluation_source.freeform_tags: + multi_model_id = evaluation_source.freeform_tags.get( + Tags.AQUA_MODEL_ID_TAG, UNKNOWN + ) + + if not multi_model_id: + raise AquaRuntimeError( + f"Invalid multi model deployment {multi_model_id}." + f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment." + ) + + aqua_model = DataScienceModel.from_id(multi_model_id) + AquaEvaluationApp.validate_model_name( + aqua_model, create_aqua_evaluation_details + ) + try: if ( evaluation_source.runtime.type @@ -413,8 +482,11 @@ def create( JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING ) - container_image = self._get_evaluation_container( - create_aqua_evaluation_details.evaluation_source_id + container_image = ( + create_aqua_evaluation_details.container_image_uri + or self._get_evaluation_container( + create_aqua_evaluation_details.evaluation_source_id + ) ) evaluation_job.with_runtime( @@ -432,9 +504,7 @@ def create( metrics=create_aqua_evaluation_details.metrics, inference_configuration=eval_inference_configuration or {}, ) - ).create( - **kwargs - ) ## TODO: decide what parameters will be needed + ).create(**kwargs) ## TODO: decide what parameters will be needed logger.debug( f"Successfully created evaluation job {evaluation_job.id} for {create_aqua_evaluation_details.evaluation_source_id}." ) @@ -551,6 +621,120 @@ def create( parameters=AquaEvalParams(), ) + @staticmethod + def validate_model_name( + evaluation_source: DataScienceModel, + create_aqua_evaluation_details: CreateAquaEvaluationDetails, + ) -> None: + """ + Validates the user input for the model name when creating an Aqua evaluation. + + This function verifies that: + - The model group is not empty. + - The model multi metadata is present in the DataScienceModel metadata. + - The user provided a non-empty model name. + - The provided model name exists in the DataScienceModel metadata. + - The deployment configuration contains core metadata required for validation. + + Parameters + ---------- + evaluation_source : DataScienceModel + The DataScienceModel object containing metadata about each model in the deployment. + create_aqua_evaluation_details : CreateAquaEvaluationDetails + Contains required and optional fields for creating the Aqua evaluation. + + Raises + ------ + AquaValueError + If the user fails to provide a model name or if the provided model name does not match + any of the valid model names in the deployment metadata. + AquaRuntimeError + If the metadata is missing the model group count or if the model group count is invalid. + """ + user_model_parameters = create_aqua_evaluation_details.model_parameters + custom_metadata_list = evaluation_source.custom_metadata_list + user_model_name = user_model_parameters.get("model") + + # Ensure that a non-empty model name was provided. + if not user_model_name: + error_message = ( + "No model name was provided for evaluation. For multi-model deployment, " + "a model must be specified in the model parameters." + ) + logger.debug(error_message) + raise AquaValueError(error_message) + + # Retrieve and convert the model group count from metadata. + model_count = custom_metadata_list.get( + ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT + ) + try: + model_group_count = int(model_count.value) + except Exception as ex: + error_message = ( + "Missing or invalid `MULTIMODEL_GROUP_COUNT` " + f"in custom metadata for evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}'. " + f"Details: {ex}" + ) + logger.error(error_message) + + if model_group_count < 1: + logger.error( + f"Invalid model group count: {model_group_count} for evaluation source ID " + f"'{create_aqua_evaluation_details.evaluation_source_id}'. A valid multi-model deployment " + f"requires at least one model." + ) + raise AquaRuntimeError( + f"Unable to retrieve details for the multi-model deployment evaluation. A valid multi-model deployment " + f"must include at least one model. However, the provided evaluation source ID " + f"'{create_aqua_evaluation_details.evaluation_source_id}' does not contain any information about deployed models." + ) + + multi_model_metadata_value = custom_metadata_list.get( + ModelCustomMetadataFields.MULTIMODEL_METADATA, + ModelCustomMetadataItem(key=ModelCustomMetadataFields.MULTIMODEL_METADATA), + ).value + + if not multi_model_metadata_value: + error_message = ( + f"Required model metadata is missing for evaluation source ID: {evaluation_source.id}. " + f"A valid multi-model deployment requires {ModelCustomMetadataFields.MULTIMODEL_METADATA}. " + "Please recreate the model deployment and retry the evaluation, as an issue occurred during the initialization of the model group." + ) + logger.debug(error_message) + raise AquaRuntimeError(error_message) + + try: + multi_model_metadata = json.loads( + evaluation_source.dsc_model.get_custom_metadata_artifact( + metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA + ).decode("utf-8") + ) + except Exception as ex: + error_message = ( + f"Error fetching {ModelCustomMetadataFields.MULTIMODEL_METADATA} " + f"from custom metadata for evaluation source ID '{evaluation_source.id}'. " + f"Details: {ex}" + ) + logger.error(error_message) + raise AquaRuntimeError(error_message) from ex + + # Build the list of valid model names from custom metadata. + model_names = [ + AquaMultiModelRef(**metadata).model_name + for metadata in multi_model_metadata + ] + + # Check if the provided model name is among the valid names. + if user_model_name not in model_names: + error_message = ( + f"Provided model name '{user_model_name}' does not match any valid model names {model_names} " + f"for evaluation source ID '{create_aqua_evaluation_details.evaluation_source_id}'. " + "Please provide the correct model name." + ) + logger.debug(error_message) + raise AquaValueError(error_message) + def _build_evaluation_runtime( self, evaluation_id: str, @@ -1393,7 +1577,7 @@ def _fetch_jobrun( ) except Exception as e: logger.debug( - f"Failed to retreive job run: {jobrun_id}. " f"DEBUG INFO: {str(e)}" + f"Failed to retreive job run: {jobrun_id}. DEBUG INFO: {str(e)}" ) jobrun = None diff --git a/ads/aqua/extension/base_handler.py b/ads/aqua/extension/base_handler.py index 80af4fc44..f56e4bf36 100644 --- a/ads/aqua/extension/base_handler.py +++ b/ads/aqua/extension/base_handler.py @@ -15,6 +15,7 @@ from tornado.web import Application, HTTPError from ads.aqua import logger +from ads.aqua.common.utils import is_pydantic_model from ads.config import AQUA_TELEMETRY_BUCKET, AQUA_TELEMETRY_BUCKET_NS from ads.telemetry.client import TelemetryClient @@ -40,7 +41,7 @@ def __init__( def prepare(self, *args, **kwargs): """The base class prepare is not required for Aqua""" pass - + @staticmethod def serialize(obj: Any): """Serialize the object. @@ -52,6 +53,9 @@ def serialize(obj: Any): if is_dataclass(obj): return asdict(obj) + if is_pydantic_model(obj): + return obj.model_dump() + return str(obj) def finish(self, payload=None): # pylint: disable=W0221 diff --git a/ads/aqua/extension/deployment_handler.py b/ads/aqua/extension/deployment_handler.py index 2a3e827c3..4c4fc2ac5 100644 --- a/ads/aqua/extension/deployment_handler.py +++ b/ads/aqua/extension/deployment_handler.py @@ -1,7 +1,8 @@ #!/usr/bin/env python -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +from typing import List, Union from urllib.parse import urlparse from tornado.web import HTTPError @@ -11,7 +12,7 @@ from ads.aqua.extension.errors import Errors from ads.aqua.modeldeployment import AquaDeploymentApp, MDInferenceResponse from ads.aqua.modeldeployment.entities import ModelParams -from ads.config import COMPARTMENT_OCID, PROJECT_OCID +from ads.config import COMPARTMENT_OCID class AquaDeploymentHandler(AquaAPIhandler): @@ -20,7 +21,7 @@ class AquaDeploymentHandler(AquaAPIhandler): Methods ------- - get(self, id="") + get(self, id: Union[str, List[str]]) Retrieves a list of AQUA deployments or model info or logs by ID. post(self, *args, **kwargs) Creates a new AQUA deployment. @@ -30,6 +31,8 @@ class AquaDeploymentHandler(AquaAPIhandler): Lists all the AQUA deployments. get_deployment_config(self, model_id) Gets the deployment config for Aqua model. + list_shapes(self) + Lists the valid model deployment shapes. Raises ------ @@ -37,16 +40,23 @@ class AquaDeploymentHandler(AquaAPIhandler): """ @handle_exceptions - def get(self, id=""): + def get(self, id: Union[str, List[str]] = None): """Handle GET request.""" url_parse = urlparse(self.request.path) paths = url_parse.path.strip("/") if paths.startswith("aqua/deployments/config"): - if not id: + if not id or not isinstance(id, str): raise HTTPError( - 400, f"The request {self.request.path} requires model id." + 400, + f"Invalid request format for {self.request.path}. " + "Expected a single model ID or a comma-separated list of model IDs.", ) - return self.get_deployment_config(id) + id = id.replace(" ", "") + return self.get_deployment_config( + model_id=id.split(",") if "," in id else id + ) + elif paths.startswith("aqua/deployments/shapes"): + return self.list_shapes() elif paths.startswith("aqua/deployments"): if not id: return self.list() @@ -98,71 +108,7 @@ def post(self, *args, **kwargs): # noqa: ARG002 if not input_data: raise HTTPError(400, Errors.NO_INPUT_DATA) - # required input parameters - display_name = input_data.get("display_name") - if not display_name: - raise HTTPError( - 400, Errors.MISSING_REQUIRED_PARAMETER.format("display_name") - ) - instance_shape = input_data.get("instance_shape") - if not instance_shape: - raise HTTPError( - 400, Errors.MISSING_REQUIRED_PARAMETER.format("instance_shape") - ) - model_id = input_data.get("model_id") - if not model_id: - raise HTTPError(400, Errors.MISSING_REQUIRED_PARAMETER.format("model_id")) - - compartment_id = input_data.get("compartment_id", COMPARTMENT_OCID) - project_id = input_data.get("project_id", PROJECT_OCID) - log_group_id = input_data.get("log_group_id") - access_log_id = input_data.get("access_log_id") - predict_log_id = input_data.get("predict_log_id") - description = input_data.get("description") - instance_count = input_data.get("instance_count") - bandwidth_mbps = input_data.get("bandwidth_mbps") - web_concurrency = input_data.get("web_concurrency") - server_port = input_data.get("server_port") - health_check_port = input_data.get("health_check_port") - env_var = input_data.get("env_var") - container_family = input_data.get("container_family") - ocpus = input_data.get("ocpus") - memory_in_gbs = input_data.get("memory_in_gbs") - model_file = input_data.get("model_file") - private_endpoint_id = input_data.get("private_endpoint_id") - container_image_uri = input_data.get("container_image_uri") - cmd_var = input_data.get("cmd_var") - freeform_tags = input_data.get("freeform_tags") - defined_tags = input_data.get("defined_tags") - - self.finish( - AquaDeploymentApp().create( - compartment_id=compartment_id, - project_id=project_id, - model_id=model_id, - display_name=display_name, - description=description, - instance_count=instance_count, - instance_shape=instance_shape, - log_group_id=log_group_id, - access_log_id=access_log_id, - predict_log_id=predict_log_id, - bandwidth_mbps=bandwidth_mbps, - web_concurrency=web_concurrency, - server_port=server_port, - health_check_port=health_check_port, - env_var=env_var, - container_family=container_family, - ocpus=ocpus, - memory_in_gbs=memory_in_gbs, - model_file=model_file, - private_endpoint_id=private_endpoint_id, - container_image_uri=container_image_uri, - cmd_var=cmd_var, - freeform_tags=freeform_tags, - defined_tags=defined_tags, - ) - ) + self.finish(AquaDeploymentApp().create(**input_data)) def read(self, id): """Read the information of an Aqua model deployment.""" @@ -181,9 +127,52 @@ def list(self): ) ) - def get_deployment_config(self, model_id): - """Gets the deployment config for Aqua model.""" - return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id)) + def get_deployment_config(self, model_id: Union[str, List[str]]): + """ + Retrieves the deployment configuration for one or more Aqua models. + + Parameters + ---------- + model_id : Union[str, List[str]] + A single model ID (str) or a list of model IDs (List[str]). + + Returns + ------- + None + The function sends the deployment configuration as a response. + """ + app = AquaDeploymentApp() + + compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID) + + if isinstance(model_id, list): + # Handle multiple model deployment + primary_model_id = self.get_argument("primary_model_id", default=None) + deployment_config = app.get_multimodel_deployment_config( + model_ids=model_id, + primary_model_id=primary_model_id, + compartment_id=compartment_id, + ) + else: + # Handle single model deployment + deployment_config = app.get_deployment_config(model_id=model_id) + + return self.finish(deployment_config) + + def list_shapes(self): + """ + Lists the valid model deployment shapes. + + Returns + ------- + List[ComputeShapeSummary]: + The list of the model deployment shapes. + """ + compartment_id = self.get_argument("compartment_id", default=COMPARTMENT_OCID) + + return self.finish( + AquaDeploymentApp().list_shapes(compartment_id=compartment_id) + ) class AquaDeploymentInferenceHandler(AquaAPIhandler): @@ -259,9 +248,10 @@ class AquaDeploymentParamsHandler(AquaAPIhandler): def get(self, model_id): """Handle GET request.""" instance_shape = self.get_argument("instance_shape") + gpu_count = self.get_argument("gpu_count", default=None) return self.finish( AquaDeploymentApp().get_deployment_default_params( - model_id=model_id, instance_shape=instance_shape + model_id=model_id, instance_shape=instance_shape, gpu_count=gpu_count ) ) @@ -300,6 +290,7 @@ def post(self, *args, **kwargs): # noqa: ARG002 __handlers__ = [ ("deployments/?([^/]*)/params", AquaDeploymentParamsHandler), ("deployments/config/?([^/]*)", AquaDeploymentHandler), + ("deployments/shapes/?([^/]*)", AquaDeploymentHandler), ("deployments/?([^/]*)", AquaDeploymentHandler), ("deployments/?([^/]*)/activate", AquaDeploymentHandler), ("deployments/?([^/]*)/deactivate", AquaDeploymentHandler), diff --git a/ads/aqua/finetuning/finetuning.py b/ads/aqua/finetuning/finetuning.py index c1e8945bd..5d8be4c36 100644 --- a/ads/aqua/finetuning/finetuning.py +++ b/ads/aqua/finetuning/finetuning.py @@ -87,13 +87,62 @@ class AquaFineTuningApp(AquaApp): def create( self, create_fine_tuning_details: CreateFineTuningDetails = None, **kwargs ) -> "AquaFineTuningSummary": - """Creates Aqua fine tuning for model. + """Creates Aqua fine tuning for model.\n + For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/f271ca63d12e3c256718f23a14d93da4b4fc086b/ai-quick-actions/cli-tips.md#create-fine-tuned-model Parameters ---------- create_fine_tuning_details: CreateFineTuningDetails The CreateFineTuningDetails data class which contains all required and optional fields to create the aqua fine tuning. + kwargs: + ft_source_id: str The fine tuning source id. Must be model OCID. + ft_name: str + The name for fine tuning. + dataset_path: str + The dataset path for fine tuning. Could be either a local path from notebook session + or an object storage path. + report_path: str + The report path for fine tuning. Must be an object storage path. + ft_parameters: dict + The parameters for fine tuning. + shape_name: str + The shape name for fine tuning job infrastructure. + replica: int + The replica for fine tuning job runtime. + validation_set_size: float + The validation set size for fine tuning job. Must be a float in between [0,1). + ft_description: (str, optional). Defaults to `None`. + The description for fine tuning. + compartment_id: (str, optional). Defaults to `None`. + The compartment id for fine tuning. + project_id: (str, optional). Defaults to `None`. + The project id for fine tuning. + experiment_id: (str, optional). Defaults to `None`. + The fine tuning model version set id. If provided, + fine tuning model will be associated with it. + experiment_name: (str, optional). Defaults to `None`. + The fine tuning model version set name. If provided, + the fine tuning version set with the same name will be used if exists, + otherwise a new model version set will be created with the name. + experiment_description: (str, optional). Defaults to `None`. + The description for fine tuning model version set. + block_storage_size: (int, optional). Defaults to 256. + The storage for fine tuning job infrastructure. + subnet_id: (str, optional). Defaults to `None`. + The custom egress for fine tuning job. + log_group_id: (str, optional). Defaults to `None`. + The log group id for fine tuning job infrastructure. + log_id: (str, optional). Defaults to `None`. + The log id for fine tuning job infrastructure. + watch_logs: (bool, optional). Defaults to `False`. + The flag to watch the job run logs when a fine-tuning job is created. + force_overwrite: (bool, optional). Defaults to `False`. + Whether to force overwrite the existing file in object storage. + freeform_tags: (dict, optional) + Freeform tags for the fine-tuning model + defined_tags: (dict, optional) + Defined tags for the fine-tuning model kwargs: The kwargs for creating CreateFineTuningDetails instance if no create_fine_tuning_details provided. diff --git a/ads/aqua/model/constants.py b/ads/aqua/model/constants.py index 4b9b7e585..9c5859671 100644 --- a/ads/aqua/model/constants.py +++ b/ads/aqua/model/constants.py @@ -18,6 +18,8 @@ class ModelCustomMetadataFields(ExtendedEnum): EVALUATION_CONTAINER = "evaluation-container" FINETUNE_CONTAINER = "finetune-container" DEPLOYMENT_CONTAINER_URI = "deployment-container-uri" + MULTIMODEL_GROUP_COUNT = "model_group_count" + MULTIMODEL_METADATA = "multi_model_metadata" class ModelTask(ExtendedEnum): @@ -34,6 +36,7 @@ class FineTuningMetricCategories(ExtendedEnum): class ModelType(ExtendedEnum): FT = "FT" # Fine Tuned Model BASE = "BASE" # Base model + MULTIMODEL = "MULTIMODEL" # TODO: merge metadata key used in create FT diff --git a/ads/aqua/model/enums.py b/ads/aqua/model/enums.py index 7e61001c6..1a21adabc 100644 --- a/ads/aqua/model/enums.py +++ b/ads/aqua/model/enums.py @@ -23,3 +23,8 @@ class FineTuningCustomMetadata(ExtendedEnum): VALIDATION_METRICS_FINAL = "val_metrics_final" TRAINING_METRICS_EPOCH = "train_metrics_epoch" VALIDATION_METRICS_EPOCH = "val_metrics_epoch" + + +class MultiModelSupportedTaskType(ExtendedEnum): + TEXT_GENERATION = "text-generation" + TEXT_GENERATION_ALT = "text_generation" diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py index b224112fe..b225b3745 100644 --- a/ads/aqua/model/model.py +++ b/ads/aqua/model/model.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +import json import os import pathlib from datetime import datetime, timedelta @@ -14,6 +15,7 @@ from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID, logger from ads.aqua.app import AquaApp +from ads.aqua.common.entities import AquaMultiModelRef from ads.aqua.common.enums import ( ConfigFolder, CustomInferenceContainerTypeFamily, @@ -42,7 +44,7 @@ read_file, upload_folder, ) -from ads.aqua.config.container_config import AquaContainerConfig +from ads.aqua.config.container_config import AquaContainerConfig, Usage from ads.aqua.constants import ( AQUA_MODEL_ARTIFACT_CONFIG, AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME, @@ -77,6 +79,7 @@ ImportModelDetails, ModelValidationResult, ) +from ads.aqua.model.enums import MultiModelSupportedTaskType from ads.common.auth import default_signer from ads.common.oci_resource import SEARCH_TYPE, OCIResource from ads.common.utils import UNKNOWN, get_console_link @@ -91,6 +94,7 @@ TENANCY_OCID, ) from ads.model import DataScienceModel +from ads.model.common.utils import MetadataArtifactPathType from ads.model.model_metadata import ( MetadataCustomCategory, ModelCustomMetadata, @@ -135,40 +139,45 @@ class AquaModelApp(AquaApp): @telemetry(entry_point="plugin=model&action=create", name="aqua") def create( self, - model_id: str, - project_id: str, - compartment_id: str = None, - freeform_tags: Optional[dict] = None, - defined_tags: Optional[dict] = None, + model_id: Union[str, AquaMultiModelRef], + project_id: Optional[str] = None, + compartment_id: Optional[str] = None, + freeform_tags: Optional[Dict] = None, + defined_tags: Optional[Dict] = None, **kwargs, ) -> DataScienceModel: - """Creates custom aqua model from service model. + """ + Creates a custom Aqua model from a service model. Parameters ---------- - model_id: str - The service model id. - project_id: str - The project id for custom model. - compartment_id: str - The compartment id for custom model. Defaults to None. - If not provided, compartment id will be fetched from environment variables. - freeform_tags: dict - Freeform tags for the model - defined_tags: dict - Defined tags for the model + model_id : Union[str, AquaMultiModelRef] + The model ID as a string or a AquaMultiModelRef instance to be deployed. + project_id : Optional[str] + The project ID for the custom model. + compartment_id : Optional[str] + The compartment ID for the custom model. Defaults to None. + If not provided, the compartment ID will be fetched from environment variables. + freeform_tags : Optional[Dict] + Freeform tags for the model. + defined_tags : Optional[Dict] + Defined tags for the model. + Returns ------- - DataScienceModel: + DataScienceModel The instance of DataScienceModel. """ + model_id = ( + model_id.model_id if isinstance(model_id, AquaMultiModelRef) else model_id + ) service_model = DataScienceModel.from_id(model_id) target_project = project_id or PROJECT_OCID target_compartment = compartment_id or COMPARTMENT_OCID if service_model.compartment_id != ODSC_MODEL_COMPARTMENT_OCID: logger.info( - f"Aqua Model {model_id} already exists in user's compartment." + f"Aqua Model {model_id} already exists in the user's compartment." "Skipped copying." ) return service_model @@ -195,14 +204,13 @@ def create( .with_custom_metadata_list(service_model.custom_metadata_list) .with_defined_metadata_list(service_model.defined_metadata_list) .with_provenance_metadata(service_model.provenance_metadata) - # TODO: decide what kwargs will be needed. .create(model_by_reference=True, **kwargs) ) logger.info( f"Aqua Model {custom_model.id} created with the service model {model_id}." ) - # tracks unique models that were created in the user compartment + # Track unique models that were created in the user's compartment self.telemetry.record_event_async( category="aqua/service/model", action="create", @@ -211,6 +219,204 @@ def create( return custom_model + @telemetry(entry_point="plugin=model&action=create", name="aqua") + def create_multi( + self, + models: List[AquaMultiModelRef], + project_id: Optional[str] = None, + compartment_id: Optional[str] = None, + freeform_tags: Optional[Dict] = None, + defined_tags: Optional[Dict] = None, + **kwargs, # noqa: ARG002 + ) -> DataScienceModel: + """ + Creates a multi-model grouping using the provided model list. + + Parameters + ---------- + models : List[AquaMultiModelRef] + List of AquaMultiModelRef instances for creating a multi-model group. + project_id : Optional[str] + The project ID for the multi-model group. + compartment_id : Optional[str] + The compartment ID for the multi-model group. + freeform_tags : Optional[Dict] + Freeform tags for the model. + defined_tags : Optional[Dict] + Defined tags for the model. + + Returns + ------- + DataScienceModel + Instance of DataScienceModel object. + """ + + if not models: + raise AquaValueError( + "Model list cannot be empty. Please provide at least one model for deployment." + ) + + artifact_list = [] + display_name_list = [] + model_custom_metadata = ModelCustomMetadata() + + # Get container config + container_config = get_container_config() + + service_inference_containers = AquaContainerConfig.from_container_index_json( + config=container_config + ).inference.values() + + supported_container_families = [ + container_config_item.family + for container_config_item in service_inference_containers + if Usage.MULTI_MODEL in container_config_item.usages + ] + + if not supported_container_families: + raise AquaValueError( + "Currently, there are no containers that support multi-model deployment." + ) + + selected_models_deployment_containers = set() + + # Process each model + for model in models: + source_model = DataScienceModel.from_id(model.model_id) + display_name = source_model.display_name + # Update model name in user's input model + model.model_name = model.model_name or display_name + + # TODO Uncomment the section below, if only service models should be allowed for multi-model deployment + # if not source_model.freeform_tags.get(Tags.AQUA_SERVICE_MODEL_TAG, UNKNOWN): + # raise AquaValueError( + # f"Invalid selected model {display_name}. " + # "Currently only service models are supported for multi model deployment." + # ) + + if ( + source_model.freeform_tags.get(Tags.TASK, UNKNOWN).lower() + not in MultiModelSupportedTaskType + ): + raise AquaValueError( + f"Invalid or missing {Tags.TASK} tag for selected model {display_name}. " + f"Currently only `{MultiModelSupportedTaskType.values()}` models are supported for multi model deployment." + ) + + display_name_list.append(display_name) + + # Retrieve model artifact + model_artifact_path = source_model.artifact + if not model_artifact_path: + raise AquaValueError( + f"Model '{display_name}' (ID: {model.model_id}) has no artifacts. " + "Please register the model first." + ) + + # Update model artifact location in user's input model + model.artifact_location = model_artifact_path + + artifact_list.append(model_artifact_path) + + # Validate deployment container consistency + deployment_container = source_model.custom_metadata_list.get( + ModelCustomMetadataFields.DEPLOYMENT_CONTAINER, + ModelCustomMetadataItem( + key=ModelCustomMetadataFields.DEPLOYMENT_CONTAINER + ), + ).value + + if deployment_container not in supported_container_families: + raise AquaValueError( + f"Unsupported deployment container '{deployment_container}' for model '{source_model.id}'. " + f"Only '{supported_container_families}' are supported for multi-model deployments." + ) + + selected_models_deployment_containers.add(deployment_container) + + # Check if the all models in the group shares same container family + if len(selected_models_deployment_containers) > 1: + raise AquaValueError( + "The selected models are associated with different container families: " + f"{list(selected_models_deployment_containers)}." + "For multi-model deployment, all models in the group must share the same container family." + ) + + deployment_container = selected_models_deployment_containers.pop() + + # Generate model group details + timestamp = datetime.now().strftime("%Y%m%d") + model_group_display_name = f"model_group_{timestamp}" + combined_models = ", ".join(display_name_list) + model_group_description = f"Multi-model grouping using {combined_models}." + + # Add global metadata + model_custom_metadata.add( + key=ModelCustomMetadataFields.DEPLOYMENT_CONTAINER, + value=deployment_container, + description=f"Inference container mapping for {model_group_display_name}", + category="Other", + ) + model_custom_metadata.add( + key=ModelCustomMetadataFields.MULTIMODEL_GROUP_COUNT, + value=str(len(models)), + description="Number of models in the group.", + category="Other", + ) + + # Combine tags. The `Tags.AQUA_TAG` has been excluded, because we don't want to show + # the models created for multi-model purpose in the AQUA models list. + tags = { + # Tags.AQUA_TAG: "active", + Tags.MULTIMODEL_TYPE_TAG: "true", + **(freeform_tags or {}), + } + + # Create multi-model group + custom_model = ( + DataScienceModel() + .with_compartment_id(compartment_id) + .with_project_id(project_id) + .with_display_name(model_group_display_name) + .with_description(model_group_description) + .with_freeform_tags(**tags) + .with_defined_tags(**(defined_tags or {})) + .with_custom_metadata_list(model_custom_metadata) + ) + + # Attach artifacts + for artifact in artifact_list: + custom_model.add_artifact(uri=artifact) + + # Finalize creation + custom_model.create(model_by_reference=True) + + logger.info( + f"Aqua Model '{custom_model.id}' created with models: {', '.join(display_name_list)}." + ) + + # Create custom metadata for multi model metadata + custom_model.create_custom_metadata_artifact( + metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA, + artifact_path_or_content=json.dumps( + [model.model_dump() for model in models] + ).encode(), + path_type=MetadataArtifactPathType.CONTENT, + ) + + logger.debug( + f"Multi model metadata uploaded for Aqua model: {custom_model.id}." + ) + + # Track telemetry event + self.telemetry.record_event_async( + category="aqua/multimodel", + action="create", + detail=combined_models, + ) + + return custom_model + @telemetry(entry_point="plugin=model&action=get", name="aqua") def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaModel": """Gets the information of an Aqua model. @@ -1448,8 +1654,9 @@ def register( self, import_model_details: ImportModelDetails = None, **kwargs ) -> AquaModel: """Loads the model from object storage and registers as Model in Data Science Model catalog - The inference container and finetuning container could be of type Service Manged Container(SMC) or custom. - If it is custom, full container URI is expected. If it of type SMC, only the container family name is expected. + The inference container and finetuning container could be of type Service Managed Container(SMC) or custom. + If it is custom, full container URI is expected. If it of type SMC, only the container family name is expected.\n + For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#register-model Args: import_model_details (ImportModelDetails): Model details for importing the model. @@ -1609,6 +1816,8 @@ def _rqs(self, compartment_id: str, model_type="FT", **kwargs): filter_tag = Tags.AQUA_FINE_TUNED_MODEL_TAG elif model_type == ModelType.BASE: filter_tag = Tags.BASE_MODEL_CUSTOM + # elif model_type == ModelType.MULTIMODEL: + # filter_tag = Tags.MULTIMODEL_TYPE_TAG else: raise AquaValueError( f"Model of type {model_type} is unknown. The values should be in {ModelType.values()}" diff --git a/ads/aqua/modeldeployment/deployment.py b/ads/aqua/modeldeployment/deployment.py index 4ec0d0d93..0695c9374 100644 --- a/ads/aqua/modeldeployment/deployment.py +++ b/ads/aqua/modeldeployment/deployment.py @@ -2,14 +2,27 @@ # Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +import json import shlex +from datetime import datetime, timedelta from typing import Dict, List, Optional, Union +from cachetools import TTLCache, cached +from oci.data_science.models import ModelDeploymentShapeSummary +from pydantic import ValidationError + from ads.aqua.app import AquaApp, logger -from ads.aqua.common.entities import ContainerSpec +from ads.aqua.common.entities import ( + AquaMultiModelRef, + ComputeShapeSummary, + ContainerPath, + ContainerSpec, +) from ads.aqua.common.enums import InferenceContainerTypeFamily, ModelFormat, Tags from ads.aqua.common.errors import AquaRuntimeError, AquaValueError from ads.aqua.common.utils import ( + build_params_string, + build_pydantic_error_message, get_combined_params, get_container_config, get_container_image, @@ -20,19 +33,34 @@ get_params_list, get_resource_name, get_restricted_params_by_container, + load_gpu_shapes_index, validate_cmd_var, ) +from ads.aqua.config.container_config import AquaContainerConfig, Usage from ads.aqua.constants import ( AQUA_MODEL_ARTIFACT_FILE, AQUA_MODEL_TYPE_CUSTOM, + AQUA_MODEL_TYPE_MULTI, AQUA_MODEL_TYPE_SERVICE, + AQUA_MULTI_MODEL_CONFIG, MODEL_BY_REFERENCE_OSS_PATH_KEY, + MODEL_NAME_DELIMITER, UNKNOWN_DICT, ) from ads.aqua.data import AquaResourceIdentifier from ads.aqua.finetuning.finetuning import FineTuneCustomMetadata from ads.aqua.model import AquaModelApp -from ads.aqua.modeldeployment.entities import AquaDeployment, AquaDeploymentDetail +from ads.aqua.model.constants import ModelCustomMetadataFields +from ads.aqua.modeldeployment.entities import ( + AquaDeployment, + AquaDeploymentConfig, + AquaDeploymentDetail, + ConfigurationItem, + ConfigValidationError, + CreateModelDeploymentDetails, + ModelDeploymentConfigSummary, +) +from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader from ads.common.object_storage_details import ObjectStorageDetails from ads.common.utils import UNKNOWN, get_log_links from ads.config import ( @@ -41,6 +69,7 @@ AQUA_DEPLOYMENT_CONTAINER_URI_METADATA_NAME, AQUA_MODEL_DEPLOYMENT_CONFIG, COMPARTMENT_OCID, + PROJECT_OCID, ) from ads.model.datascience_model import DataScienceModel from ads.model.deployment import ( @@ -49,6 +78,7 @@ ModelDeploymentInfrastructure, ModelDeploymentMode, ) +from ads.model.model_metadata import ModelCustomMetadataItem from ads.telemetry import telemetry @@ -66,8 +96,13 @@ class AquaDeploymentApp(AquaApp): Retrieves details of an Aqua model deployment by its unique identifier. list(**kwargs) -> List[AquaModelSummary]: Lists all Aqua deployments within a specified compartment and/or project. - get_deployment_config(self, model_id: str) -> Dict: + get_deployment_config(self, model_id: str) -> AquaDeploymentConfig: Gets the deployment config of given Aqua model. + get_multimodel_deployment_config(self, model_ids: List[str],...) -> ModelDeploymentConfigSummary: + Retrieves the deployment configuration for multiple Aqua models and calculates + the GPU allocations for all compatible shapes. + list_shapes(self, **kwargs) -> List[Dict]: + Lists the valid model deployment shapes. Note: Use `ads aqua deployment --help` to get more details on the parameters available. @@ -79,103 +114,225 @@ class AquaDeploymentApp(AquaApp): @telemetry(entry_point="plugin=deployment&action=create", name="aqua") def create( self, - model_id: str, - instance_shape: str, - display_name: str, - instance_count: int = None, - log_group_id: str = None, - access_log_id: str = None, - predict_log_id: str = None, - compartment_id: str = None, - project_id: str = None, - description: str = None, - bandwidth_mbps: int = None, - web_concurrency: int = None, - server_port: int = None, - health_check_port: int = None, - env_var: Dict = None, - container_family: str = None, - memory_in_gbs: Optional[float] = None, - ocpus: Optional[float] = None, - model_file: Optional[str] = None, - private_endpoint_id: Optional[str] = None, - container_image_uri: Optional[None] = None, - cmd_var: List[str] = None, - freeform_tags: Optional[dict] = None, - defined_tags: Optional[dict] = None, + create_deployment_details: Optional[CreateModelDeploymentDetails] = None, + **kwargs, ) -> "AquaDeployment": """ - Creates a new Aqua deployment + Creates a new Aqua model deployment.\n + For detailed information about CLI flags see: https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/cli-tips.md#create-model-deployment + + Args: + create_deployment_details : CreateModelDeploymentDetails, optional + An instance of CreateModelDeploymentDetails containing all required and optional + fields for creating a model deployment via Aqua. + kwargs: + instance_shape (str): The instance shape used for deployment. + display_name (str): The name of the model deployment. + compartment_id (Optional[str]): The compartment OCID. + project_id (Optional[str]): The project OCID. + description (Optional[str]): The description of the deployment. + model_id (Optional[str]): The model OCID to deploy. + models (Optional[List[AquaMultiModelRef]]): List of models for multimodel deployment. + instance_count (int): Number of instances used for deployment. + log_group_id (Optional[str]): OCI logging group ID for logs. + access_log_id (Optional[str]): OCID for access logs. + predict_log_id (Optional[str]): OCID for prediction logs. + bandwidth_mbps (Optional[int]): Bandwidth limit on the load balancer in Mbps. + web_concurrency (Optional[int]): Number of worker processes/threads for handling requests. + server_port (Optional[int]): Server port for the Docker container image. + health_check_port (Optional[int]): Health check port for the Docker container image. + env_var (Optional[Dict[str, str]]): Environment variables for deployment. + container_family (Optional[str]): Image family of the model deployment container runtime. + memory_in_gbs (Optional[float]): Memory (in GB) for the selected shape. + ocpus (Optional[float]): OCPU count for the selected shape. + model_file (Optional[str]): File used for model deployment. + private_endpoint_id (Optional[str]): Private endpoint ID for model deployment. + container_image_uri (Optional[str]): Image URI for model deployment container runtime. + cmd_var (Optional[List[str]]): Command variables for the container runtime. + freeform_tags (Optional[Dict]): Freeform tags for model deployment. + defined_tags (Optional[Dict]): Defined tags for model deployment. + + Returns + ------- + AquaDeployment + An Aqua deployment instance. + """ + # Build deployment details from kwargs if not explicitly provided. + if create_deployment_details is None: + try: + create_deployment_details = CreateModelDeploymentDetails(**kwargs) + except ValidationError as ex: + custom_errors = build_pydantic_error_message(ex) + raise AquaValueError( + f"Invalid parameters for creating a model deployment. Error details: {custom_errors}." + ) from ex + + if not (create_deployment_details.model_id or create_deployment_details.models): + raise AquaValueError( + "Invalid parameters for creating a model deployment. Either `model_id` or `models` must be provided." + ) + + # Set defaults for compartment and project if not provided. + compartment_id = create_deployment_details.compartment_id or COMPARTMENT_OCID + project_id = create_deployment_details.project_id or PROJECT_OCID + freeform_tags = create_deployment_details.freeform_tags + defined_tags = create_deployment_details.defined_tags + + # validate instance shape availability in compartment + available_shapes = [ + shape.name.lower() + for shape in self.list_shapes( + compartment_id=create_deployment_details.compartment_id + ) + ] + + if create_deployment_details.instance_shape.lower() not in available_shapes: + raise AquaValueError( + f"Invalid Instance Shape. The selected shape '{create_deployment_details.instance_shape}' " + f"is not available in the {self.region} region. Please choose another shape to deploy the model." + ) + + # Get container config + container_config = get_container_config() + + # Create an AquaModelApp instance once to perform the deployment creation. + model_app = AquaModelApp() + if create_deployment_details.model_id: + logger.debug( + f"Single model ({create_deployment_details.model_id}) provided. " + "Delegating to single model creation method." + ) + aqua_model = model_app.create( + model_id=create_deployment_details.model_id, + compartment_id=compartment_id, + project_id=project_id, + freeform_tags=freeform_tags, + defined_tags=defined_tags, + ) + return self._create( + aqua_model=aqua_model, + create_deployment_details=create_deployment_details, + container_config=container_config, + ) + else: + model_ids = [model.model_id for model in create_deployment_details.models] + try: + model_config_summary = self.get_multimodel_deployment_config( + model_ids=model_ids, compartment_id=compartment_id + ) + if not model_config_summary.gpu_allocation: + raise AquaValueError(model_config_summary.error_message) + create_deployment_details.validate_multimodel_deployment_feasibility( + models_config_summary=model_config_summary + ) + except ConfigValidationError as err: + raise AquaValueError(f"{err}") from err + + service_inference_containers = ( + AquaContainerConfig.from_container_index_json( + config=container_config + ).inference.values() + ) + + supported_container_families = [ + container_config_item.family + for container_config_item in service_inference_containers + if Usage.MULTI_MODEL in container_config_item.usages + ] + + if not supported_container_families: + raise AquaValueError( + "Currently, there are no containers that support multi-model deployment." + ) + + # Check if provided container family supports multi-model deployment + if ( + create_deployment_details.container_family + and create_deployment_details.container_family + not in supported_container_families + ): + raise AquaValueError( + f"Unsupported deployment container '{create_deployment_details.container_family}'. " + f"Only {supported_container_families} families are supported for multi-model deployments." + ) + + # Verify if it matches one of the registered containers and attempt to + # extract the container family from there. + # If the container is not recognized, we can only issue a warning that + # the provided container may not support multi-model deployment. + if create_deployment_details.container_image_uri: + selected_container_name = ContainerPath( + full_path=create_deployment_details.container_image_uri + ).name + + container_config_item = next( + ( + container_config_item + for container_config_item in service_inference_containers + if ContainerPath( + full_path=f"{container_config_item.name}:{container_config_item.version}" + ).name.upper() + == selected_container_name.upper() + ), + None, + ) + + if ( + container_config_item + and container_config_item.family not in supported_container_families + ): + raise AquaValueError( + f"Unsupported deployment container '{create_deployment_details.container_image_uri}'. " + f"Only {supported_container_families} families are supported for multi-model deployments." + ) + + if not container_config_item: + logger.warning( + f"The provided container `{create_deployment_details.container_image_uri}` may not support multi-model deployment. " + f"Only the following container families are supported: {supported_container_families}." + ) + + logger.debug( + f"Multi models ({model_ids}) provided. Delegating to multi model creation method." + ) + + aqua_model = model_app.create_multi( + models=create_deployment_details.models, + compartment_id=compartment_id, + project_id=project_id, + freeform_tags=freeform_tags, + defined_tags=defined_tags, + ) + return self._create_multi( + aqua_model=aqua_model, + model_config_summary=model_config_summary, + create_deployment_details=create_deployment_details, + container_config=container_config, + ) + + def _create( + self, + aqua_model: DataScienceModel, + create_deployment_details: CreateModelDeploymentDetails, + container_config: Dict, + ) -> AquaDeployment: + """Builds the configurations required by single model deployment and creates the deployment. Parameters ---------- - model_id: str - The model OCID to deploy. - compartment_id: str - The compartment OCID - project_id: str - Target project to list deployments from. - display_name: str - The name of model deployment. - description: str - The description of the deployment. - instance_count: (int, optional). Defaults to 1. - The number of instance used for deployment. - instance_shape: (str). - The shape of the instance used for deployment. - log_group_id: (str) - The oci logging group id. The access log and predict log share the same log group. - access_log_id: (str). - The access log OCID for the access logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm - predict_log_id: (str). - The predict log OCID for the predict logs. https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm - bandwidth_mbps: (int). Defaults to 10. - The bandwidth limit on the load balancer in Mbps. - web_concurrency: str - The number of worker processes/threads to handle incoming requests - with_bucket_uri(bucket_uri) - Sets the bucket uri when uploading large size model. - server_port: (int). - The server port for docker container image. - health_check_port: (int). - The health check port for docker container image. - env_var : dict, optional - Environment variable for the deployment, by default None. - container_family: str - The image family of model deployment container runtime. - memory_in_gbs: float - The memory in gbs for the shape selected. - ocpus: float - The ocpu count for the shape selected. - model_file: str - The file used for model deployment. - private_endpoint_id: str - The private endpoint id of model deployment. - container_image_uri: str - The image of model deployment container runtime, ignored for service managed containers. - Required parameter for BYOC based deployments if this parameter was not set during model registration. - cmd_var: List[str] - The cmd of model deployment container runtime. - freeform_tags: dict - Freeform tags for the model deployment - defined_tags: dict - Defined tags for the model deployment + aqua_model : DataScienceModel + An instance of Aqua data science model. + create_deployment_details : CreateModelDeploymentDetails + An instance of CreateModelDeploymentDetails containing all required and optional + fields for creating a model deployment via Aqua. + container_config: Dict + Container config dictionary. + Returns ------- AquaDeployment - An Aqua deployment instance - + An Aqua deployment instance. """ - # TODO validate if the service model has no artifact and if it requires import step before deployment. - # Create a model catalog entry in the user compartment - aqua_model = AquaModelApp().create( - model_id=model_id, - compartment_id=compartment_id, - project_id=project_id, - freeform_tags=freeform_tags, - defined_tags=defined_tags, - ) - tags = {} for tag in [ Tags.AQUA_SERVICE_MODEL_TAG, @@ -189,7 +346,7 @@ def create( tags.update({Tags.TASK: aqua_model.freeform_tags.get(Tags.TASK, UNKNOWN)}) # Set up info to get deployment config - config_source_id = model_id + config_source_id = create_deployment_details.model_id model_name = aqua_model.display_name is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in aqua_model.freeform_tags @@ -209,10 +366,8 @@ def create( ) from err # set up env and cmd var - if not env_var: - env_var = {} - if not cmd_var: - cmd_var = [] + env_var = create_deployment_details.env_var or {} + cmd_var = create_deployment_details.cmd_var or [] try: model_path_prefix = aqua_model.custom_metadata_list.get( @@ -245,11 +400,13 @@ def create( env_var.update({"FT_MODEL": f"{fine_tune_output_path}"}) container_type_key = self._get_container_type_key( - model=aqua_model, container_family=container_family + model=aqua_model, + container_family=create_deployment_details.container_family, ) - container_image_uri = container_image_uri or get_container_image( - container_type=container_type_key + container_image_uri = ( + create_deployment_details.container_image_uri + or get_container_image(container_type=container_type_key) ) if not container_image_uri: try: @@ -295,6 +452,7 @@ def create( and container_type_key.lower() == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY ): + model_file = create_deployment_details.model_file if model_file is not None: logger.info( f"Overriding {model_file} as model_file for model {aqua_model.id}." @@ -318,27 +476,24 @@ def create( # Fetch the startup cli command for the container # container_index.json will have "containerSpec" section which will provide the cli params for # a given container family - container_config = get_container_config() container_spec = container_config.get(ContainerSpec.CONTAINER_SPEC, {}).get( container_type_key, {} ) # these params cannot be overridden for Aqua deployments params = container_spec.get(ContainerSpec.CLI_PARM, "") - server_port = server_port or container_spec.get( + server_port = create_deployment_details.server_port or container_spec.get( ContainerSpec.SERVER_PORT - ) # Give precendece to the input parameter - health_check_port = health_check_port or container_spec.get( - ContainerSpec.HEALTH_CHECK_PORT - ) # Give precendece to the input parameter - - deployment_config = self.get_deployment_config(config_source_id) - - config_params = ( - deployment_config.get("configuration", UNKNOWN_DICT) - .get(instance_shape, UNKNOWN_DICT) - .get("parameters", UNKNOWN_DICT) - .get(get_container_params_type(container_type_key), UNKNOWN) - ) + ) # Give precedence to the input parameter + health_check_port = ( + create_deployment_details.health_check_port + or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT) + ) # Give precedence to the input parameter + + deployment_config = self.get_deployment_config(model_id=config_source_id) + + config_params = deployment_config.configuration.get( + create_deployment_details.instance_shape, ConfigurationItem() + ).parameters.get(get_container_params_type(container_type_key), UNKNOWN) # validate user provided params user_params = env_var.get("PARAMS", UNKNOWN) @@ -372,36 +527,250 @@ def create( for env in container_spec.get(ContainerSpec.ENV_VARS, []): if isinstance(env, dict): - for key, _items in env.items(): + for key, _ in env.items(): if key not in env_var: env_var.update(env) logger.info(f"Env vars used for deploying {aqua_model.id} :{env_var}") + tags = {**tags, **(create_deployment_details.freeform_tags or {})} + model_type = ( + AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE + ) + + return self._create_deployment( + create_deployment_details=create_deployment_details, + aqua_model_id=aqua_model.id, + model_name=model_name, + model_type=model_type, + container_image_uri=container_image_uri, + server_port=server_port, + health_check_port=health_check_port, + env_var=env_var, + tags=tags, + cmd_var=cmd_var, + ) + + def _create_multi( + self, + aqua_model: DataScienceModel, + model_config_summary: ModelDeploymentConfigSummary, + create_deployment_details: CreateModelDeploymentDetails, + container_config: Dict, + ) -> AquaDeployment: + """Builds the environment variables required by multi deployment container and creates the deployment. + + Parameters + ---------- + model_config_summary : model_config_summary + Summary Model Deployment configuration for the group of models. + aqua_model : DataScienceModel + An instance of Aqua data science model. + create_deployment_details : CreateModelDeploymentDetails + An instance of CreateModelDeploymentDetails containing all required and optional + fields for creating a model deployment via Aqua. + container_config: Dict + Container config dictionary. + Returns + ------- + AquaDeployment + An Aqua deployment instance. + """ + model_config = [] + model_name_list = [] + env_var = {**(create_deployment_details.env_var or UNKNOWN_DICT)} + + container_type_key = self._get_container_type_key( + model=aqua_model, + container_family=create_deployment_details.container_family, + ) + container_spec = container_config.get( + ContainerSpec.CONTAINER_SPEC, UNKNOWN_DICT + ).get(container_type_key, UNKNOWN_DICT) + + container_params = container_spec.get(ContainerSpec.CLI_PARM, UNKNOWN).strip() + + for model in create_deployment_details.models: + user_params = build_params_string(model.env_var) + if user_params: + restricted_params = self._find_restricted_params( + container_params, user_params, container_type_key + ) + if restricted_params: + selected_model = model.model_name or model.model_id + raise AquaValueError( + f"Parameters {restricted_params} are set by Aqua " + f"and cannot be overridden or are invalid." + f"Select other parameters for model {selected_model}." + ) + + # replaces `--served-model-name`` with user's model name + container_params_dict = get_params_dict(container_params) + container_params_dict.update({"--served-model-name": model.model_name}) + # replaces `--tensor-parallel-size` with model gpu count + container_params_dict.update({"--tensor-parallel-size": model.gpu_count}) + params = build_params_string(container_params_dict) + + deployment_config = model_config_summary.deployment_config.get( + model.model_id, AquaDeploymentConfig() + ).configuration.get( + create_deployment_details.instance_shape, ConfigurationItem() + ) + + # finds the corresponding deployment parameters based on the gpu count + # and combines them with user's parameters. Existing deployment parameters + # will be overriden by user's parameters. + params_found = False + for item in deployment_config.multi_model_deployment: + if ( + model.gpu_count + and item.gpu_count + and item.gpu_count == model.gpu_count + ): + config_parameters = item.parameters.get( + get_container_params_type(container_type_key), UNKNOWN + ) + params = f"{params} {get_combined_params(config_parameters, user_params)}".strip() + params_found = True + break + + if not params_found and deployment_config.parameters: + config_parameters = deployment_config.parameters.get( + get_container_params_type(container_type_key), UNKNOWN + ) + params = f"{params} {get_combined_params(config_parameters, user_params)}".strip() + params_found = True + + # if no config parameters found, append user parameters directly. + if not params_found: + params = f"{params} {user_params}".strip() + + artifact_path_prefix = model.artifact_location.rstrip("/") + if ObjectStorageDetails.is_oci_path(artifact_path_prefix): + os_path = ObjectStorageDetails.from_path(artifact_path_prefix) + artifact_path_prefix = os_path.filepath.rstrip("/") + + model_config.append({"params": params, "model_path": artifact_path_prefix}) + model_name_list.append(model.model_name) + + env_var.update({AQUA_MULTI_MODEL_CONFIG: json.dumps({"models": model_config})}) + + for env in container_spec.get(ContainerSpec.ENV_VARS, []): + if isinstance(env, dict): + for key, _ in env.items(): + if key not in env_var: + env_var.update(env) + + logger.info(f"Env vars used for deploying {aqua_model.id} : {env_var}.") + + container_image_uri = ( + create_deployment_details.container_image_uri + or get_container_image(container_type=container_type_key) + ) + server_port = create_deployment_details.server_port or container_spec.get( + ContainerSpec.SERVER_PORT + ) + health_check_port = ( + create_deployment_details.health_check_port + or container_spec.get(ContainerSpec.HEALTH_CHECK_PORT) + ) + tags = { + Tags.AQUA_MODEL_ID_TAG: aqua_model.id, + Tags.MULTIMODEL_TYPE_TAG: "true", + Tags.AQUA_TAG: "active", + **(create_deployment_details.freeform_tags or UNKNOWN_DICT), + } + + model_name = f"{MODEL_NAME_DELIMITER} ".join(model_name_list) + + aqua_deployment = self._create_deployment( + create_deployment_details=create_deployment_details, + aqua_model_id=aqua_model.id, + model_name=model_name, + model_type=AQUA_MODEL_TYPE_MULTI, + container_image_uri=container_image_uri, + server_port=server_port, + health_check_port=health_check_port, + env_var=env_var, + tags=tags, + ) + aqua_deployment.models = create_deployment_details.models + return aqua_deployment + + def _create_deployment( + self, + create_deployment_details: CreateModelDeploymentDetails, + aqua_model_id: str, + model_name: str, + model_type: str, + container_image_uri: str, + server_port: str, + health_check_port: str, + env_var: dict, + tags: dict, + cmd_var: Optional[dict] = None, + ): + """Creates data science model deployment. + + Parameters + ---------- + create_deployment_details : CreateModelDeploymentDetails + An instance of CreateModelDeploymentDetails containing all required and optional + fields for creating a model deployment via Aqua. + aqua_model_id: str + The id of the aqua model to be deployed. + model_name: str + The name of the aqua model to be deployed. If it's multi model deployment, it is a list of model names. + model_type: str + The type of aqua model to be deployed. Allowed values are: `custom`, `service` and `multi_model`. + container_image_uri: str + The container image uri to deploy the model. + server_port: str + The service port of the container image. + health_check_port: str + The health check port of the container image. + env_var: dict + The environment variables input for the deployment. + tags: dict + The tags input for the deployment. + cmd_var: dict, optional + The cmd arguments input for the deployment. + + Returns + ------- + AquaDeployment + An Aqua deployment instance. + """ # Start model deployment # configure model deployment infrastructure infrastructure = ( ModelDeploymentInfrastructure() - .with_project_id(project_id) - .with_compartment_id(compartment_id) - .with_shape_name(instance_shape) - .with_bandwidth_mbps(bandwidth_mbps) - .with_replica(instance_count) - .with_web_concurrency(web_concurrency) - .with_private_endpoint_id(private_endpoint_id) + .with_project_id(create_deployment_details.project_id or PROJECT_OCID) + .with_compartment_id( + create_deployment_details.compartment_id or COMPARTMENT_OCID + ) + .with_shape_name(create_deployment_details.instance_shape) + .with_bandwidth_mbps(create_deployment_details.bandwidth_mbps) + .with_replica(create_deployment_details.instance_count) + .with_web_concurrency(create_deployment_details.web_concurrency) + .with_private_endpoint_id(create_deployment_details.private_endpoint_id) .with_access_log( - log_group_id=log_group_id, - log_id=access_log_id, + log_group_id=create_deployment_details.log_group_id, + log_id=create_deployment_details.access_log_id, ) .with_predict_log( - log_group_id=log_group_id, - log_id=predict_log_id, + log_group_id=create_deployment_details.log_group_id, + log_id=create_deployment_details.predict_log_id, ) ) - if memory_in_gbs and ocpus and infrastructure.shape_name.endswith("Flex"): + if ( + create_deployment_details.memory_in_gbs + and create_deployment_details.ocpus + and infrastructure.shape_name.endswith("Flex") + ): infrastructure.with_shape_config_details( - ocpus=ocpus, - memory_in_gbs=memory_in_gbs, + ocpus=create_deployment_details.ocpus, + memory_in_gbs=create_deployment_details.memory_in_gbs, ) # configure model deployment runtime container_runtime = ( @@ -411,7 +780,7 @@ def create( .with_health_check_port(health_check_port) .with_env(env_var) .with_deployment_mode(ModelDeploymentMode.HTTPS) - .with_model_uri(aqua_model.id) + .with_model_uri(aqua_model_id) .with_region(self.region) .with_overwrite_existing_artifact(True) .with_remove_existing_artifact(True) @@ -419,24 +788,20 @@ def create( if cmd_var: container_runtime.with_cmd(cmd_var) - tags = {**tags, **(freeform_tags or {})} # configure model deployment and deploy model on container runtime deployment = ( ModelDeployment() - .with_display_name(display_name) - .with_description(description) + .with_display_name(create_deployment_details.display_name) + .with_description(create_deployment_details.description) .with_freeform_tags(**tags) - .with_defined_tags(**(defined_tags or {})) + .with_defined_tags(**(create_deployment_details.defined_tags or {})) .with_infrastructure(infrastructure) .with_runtime(container_runtime) ).deploy(wait_for_completion=False) - deployment_id = deployment.dsc_model_deployment.id + deployment_id = deployment.id logger.info( - f"Aqua model deployment {deployment_id} created for model {aqua_model.id}." - ) - model_type = ( - AQUA_MODEL_TYPE_CUSTOM if is_fine_tuned_model else AQUA_MODEL_TYPE_SERVICE + f"Aqua model deployment {deployment_id} created for model {aqua_model_id}." ) # we arbitrarily choose last 8 characters of OCID to identify MD in telemetry @@ -453,7 +818,7 @@ def create( self.telemetry.record_event_async( category=f"aqua/{model_type}/deployment/create", action="shape", - detail=instance_shape, + detail=create_deployment_details.instance_shape, value=model_name, ) @@ -627,10 +992,43 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail": source_id=model_deployment.id, ) + aqua_deployment = AquaDeployment.from_oci_model_deployment( + model_deployment, self.region + ) + + if Tags.MULTIMODEL_TYPE_TAG in model_deployment.freeform_tags: + aqua_model_id = model_deployment.freeform_tags.get( + Tags.AQUA_MODEL_ID_TAG, UNKNOWN + ) + if not aqua_model_id: + raise AquaRuntimeError( + f"Invalid multi model deployment {model_deployment_id}." + f"Make sure the {Tags.AQUA_MODEL_ID_TAG} tag is added to the deployment." + ) + aqua_model = DataScienceModel.from_id(aqua_model_id) + custom_metadata_list = aqua_model.custom_metadata_list + multi_model_metadata_value = custom_metadata_list.get( + ModelCustomMetadataFields.MULTIMODEL_METADATA, + ModelCustomMetadataItem( + key=ModelCustomMetadataFields.MULTIMODEL_METADATA + ), + ).value + if not multi_model_metadata_value: + raise AquaRuntimeError( + f"Invalid multi-model deployment: {model_deployment_id}. " + f"Ensure that the required custom metadata `{ModelCustomMetadataFields.MULTIMODEL_METADATA}` is added to the AQUA multi-model `{aqua_model.display_name}` ({aqua_model.id})." + ) + multi_model_metadata = json.loads( + aqua_model.dsc_model.get_custom_metadata_artifact( + metadata_key_name=ModelCustomMetadataFields.MULTIMODEL_METADATA + ).decode("utf-8") + ) + aqua_deployment.models = [ + AquaMultiModelRef(**metadata) for metadata in multi_model_metadata + ] + return AquaDeploymentDetail( - **vars( - AquaDeployment.from_oci_model_deployment(model_deployment, self.region) - ), + **vars(aqua_deployment), log_group=AquaResourceIdentifier( log_group_id, log_group_name, log_group_url ), @@ -640,7 +1038,7 @@ def get(self, model_deployment_id: str, **kwargs) -> "AquaDeploymentDetail": @telemetry( entry_point="plugin=deployment&action=get_deployment_config", name="aqua" ) - def get_deployment_config(self, model_id: str) -> Dict: + def get_deployment_config(self, model_id: str) -> AquaDeploymentConfig: """Gets the deployment config of given Aqua model. Parameters @@ -650,20 +1048,83 @@ def get_deployment_config(self, model_id: str) -> Dict: Returns ------- - Dict: - A dict of allowed deployment configs. + AquaDeploymentConfig: + An instance of AquaDeploymentConfig. """ config = self.get_config(model_id, AQUA_MODEL_DEPLOYMENT_CONFIG).config if not config: logger.debug( f"Deployment config for custom model: {model_id} is not available. Use defaults." ) - return config + return AquaDeploymentConfig(**(config or UNKNOWN_DICT)) + + @telemetry( + entry_point="plugin=deployment&action=get_multimodel_deployment_config", + name="aqua", + ) + def get_multimodel_deployment_config( + self, + model_ids: List[str], + primary_model_id: Optional[str] = None, + **kwargs: Dict, + ) -> ModelDeploymentConfigSummary: + """ + Retrieves the deployment configuration for multiple models and calculates + GPU allocations across all compatible shapes. + + More details: + https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#get_multimodel_deployment_config + + CLI example: + ads aqua deployment get_multimodel_deployment_config --model_ids '["ocid1.datasciencemodel.oc1.iad.OCID"]' + + If a primary model ID is provided, GPU allocation will prioritize that model + when selecting compatible shapes. + + Example: + Assume all three models: A, B, and C, support the same shape: "BM.GPU.H100.8" and each supports the following GPU counts for that shape: 1, 2, 4, 8. + If `no` primary model is specified, valid allocations could be: [2, 4, 2], [2, 2, 4], or [4, 2, 2] + If `B` is set as the primary model, the allocation will be: [2, 4, 2], where B receives the maximum available GPU count + + Parameters + ---------- + model_ids : List[str] + A list of OCIDs for the Aqua models. + primary_model_id : Optional[str] + The OCID of the primary Aqua model. If provided, GPU allocation will prioritize + this model. Otherwise, GPUs will be evenly allocated. + **kwargs: Dict + - compartment_id: str + The compartment OCID to retrieve the model deployment shapes. + + Returns + ------- + ModelDeploymentConfigSummary + A summary of the model deployment configurations and GPU allocations. + """ + if not model_ids: + raise AquaValueError( + "Model IDs were not provided. Please provide a valid list of model IDs to retrieve the multi-model deployment configuration." + ) + + compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID) + + # Get the all model deployment available shapes in a given compartment + available_shapes = self.list_shapes(compartment_id=compartment_id) + + return MultiModelDeploymentConfigLoader( + deployment_app=self, + ).load( + shapes=available_shapes, + model_ids=model_ids, + primary_model_id=primary_model_id, + ) def get_deployment_default_params( self, model_id: str, instance_shape: str, + gpu_count: int = None, ) -> List[str]: """Gets the default params set in the deployment configs for the given model and instance shape. @@ -675,6 +1136,9 @@ def get_deployment_default_params( instance_shape: (str). The shape of the instance used for deployment. + gpu_count: (int, optional). + The number of GPUs used by the Aqua model. Defaults to None. + Returns ------- List[str]: @@ -683,6 +1147,7 @@ def get_deployment_default_params( """ default_params = [] + config_params = {} model = DataScienceModel.from_id(model_id) try: container_type_key = model.custom_metadata_list.get( @@ -699,12 +1164,26 @@ def get_deployment_default_params( and container_type_key in InferenceContainerTypeFamily.values() ): deployment_config = self.get_deployment_config(model_id) - config_params = ( - deployment_config.get("configuration", UNKNOWN_DICT) - .get(instance_shape, UNKNOWN_DICT) - .get("parameters", UNKNOWN_DICT) - .get(get_container_params_type(container_type_key), UNKNOWN) + + instance_shape_config = deployment_config.configuration.get( + instance_shape, ConfigurationItem() ) + + if instance_shape_config.multi_model_deployment and gpu_count: + gpu_params = instance_shape_config.multi_model_deployment + + for gpu_config in gpu_params: + if gpu_config.gpu_count == gpu_count: + config_params = gpu_config.parameters.get( + get_container_params_type(container_type_key), UNKNOWN + ) + break + + else: + config_params = instance_shape_config.parameters.get( + get_container_params_type(container_type_key), UNKNOWN + ) + if config_params: params_list = get_params_list(config_params) restricted_params_set = get_restricted_params_by_container( @@ -799,3 +1278,39 @@ def _find_restricted_params( restricted_params.append(key.lstrip("-")) return restricted_params + + @telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua") + @cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now)) + def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]: + """Lists the valid model deployment shapes. + + Parameters + ---------- + kwargs + Keyword arguments, such as compartment_id + for `list_call_get_all_results `_ + + Returns + ------- + List[ComputeShapeSummary]: + The list of the model deployment shapes. + """ + compartment_id = kwargs.pop("compartment_id", COMPARTMENT_OCID) + oci_shapes: list[ModelDeploymentShapeSummary] = self.list_resource( + self.ds_client.list_model_deployment_shapes, + compartment_id=compartment_id, + **kwargs, + ) + + gpu_specs = load_gpu_shapes_index() + + return [ + ComputeShapeSummary( + core_count=oci_shape.core_count, + memory_in_gbs=oci_shape.memory_in_gbs, + shape_series=oci_shape.shape_series, + name=oci_shape.name, + gpu_specs=gpu_specs.shapes.get(oci_shape.name), + ) + for oci_shape in oci_shapes + ] diff --git a/ads/aqua/modeldeployment/entities.py b/ads/aqua/modeldeployment/entities.py index 8e34f986d..5899e5b2f 100644 --- a/ads/aqua/modeldeployment/entities.py +++ b/ads/aqua/modeldeployment/entities.py @@ -1,60 +1,123 @@ #!/usr/bin/env python -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ -from dataclasses import dataclass, field -from typing import List, Optional, Union +from typing import Any, Dict, List, Optional, Union -from oci.data_science.models import ( - ModelDeployment, - ModelDeploymentSummary, -) +from oci.data_science.models import ModelDeployment, ModelDeploymentSummary +from pydantic import BaseModel, Field, model_validator +from ads.aqua import logger +from ads.aqua.common.entities import AquaMultiModelRef from ads.aqua.common.enums import Tags +from ads.aqua.config.utils.serializer import Serializable from ads.aqua.constants import UNKNOWN_DICT from ads.aqua.data import AquaResourceIdentifier from ads.common.serializer import DataClassSerializable from ads.common.utils import UNKNOWN, get_console_link -@dataclass -class ModelParams: - max_tokens: int = None - temperature: float = None - top_k: float = None - top_p: float = None - model: str = None +class ConfigValidationError(Exception): + """Exception raised for config validation.""" + def __init__( + self, + message: str = ( + "Validation failed: The provided model group configuration is incompatible " + "with the selected instance shape. Please verify the GPU count per model and ensure " + "multi-model deployment is supported for the chosen instance shape." + ), + ): + super().__init__(message) -@dataclass -class ShapeInfo: - instance_shape: str = None - instance_count: int = None - ocpus: float = None - memory_in_gbs: float = None +class ShapeInfo(Serializable): + """ + Represents the configuration details for a compute instance shape. + """ -@dataclass(repr=False) -class AquaDeployment(DataClassSerializable): + instance_shape: Optional[str] = Field( + default=None, + description="The identifier of the compute instance shape (e.g., VM.Standard2.1)", + ) + instance_count: Optional[int] = Field( + default=None, description="The number of instances for the given shape." + ) + ocpus: Optional[float] = Field( + default=None, + description="The number of Oracle CPUs allocated for the instance.", + ) + memory_in_gbs: Optional[float] = Field( + default=None, + description="The total memory allocated for the instance, in gigabytes.", + ) + + +class ModelParams(Serializable): + max_tokens: Optional[int] = None + temperature: Optional[float] = None + top_k: Optional[float] = None + top_p: Optional[float] = None + model: Optional[str] = None + + class Config: + extra = "allow" + protected_namespaces = () + + +class AquaDeployment(Serializable): """Represents an Aqua Model Deployment""" - id: str = None - display_name: str = None - aqua_service_model: bool = None - model_id: str = None - aqua_model_name: str = None - state: str = None - description: str = None - created_on: str = None - created_by: str = None - endpoint: str = None - private_endpoint_id: str = None - console_link: str = None - lifecycle_details: str = None - shape_info: Optional[ShapeInfo] = None - tags: dict = None - environment_variables: dict = None - cmd: List[str] = None + id: Optional[str] = Field(None, description="The model deployment OCID.") + display_name: Optional[str] = Field( + None, description="The name of the model deployment." + ) + aqua_service_model: Optional[bool] = Field( + False, description="The bool value to indicate if it's aqua service model." + ) + model_id: str = Field(..., description="The model OCID to deploy.") + models: Optional[List[AquaMultiModelRef]] = Field( + default_factory=list, description="List of models for multi model deployment." + ) + aqua_model_name: Optional[str] = Field( + None, description="The name of the aqua model." + ) + state: Optional[str] = Field(None, description="The state of the model deployment.") + description: Optional[str] = Field( + None, description="The description of the model deployment." + ) + created_on: Optional[str] = Field( + None, description="The creation time of the model deployment." + ) + created_by: Optional[str] = Field( + None, description="The OCID that creates the model deployment." + ) + endpoint: Optional[str] = Field( + None, description="The endpoint of the model deployment." + ) + private_endpoint_id: Optional[str] = Field( + None, description="The private endpoint id of the model deployment." + ) + console_link: Optional[str] = Field( + None, description="The console link of the model deployment." + ) + lifecycle_details: Optional[str] = Field( + None, description="The lifecycle details of the model deployment." + ) + shape_info: Optional[ShapeInfo] = Field( + default_factory=ShapeInfo, + description="The shape information of the model deployment.", + ) + tags: Optional[dict] = Field( + default_factory=dict, description="The tags of the model deployment." + ) + environment_variables: Optional[dict] = Field( + default_factory=dict, + description="The environment variables of the model deployment.", + ) + cmd: Optional[List[str]] = Field( + default_factory=list, description="The cmd of the model deployment." + ) @classmethod def from_oci_model_deployment( @@ -135,10 +198,456 @@ def from_oci_model_deployment( cmd=cmd, ) + class Config: + extra = "ignore" + protected_namespaces = () + -@dataclass(repr=False) class AquaDeploymentDetail(AquaDeployment, DataClassSerializable): """Represents a details of Aqua deployment.""" - log_group: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier) - log: AquaResourceIdentifier = field(default_factory=AquaResourceIdentifier) + log_group: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier) + log: AquaResourceIdentifier = Field(default_factory=AquaResourceIdentifier) + + class Config: + extra = "allow" + + +class ShapeInfoConfig(Serializable): + """Describes how many memory and cpu to this model for specific shape. + + Attributes: + memory_in_gbs (float, optional): The number of memory in gbs to this model of the shape. + ocpu (float, optional): The number of ocpus to this model of the shape. + """ + + memory_in_gbs: Optional[float] = Field( + None, + description="The number of memory in gbs to this model of the shape.", + ) + ocpu: Optional[float] = Field( + None, + description="The number of ocpus to this model of the shape.", + ) + + class Config: + extra = "allow" + + +class DeploymentShapeInfo(Serializable): + """Describes the shape information to this model for specific shape. + + Attributes: + configs (List[ShapeInfoConfig], optional): A list of memory and cpu number details to this model of the shape. + type (str, optional): The type of the shape. + """ + + configs: Optional[List[ShapeInfoConfig]] = Field( + default_factory=list, + description="A list of memory and cpu number details to this model of the shape.", + ) + type: Optional[str] = Field( + default_factory=str, description="The type of the shape." + ) + + class Config: + extra = "allow" + + +class MultiModelConfig(Serializable): + """Describes how many GPUs and the parameters of specific shape for multi model deployment. + + Attributes: + gpu_count (int, optional): Number of GPUs count to this model of this shape. + parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to + configure the behavior of a particular GPU shape. + """ + + gpu_count: Optional[int] = Field( + default_factory=int, description="The number of GPUs allocated to the model." + ) + parameters: Optional[Dict[str, str]] = Field( + default_factory=dict, + description="Key-value pairs for GPU shape parameters (e.g., VLLM_PARAMS).", + ) + + class Config: + extra = "allow" + + +class ConfigurationItem(Serializable): + """Holds key-value parameter pairs for a specific GPU or CPU shape. + + Attributes: + parameters (Dict[str, str], optional): A dictionary of parameters (e.g., VLLM_PARAMS) to + configure the behavior of a particular GPU shape. + multi_model_deployment (List[MultiModelConfig], optional): A list of multi model configuration details. + shape_info (DeploymentShapeInfo, optional): The shape information to this model for specific CPU shape. + """ + + parameters: Optional[Dict[str, str]] = Field( + default_factory=dict, + description="Key-value pairs for shape parameters.", + ) + multi_model_deployment: Optional[List[MultiModelConfig]] = Field( + default_factory=list, description="A list of multi model configuration details." + ) + shape_info: Optional[DeploymentShapeInfo] = Field( + default_factory=DeploymentShapeInfo, + description="The shape information to this model for specific shape", + ) + + class Config: + extra = "allow" + + +class AquaDeploymentConfig(Serializable): + """Represents multi model's shape list and detailed configuration. + + Attributes: + shape (List[str], optional): A list of shape names (e.g., BM.GPU.A10.4). + configuration (Dict[str, ConfigurationItem], optional): Maps each shape to its configuration details. + """ + + shape: Optional[List[str]] = Field( + default_factory=list, description="List of supported shapes for the model." + ) + configuration: Optional[Dict[str, ConfigurationItem]] = Field( + default_factory=dict, description="Configuration details keyed by shape." + ) + + class Config: + extra = "allow" + + +class GPUModelAllocation(Serializable): + """Describes how many GPUs are allocated to a particular model. + + Attributes: + ocid (str, optional): The unique identifier of the model. + gpu_count (int, optional): Number of GPUs allocated to this model. + """ + + ocid: Optional[str] = Field( + default_factory=str, description="The unique model OCID." + ) + gpu_count: Optional[int] = Field( + default_factory=int, description="The number of GPUs allocated to the model." + ) + + class Config: + extra = "allow" + + +class GPUShapeAllocation(Serializable): + """ + Allocation details for a specific GPU shape. + + Attributes: + models (List[GPUModelAllocation], optional): List of model GPU allocations for this shape. + total_gpus_available (int, optional): The total number of GPUs available for this shape. + """ + + models: Optional[List[GPUModelAllocation]] = Field( + default_factory=list, description="List of model allocations for this shape." + ) + total_gpus_available: Optional[int] = Field( + default_factory=int, description="Total GPUs available for this shape." + ) + + class Config: + extra = "allow" + + +class ModelDeploymentConfigSummary(Serializable): + """Top-level configuration model for OCI-based deployments. + + Attributes: + deployment_config (Dict[str, AquaDeploymentConfig], optional): Deployment configurations + keyed by model OCID. + gpu_allocation (Dict[str, GPUShapeAllocation], optional): GPU allocations keyed by GPU shape. + error_message (str, optional): Error message if GPU allocation is not possible. + """ + + deployment_config: Optional[Dict[str, AquaDeploymentConfig]] = Field( + default_factory=dict, + description=( + "Deployment configuration details for each model, including supported shapes " + "and shape-specific parameters." + ), + ) + gpu_allocation: Optional[Dict[str, GPUShapeAllocation]] = Field( + default_factory=dict, + description=( + "Details on how GPUs are allocated per shape, including the total " + "GPUs available for each shape." + ), + ) + error_message: Optional[str] = Field( + default=None, description="Error message if GPU allocation is not possible." + ) + + class Config: + extra = "allow" + + +class CreateModelDeploymentDetails(BaseModel): + """Class for creating Aqua model deployments.""" + + instance_shape: str = Field( + ..., description="The instance shape used for deployment." + ) + display_name: str = Field(..., description="The name of the model deployment.") + compartment_id: Optional[str] = Field(None, description="The compartment OCID.") + project_id: Optional[str] = Field(None, description="The project OCID.") + description: Optional[str] = Field( + None, description="The description of the deployment." + ) + model_id: Optional[str] = Field(None, description="The model OCID to deploy.") + models: Optional[List[AquaMultiModelRef]] = Field( + None, description="List of models for multimodel deployment." + ) + instance_count: int = Field( + None, description="Number of instances used for deployment." + ) + log_group_id: Optional[str] = Field( + None, description="OCI logging group ID for logs." + ) + access_log_id: Optional[str] = Field( + None, + description="OCID for access logs. " + "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm", + ) + predict_log_id: Optional[str] = Field( + None, + description="OCID for prediction logs." + "https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm", + ) + bandwidth_mbps: Optional[int] = Field( + None, description="Bandwidth limit on the load balancer in Mbps." + ) + web_concurrency: Optional[int] = Field( + None, description="Number of worker processes/threads for handling requests." + ) + server_port: Optional[int] = Field( + None, description="Server port for the Docker container image." + ) + health_check_port: Optional[int] = Field( + None, description="Health check port for the Docker container image." + ) + env_var: Optional[Dict[str, str]] = Field( + default_factory=dict, description="Environment variables for deployment." + ) + container_family: Optional[str] = Field( + None, description="Image family of the model deployment container runtime." + ) + memory_in_gbs: Optional[float] = Field( + None, description="Memory (in GB) for the selected shape." + ) + ocpus: Optional[float] = Field( + None, description="OCPU count for the selected shape." + ) + model_file: Optional[str] = Field( + None, description="File used for model deployment." + ) + private_endpoint_id: Optional[str] = Field( + None, description="Private endpoint ID for model deployment." + ) + container_image_uri: Optional[str] = Field( + None, + description="Image URI for model deployment container runtime " + "(ignored for service-managed containers). " + "Required parameter for BYOC based deployments if this parameter was not set during " + "model registration.", + ) + cmd_var: Optional[List[str]] = Field( + None, description="Command variables for the container runtime." + ) + freeform_tags: Optional[Dict] = Field( + None, description="Freeform tags for model deployment." + ) + defined_tags: Optional[Dict] = Field( + None, description="Defined tags for model deployment." + ) + + @model_validator(mode="before") + @classmethod + def validate(cls, values: Any) -> Any: + """Ensures exactly one of `model_id` or `models` is provided.""" + model_id = values.get("model_id") + models = values.get("models") + if bool(model_id) == bool(models): # Both set or both unset + raise ValueError( + "Exactly one of `model_id` or `models` must be provided to create a model deployment." + ) + return values + + def validate_multimodel_deployment_feasibility( + self, models_config_summary: ModelDeploymentConfigSummary + ) -> None: + """ + Validates whether the selected model group is feasible for a multi-model deployment + on the chosen instance shape. + + Validation Criteria: + - Ensures that the model group is not empty. + - Verifies that the selected instance shape is supported by the GPU allocation. + - Confirms that each model in the group has a corresponding deployment configuration. + - Ensures that each model's user-specified GPU allocation is allowed by its deployment configuration. + - Checks that the total GPUs requested by the model group does not exceed the available GPU capacity + for the selected instance shape. + + Parameters + ---------- + models_config_summary : ModelDeploymentConfigSummary + Contains GPU allocations and deployment configuration for models. + + Raises + ------ + ConfigValidationError: + - If the model group is empty. + - If the selected instance shape is not supported. + - If any model is missing from the deployment configuration. + - If a model's GPU allocation does not match any valid configuration. + - If the total requested GPUs exceed the instance shape’s capacity. + """ + # Ensure that at least one model is provided. + if not self.models: + logger.error("No models provided in the model group.") + raise ConfigValidationError( + "Multi-model deployment requires at least one model. Please provide one or more models." + ) + + selected_shape = self.instance_shape + + if models_config_summary.error_message: + logger.error(models_config_summary.error_message) + raise ConfigValidationError(models_config_summary.error_message) + + # Verify that the selected shape is supported by the GPU allocation. + if selected_shape not in models_config_summary.gpu_allocation: + supported_shapes = list(models_config_summary.gpu_allocation.keys()) + error_message = ( + f"The model group is not compatible with the selected instance shape `{selected_shape}`. " + f"Supported shapes: {supported_shapes}." + ) + logger.error(error_message) + raise ConfigValidationError(error_message) + + total_available_gpus: int = models_config_summary.gpu_allocation[ + selected_shape + ].total_gpus_available + model_deployment_config = models_config_summary.deployment_config + + # Verify that every model in the group has a corresponding deployment configuration. + required_model_ids = {model.model_id for model in self.models} + missing_model_ids = required_model_ids - set(model_deployment_config.keys()) + if missing_model_ids: + error_message = ( + f"Missing deployment configuration for models: {list(missing_model_ids)}. " + "Ensure all selected models are properly configured. If you are deploying custom " + "models that lack AQUA service configuration, refer to the deployment guidelines here: " + "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models" + ) + logger.error(error_message) + raise ConfigValidationError(error_message) + + sum_model_gpus = 0 + is_single_model = len(self.models) == 1 + + # Validate each model's GPU allocation against its deployment configuration. + for model in self.models: + sum_model_gpus += model.gpu_count + aqua_deployment_config = model_deployment_config[model.model_id] + + # Skip validation for models without deployment configuration details. + if not aqua_deployment_config.configuration: + error_message = ( + f"Missing deployment configuration for model `{model.model_id}`. " + "Please verify that the model is correctly configured. If you are deploying custom models without AQUA service configuration, " + "refer to the guidelines at: " + "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models" + ) + + logger.error(error_message) + raise ConfigValidationError(error_message) + + allowed_shapes = ( + list( + set(aqua_deployment_config.configuration.keys()).union( + set(aqua_deployment_config.shape or []) + ) + ) + if is_single_model + else list(aqua_deployment_config.configuration.keys()) + ) + + if selected_shape not in allowed_shapes: + error_message = ( + f"Model `{model.model_id}` is not compatible with the selected instance shape `{selected_shape}`. " + f"Select a different instance shape from allowed shapes {allowed_shapes}." + ) + logger.error(error_message) + raise ConfigValidationError(error_message) + + # Retrieve valid GPU counts for the selected shape. + multi_model_configs = aqua_deployment_config.configuration.get( + selected_shape, ConfigurationItem() + ).multi_model_deployment + + valid_gpu_configurations = [cfg.gpu_count for cfg in multi_model_configs] + + if model.gpu_count not in valid_gpu_configurations: + valid_gpu_str = valid_gpu_configurations or [] + + if is_single_model: + # If total GPU allocation is not supported by selected model + if selected_shape not in aqua_deployment_config.shape: + error_message = ( + f"Model `{model.model_id}` is configured with {model.gpu_count} GPU(s), " + f"which is invalid. The allowed GPU configurations are: {valid_gpu_str}." + ) + logger.error(error_message) + raise ConfigValidationError(error_message) + + if model.gpu_count != total_available_gpus: + error_message = ( + f"Model '{model.model_id}' is configured to use {model.gpu_count} GPU(s), " + f"which not fully utilize the selected instance shape with {total_available_gpus} available GPU(s). " + "Consider adjusting the GPU allocation to better utilize the available resources and maximize performance." + ) + logger.error(error_message) + raise ConfigValidationError(error_message) + + else: + error_message = ( + f"Model `{model.model_id}` is configured with {model.gpu_count} GPU(s), which is invalid. " + f"Valid GPU configurations are: {valid_gpu_str}. Please adjust the GPU allocation " + f"or choose an instance shape that supports a higher GPU count." + ) + logger.error(error_message) + raise ConfigValidationError(error_message) + + if sum_model_gpus < total_available_gpus: + error_message = ( + f"Selected models are configured to use {sum_model_gpus} GPU(s), " + f"which not fully utilize the selected instance shape with {total_available_gpus} available GPU(s). " + "This configuration may lead to suboptimal performance for a multi-model deployment. " + "Consider adjusting the GPU allocation to better utilize the available resources and maximize performance." + ) + logger.warning(error_message) + # raise ConfigValidationError(error_message) + + # Check that the total GPU count for the model group does not exceed the instance capacity. + if sum_model_gpus > total_available_gpus: + error_message = ( + f"The selected instance shape `{selected_shape}` provides `{total_available_gpus}` GPU(s), " + f"but the total GPU allocation required by the model group is `{sum_model_gpus}` GPU(s). " + "Please adjust the GPU allocation per model or choose an instance shape with greater GPU capacity." + ) + logger.error(error_message) + raise ConfigValidationError(error_message) + + class Config: + extra = "allow" + protected_namespaces = () diff --git a/ads/aqua/modeldeployment/inference.py b/ads/aqua/modeldeployment/inference.py index 02f9bb408..e5812ad25 100644 --- a/ads/aqua/modeldeployment/inference.py +++ b/ads/aqua/modeldeployment/inference.py @@ -1,15 +1,14 @@ #!/usr/bin/env python -# -*- coding: utf-8 -*-- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import json -from dataclasses import asdict, dataclass, field +from dataclasses import dataclass, field import requests -from ads.aqua.app import AquaApp, logger +from ads.aqua.app import AquaApp from ads.aqua.modeldeployment.entities import ModelParams from ads.common.auth import default_signer from ads.telemetry import telemetry @@ -63,7 +62,7 @@ def get_model_deployment_response(self, endpoint): model_response_content """ - params_dict = asdict(self.model_params) + params_dict = self.model_params.to_dict() params_dict = { key: value for key, value in params_dict.items() if value is not None } diff --git a/ads/aqua/modeldeployment/utils.py b/ads/aqua/modeldeployment/utils.py new file mode 100644 index 000000000..ac93ed23f --- /dev/null +++ b/ads/aqua/modeldeployment/utils.py @@ -0,0 +1,525 @@ +#!/usr/bin/env python +# Copyright (c) 2025 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ +"""AQUA model deployment utils""" + +import copy +import itertools +import logging +import math +from concurrent.futures import ThreadPoolExecutor +from typing import Dict, List, Optional + +from ads.aqua.app import AquaApp +from ads.aqua.common.entities import ComputeShapeSummary +from ads.aqua.modeldeployment.entities import ( + AquaDeploymentConfig, + ConfigurationItem, + GPUModelAllocation, + GPUShapeAllocation, + ModelDeploymentConfigSummary, + MultiModelConfig, +) +from ads.config import AQUA_MODEL_DEPLOYMENT_CONFIG + +logger = logging.getLogger("ads.aqua") + + +class MultiModelDeploymentConfigLoader: + """ + Processes multiple model deployment configurations to determine compatible GPU shapes + and calculate optimal GPU allocations. + """ + + MAX_WORKERS = 10 # Number of workers for asynchronous models detail loading + + def __init__(self, deployment_app: AquaApp): + """ + Initializes the processor with a reference to the `AquaDeploymentApp` to fetch model configurations. + + Parameters + ---------- + deployment_app : AquaDeploymentApp + An instance of AquaDeploymentApp used to fetch model deployment configurations. + """ + self.deployment_app = deployment_app + + def load( + self, + shapes: List[ComputeShapeSummary], + model_ids: List[str], + primary_model_id: Optional[str] = None, + ) -> ModelDeploymentConfigSummary: + """ + Retrieves deployment configurations for multiple/single model and calculates compatible GPU allocations. + + Parameters + ---------- + shapes : List[ComputeShapeSummary] + Model deployment available shapes. + model_ids : List[str] + A list of OCIDs for the Aqua models. + primary_model_id : Optional[str], optional + The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model. + Otherwise, GPUs are evenly allocated. + + Returns + ------- + ModelDeploymentConfigSummary + A summary of the deployment configurations and GPU allocations. If GPU allocation + cannot be determined, an appropriate error message is included in the summary. + """ + return self._load_multi_model_deployment_configuration( + shapes=shapes, model_ids=model_ids, primary_model_id=primary_model_id + ) + + def _load_multi_model_deployment_configuration( + self, + shapes: List[ComputeShapeSummary], + model_ids: List[str], + primary_model_id: Optional[str] = None, + ) -> ModelDeploymentConfigSummary: + """ + Retrieves deployment configurations for multiple models and calculates compatible GPU allocations. + + Parameters + ---------- + shapes : List[ComputeShapeSummary] + Model deployment available shapes. + model_ids : List[str] + A list of OCIDs for the Aqua models. + primary_model_id : Optional[str], optional + The OCID of the primary Aqua model. If provided, GPU allocation prioritizes this model. + Otherwise, GPUs are evenly allocated. + + Returns + ------- + ModelDeploymentConfigSummary + A summary of the deployment configurations and GPU allocations. If GPU allocation + cannot be determined, an appropriate error message is included in the summary. + """ + model_shape_gpu, available_shapes, summary = self._fetch_model_shape_gpu( + shapes=shapes, model_ids=model_ids + ) + + # Identify common deployment shapes among all models. + common_shapes, empty_configs = self._get_common_shapes(model_shape_gpu) + logger.debug(f"Common Shapes: {common_shapes} from: {model_shape_gpu}") + + # If all models' shape configs are empty, use default deployment shapes instead + common_shapes = ( + available_shapes + if empty_configs + else [ + shape_name + for shape_name in common_shapes + if shape_name.upper() in available_shapes + ] + ) + logger.debug(f"Available Common Shapes: {common_shapes}") + + if not common_shapes: + summary.error_message = ( + "The selected models do not share any available common deployment shapes. " + "Please ensure that all chosen models are compatible for multi-model deployment." + ) + logger.debug( + f"No common deployment shapes found among selected models: {model_ids}" + ) + return summary + + # Compute GPU allocations based on the common shapes and optionally prioritize a primary model. + gpu_allocation = self._compute_gpu_allocation( + shapes=shapes, + common_shapes=common_shapes, + model_shape_gpu=model_shape_gpu, + primary_model_id=primary_model_id, + ) + + logger.debug(f"GPU Allocation: {gpu_allocation}") + + if not gpu_allocation: + summary.error_message = ( + "The selected models do not have a valid GPU allocation based on their current configurations. " + "Please select a different model group. If you are deploying custom models that lack AQUA service configuration, " + "refer to the deployment guidelines here: " + "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models" + ) + + logger.debug( + f"GPU allocation computation failed for selected models: {model_ids}" + ) + + return summary + + summary.gpu_allocation = gpu_allocation + return summary + + def _fetch_model_shape_gpu( + self, shapes: List[ComputeShapeSummary], model_ids: List[str] + ): + """Fetches dict of model shape and gpu, list of available shapes and builds `ModelDeploymentConfigSummary` instance.""" + # Fetch deployment configurations concurrently. + logger.debug(f"Loading model deployment configuration for models: {model_ids}") + deployment_configs = self._fetch_deployment_configs_concurrently(model_ids) + + logger.debug(f"Loaded config: {deployment_configs}") + model_shape_gpu, deployment = self._extract_model_shape_gpu( + deployment_configs=deployment_configs, shapes=shapes + ) + + # Initialize the summary result with the deployment configurations. + summary = ModelDeploymentConfigSummary(deployment_config=deployment) + + # Filter out not available shapes + available_shapes = [item.name.upper() for item in shapes] + logger.debug(f"Service Available Shapes: {available_shapes}") + + return model_shape_gpu, available_shapes, summary + + def _fetch_deployment_configs_concurrently( + self, model_ids: List[str] + ) -> Dict[str, AquaDeploymentConfig]: + """Fetches deployment configurations in parallel using ThreadPoolExecutor.""" + with ThreadPoolExecutor(max_workers=self.MAX_WORKERS) as executor: + results = executor.map( + lambda model_id: self.deployment_app.get_config( + model_id, AQUA_MODEL_DEPLOYMENT_CONFIG + ).config, + model_ids, + ) + + return { + model_id: AquaDeploymentConfig(**config) + for model_id, config in zip(model_ids, results) + } + + def _extract_model_shape_gpu( + self, + deployment_configs: Dict[str, AquaDeploymentConfig], + shapes: List[ComputeShapeSummary], + ): + """Extracts shape and GPU count details from deployment configurations. + Supported shapes for multi model deployment will be collected from `configuration` entry in deployment config. + Supported shapes for single model deployment will be collected from `shape` entry in deployment config. + """ + model_shape_gpu = {} + deployment = {} + is_single_model = len(deployment_configs) == 1 + + for model_id, config in deployment_configs.items(): + # For multi model deployment, we cannot rely on .shape because some models, like Falcon-7B, can only be deployed on a single GPU card (A10.1). + # However, Falcon can also be deployed on a single card in other A10 shapes, such as A10.2. + # Our current configuration does not support this flexibility. + # For single model deployment, we use `config.shape` to find the available shapes. + multi_deployment_shape = ( + list(set(config.configuration.keys()).union(set(config.shape or []))) + if is_single_model + else list(config.configuration.keys()) + ) + + shape_total_gpus_available_map = { + deployment_shape.name.upper(): deployment_shape.gpu_specs.gpu_count + or None + for deployment_shape in shapes + if deployment_shape and deployment_shape.gpu_specs + } + + model_shape_gpu[model_id] = { + shape.upper(): [ + item.gpu_count + for item in config.configuration.get( + shape, + ConfigurationItem( + multi_model_deployment=( + [ + MultiModelConfig( + gpu_count=shape_total_gpus_available_map.get( + shape.upper() + ) + ) + ] + if is_single_model + else [] + ) + ), + ).multi_model_deployment + ] + for shape in multi_deployment_shape + } + + # For single-model deployments: if the shape is listed in the `shapes` section of the config, + # we include the maximum available GPU count for that shape in the allocation consideration. + if is_single_model: + for shape in model_shape_gpu[model_id]: + shape_total_gpu_count = shape_total_gpus_available_map.get( + shape.upper() + ) + if ( + shape in config.shape + and shape_total_gpu_count + and shape_total_gpu_count + not in model_shape_gpu[model_id][shape] + ): + model_shape_gpu[model_id][shape].append(shape_total_gpu_count) + + deployment[model_id] = { + "shape": [shape.upper() for shape in config.shape], + "configuration": { + shape.upper(): config.configuration.get(shape, ConfigurationItem()) + for shape in multi_deployment_shape + }, + } + + return model_shape_gpu, deployment + + def _get_common_shapes( + self, model_shape_gpu: Dict[str, Dict[str, List[int]]] + ) -> tuple: + """Finds common shapes across all models.""" + common_shapes_set = [] + empty_configs = True + for shapes in model_shape_gpu.values(): + if shapes: + common_shapes_set.append(set(shapes.keys())) + empty_configs = False + if not common_shapes_set: + return [], empty_configs + return list(set.intersection(*(common_shapes_set))), empty_configs + + def _compute_gpu_allocation( + self, + shapes: List[ComputeShapeSummary], + common_shapes: List[str], + model_shape_gpu: Dict[str, Dict[str, List[int]]], + primary_model_id: Optional[str], + ) -> Dict[str, GPUShapeAllocation]: + """Computes GPU allocation for common shapes.""" + + gpu_allocation = {} + + for common_shape in common_shapes: + total_gpus_available = 0 + + # search the shape in the available shapes list + shape_summary = next( + (shape for shape in shapes if shape.name.upper() == common_shape), + None, + ) + if shape_summary and shape_summary.gpu_specs: + total_gpus_available = shape_summary.gpu_specs.gpu_count + + # generate a list of possible gpu count from `total_gpus_available` for custom models + # without multi model deployment config + # model_gpu = { + # model: ( + # shape_gpu[common_shape] + # if shape_gpu.get(common_shape, UNKNOWN) + # else self._generate_gpu_list(total_gpus_available) + # ) + # for model, shape_gpu in model_shape_gpu.items() + # } + + model_gpu = { + model: (shape_gpu.get(common_shape, []) or []) + for model, shape_gpu in model_shape_gpu.items() + } + + is_compatible, combination = self._verify_compatibility( + total_gpus_available=total_gpus_available, + model_gpu_dict=model_gpu, + primary_model_id=primary_model_id, + ) + + if is_compatible: + gpu_allocation[common_shape] = GPUShapeAllocation( + models=combination, total_gpus_available=total_gpus_available + ) + + return gpu_allocation + + @staticmethod + def _generate_gpu_list(total_gpus_available: int) -> list[int]: + """Generates a list of powers of 2 that's smaller than or equal to `total_gpus_available`. + + Example + ------- + input: 8 + output: [1,2,4,8] + + Parameters + ---------- + total_gpus_available : int + Total GPU available + + Returns + ------- + list + A list of powers of 2. + """ + if total_gpus_available < 1: + return [] + return [2**i for i in range(int(math.log2(total_gpus_available)) + 1)] + + def _verify_compatibility( + self, + total_gpus_available: int, + model_gpu_dict: Dict, + primary_model_id: str = None, + ) -> tuple: + """Calculates the gpu allocations for all compatible shapes. + If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated. + If provided, gpu count for each compatible shape will be prioritized for primary model. + + Example + ------- + + Case 1: + There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below: + + A - BM.GPU.H100.8 - 1, 2, 4, 8 + B - BM.GPU.H100.8 - 1, 2, 4, 8 + C - BM.GPU.H100.8 - 1, 2, 4, 8 + + If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2] + If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count. + + Case 2: + There is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below: + + A - BM.GPU.H100.8 - 1 + B - BM.GPU.H100.8 - 1, 2, 4 + C - BM.GPU.H100.8 - 1, 2, 4 + + If no primary model is provided, the gpu allocation for A, B, C could be [1, 1, 2] or [1, 2, 1] + If C is the primary model, the gpu allocation is [1, 1, 2] as C always gets the maximum gpu count. + + Parameters + ---------- + model_gpu_dict: Dict + A dict of Aqua model and its gpu counts. + primary_model_id: str + The OCID of the primary Aqua model + + Returns + ------- + tuple: + A tuple of gpu count allocation result. + """ + model_gpu_dict_copy = copy.deepcopy(model_gpu_dict) + # minimal gpu count needed to satisfy all models + minimal_gpus_needed = len(model_gpu_dict) + if primary_model_id and minimal_gpus_needed > 1: + primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id)) + primary_model_gpu_list.reverse() + combinations = self.get_combinations(model_gpu_dict_copy) + for gpu_count in primary_model_gpu_list: + current_gpus_available = total_gpus_available + while ( + current_gpus_available >= minimal_gpus_needed + # or current_gpus_available == 1 + ): + for combination in combinations: + if ( + len(combination) == len(model_gpu_dict_copy) + and sum(combination.values()) + == current_gpus_available - gpu_count + ): + combination[primary_model_id] = gpu_count + return ( + True, + [ + GPUModelAllocation(ocid=ocid, gpu_count=gpu_count) + for ocid, gpu_count in combination.items() + ], + ) + + current_gpus_available -= 1 + # current_gpus_available = ( + # 1 if current_gpus_available == 0 else current_gpus_available + # ) + else: + combinations = self.get_combinations(model_gpu_dict_copy) + current_gpus_available = total_gpus_available + while ( + current_gpus_available >= minimal_gpus_needed + # or current_gpus_available == 1 + ): + minimal_difference = float("inf") # gets the positive infinity + optimal_combination = [] + for combination in combinations: + if ( + len(combination) == len(model_gpu_dict_copy) + and sum(combination.values()) == current_gpus_available + ): + difference = max(combination.values()) - min( + combination.values() + ) + if difference < minimal_difference: + minimal_difference = difference + optimal_combination = combination + + # find the optimal combination, no need to continue + if minimal_difference == 0: + break + + if optimal_combination: + return ( + True, + [ + GPUModelAllocation(ocid=ocid, gpu_count=gpu_count) + for ocid, gpu_count in optimal_combination.items() + ], + ) + + current_gpus_available -= 1 + # current_gpus_available = ( + # 1 if current_gpus_available == 0 else current_gpus_available + # ) + + return (False, []) + + @staticmethod + def get_combinations(input_dict: dict): + """Finds all unique combinations within input dict. + + The input is a dict of {model:[gpu_count]} on a specific shape and this method will + return a list of all unique combinations of gpu allocation of each model. + + For example: + + input: {'model_a': [2, 4], 'model_b': [1, 2, 4], 'model_c': [1, 2, 8]} + output: + [ + {'model_a': 2, 'model_b': 1, 'model_c': 1}, + {'model_a': 2, 'model_b': 1, 'model_c': 2}, + {'model_a': 2, 'model_b': 1, 'model_c': 8}, + {'model_a': 2, 'model_b': 2, 'model_c': 1}, + {'model_a': 2, 'model_b': 2, 'model_c': 2}, + {'model_a': 2, 'model_b': 2, 'model_c': 8}, + {'model_a': 2, 'model_b': 4, 'model_c': 1}, + {'model_a': 2, 'model_b': 4, 'model_c': 2}, + {'model_a': 2, 'model_b': 4, 'model_c': 8}, + {'model_a': 4, 'model_b': 1, 'model_c': 1}, + {'model_a': 4, 'model_b': 1, 'model_c': 2}, + {'model_a': 4, 'model_b': 1, 'model_c': 8}, + {'model_a': 4, 'model_b': 2, 'model_c': 1}, + {'model_a': 4, 'model_b': 2, 'model_c': 2}, + {'model_a': 4, 'model_b': 2, 'model_c': 8}, + {'model_a': 4, 'model_b': 4, 'model_c': 1}, + {'model_a': 4, 'model_b': 4, 'model_c': 2}, + {'model_a': 4, 'model_b': 4, 'model_c': 8} + ] + + Parameters + ---------- + input_dict: dict + A dict of {model:[gpu_count]} on a specific shape + + Returns + ------- + list: + A list of all unique combinations of gpu allocation of each model. + """ + keys, values = zip(*input_dict.items()) + return [dict(zip(keys, v)) for v in itertools.product(*values)] diff --git a/ads/aqua/resources/gpu_shapes_index.json b/ads/aqua/resources/gpu_shapes_index.json new file mode 100644 index 000000000..c88155e45 --- /dev/null +++ b/ads/aqua/resources/gpu_shapes_index.json @@ -0,0 +1,94 @@ +{ + "shapes": { + "BM.GPU.A10.4": { + "gpu_count": 4, + "gpu_memory_in_gbs": 96, + "gpu_type": "A10" + }, + "BM.GPU.A100-V2.8": { + "gpu_count": 8, + "gpu_memory_in_gbs": 640, + "gpu_type": "A100" + }, + "BM.GPU.B4.8": { + "gpu_count": 8, + "gpu_memory_in_gbs": 320, + "gpu_type": "A100" + }, + "BM.GPU.H100.8": { + "gpu_count": 8, + "gpu_memory_in_gbs": 640, + "gpu_type": "H100" + }, + "BM.GPU.H200.8": { + "gpu_count": 8, + "gpu_memory_in_gbs": 1128, + "gpu_type": "H200" + }, + "BM.GPU.L40S-NC.4": { + "gpu_count": 4, + "gpu_memory_in_gbs": 192, + "gpu_type": "L40S" + }, + "BM.GPU.L40S.4": { + "gpu_count": 4, + "gpu_memory_in_gbs": 192, + "gpu_type": "L40S" + }, + "BM.GPU.MI300X.8": { + "gpu_count": 8, + "gpu_memory_in_gbs": 1536, + "gpu_type": "MI300X" + }, + "BM.GPU2.2": { + "gpu_count": 2, + "gpu_memory_in_gbs": 32, + "gpu_type": "P100" + }, + "BM.GPU3.8": { + "gpu_count": 8, + "gpu_memory_in_gbs": 128, + "gpu_type": "V100" + }, + "BM.GPU4.8": { + "gpu_count": 8, + "gpu_memory_in_gbs": 320, + "gpu_type": "A100" + }, + "VM.GPU.A10.1": { + "gpu_count": 1, + "gpu_memory_in_gbs": 24, + "gpu_type": "A10" + }, + "VM.GPU.A10.2": { + "gpu_count": 2, + "gpu_memory_in_gbs": 48, + "gpu_type": "A10" + }, + "VM.GPU.A10.4": { + "gpu_count": 4, + "gpu_memory_in_gbs": 96, + "gpu_type": "A10" + }, + "VM.GPU2.1": { + "gpu_count": 1, + "gpu_memory_in_gbs": 16, + "gpu_type": "P100" + }, + "VM.GPU3.1": { + "gpu_count": 1, + "gpu_memory_in_gbs": 16, + "gpu_type": "V100" + }, + "VM.GPU3.2": { + "gpu_count": 2, + "gpu_memory_in_gbs": 32, + "gpu_type": "V100" + }, + "VM.GPU3.4": { + "gpu_count": 4, + "gpu_memory_in_gbs": 64, + "gpu_type": "V100" + } + } +} diff --git a/tests/unitary/with_extras/aqua/test_common_entities.py b/tests/unitary/with_extras/aqua/test_common_entities.py new file mode 100644 index 000000000..778c07ff1 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_common_entities.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*-- + +# Copyright (c) 2025 Oracle and/or its affiliates. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ + +import pytest + +from ads.aqua.common.entities import ComputeShapeSummary, ContainerPath + + +class TestComputeShapeSummary: + @pytest.mark.parametrize( + "input_data, expected_gpu_specs", + [ + # Case 1: Shape is present in GPU_SPECS. + ( + { + "core_count": 32, + "memory_in_gbs": 512, + "name": "VM.GPU2.1", + "shape_series": "GPU", + "gpu_specs": { + "gpu_type": "P100", + "gpu_count": 1, + "gpu_memory_in_gbs": 16, + }, + }, + {"gpu_type": "P100", "gpu_count": 1, "gpu_memory_in_gbs": 16}, + ), + # Case 2: Not in GPU_SPECS; fallback extraction should yield gpu_count. + ( + { + "core_count": 16, + "memory_in_gbs": 256, + "name": "VM.GPU.UNKNOWN.4", + "shape_series": "GPU", + }, + {"gpu_type": None, "gpu_count": 4, "gpu_memory_in_gbs": None}, + ), + # Case 3: Non-GPU shape should not populate GPU specs. + ( + { + "core_count": 8, + "memory_in_gbs": 64, + "name": "VM.Standard2.1", + "shape_series": "STANDARD", + }, + None, + ), + ], + ) + def test_set_gpu_specs(self, input_data, expected_gpu_specs): + shape = ComputeShapeSummary(**input_data) + if expected_gpu_specs is None: + assert shape.gpu_specs is None + else: + assert shape.gpu_specs is not None + # Verify GPU type, count, and memory. + assert shape.gpu_specs.gpu_type == expected_gpu_specs.get("gpu_type") + assert shape.gpu_specs.gpu_count == expected_gpu_specs.get("gpu_count") + assert shape.gpu_specs.gpu_memory_in_gbs == expected_gpu_specs.get( + "gpu_memory_in_gbs" + ) + + +class TestContainerPath: + """The unit tests for ContainerPath.""" + + @pytest.mark.parametrize( + "image_path, expected_result", + [ + ( + "iad.ocir.io/ociodscdev/odsc-llm-evaluate:0.1.2.9", + { + "full_path": "iad.ocir.io/ociodscdev/odsc-llm-evaluate:0.1.2.9", + "path": "iad.ocir.io/ociodscdev/odsc-llm-evaluate", + "name": "odsc-llm-evaluate", + "version": "0.1.2.9", + }, + ), + ( + "dsmc://model-with-version:0.2.78.0", + { + "full_path": "dsmc://model-with-version:0.2.78.0", + "path": "dsmc://model-with-version", + "name": "model-with-version", + "version": "0.2.78.0", + }, + ), + ( + "oci://my-custom-model-version:1.0.0", + { + "full_path": "oci://my-custom-model-version:1.0.0", + "path": "oci://my-custom-model-version", + "name": "my-custom-model-version", + "version": "1.0.0", + }, + ), + ( + "custom-scheme://path/to/versioned-model:2.5.1", + { + "full_path": "custom-scheme://path/to/versioned-model:2.5.1", + "path": "custom-scheme://path/to/versioned-model", + "name": "versioned-model", + "version": "2.5.1", + }, + ), + ( + "custom-scheme://path/to/versioned-model", + { + "full_path": "custom-scheme://path/to/versioned-model", + "path": "custom-scheme://path/to/versioned-model", + "name": "versioned-model", + "version": None, + }, + ), + ], + ) + def test_positive(self, image_path, expected_result): + assert ContainerPath(full_path=image_path).model_dump() == expected_result diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml index 29d0db8c3..2d394adfa 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_gguf_deployment.yaml @@ -18,7 +18,7 @@ spec: shapeName: "VM.Standard.A1.Flex" shapeConfigDetails: memoryInGBs: 60.0 - ocpus: 10 + ocpus: 10.0 type: datascienceModelDeployment lifecycleState: CREATING modelDeploymentUrl: "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.." diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml new file mode 100644 index 000000000..9e33aaa6f --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_create_multi_deployment.yaml @@ -0,0 +1,35 @@ +kind: deployment +spec: + createdBy: ocid1.user.oc1.. + displayName: multi-model-deployment-name + freeformTags: + OCI_AQUA: active + aqua_multimodel: "true" + aqua_model_id: model-id + id: "ocid1.datasciencemodeldeployment.oc1.." + infrastructure: + kind: infrastructure + spec: + bandwidthMbps: 10 + compartmentId: ocid1.compartment.oc1.. + deploymentType: SINGLE_MODEL + policyType: FIXED_SIZE + projectId: ocid1.datascienceproject.oc1.iad. + replica: 1 + shapeName: "BM.GPU.A10.4" + type: datascienceModelDeployment + lifecycleState: CREATING + modelDeploymentUrl: "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.." + runtime: + kind: runtime + spec: + env: + MODEL_DEPLOY_PREDICT_ENDPOINT: /v1/completions + MULTI_MODEL_CONFIG: '{"models": [{"params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}' + healthCheckPort: 8080 + image: "dsmc://image-name:1.0.0.0" + modelUri: "ocid1.datasciencemodeldeployment.oc1.." + serverPort: 8080 + type: container + timeCreated: 2024-01-01T00:00:00.000000+00:00 +type: modelDeployment diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_deployment_shapes.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_deployment_shapes.json new file mode 100644 index 000000000..66f3d3aaa --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_deployment_shapes.json @@ -0,0 +1,288 @@ +{ + "shapes": [ + { + "core_count": 1, + "gpu_specs": null, + "memory_in_gbs": 15, + "name": "VM.Standard2.1", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 2, + "gpu_specs": null, + "memory_in_gbs": 30, + "name": "VM.Standard2.2", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 4, + "gpu_specs": null, + "memory_in_gbs": 30, + "name": "VM.Standard2.4", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 8, + "gpu_specs": null, + "memory_in_gbs": 120, + "name": "VM.Standard2.8", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 16, + "gpu_specs": null, + "memory_in_gbs": 240, + "name": "VM.Standard2.16", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 24, + "gpu_specs": null, + "memory_in_gbs": 320, + "name": "VM.Standard2.24", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 64, + "gpu_specs": null, + "memory_in_gbs": 1024, + "name": "VM.Standard.E3.Flex", + "shape_series": "AMD_ROME" + }, + { + "core_count": 64, + "gpu_specs": null, + "memory_in_gbs": 1024, + "name": "VM.Standard.E4.Flex", + "shape_series": "AMD_ROME" + }, + { + "core_count": 94, + "gpu_specs": null, + "memory_in_gbs": 1049, + "name": "VM.Standard.E5.Flex", + "shape_series": "AMD_ROME" + }, + { + "core_count": 32, + "gpu_specs": null, + "memory_in_gbs": 512, + "name": "VM.Standard3.Flex", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 18, + "gpu_specs": null, + "memory_in_gbs": 256, + "name": "VM.Optimized3.Flex", + "shape_series": "INTEL_SKYLAKE" + }, + { + "core_count": 80, + "gpu_specs": null, + "memory_in_gbs": 512, + "name": "VM.Standard.A1.Flex", + "shape_series": "ARM" + }, + { + "core_count": 78, + "gpu_specs": null, + "memory_in_gbs": 946, + "name": "VM.Standard.A2.Flex", + "shape_series": "ARM" + }, + { + "core_count": 12, + "gpu_specs": { + "gpu_count": 1, + "gpu_memory_in_gbs": 16, + "gpu_type": "P100" + }, + "memory_in_gbs": 72, + "name": "VM.GPU2.1", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 6, + "gpu_specs": { + "gpu_count": 1, + "gpu_memory_in_gbs": 16, + "gpu_type": "V100" + }, + "memory_in_gbs": 90, + "name": "VM.GPU3.1", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 12, + "gpu_specs": { + "gpu_count": 2, + "gpu_memory_in_gbs": 32, + "gpu_type": "V100" + }, + "memory_in_gbs": 180, + "name": "VM.GPU3.2", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 24, + "gpu_specs": { + "gpu_count": 4, + "gpu_memory_in_gbs": 64, + "gpu_type": "V100" + }, + "memory_in_gbs": 360, + "name": "VM.GPU3.4", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 28, + "gpu_specs": { + "gpu_count": 2, + "gpu_memory_in_gbs": 32, + "gpu_type": "P100" + }, + "memory_in_gbs": 192, + "name": "BM.GPU2.2", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 52, + "gpu_specs": { + "gpu_count": 8, + "gpu_memory_in_gbs": 128, + "gpu_type": "V100" + }, + "memory_in_gbs": 768, + "name": "BM.GPU3.8", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 64, + "gpu_specs": { + "gpu_count": 8, + "gpu_memory_in_gbs": 320, + "gpu_type": "A100" + }, + "memory_in_gbs": 2048, + "name": "BM.GPU4.8", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 64, + "gpu_specs": { + "gpu_count": 8, + "gpu_memory_in_gbs": 320, + "gpu_type": "A100" + }, + "memory_in_gbs": 2048, + "name": "BM.GPU.A100-v2.8", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 112, + "gpu_specs": { + "gpu_count": 8, + "gpu_memory_in_gbs": 1128, + "gpu_type": "H200" + }, + "memory_in_gbs": 2048, + "name": "BM.GPU.H100.8", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 32, + "gpu_specs": { + "gpu_count": 2, + "gpu_memory_in_gbs": null, + "gpu_type": null + }, + "memory_in_gbs": 1024, + "name": "BM.GPU.T1.2", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 64, + "gpu_specs": { + "gpu_count": 4, + "gpu_memory_in_gbs": 96, + "gpu_type": "A10" + }, + "memory_in_gbs": 1024, + "name": "BM.GPU.A10.4", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 64, + "gpu_specs": { + "gpu_count": 4, + "gpu_memory_in_gbs": 96, + "gpu_type": "A10" + }, + "memory_in_gbs": 1024, + "name": "VM.GPU.A10.4", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 112, + "gpu_specs": { + "gpu_count": 4, + "gpu_memory_in_gbs": 192, + "gpu_type": "L40S" + }, + "memory_in_gbs": 1024, + "name": "BM.GPU.L40S-NC.4", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 15, + "gpu_specs": { + "gpu_count": 1, + "gpu_memory_in_gbs": 24, + "gpu_type": "A10" + }, + "memory_in_gbs": 240, + "name": "VM.GPU.A10.1", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 30, + "gpu_specs": { + "gpu_count": 2, + "gpu_memory_in_gbs": 48, + "gpu_type": "A10" + }, + "memory_in_gbs": 480, + "name": "VM.GPU.A10.2", + "shape_series": "NVIDIA_GPU" + }, + { + "core_count": 64, + "gpu_specs": null, + "memory_in_gbs": 1024, + "name": "VM.Standard.AMD.Generic", + "shape_series": "GENERIC" + }, + { + "core_count": 32, + "gpu_specs": null, + "memory_in_gbs": 512, + "name": "VM.Standard.Intel.Generic", + "shape_series": "GENERIC" + }, + { + "core_count": 80, + "gpu_specs": null, + "memory_in_gbs": 512, + "name": "VM.Standard.Ampere.Generic", + "shape_series": "GENERIC" + }, + { + "core_count": 32, + "gpu_specs": null, + "memory_in_gbs": 512, + "name": "VM.Standard.x86.Generic", + "shape_series": "GENERIC" + } + ] +} diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml new file mode 100644 index 000000000..7a498d035 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model.yaml @@ -0,0 +1,73 @@ +kind: datascienceModel +spec: + artifact: oci://models@namespace/models/model-name/artifact + compartmentId: ocid1.compartment.oc1.. + customMetadataList: + data: + - category: null + description: null + key: multi_model_metadata + value: Uploaded + - category: Other + description: Inference container mapping for multi_model + key: deployment-container + value: odsc-vllm-serving + - category: Other + description: Number of models in the group. + key: model_group_count + value: 3 + definedTags: {} + description: Multi-model grouping using model_one, model_two, model_three. + displayName: multi_model + freeformTags: + OCI_AQUA: active + aqua_multimodel: "true" + license: Apache 2.0 + organization: Organization + ready_to_fine_tune: false + id: ocid1.datasciencemodel.oc1.iad. + lifecycleState: ACTIVE + modelDescription: + models: + - bucketName: models + namespace: namespace + objects: + - name: models/model_one/artifact/README.md + sizeInBytes: 10317 + version: 450a8124-f5ca-4ee6-b4cf-c1dc05b13d46 + - name: models/model_one/artifact/config.json + sizeInBytes: 950 + version: 3ace781b-4a48-4e89-88b6-61f0db6d51ad + prefix: models/model_one/artifact + - bucketName: models + namespace: namespace + objects: + - name: models/model_two/artifact/README.md + sizeInBytes: 10317 + version: 450a8124-f5ca-4ee6-b4cf-c1dc05b13d46 + - name: models/model_two/artifact/config.json + sizeInBytes: 950 + version: 3ace781b-4a48-4e89-88b6-61f0db6d51ad + prefix: models/model_two/artifact + - bucketName: models + namespace: namespace + objects: + - name: models/model_three/artifact/README.md + sizeInBytes: 10317 + version: 450a8124-f5ca-4ee6-b4cf-c1dc05b13d46 + - name: models/model_three/artifact/config.json + sizeInBytes: 950 + version: 3ace781b-4a48-4e89-88b6-61f0db6d51ad + prefix: models/model_three/artifact + type: modelOSSReferenceDescription + version: '1.0' + projectId: ocid1.datascienceproject.oc1.iad. + provenanceMetadata: + artifact_dir: null + git_branch: null + git_commit: 123456 + repository_url: https://model-name-url.com + training_id: null + training_script_path: null + timeCreated: 2024-01-01T00:00:00.000000+00:00 +type: dataScienceModel diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json new file mode 100644 index 000000000..ac197f726 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_multi_model_deployment_config.json @@ -0,0 +1,77 @@ +{ + "configuration": { + "BM.GPU.A100-v2.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 8, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + } + }, + "BM.GPU.H100.8": { + "multi_model_deployment": [ + { + "gpu_count": 1 + }, + { + "gpu_count": 2 + }, + { + "gpu_count": 8 + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + } + }, + "VM.GPU.A10.2": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ] + }, + "VM.GPU.A10.4": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 4 + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + } + } + }, + "shape": [ + "VM.GPU.A10.2", + "VM.GPU.A10.4", + "BM.GPU.A100-v2.8", + "BM.GPU.H100.8" + ] +} diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json new file mode 100644 index 000000000..ae5ed7b72 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model.json @@ -0,0 +1,313 @@ +{ + "deployment_config": { + "model_a": { + "configuration": { + "BM.GPU.A10.4": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 4, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "BM.GPU.A100-v2.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 8, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "BM.GPU.H100.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": {} + }, + { + "gpu_count": 2, + "parameters": {} + }, + { + "gpu_count": 4, + "parameters": {} + }, + { + "gpu_count": 8, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "VM.GPU.A10.2": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": {}, + "shape_info": { + "configs": [], + "type": "" + } + } + }, + "shape": [ + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.A100-v2.8", + "BM.GPU.H100.8" + ] + }, + "model_b": { + "configuration": { + "BM.GPU.A10.4": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 4, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "BM.GPU.A100-v2.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 8, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "BM.GPU.H100.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": {} + }, + { + "gpu_count": 2, + "parameters": {} + }, + { + "gpu_count": 8, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "VM.GPU.A10.2": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": {}, + "shape_info": { + "configs": [], + "type": "" + } + } + }, + "shape": [ + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.A100-v2.8", + "BM.GPU.H100.8" + ] + }, + "model_c": { + "configuration": { + "BM.GPU.A10.4": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 4, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "BM.GPU.H100.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": {} + }, + { + "gpu_count": 2, + "parameters": {} + }, + { + "gpu_count": 4, + "parameters": {} + }, + { + "gpu_count": 8, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "VM.GPU.A10.2": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": {}, + "shape_info": { + "configs": [], + "type": "" + } + } + }, + "shape": [ + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.H100.8" + ] + } + }, + "error_message": null, + "gpu_allocation": { + "BM.GPU.A100-v2.8": { + "models": [ + { + "gpu_count": 2, + "ocid": "model_a" + }, + { + "gpu_count": 2, + "ocid": "model_b" + }, + { + "gpu_count": 2, + "ocid": "model_c" + } + ], + "total_gpus_available": 8 + }, + "BM.GPU.H100.8": { + "models": [ + { + "gpu_count": 2, + "ocid": "model_a" + }, + { + "gpu_count": 2, + "ocid": "model_b" + }, + { + "gpu_count": 2, + "ocid": "model_c" + } + ], + "total_gpus_available": 8 + } + } +} diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json new file mode 100644 index 000000000..491049250 --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/aqua_summary_multi_model_single.json @@ -0,0 +1,145 @@ +{ + "deployment_config": { + "model_a": { + "configuration": { + "BM.GPU.A10.4": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 4, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "BM.GPU.A100-v2.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + }, + { + "gpu_count": 8, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "BM.GPU.H100.8": { + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": {} + }, + { + "gpu_count": 2, + "parameters": {} + }, + { + "gpu_count": 4, + "parameters": {} + }, + { + "gpu_count": 8, + "parameters": {} + } + ], + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "shape_info": { + "configs": [], + "type": "" + } + }, + "VM.GPU.A10.2": { + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + } + } + ], + "parameters": {}, + "shape_info": { + "configs": [], + "type": "" + } + } + }, + "shape": [ + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.A100-v2.8", + "BM.GPU.H100.8" + ] + } + }, + "error_message": null, + "gpu_allocation": { + "BM.GPU.A10.4": { + "models": [ + { + "gpu_count": 4, + "ocid": "model_a" + } + ], + "total_gpus_available": 4 + }, + "BM.GPU.A100-v2.8": { + "models": [ + { + "gpu_count": 8, + "ocid": "model_a" + } + ], + "total_gpus_available": 8 + }, + "BM.GPU.H100.8": { + "models": [ + { + "gpu_count": 8, + "ocid": "model_a" + } + ], + "total_gpus_available": 8 + }, + "VM.GPU.A10.2": { + "models": [ + { + "gpu_count": 2, + "ocid": "model_a" + } + ], + "total_gpus_available": 2 + } + } +} diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json index b60178403..824fa8541 100644 --- a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_config.json @@ -1,6 +1,6 @@ { "configuration": { - "VM.GPU.A10.1": { + "VM.GPU.A10.4": { "parameters": { "TGI_PARAMS": "--max-stop-sequences 6", "VLLM_PARAMS": "--max-model-len 4096" @@ -24,7 +24,7 @@ } }, "shape": [ - "VM.GPU.A10.1", + "VM.GPU.A10.4", "VM.Standard.A1.Flex" ] } diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json new file mode 100644 index 000000000..8764c354b --- /dev/null +++ b/tests/unitary/with_extras/aqua/test_data/deployment/deployment_gpu_config.json @@ -0,0 +1,43 @@ +{ + "shape": [ + "VM.GPU.A10.1", + "VM.GPU.A10.2", + "BM.GPU.A10.4", + "BM.GPU.L40S-NC.4" + ], + "configuration": { + "VM.GPU.A10.2": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1 + } + ] + }, + "BM.GPU.A10.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1 + }, + { + "gpu_count": 2 + } + ] + }, + "BM.GPU.L40S-NC.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 2 + } + ] + } + } +} diff --git a/tests/unitary/with_extras/aqua/test_data/deployment/model_deployment_config_summary.json b/tests/unitary/with_extras/aqua/test_data/deployment/model_deployment_config_summary.json new file mode 100644 index 000000000..e69de29bb diff --git a/tests/unitary/with_extras/aqua/test_data/ui/container_index.json b/tests/unitary/with_extras/aqua/test_data/ui/container_index.json index e0689d17e..a3f4dbe77 100644 --- a/tests/unitary/with_extras/aqua/test_data/ui/container_index.json +++ b/tests/unitary/with_extras/aqua/test_data/ui/container_index.json @@ -130,6 +130,9 @@ "NVIDIA_GPU" ], "type": "inference", + "usages": [ + "multi_model" + ], "version": "0.4.1.3" } ] diff --git a/tests/unitary/with_extras/aqua/test_deployment.py b/tests/unitary/with_extras/aqua/test_deployment.py index 81d005fa1..c82081b5d 100644 --- a/tests/unitary/with_extras/aqua/test_deployment.py +++ b/tests/unitary/with_extras/aqua/test_deployment.py @@ -8,7 +8,6 @@ import json import os import unittest -from dataclasses import asdict from importlib import reload from unittest.mock import MagicMock, patch @@ -16,16 +15,27 @@ import pytest from parameterized import parameterized -from ads.aqua.common.entities import ModelConfigResult +from ads.aqua.common.entities import ( + AquaMultiModelRef, + ComputeShapeSummary, + ModelConfigResult, +) import ads.aqua.modeldeployment.deployment import ads.config +from ads.aqua.common.entities import AquaMultiModelRef +from ads.aqua.common.enums import Tags +from ads.aqua.common.errors import AquaRuntimeError, AquaValueError from ads.aqua.modeldeployment import AquaDeploymentApp, MDInferenceResponse from ads.aqua.modeldeployment.entities import ( AquaDeployment, + AquaDeploymentConfig, AquaDeploymentDetail, + ConfigValidationError, + CreateModelDeploymentDetails, + ModelDeploymentConfigSummary, ModelParams, ) -from ads.aqua.common.errors import AquaRuntimeError, AquaValueError +from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader from ads.model.datascience_model import DataScienceModel from ads.model.deployment.model_deployment import ModelDeployment from ads.model.model_metadata import ModelCustomMetadata @@ -33,15 +43,51 @@ null = None +@pytest.fixture(scope="module", autouse=True) +def set_env(): + os.environ["SERVICE_COMPARTMENT_ID"] = "ocid1.compartment.oc1.." + os.environ["USER_COMPARTMENT_ID"] = "ocid1.compartment.oc1.." + os.environ["USER_PROJECT_ID"] = "ocid1.project.oc1.." + os.environ["COMPARTMENT_ID"] = "ocid1.compartment.oc1.." + + os.environ["PROJECT_COMPARTMENT_OCID"] = ( + "ocid1.compartment.oc1.." + ) + os.environ["NB_SESSION_COMPARTMENT_OCID"] = ( + "ocid1.compartment.oc1.." + ) + os.environ["ODSC_MODEL_COMPARTMENT_OCID"] = ( + "ocid1.compartment.oc1.." + ) + + os.environ["MODEL_DEPLOYMENT_ID"] = ( + "ocid1.datasciencemodeldeployment.oc1.." + ) + os.environ["MODEL_DEPLOYMENT_URL"] = ( + "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.." + ) + os.environ["MODEL_ID"] = ( + "ocid1.datasciencemodeldeployment.oc1.." + ) + os.environ["DEPLOYMENT_IMAGE_NAME"] = "dsmc://image-name:1.0.0.0" + os.environ["DEPLOYMENT_SHAPE_NAME"] = "BM.GPU.A10.4" + os.environ["DEPLOYMENT_GPU_COUNT"] = "1" + os.environ["DEPLOYMENT_GPU_COUNT_B"] = "2" + os.environ["DEPLOYMENT_SHAPE_NAME_CPU"] = "VM.Standard.A1.Flex" + + class TestDataset: SERVICE_COMPARTMENT_ID = "ocid1.compartment.oc1.." USER_COMPARTMENT_ID = "ocid1.compartment.oc1.." + USER_PROJECT_ID = "ocid1.project.oc1.." COMPARTMENT_ID = "ocid1.compartment.oc1.." MODEL_DEPLOYMENT_ID = "ocid1.datasciencemodeldeployment.oc1.." MODEL_DEPLOYMENT_URL = "https://modeldeployment.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.." MODEL_ID = "ocid1.datasciencemodeldeployment.oc1.." DEPLOYMENT_IMAGE_NAME = "dsmc://image-name:1.0.0.0" - DEPLOYMENT_SHAPE_NAME = "VM.GPU.A10.1" + DEPLOYMENT_SHAPE_NAME = "BM.GPU.A10.4" + DEPLOYMENT_GPU_COUNT = 1 + DEPLOYMENT_GPU_COUNT_B = 2 DEPLOYMENT_SHAPE_NAME_CPU = "VM.Standard.A1.Flex" model_deployment_object = [ @@ -108,11 +154,81 @@ class TestDataset: } ), "model_deployment_url": MODEL_DEPLOYMENT_URL, - "project_id": "ocid1.datascienceproject.oc1..", + "project_id": USER_PROJECT_ID, "time_created": "2024-01-01T00:00:00.000000+00:00", } ] + multi_model_deployment_object = { + "category_log_details": oci.data_science.models.CategoryLogDetails( + **{ + "access": oci.data_science.models.LogDetails( + **{ + "log_group_id": "ocid1.loggroup.oc1..", + "log_id": "ocid1.log.oc1..", + } + ), + "predict": oci.data_science.models.LogDetails( + **{ + "log_group_id": "ocid1.loggroup.oc1..", + "log_id": "ocid1.log.oc1..", + } + ), + } + ), + "compartment_id": "ocid1.compartment.oc1..", + "created_by": "ocid1.user.oc1..", + "defined_tags": {}, + "description": "Mock description", + "display_name": "multi-model-deployment-name", + "freeform_tags": { + "OCI_AQUA": "active", + "aqua_model_id": "model-id", + "aqua_multimodel": "true", + }, + "id": "ocid1.datasciencemodeldeployment.oc1..", + "lifecycle_state": "ACTIVE", + "model_deployment_configuration_details": oci.data_science.models.SingleModelDeploymentConfigurationDetails( + **{ + "deployment_type": "SINGLE_MODEL", + "environment_configuration_details": oci.data_science.models.OcirModelDeploymentEnvironmentConfigurationDetails( + **{ + "cmd": [], + "entrypoint": [], + "environment_configuration_type": "OCIR_CONTAINER", + "environment_variables": { + "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions", + "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}', + }, + "health_check_port": 8080, + "image": "dsmc://image-name:1.0.0.0", + "image_digest": "sha256:mock22373c16f2015f6f33c5c8553923cf8520217da0bd9504471c5e53cbc9d", + "server_port": 8080, + } + ), + "model_configuration_details": oci.data_science.models.ModelConfigurationDetails( + **{ + "bandwidth_mbps": 10, + "instance_configuration": oci.data_science.models.InstanceConfiguration( + **{ + "instance_shape_name": DEPLOYMENT_SHAPE_NAME, + "model_deployment_instance_shape_config_details": null, + } + ), + "model_id": "ocid1.datasciencemodel.oc1..", + "scaling_policy": oci.data_science.models.FixedSizeScalingPolicy( + **{"instance_count": 1, "policy_type": "FIXED_SIZE"} + ), + "maximum_bandwidth_mbps": 10, + } + ), + } + ), + "model_deployment_url": MODEL_DEPLOYMENT_URL, + "project_id": USER_PROJECT_ID, + "time_created": "2024-01-01T00:00:00.000000+00:00", + } + model_deployment_object_gguf = [ { "compartment_id": "ocid1.compartment.oc1..", @@ -152,7 +268,7 @@ class TestDataset: "instance_shape_name": DEPLOYMENT_SHAPE_NAME_CPU, "model_deployment_instance_shape_config_details": oci.data_science.models.ModelDeploymentInstanceShapeConfigDetails( **{ - "ocpus": 10, + "ocpus": 10.0, "memory_in_gbs": 60.0, } ), @@ -167,7 +283,7 @@ class TestDataset: } ), "model_deployment_url": MODEL_DEPLOYMENT_URL, - "project_id": "ocid1.datascienceproject.oc1..", + "project_id": USER_PROJECT_ID, "time_created": "2024-01-01T00:00:00.000000+00:00", } ] @@ -239,7 +355,7 @@ class TestDataset: } ), "model_deployment_url": MODEL_DEPLOYMENT_URL, - "project_id": "ocid1.datascienceproject.oc1..", + "project_id": USER_PROJECT_ID, "time_created": "2024-01-01T00:00:00.000000+00:00", } ] @@ -254,7 +370,8 @@ class TestDataset: "created_on": "2024-01-01T00:00:00.000000+00:00", "created_by": "ocid1.user.oc1..", "endpoint": MODEL_DEPLOYMENT_URL, - "private_endpoint_id": null, + "private_endpoint_id": None, + "models": [], "model_id": "ocid1.datasciencemodel.oc1..", "environment_variables": { "BASE_MODEL": "service_models/model-name/artifact", @@ -274,6 +391,61 @@ class TestDataset: "tags": {"OCI_AQUA": "active", "aqua_model_name": "model-name"}, } + aqua_multi_deployment_object = { + "id": "ocid1.datasciencemodeldeployment.oc1..", + "display_name": "multi-model-deployment-name", + "aqua_service_model": False, + "aqua_model_name": "", + "state": "ACTIVE", + "description": "Mock description", + "created_on": "2024-01-01T00:00:00.000000+00:00", + "created_by": "ocid1.user.oc1..", + "endpoint": MODEL_DEPLOYMENT_URL, + "private_endpoint_id": None, + "models": [ + { + "env_var": {}, + "gpu_count": 2, + "model_id": "test_model_id_1", + "model_name": "test_model_1", + "artifact_location": "test_location_1", + }, + { + "env_var": {}, + "gpu_count": 2, + "model_id": "test_model_id_2", + "model_name": "test_model_2", + "artifact_location": "test_location_2", + }, + { + "env_var": {}, + "gpu_count": 2, + "model_id": "test_model_id_3", + "model_name": "test_model_3", + "artifact_location": "test_location_3", + }, + ], + "model_id": "ocid1.datasciencemodel.oc1..", + "environment_variables": { + "MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions", + "MULTI_MODEL_CONFIG": '{ "models": [{ "params": "--served-model-name model_one --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_one/5be6479/artifact/"}, {"params": "--served-model-name model_two --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_two/83e9aa1/artifact/"}, {"params": "--served-model-name model_three --tensor-parallel-size 1 --max-model-len 2096", "model_path": "models/model_three/83e9aa1/artifact/"}]}', + }, + "cmd": [], + "console_link": "https://cloud.oracle.com/data-science/model-deployments/ocid1.datasciencemodeldeployment.oc1..?region=region-name", + "lifecycle_details": "", + "shape_info": { + "instance_shape": DEPLOYMENT_SHAPE_NAME, + "instance_count": 1, + "ocpus": null, + "memory_in_gbs": null, + }, + "tags": { + "OCI_AQUA": "active", + "aqua_model_id": "model-id", + "aqua_multimodel": "true", + }, + } + aqua_deployment_gguf_env_vars = { "BASE_MODEL": "service_models/model-name/artifact", "BASE_MODEL_FILE": "model-name.gguf", @@ -285,12 +457,12 @@ class TestDataset: aqua_deployment_gguf_shape_info = { "instance_shape": DEPLOYMENT_SHAPE_NAME_CPU, "instance_count": 1, - "ocpus": 10, + "ocpus": 10.0, "memory_in_gbs": 60.0, } aqua_deployment_detail = { - **vars(AquaDeployment(**aqua_deployment_object)), + **(AquaDeployment(**aqua_deployment_object).to_dict()), "log_group": { "id": "ocid1.loggroup.oc1..", "name": "log-group-name", @@ -330,6 +502,409 @@ class TestDataset: "8080", ] + aqua_deployment_multi_model_config_summary = { + "deployment_config": { + "model_a": { + "shape": [ + "VM.GPU.A10.2", + "VM.GPU.A10.4", + "BM.GPU.A100-V2.8", + "BM.GPU.H100.8", + ], + "configuration": { + "VM.GPU.A10.2": { + "parameters": {}, + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + } + ], + "shape_info": {"configs": [], "type": ""}, + }, + "VM.GPU.A10.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + {"gpu_count": 4, "parameters": {}}, + ], + "shape_info": {"configs": [], "type": ""}, + }, + "BM.GPU.A100-V2.8": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + { + "gpu_count": 8, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + ], + "shape_info": {"configs": [], "type": ""}, + }, + "BM.GPU.H100.8": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + {"gpu_count": 1, "parameters": {}}, + {"gpu_count": 2, "parameters": {}}, + {"gpu_count": 8, "parameters": {}}, + ], + "shape_info": {"configs": [], "type": ""}, + }, + }, + } + }, + "gpu_allocation": { + "VM.GPU.A10.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "VM.GPU.A10.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "BM.GPU.A100-V2.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.H100.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + }, + "error_message": None, + } + + aqua_deployment_multi_model_config_single_custom = { + "deployment_config": {"model_a": {"shape": [], "configuration": {}}}, + "gpu_allocation": { + "VM.GPU2.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU3.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU3.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "VM.GPU3.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "BM.GPU2.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "BM.GPU3.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU4.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.A100-V2.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.H100.8": { + "models": [{"ocid": "model_a", "gpu_count": 8}], + "total_gpus_available": 8, + }, + "BM.GPU.T1.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + "BM.GPU.A10.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "VM.GPU.A10.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "BM.GPU.L40S-NC.4": { + "models": [{"ocid": "model_a", "gpu_count": 4}], + "total_gpus_available": 4, + }, + "VM.GPU.A10.1": { + "models": [{"ocid": "model_a", "gpu_count": 1}], + "total_gpus_available": 1, + }, + "VM.GPU.A10.2": { + "models": [{"ocid": "model_a", "gpu_count": 2}], + "total_gpus_available": 2, + }, + }, + "error_message": None, + } + + aqua_deployment_multi_model_config_summary_hybrid = { + "deployment_config": { + "model_a": { + "shape": [ + "BM.GPU.A100-V2.8", + "BM.GPU.H100.8", + "VM.GPU.A10.2", + "VM.GPU.A10.4", + ], + "configuration": { + "VM.GPU.A10.2": { + "parameters": {}, + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + } + ], + "shape_info": {"configs": [], "type": ""}, + }, + "VM.GPU.A10.4": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + {"gpu_count": 4, "parameters": {}}, + ], + "shape_info": {"configs": [], "type": ""}, + }, + "BM.GPU.A100-V2.8": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + { + "gpu_count": 1, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + { + "gpu_count": 2, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + { + "gpu_count": 8, + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 32000" + }, + }, + ], + "shape_info": {"configs": [], "type": ""}, + }, + "BM.GPU.H100.8": { + "parameters": { + "VLLM_PARAMS": "--trust-remote-code --max-model-len 60000" + }, + "multi_model_deployment": [ + {"gpu_count": 1, "parameters": {}}, + {"gpu_count": 2, "parameters": {}}, + {"gpu_count": 8, "parameters": {}}, + ], + "shape_info": {"configs": [], "type": ""}, + }, + }, + }, + "model_b": { + "configuration": {}, + "shape": [], + }, + "model_c": { + "configuration": {}, + "shape": [], + }, + }, + "gpu_allocation": { + "BM.GPU.H100.8": { + "models": [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "total_gpus_available": 8, + }, + "VM.GPU.A10.4": { + "models": [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 1}, + {"ocid": "model_c", "gpu_count": 1}, + ], + "total_gpus_available": 4, + }, + "BM.GPU.A100-V2.8": { + "models": [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "total_gpus_available": 8, + }, + }, + "error_message": None, + } + + aqua_deployment_multi_model_config_summary_all_empty = { + "deployment_config": { + "model_a": { + "configuration": {}, + "shape": [], + }, + "model_b": { + "configuration": {}, + "shape": [], + }, + "model_c": { + "configuration": {}, + "shape": [], + }, + }, + "gpu_allocation": { + "VM.GPU3.4": { + "models": [ + {"ocid": "model_a", "gpu_count": 1}, + {"ocid": "model_b", "gpu_count": 1}, + {"ocid": "model_c", "gpu_count": 2}, + ], + "total_gpus_available": 4, + }, + "BM.GPU3.8": { + "models": [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "total_gpus_available": 8, + }, + "BM.GPU4.8": { + "models": [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "total_gpus_available": 8, + }, + "BM.GPU.A100-V2.8": { + "models": [ + { + "gpu_count": 2, + "ocid": "model_a", + }, + { + "gpu_count": 2, + "ocid": "model_b", + }, + { + "gpu_count": 4, + "ocid": "model_c", + }, + ], + "total_gpus_available": 8, + }, + "BM.GPU.H100.8": { + "models": [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "total_gpus_available": 8, + }, + "BM.GPU.A10.4": { + "models": [ + {"ocid": "model_a", "gpu_count": 1}, + {"ocid": "model_b", "gpu_count": 1}, + {"ocid": "model_c", "gpu_count": 2}, + ], + "total_gpus_available": 4, + }, + "VM.GPU.A10.4": { + "models": [ + {"ocid": "model_a", "gpu_count": 1}, + {"ocid": "model_b", "gpu_count": 1}, + {"ocid": "model_c", "gpu_count": 2}, + ], + "total_gpus_available": 4, + }, + "BM.GPU.L40S-NC.4": { + "models": [ + {"ocid": "model_a", "gpu_count": 1}, + {"ocid": "model_b", "gpu_count": 1}, + {"ocid": "model_c", "gpu_count": 2}, + ], + "total_gpus_available": 4, + }, + }, + "error_message": None, + } + + model_gpu_dict = {"model_a": [2, 4], "model_b": [1, 2, 4], "model_c": [1, 2, 8]} + incompatible_model_gpu_dict = { + "model_a": [1, 2], + "model_b": [1, 2], + "model_c": [1, 2, 8], + } + + multi_model_deployment_model_attributes = [ + { + "env_var": {"--test_key_one": "test_value_one"}, + "gpu_count": 1, + "model_id": "ocid1.compartment.oc1..", + "model_name": "model_one", + "artifact_location": "artifact_location_one", + }, + { + "env_var": {"--test_key_two": "test_value_two"}, + "gpu_count": 1, + "model_id": "ocid1.compartment.oc1..", + "model_name": "model_two", + "artifact_location": "artifact_location_two", + }, + { + "env_var": {"--test_key_three": "test_value_three"}, + "gpu_count": 1, + "model_id": "ocid1.compartment.oc1..", + "model_name": "model_three", + "artifact_location": "artifact_location_three", + }, + ] + class TestAquaDeployment(unittest.TestCase): def setUp(self): @@ -342,6 +917,7 @@ def setUpClass(cls): os.environ["CONDA_BUCKET_NS"] = "test-namespace" os.environ["ODSC_MODEL_COMPARTMENT_OCID"] = TestDataset.SERVICE_COMPARTMENT_ID os.environ["PROJECT_COMPARTMENT_OCID"] = TestDataset.USER_COMPARTMENT_ID + os.environ["PROJECT_OCID"] = TestDataset.USER_PROJECT_ID reload(ads.config) reload(ads.aqua) reload(ads.aqua.modeldeployment.deployment) @@ -352,6 +928,7 @@ def tearDownClass(cls): os.environ.pop("CONDA_BUCKET_NS", None) os.environ.pop("ODSC_MODEL_COMPARTMENT_OCID", None) os.environ.pop("PROJECT_COMPARTMENT_OCID", None) + os.environ.pop("PROJECT_OCID", None) reload(ads.config) reload(ads.aqua) reload(ads.aqua.modeldeployment.deployment) @@ -372,7 +949,7 @@ def test_list_deployments(self): assert len(results) == 1 expected_attributes = AquaDeployment.__annotations__.keys() for r in results: - actual_attributes = asdict(r) + actual_attributes = r.to_dict() assert set(actual_attributes) == set( expected_attributes ), "Attributes mismatch" @@ -403,12 +980,71 @@ def test_get_deployment(self, mock_get_resource_name): expected_attributes = set(AquaDeploymentDetail.__annotations__.keys()) | set( AquaDeployment.__annotations__.keys() ) - actual_attributes = asdict(result) + actual_attributes = result.to_dict() assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" assert actual_attributes == TestDataset.aqua_deployment_detail assert result.log.name == "log-name" assert result.log_group.name == "log-group-name" + @patch( + "ads.model.service.oci_datascience_model.OCIDataScienceModel.get_custom_metadata_artifact" + ) + @patch("ads.model.DataScienceModel.from_id") + @patch("ads.aqua.modeldeployment.deployment.get_resource_name") + def test_get_multi_model_deployment( + self, + mock_get_resource_name, + mock_model_from_id, + mock_get_custom_metadata_artifact, + ): + multi_model_deployment = copy.deepcopy( + TestDataset.multi_model_deployment_object + ) + self.app.ds_client.get_model_deployment = MagicMock( + return_value=oci.response.Response( + status=200, + request=MagicMock(), + headers=MagicMock(), + data=oci.data_science.models.ModelDeploymentSummary( + **multi_model_deployment + ), + ) + ) + mock_get_resource_name.side_effect = lambda param: ( + "log-group-name" + if param.startswith("ocid1.loggroup") + else "log-name" + if param.startswith("ocid1.log") + else "" + ) + + aqua_multi_model = os.path.join( + self.curr_dir, "test_data/deployment/aqua_multi_model.yaml" + ) + + mock_model_from_id.return_value = DataScienceModel.from_yaml( + uri=aqua_multi_model + ) + + multi_model_deployment_model_attributes_str = json.dumps( + TestDataset.multi_model_deployment_model_attributes + ).encode("utf-8") + mock_get_custom_metadata_artifact.return_value = ( + multi_model_deployment_model_attributes_str + ) + + result = self.app.get(model_deployment_id=TestDataset.MODEL_DEPLOYMENT_ID) + + expected_attributes = set(AquaDeploymentDetail.__annotations__.keys()) | set( + AquaDeployment.__annotations__.keys() + ) + actual_attributes = result.to_dict() + assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" + assert len(result.models) == 3 + assert [ + model.model_dump() for model in result.models + ] == TestDataset.multi_model_deployment_model_attributes + def test_get_deployment_missing_tags(self): """Test for returning a runtime error if OCI_AQUA tag is missing.""" with pytest.raises( @@ -441,11 +1077,152 @@ def test_get_deployment_config(self): self.app.get_config = MagicMock(return_value=ModelConfigResult(config=config)) result = self.app.get_deployment_config(TestDataset.MODEL_ID) - assert result == config + expected_config = AquaDeploymentConfig(**config) + assert result == expected_config self.app.get_config = MagicMock(return_value=ModelConfigResult(config=None)) result = self.app.get_deployment_config(TestDataset.MODEL_ID) - assert result == None + expected_config = AquaDeploymentConfig(**{}) + assert result == expected_config + + @patch( + "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently" + ) + @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes") + def test_get_multimodel_deployment_config_single( + self, mock_list_shapes, mock_fetch_deployment_configs_concurrently + ): + config_json = os.path.join( + self.curr_dir, + "test_data/deployment/aqua_multi_model_deployment_config.json", + ) + with open(config_json, "r") as _file: + config = json.load(_file) + + shapes = [] + + with open( + os.path.join( + self.curr_dir, + "test_data/deployment/aqua_deployment_shapes.json", + ), + "r", + ) as _file: + shapes = [ + ComputeShapeSummary(**item) for item in json.load(_file)["shapes"] + ] + mock_list_shapes.return_value = shapes + + mock_fetch_deployment_configs_concurrently.return_value = { + "model_a": AquaDeploymentConfig(**config) + } + result = self.app.get_multimodel_deployment_config(["model_a"]) + + assert ( + result.model_dump() + == TestDataset.aqua_deployment_multi_model_config_summary + ) + + # custom model without deployment config + # deployment shape should be collected from `list_shapes`. + mock_fetch_deployment_configs_concurrently.return_value = { + "model_a": AquaDeploymentConfig() + } + result = self.app.get_multimodel_deployment_config(["model_a"]) + + assert result.error_message == ( + "The selected models do not have a valid GPU allocation based on their current configurations. " + "Please select a different model group. If you are deploying custom models that lack AQUA service configuration, " + "refer to the deployment guidelines here: " + "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/multimodel-deployment-tips.md#custom_models" + ) + + @patch( + "ads.aqua.modeldeployment.utils.MultiModelDeploymentConfigLoader._fetch_deployment_configs_concurrently" + ) + @patch("ads.aqua.modeldeployment.AquaDeploymentApp.list_shapes") + def test_get_multimodel_deployment_config_hybrid( + self, mock_list_shapes, mock_fetch_deployment_configs_concurrently + ): + config_json = os.path.join( + self.curr_dir, + "test_data/deployment/aqua_multi_model_deployment_config.json", + ) + with open(config_json, "r") as _file: + config = json.load(_file) + + shapes = [] + + with open( + os.path.join( + self.curr_dir, + "test_data/deployment/aqua_deployment_shapes.json", + ), + "r", + ) as _file: + shapes = [ + ComputeShapeSummary(**item) for item in json.load(_file)["shapes"] + ] + mock_list_shapes.return_value = shapes + + mock_fetch_deployment_configs_concurrently.return_value = { + "model_a": AquaDeploymentConfig(**config), + "model_b": AquaDeploymentConfig(), + "model_c": AquaDeploymentConfig(), + } + result = self.app.get_multimodel_deployment_config( + ["model_a", "model_b", "model_c"] + ) + + assert result.error_message != "" + + # assert ( + # result.model_dump() + # == TestDataset.aqua_deployment_multi_model_config_summary_hybrid + # ) + + # # all custom models without deployment config + # # deployment shape should be collected from `list_shapes` and gpu list will be generated by ads sdk. + # mock_fetch_deployment_configs_concurrently.return_value = { + # "model_a": AquaDeploymentConfig(), + # "model_b": AquaDeploymentConfig(), + # "model_c": AquaDeploymentConfig(), + # } + # result = self.app.get_multimodel_deployment_config( + # ["model_a", "model_b", "model_c"] + # ) + + # assert ( + # result.model_dump() + # == TestDataset.aqua_deployment_multi_model_config_summary_all_empty + # ) + + def test_verify_compatibility(self): + result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility( + 8, TestDataset.model_gpu_dict + ) + + assert result[0] == True + assert len(result[1]) == 3 + + result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility( + 8, model_gpu_dict=TestDataset.model_gpu_dict, primary_model_id="model_b" + ) + + assert result[0] == True + assert len(result[1]) == 3 + + for item in result[1]: + if item.ocid == "model_b": + # model_b gets the maximum gpu count + assert item.gpu_count == 4 + + result = MultiModelDeploymentConfigLoader(self.app)._verify_compatibility( + 0, TestDataset.incompatible_model_gpu_dict + ) + + assert result[0] == False + assert result[1] == [] @patch("ads.aqua.modeldeployment.deployment.get_container_config") @patch("ads.aqua.model.AquaModelApp.create") @@ -469,7 +1246,9 @@ def test_create_deployment_for_foundation_model( with open(config_json, "r") as _file: config = json.load(_file) - self.app.get_deployment_config = MagicMock(return_value=config) + self.app.get_deployment_config = MagicMock( + return_value=AquaDeploymentConfig(**config) + ) freeform_tags = {"ftag1": "fvalue1", "ftag2": "fvalue2"} defined_tags = {"dtag1": "dvalue1", "dtag2": "dvalue2"} @@ -481,6 +1260,21 @@ def test_create_deployment_for_foundation_model( container_index_config = json.load(_file) mock_get_container_config.return_value = container_index_config + shapes = [] + + with open( + os.path.join( + self.curr_dir, + "test_data/deployment/aqua_deployment_shapes.json", + ), + "r", + ) as _file: + shapes = [ + ComputeShapeSummary(**item) for item in json.load(_file)["shapes"] + ] + + self.app.list_shapes = MagicMock(return_value=shapes) + mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME aqua_deployment = os.path.join( self.curr_dir, "test_data/deployment/aqua_create_deployment.yaml" @@ -508,8 +1302,8 @@ def test_create_deployment_for_foundation_model( mock_create.assert_called_with( model_id=TestDataset.MODEL_ID, - compartment_id=None, - project_id=None, + compartment_id=TestDataset.USER_COMPARTMENT_ID, + project_id=TestDataset.USER_PROJECT_ID, freeform_tags=freeform_tags, defined_tags=defined_tags, ) @@ -517,7 +1311,7 @@ def test_create_deployment_for_foundation_model( mock_deploy.assert_called() expected_attributes = set(AquaDeployment.__annotations__.keys()) - actual_attributes = asdict(result) + actual_attributes = result.to_dict() assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" expected_result = copy.deepcopy(TestDataset.aqua_deployment_object) expected_result["state"] = "CREATING" @@ -550,7 +1344,24 @@ def test_create_deployment_for_fine_tuned_model( with open(config_json, "r") as _file: config = json.load(_file) - self.app.get_deployment_config = MagicMock(return_value=config) + shapes = [] + + with open( + os.path.join( + self.curr_dir, + "test_data/deployment/aqua_deployment_shapes.json", + ), + "r", + ) as _file: + shapes = [ + ComputeShapeSummary(**item) for item in json.load(_file)["shapes"] + ] + + self.app.list_shapes = MagicMock(return_value=shapes) + + self.app.get_deployment_config = MagicMock( + return_value=AquaDeploymentConfig(**config) + ) container_index_json = os.path.join( self.curr_dir, "test_data/ui/container_index.json" @@ -582,8 +1393,8 @@ def test_create_deployment_for_fine_tuned_model( mock_create.assert_called_with( model_id=TestDataset.MODEL_ID, - compartment_id=None, - project_id=None, + compartment_id=TestDataset.USER_COMPARTMENT_ID, + project_id=TestDataset.USER_PROJECT_ID, freeform_tags=None, defined_tags=None, ) @@ -591,7 +1402,7 @@ def test_create_deployment_for_fine_tuned_model( mock_deploy.assert_called() expected_attributes = set(AquaDeployment.__annotations__.keys()) - actual_attributes = asdict(result) + actual_attributes = result.to_dict() assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" expected_result = copy.deepcopy(TestDataset.aqua_deployment_object) expected_result["state"] = "CREATING" @@ -622,7 +1433,9 @@ def test_create_deployment_for_gguf_model( with open(config_json, "r") as _file: config = json.load(_file) - self.app.get_deployment_config = MagicMock(return_value=config) + self.app.get_deployment_config = MagicMock( + return_value=AquaDeploymentConfig(**config) + ) container_index_json = os.path.join( self.curr_dir, "test_data/ui/container_index.json" @@ -631,6 +1444,21 @@ def test_create_deployment_for_gguf_model( container_index_config = json.load(_file) mock_get_container_config.return_value = container_index_config + shapes = [] + + with open( + os.path.join( + self.curr_dir, + "test_data/deployment/aqua_deployment_shapes.json", + ), + "r", + ) as _file: + shapes = [ + ComputeShapeSummary(**item) for item in json.load(_file)["shapes"] + ] + + self.app.list_shapes = MagicMock(return_value=shapes) + mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME aqua_deployment = os.path.join( self.curr_dir, "test_data/deployment/aqua_create_gguf_deployment.yaml" @@ -658,8 +1486,8 @@ def test_create_deployment_for_gguf_model( mock_create.assert_called_with( model_id=TestDataset.MODEL_ID, - compartment_id=None, - project_id=None, + compartment_id=TestDataset.USER_COMPARTMENT_ID, + project_id=TestDataset.USER_PROJECT_ID, freeform_tags=None, defined_tags=None, ) @@ -667,7 +1495,7 @@ def test_create_deployment_for_gguf_model( mock_deploy.assert_called() expected_attributes = set(AquaDeployment.__annotations__.keys()) - actual_attributes = asdict(result) + actual_attributes = result.to_dict() assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" expected_result = copy.deepcopy(TestDataset.aqua_deployment_object) expected_result["state"] = "CREATING" @@ -701,7 +1529,9 @@ def test_create_deployment_for_tei_byoc_embedding_model( with open(config_json, "r") as _file: config = json.load(_file) - self.app.get_deployment_config = MagicMock(return_value=config) + self.app.get_deployment_config = MagicMock( + return_value=AquaDeploymentConfig(**config) + ) container_index_json = os.path.join( self.curr_dir, "test_data/ui/container_index.json" @@ -710,6 +1540,21 @@ def test_create_deployment_for_tei_byoc_embedding_model( container_index_config = json.load(_file) mock_get_container_config.return_value = container_index_config + shapes = [] + + with open( + os.path.join( + self.curr_dir, + "test_data/deployment/aqua_deployment_shapes.json", + ), + "r", + ) as _file: + shapes = [ + ComputeShapeSummary(**item) for item in json.load(_file)["shapes"] + ] + + self.app.list_shapes = MagicMock(return_value=shapes) + mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME aqua_deployment = os.path.join( self.curr_dir, "test_data/deployment/aqua_create_embedding_deployment.yaml" @@ -737,8 +1582,8 @@ def test_create_deployment_for_tei_byoc_embedding_model( mock_create.assert_called_with( model_id=TestDataset.MODEL_ID, - compartment_id=None, - project_id=None, + compartment_id=TestDataset.USER_COMPARTMENT_ID, + project_id=TestDataset.USER_PROJECT_ID, freeform_tags=None, defined_tags=None, ) @@ -746,7 +1591,7 @@ def test_create_deployment_for_tei_byoc_embedding_model( mock_deploy.assert_called() expected_attributes = set(AquaDeployment.__annotations__.keys()) - actual_attributes = asdict(result) + actual_attributes = result.to_dict() assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" expected_result = copy.deepcopy(TestDataset.aqua_deployment_object) expected_result["state"] = "CREATING" @@ -759,29 +1604,162 @@ def test_create_deployment_for_tei_byoc_embedding_model( ) assert actual_attributes == expected_result + @patch("ads.aqua.modeldeployment.deployment.get_container_config") + @patch("ads.aqua.model.AquaModelApp.create_multi") + @patch("ads.aqua.modeldeployment.deployment.get_container_image") + @patch("ads.model.deployment.model_deployment.ModelDeployment.deploy") + @patch("ads.aqua.modeldeployment.AquaDeploymentApp.get_deployment_config") + @patch( + "ads.aqua.modeldeployment.entities.CreateModelDeploymentDetails.validate_multimodel_deployment_feasibility" + ) + def test_create_deployment_for_multi_model( + self, + mock_validate_multimodel_deployment_feasibility, + mock_get_deployment_config, + mock_deploy, + mock_get_container_image, + mock_create_multi, + mock_get_container_config, + ): + """Test to create a deployment for multi models.""" + mock_validate_multimodel_deployment_feasibility.return_value = MagicMock() + self.app.get_multimodel_deployment_config = MagicMock( + return_value=AquaDeploymentConfig( + **TestDataset.aqua_deployment_multi_model_config_summary + ) + ) + aqua_multi_model = os.path.join( + self.curr_dir, "test_data/deployment/aqua_multi_model.yaml" + ) + mock_create_multi.return_value = DataScienceModel.from_yaml( + uri=aqua_multi_model + ) + config_json = os.path.join( + self.curr_dir, + "test_data/deployment/aqua_multi_model_deployment_config.json", + ) + with open(config_json, "r") as _file: + config = json.load(_file) + + self.app.get_deployment_config = MagicMock( + return_value=AquaDeploymentConfig(**config) + ) + + container_index_json = os.path.join( + self.curr_dir, "test_data/ui/container_index.json" + ) + with open(container_index_json, "r") as _file: + container_index_config = json.load(_file) + mock_get_container_config.return_value = container_index_config + + shapes = [] + + with open( + os.path.join( + self.curr_dir, + "test_data/deployment/aqua_deployment_shapes.json", + ), + "r", + ) as _file: + shapes = [ + ComputeShapeSummary(**item) for item in json.load(_file)["shapes"] + ] + + self.app.list_shapes = MagicMock(return_value=shapes) + + deployment_config_json = os.path.join( + self.curr_dir, "test_data/deployment/deployment_gpu_config.json" + ) + mock_get_deployment_config.return_value = deployment_config_json + + mock_get_container_image.return_value = TestDataset.DEPLOYMENT_IMAGE_NAME + aqua_deployment = os.path.join( + self.curr_dir, "test_data/deployment/aqua_create_multi_deployment.yaml" + ) + model_deployment_obj = ModelDeployment.from_yaml(uri=aqua_deployment) + model_deployment_dsc_obj = copy.deepcopy( + TestDataset.multi_model_deployment_object + ) + model_deployment_dsc_obj["lifecycle_state"] = "CREATING" + model_deployment_obj.dsc_model_deployment = ( + oci.data_science.models.ModelDeploymentSummary(**model_deployment_dsc_obj) + ) + mock_deploy.return_value = model_deployment_obj + + model_info_1 = AquaMultiModelRef( + model_id="test_model_id_1", + model_name="test_model_1", + gpu_count=2, + artifact_location="test_location_1", + ) + + model_info_2 = AquaMultiModelRef( + model_id="test_model_id_2", + model_name="test_model_2", + gpu_count=2, + artifact_location="test_location_2", + ) + + model_info_3 = AquaMultiModelRef( + model_id="test_model_id_3", + model_name="test_model_3", + gpu_count=2, + artifact_location="test_location_3", + ) + + result = self.app.create( + models=[model_info_1, model_info_2, model_info_3], + instance_shape=TestDataset.DEPLOYMENT_SHAPE_NAME, + display_name="multi-model-deployment-name", + log_group_id="ocid1.loggroup.oc1..", + access_log_id="ocid1.log.oc1..", + predict_log_id="ocid1.log.oc1..", + ) + + mock_create_multi.assert_called_with( + models=[model_info_1, model_info_2, model_info_3], + compartment_id=TestDataset.USER_COMPARTMENT_ID, + project_id=TestDataset.USER_PROJECT_ID, + freeform_tags=None, + defined_tags=None, + ) + mock_get_container_image.assert_called() + mock_deploy.assert_called() + + expected_attributes = set(AquaDeployment.__annotations__.keys()) + actual_attributes = result.to_dict() + assert set(actual_attributes) == set(expected_attributes), "Attributes mismatch" + expected_result = copy.deepcopy(TestDataset.aqua_multi_deployment_object) + expected_result["state"] = "CREATING" + assert actual_attributes == expected_result + @parameterized.expand( [ ( "VLLM_PARAMS", "odsc-vllm-serving", + 2, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--trust-remote-code"], ), ( "VLLM_PARAMS", "odsc-vllm-serving", - [], - [], + None, + ["--max-model-len 4096"], + ["--max-model-len 4096"], ), ( "TGI_PARAMS", "odsc-tgi-serving", - ["--sharded true", "--trust-remote-code", "--max-stop-sequences"], - ["--max-stop-sequences"], + 1, + [], + [], ), ( "CUSTOM_PARAMS", "custom-container-key", + None, ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ["--max-model-len 4096", "--seed 42", "--trust-remote-code"], ), @@ -792,6 +1770,7 @@ def test_get_deployment_default_params( self, container_params_field, container_type_key, + gpu_count, params, allowed_params, mock_from_id, @@ -799,14 +1778,26 @@ def test_get_deployment_default_params( """Test for fetching config details for a given deployment.""" config_json = os.path.join( - self.curr_dir, "test_data/deployment/deployment_config.json" + self.curr_dir, "test_data/deployment/deployment_gpu_config.json" ) with open(config_json, "r") as _file: config = json.load(_file) # update config params for testing - config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][ - container_params_field - ] = " ".join(params) + if gpu_count: + # build field for multi_model_deployment + config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME][ + "multi_model_deployment" + ] = [ + { + "gpu_count": gpu_count, + "parameters": {container_params_field: " ".join(params)}, + } + ] + else: + # build field for normal deployment + config["configuration"][TestDataset.DEPLOYMENT_SHAPE_NAME]["parameters"][ + container_params_field + ] = " ".join(params) mock_model = MagicMock() custom_metadata_list = ModelCustomMetadata() @@ -816,11 +1807,15 @@ def test_get_deployment_default_params( mock_model.custom_metadata_list = custom_metadata_list mock_from_id.return_value = mock_model - self.app.get_deployment_config = MagicMock(return_value=config) + self.app.get_deployment_config = MagicMock( + return_value=AquaDeploymentConfig(**config) + ) + result = self.app.get_deployment_default_params( - TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME + TestDataset.MODEL_ID, TestDataset.DEPLOYMENT_SHAPE_NAME, gpu_count ) - if container_params_field == "CUSTOM_PARAMS": + + if container_params_field in ("CUSTOM_PARAMS", "TGI_PARAMS"): assert result == [] else: assert result == allowed_params @@ -936,6 +1931,247 @@ def test_validate_deployment_params_for_unverified_models( container_family=container_type_key, ) + def validate_multimodel_deployment_feasibility_helper( + self, models, instance_shape, display_name, total_gpus, mock_path + ): + config_json = os.path.join(self.curr_dir, mock_path) + + with open(config_json, "r") as _file: + config = json.load(_file) + + if models: + aqua_models = [ + AquaMultiModelRef(model_id=x["ocid"], gpu_count=x["gpu_count"]) + for x in models + ] + + mock_create_deployment_details = CreateModelDeploymentDetails( + models=aqua_models, + instance_shape=instance_shape, + display_name=display_name, + freeform_tags={Tags.MULTIMODEL_TYPE_TAG: "true"}, + ) + else: + model_id = "model_a" + mock_create_deployment_details = CreateModelDeploymentDetails( + model_id=model_id, + instance_shape=instance_shape, + display_name=display_name, + freeform_tags={Tags.MULTIMODEL_TYPE_TAG: "true"}, + ) + + mock_models_config_summary = ModelDeploymentConfigSummary(**(config)) + + mock_create_deployment_details.validate_multimodel_deployment_feasibility( + models_config_summary=mock_models_config_summary + ) + + @parameterized.expand( + [ + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 2}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + ), + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 1}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + ), + ( + [ + {"ocid": "model_a", "gpu_count": 1}, + {"ocid": "model_b", "gpu_count": 1}, + {"ocid": "model_c", "gpu_count": 2}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + ), + ], + ) + def test_validate_multimodel_deployment_feasibility_positive( + self, models, instance_shape, display_name, total_gpus + ): + self.validate_multimodel_deployment_feasibility_helper( + models, + instance_shape, + display_name, + total_gpus, + "test_data/deployment/aqua_summary_multi_model.json", + ) + + @parameterized.expand( + [ + ( + None, + "BM.GPU.H100.8", + "test_a", + 8, + "Multi-model deployment requires at least one model, but none were provided. Please add one or more models to the model group to proceed.", + ), + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "invalid_shape", + "test_a", + 8, + "The model group is not compatible with the selected instance shape 'invalid_shape'. Select a different instance shape.", + ), + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 2}, + {"ocid": "model_d", "gpu_count": 2}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + "One or more selected models are missing from the configuration, preventing validation for deployment on the given shape.", + ), + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + { + "ocid": "model_b", + "gpu_count": 4, + }, # model_b lacks this entry in loaded config + {"ocid": "model_c", "gpu_count": 2}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + "Change the GPU count for one or more models in the model group. Adjust GPU allocations per model or choose a larger instance shape.", + ), + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 2}, + ], # model c is lacks BM.GPU.A100-v2.8 + "BM.GPU.A100-v2.8", + "test_a", + 8, + "Select a different instance shape. One or more models in the group are incompatible with the selected instance shape.", + ), + ( + [ + {"ocid": "model_a", "gpu_count": 4}, + {"ocid": "model_b", "gpu_count": 2}, + {"ocid": "model_c", "gpu_count": 4}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + "Total requested GPU count exceeds the available GPU capacity for the selected instance shape. Adjust GPU allocations per model or choose a larger instance shape.", + ), + ], + ) + def test_validate_multimodel_deployment_feasibility_negative( + self, + models, + instance_shape, + display_name, + total_gpus, + value_error, + ): + with pytest.raises(ConfigValidationError): + self.validate_multimodel_deployment_feasibility_helper( + models, + instance_shape, + display_name, + total_gpus, + "test_data/deployment/aqua_summary_multi_model.json", + ) + + @parameterized.expand( + [ + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + ], + "invalid_shape", # unsupported gpu shape + "test_a", + 2, + "The model group is not compatible with the selected instance shape 'invalid_shape'. Supported shapes: ['BM.GPU.H100.8', 'BM.GPU.A100-v2.8'].", + ), + ( + [ + {"ocid": "model_a", "gpu_count": 3}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + "Model model_a allocated 3 GPUs, but for single model deployment a valid GPU count would be 8. Adjust the GPU allocation to 8.", + ), + ], + ) + def test_validate_multimodel_deployment_feasibility_negative_single( + self, + models, + instance_shape, + display_name, + total_gpus, + value_error, + ): + with pytest.raises(ConfigValidationError): + self.validate_multimodel_deployment_feasibility_helper( + models, + instance_shape, + display_name, + total_gpus, + "test_data/deployment/aqua_summary_multi_model_single.json", + ) + + @parameterized.expand( + [ + ( + [ + {"ocid": "model_a", "gpu_count": 8}, + ], + "BM.GPU.H100.8", + "test_a", + 8, + ), + ( + [ + {"ocid": "model_a", "gpu_count": 2}, + ], + "VM.GPU.A10.2", + "test_a", + 2, + ), + ], + ) + def test_validate_multimodel_deployment_feasibility_positive_single( + self, + models, + instance_shape, + display_name, + total_gpus, + ): + self.validate_multimodel_deployment_feasibility_helper( + models, + instance_shape, + display_name, + total_gpus, + "test_data/deployment/aqua_summary_multi_model_single.json", + ) + class TestMDInferenceResponse(unittest.TestCase): def setUp(self): diff --git a/tests/unitary/with_extras/aqua/test_deployment_handler.py b/tests/unitary/with_extras/aqua/test_deployment_handler.py index 95be6c351..9e9be2b34 100644 --- a/tests/unitary/with_extras/aqua/test_deployment_handler.py +++ b/tests/unitary/with_extras/aqua/test_deployment_handler.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*-- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import os @@ -31,6 +31,8 @@ class TestDataset: "display_name": "test-deployment-name", "freeform_tags": {"ftag1": "fvalue1", "ftag2": "fvalue2"}, "defined_tags": {"dtag1": "dvalue1", "dtag2": "dvalue2"}, + "project_id": USER_PROJECT_ID, + "compartment_id": USER_COMPARTMENT_ID, } inference_request = { "prompt": "What is 1+1?", @@ -87,6 +89,21 @@ def test_get_deployment_config_without_id(self, mock_error): mock_error.assert_called_once() assert result["status"] == 400 + @patch( + "ads.aqua.modeldeployment.AquaDeploymentApp.get_multimodel_deployment_config" + ) + def test_get_multimodel_deployment_config( + self, mock_get_multimodel_deployment_config + ): + """Test get method to return multi model deployment config""" + self.deployment_handler.request.path = "aqua/deployments/config" + self.deployment_handler.get(id="mock-model-id-one,mock-model-id-two") + mock_get_multimodel_deployment_config.assert_called_with( + model_ids=["mock-model-id-one", "mock-model-id-two"], + primary_model_id=None, + compartment_id=TestDataset.USER_COMPARTMENT_ID, + ) + @patch("ads.aqua.modeldeployment.AquaDeploymentApp.get") def test_get_deployment(self, mock_get): """Test get method to return deployment information.""" @@ -140,24 +157,7 @@ def test_post(self, mock_create): project_id=TestDataset.USER_PROJECT_ID, model_id=TestDataset.deployment_request["model_id"], display_name=TestDataset.deployment_request["display_name"], - description=None, - instance_count=None, instance_shape=TestDataset.deployment_request["instance_shape"], - log_group_id=None, - access_log_id=None, - predict_log_id=None, - bandwidth_mbps=None, - web_concurrency=None, - server_port=None, - health_check_port=None, - env_var=None, - container_family=None, - memory_in_gbs=None, - ocpus=None, - model_file=None, - private_endpoint_id=None, - container_image_uri=None, - cmd_var=None, freeform_tags=TestDataset.deployment_request["freeform_tags"], defined_tags=TestDataset.deployment_request["defined_tags"], ) @@ -189,7 +189,9 @@ def test_get_deployment_default_params( self.assertCountEqual(result["data"], self.default_params) mock_get_deployment_default_params.assert_called_with( - model_id="test_model_id", instance_shape=TestDataset.INSTANCE_SHAPE + model_id="test_model_id", + instance_shape=TestDataset.INSTANCE_SHAPE, + gpu_count=None, ) @parameterized.expand( diff --git a/tests/unitary/with_extras/aqua/test_evaluation.py b/tests/unitary/with_extras/aqua/test_evaluation.py index e05e4717f..b54fefd96 100644 --- a/tests/unitary/with_extras/aqua/test_evaluation.py +++ b/tests/unitary/with_extras/aqua/test_evaluation.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*-- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import base64 @@ -17,6 +17,7 @@ from ads.aqua.common import utils from ads.aqua.common.enums import Tags from ads.aqua.common.errors import ( + AquaError, AquaFileNotFoundError, AquaMissingKeyError, AquaRuntimeError, @@ -35,8 +36,10 @@ AquaEvalMetrics, AquaEvalReport, AquaEvaluationSummary, + CreateAquaEvaluationDetails, ) from ads.aqua.extension.base_handler import AquaAPIhandler +from ads.aqua.model.constants import ModelCustomMetadataFields from ads.jobs.ads_job import DataScienceJob, DataScienceJobRun, Job from ads.model import DataScienceModel from ads.model.deployment.model_deployment import ModelDeployment @@ -354,6 +357,31 @@ class TestDataset: COMPARTMENT_ID = "ocid1.compartment.oc1.." EVAL_ID = "ocid1.datasciencemodel.oc1.iad." INVALID_EVAL_ID = "ocid1.datasciencemodel.oc1.phx." + MODEL_DEPLOYMENT_ID = "ocid1.datasciencemodeldeployment.oc1.." + + multi_model_deployment_model_attributes = [ + { + "env_var": {"--test_key_one": "test_value_one"}, + "gpu_count": 1, + "model_id": "ocid1.compartment.oc1..", + "model_name": "model_one", + "artifact_location": "artifact_location_one", + }, + { + "env_var": {"--test_key_two": "test_value_two"}, + "gpu_count": 1, + "model_id": "ocid1.compartment.oc1..", + "model_name": "model_two", + "artifact_location": "artifact_location_two", + }, + { + "env_var": {"--test_key_three": "test_value_three"}, + "gpu_count": 1, + "model_id": "ocid1.compartment.oc1..", + "model_name": "model_three", + "artifact_location": "artifact_location_three", + }, + ] class TestAquaEvaluation(unittest.TestCase): @@ -534,6 +562,76 @@ def test_create_evaluation( "time_created": f"{oci_dsc_model.time_created}", } + @parameterized.expand( + [ + ( + {}, + "No model name was provided for evaluation. For multi-model deployment, a model must be specified in the model parameters.", + ), + ( + {"model": "wrong_model_name"}, + "Provided model name 'wrong_model_name' does not match any valid model names ['model_one', 'model_two', 'model_three'] for evaluation source ID 'ocid1.datasciencemodeldeployment.oc1..'. Please provide the correct model name.", + ), + ] + ) + @patch("ads.aqua.evaluation.evaluation.AquaEvaluationApp.create") + @patch( + "ads.model.datascience_model.OCIDataScienceModel.get_custom_metadata_artifact" + ) + def test_validate_model_name( + self, + mock_model_parameters, + expected_message, + mock_get_custom_metadata_artifact, + mock_model, + ): + curr_dir = os.path.dirname(__file__) + + eval_model_freeform_tags = {"ftag1": "fvalue1", "ftag2": "fvalue2"} + eval_model_defined_tags = {"dtag1": "dvalue1", "dtag2": "dvalue2"} + + eval_model_freeform_tags[Tags.MULTIMODEL_TYPE_TAG] = "true" + eval_model_freeform_tags[Tags.AQUA_TAG] = "active" + + create_aqua_evaluation_details = dict( # noqa: C408 + evaluation_source_id=TestDataset.MODEL_DEPLOYMENT_ID, + evaluation_name="test_evaluation_name", + dataset_path="oci://dataset_bucket@namespace/prefix/dataset.jsonl", + report_path="oci://report_bucket@namespace/prefix/", + model_parameters=mock_model_parameters, + shape_name="VM.Standard.E3.Flex", + block_storage_size=1, + experiment_name="test_experiment_name", + memory_in_gbs=1, + ocpus=1, + freeform_tags=eval_model_freeform_tags, + defined_tags=eval_model_defined_tags, + ) + + aqua_multi_model = os.path.join( + curr_dir, "test_data/deployment/aqua_multi_model.yaml" + ) + + mock_model = DataScienceModel.from_yaml(uri=aqua_multi_model) + + multi_model_deployment_model_attributes_str = json.dumps( + TestDataset.multi_model_deployment_model_attributes + ).encode("utf-8") + mock_get_custom_metadata_artifact.return_value = ( + multi_model_deployment_model_attributes_str + ) + + mock_create_aqua_evaluation_details = MagicMock( + **create_aqua_evaluation_details, spec=CreateAquaEvaluationDetails + ) + + try: + AquaEvaluationApp.validate_model_name( + mock_model, mock_create_aqua_evaluation_details + ) + except AquaError as e: + self.assertEqual(str(e), expected_message) + def test_get_service_model_name(self): # get service model name from fine tuned model deployment source = ModelDeployment().with_freeform_tags( diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py index a3ddc177b..f202228fd 100644 --- a/tests/unitary/with_extras/aqua/test_model.py +++ b/tests/unitary/with_extras/aqua/test_model.py @@ -20,6 +20,8 @@ import ads.common import ads.common.oci_client import ads.config + +from ads.aqua.common.entities import AquaMultiModelRef from ads.aqua.common.enums import ModelFormat from ads.aqua.common.errors import ( AquaFileNotFoundError, @@ -229,6 +231,7 @@ class TestDataset: SERVICE_COMPARTMENT_ID = "ocid1.compartment.oc1.." COMPARTMENT_ID = "ocid1.compartment.oc1.." + SERVICE_MODEL_ID = "ocid1.datasciencemodel.oc1.iad." @patch("ads.config.COMPARTMENT_OCID", "ocid1.compartment.oc1.") @@ -358,6 +361,107 @@ def test_create_model(self, mock_from_id, mock_validate, mock_create): ) assert model.provenance_metadata.training_id == "test_training_id" + @patch.object(DataScienceModel, "add_artifact") + @patch.object(DataScienceModel, "create_custom_metadata_artifact") + @patch.object(DataScienceModel, "create") + @patch("ads.model.datascience_model.validate") + @patch("ads.aqua.model.model.get_container_config") + @patch.object(DataScienceModel, "from_id") + def test_create_multimodel( + self, + mock_from_id, + mock_get_container_config, + mock_validate, + mock_create, + mock_create_custom_metadata_artifact, + mock_add_artifact, + ): + mock_get_container_config.return_value = get_container_config() + mock_model = MagicMock() + mock_model.model_file_description = {"test_key": "test_value"} + mock_model.display_name = "test_display_name" + mock_model.description = "test_description" + mock_model.freeform_tags = { + # "OCI_AQUA": "ACTIVE", + } + mock_model.id = "mock_model_id" + mock_model.artifact = "mock_artifact_path" + custom_metadata_list = ModelCustomMetadata() + custom_metadata_list.add( + **{"key": "deployment-container", "value": "odsc-tgi-serving"} + ) + + mock_model.custom_metadata_list = custom_metadata_list + mock_from_id.return_value = mock_model + + model_info_1 = AquaMultiModelRef( + model_id="test_model_id_1", + gpu_count=2, + env_var={"params": "--trust-remote-code --max-model-len 60000"}, + ) + + model_info_2 = AquaMultiModelRef( + model_id="test_model_id_2", + gpu_count=2, + env_var={"params": "--trust-remote-code --max-model-len 32000"}, + ) + + with pytest.raises(AquaValueError): + model = self.app.create_multi( + models=[model_info_1, model_info_2], + project_id="test_project_id", + compartment_id="test_compartment_id", + ) + + mock_model.freeform_tags["aqua_service_model"] = TestDataset.SERVICE_MODEL_ID + + with pytest.raises(AquaValueError): + model = self.app.create_multi( + models=[model_info_1, model_info_2], + project_id="test_project_id", + compartment_id="test_compartment_id", + ) + + mock_model.freeform_tags["task"] = "text-generation" + + with pytest.raises(AquaValueError): + model = self.app.create_multi( + models=[model_info_1, model_info_2], + project_id="test_project_id", + compartment_id="test_compartment_id", + ) + + custom_metadata_list = ModelCustomMetadata() + custom_metadata_list.add( + **{"key": "deployment-container", "value": "odsc-vllm-serving"} + ) + + mock_model.custom_metadata_list = custom_metadata_list + mock_from_id.return_value = mock_model + + # will create a multi-model group + model = self.app.create_multi( + models=[model_info_1, model_info_2], + project_id="test_project_id", + compartment_id="test_compartment_id", + ) + + mock_add_artifact.assert_called() + mock_from_id.assert_called() + mock_validate.assert_not_called() + mock_create.assert_called_with(model_by_reference=True) + + mock_model.compartment_id = TestDataset.SERVICE_COMPARTMENT_ID + mock_from_id.return_value = mock_model + mock_create.return_value = mock_model + + assert model.freeform_tags == {"aqua_multimodel": "true"} + assert model.custom_metadata_list.get("model_group_count").value == "2" + assert ( + model.custom_metadata_list.get("deployment-container").value + == "odsc-vllm-serving" + ) + @pytest.mark.parametrize( "foundation_model_type", [ diff --git a/tests/unitary/with_extras/aqua/test_ui.py b/tests/unitary/with_extras/aqua/test_ui.py index 27f8f17a1..4e1620c39 100644 --- a/tests/unitary/with_extras/aqua/test_ui.py +++ b/tests/unitary/with_extras/aqua/test_ui.py @@ -595,7 +595,7 @@ def test_list_containers(self, mock_get_container_config): ], "server_port": "8080", }, - "usages": [], + "usages": ["multi_model"], }, ], "finetune": [ diff --git a/tests/unitary/with_extras/aqua/test_utils.py b/tests/unitary/with_extras/aqua/test_utils.py index a6fb5d0b0..5b4c66740 100644 --- a/tests/unitary/with_extras/aqua/test_utils.py +++ b/tests/unitary/with_extras/aqua/test_utils.py @@ -1,22 +1,20 @@ #!/usr/bin/env python # -*- coding: utf-8 -*-- -# Copyright (c) 2024 Oracle and/or its affiliates. +# Copyright (c) 2024, 2025 Oracle and/or its affiliates. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/ import unittest from unittest.mock import MagicMock, patch -import pytest -from ads.common.object_storage_details import ObjectStorageDetails from oci.object_storage.models import ListObjects, ObjectSummary from oci.resource_search.models.resource_summary import ResourceSummary from parameterized import parameterized from ads.aqua.common import utils from ads.aqua.common.errors import AquaRuntimeError +from ads.common.object_storage_details import ObjectStorageDetails from ads.common.oci_resource import SEARCH_TYPE, OCIResource from ads.config import TENANCY_OCID -from ads.common import auth as authutil class TestDataset: diff --git a/tests/unitary/with_extras/aqua/utils.py b/tests/unitary/with_extras/aqua/utils.py index 21e61bcc1..bdc72e3b5 100644 --- a/tests/unitary/with_extras/aqua/utils.py +++ b/tests/unitary/with_extras/aqua/utils.py @@ -29,7 +29,7 @@ class HandlerTestDataset: block_storage_size=1, experiment_name="test_experiment_name", memory_in_gbs=1, - ocpus=1, + ocpus=1.0, ) mock_invalid_input = dict(name="myvalue") mock_dataclass_obj = MockData(id="myid", name="myname")