diff --git a/train_and_deploy/configs/deployer_config.yaml b/train_and_deploy/configs/deployer_config.yaml index c24231b2..cd575ec2 100644 --- a/train_and_deploy/configs/deployer_config.yaml +++ b/train_and_deploy/configs/deployer_config.yaml @@ -40,3 +40,6 @@ model: extra: notify_on_failure: True + +parameters: + target_env: staging diff --git a/train_and_deploy/configs/inference_config.yaml b/train_and_deploy/configs/inference_config.yaml index d6f1be75..1a46bd01 100644 --- a/train_and_deploy/configs/inference_config.yaml +++ b/train_and_deploy/configs/inference_config.yaml @@ -39,3 +39,5 @@ model: extra: notify_on_failure: True +parameters: + target_env: staging \ No newline at end of file diff --git a/train_and_deploy/pipelines/batch_inference.py b/train_and_deploy/pipelines/batch_inference.py index 3b7f5015..08d8d0fb 100644 --- a/train_and_deploy/pipelines/batch_inference.py +++ b/train_and_deploy/pipelines/batch_inference.py @@ -33,7 +33,9 @@ @pipeline(on_failure=notify_on_failure) -def gitguarden_batch_inference(): +def gitguarden_batch_inference( + target_env: str, +): """ Model batch inference pipeline. @@ -66,6 +68,7 @@ def gitguarden_batch_inference(): ########## Inference stage ########## inference_predict( dataset_inf=df_inference, + target_env=target_env, after=["drift_quality_gate"], ) diff --git a/train_and_deploy/pipelines/local_deployment.py b/train_and_deploy/pipelines/local_deployment.py index 0f0fffa6..dd3a4f36 100644 --- a/train_and_deploy/pipelines/local_deployment.py +++ b/train_and_deploy/pipelines/local_deployment.py @@ -21,9 +21,10 @@ @pipeline(on_failure=notify_on_failure, enable_cache=False) -def gitguarden_local_deployment(): - """ - Model deployment pipeline. +def gitguarden_local_deployment( + target_env: str, +): + """Model deployment pipeline. This is a pipeline deploys trained model for future inference. """ @@ -33,7 +34,7 @@ def gitguarden_local_deployment(): ########## Deployment stage ########## # Get the production model artifact bento = bento_builder() - deployment_deploy(bento=bento) + deployment_deploy(bento=bento, target_env=target_env) notify_on_success(after=["deployment_deploy"]) ### YOUR CODE ENDS HERE ### diff --git a/train_and_deploy/service.py b/train_and_deploy/service.py index b43c0d07..83170eab 100644 --- a/train_and_deploy/service.py +++ b/train_and_deploy/service.py @@ -11,4 +11,4 @@ @svc.api(input=input_spec, output=NumpyNdarray()) async def predict(input_arr): - return await gitguarden_runner.predict.async_run(input_arr) + return await gitguarden_runner.predict.async_run(input_arr) \ No newline at end of file diff --git a/train_and_deploy/steps/deployment/deployment_deploy.py b/train_and_deploy/steps/deployment/deployment_deploy.py index 5fbe1144..3cd109c9 100644 --- a/train_and_deploy/steps/deployment/deployment_deploy.py +++ b/train_and_deploy/steps/deployment/deployment_deploy.py @@ -1,100 +1,60 @@ -# Apache Software License 2.0 +# Copyright (c) ZenML GmbH 2022. All Rights Reserved. # -# Copyright (c) ZenML GmbH 2024. All rights reserved. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +# https://www.apache.org/licenses/LICENSE-2.0 # - +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. from typing import Optional from bentoml._internal.bento import bento -from typing_extensions import Annotated -from zenml import ( - ArtifactConfig, - Model, - get_step_context, - log_artifact_metadata, - step, -) +from zenml import get_step_context, step from zenml.client import Client -from zenml.integrations.bentoml.services.bentoml_container_deployment import ( - BentoMLContainerDeploymentService, +from zenml.integrations.bentoml.services.bentoml_local_deployment import ( + BentoMLLocalDeploymentConfig, + BentoMLLocalDeploymentService, ) -from zenml.integrations.bentoml.services.deployment_type import ( - BentoMLDeploymentType, -) -from zenml.integrations.bentoml.steps import bentoml_model_deployer_step from zenml.logger import get_logger +from zenml.utils import source_utils logger = get_logger(__name__) + @step def deployment_deploy( bento: bento.Bento, -) -> ( - Annotated[ - Optional[BentoMLContainerDeploymentService], - ArtifactConfig(name="bentoml_deployment", is_deployment_artifact=True), - ] -): - """Predictions step. - - This is an example of a predictions step that takes the data in and returns - predicted values. - - This step is parameterized, which allows you to configure the step - independently of the step code, before running it in a pipeline. - In this example, the step can be configured to use different input data. - See the documentation for more information: - - https://docs.zenml.io/user-guide/advanced-guide/configure-steps-pipelines - - Args: - dataset_inf: The inference dataset. - - Returns: - The predictions as pandas series - """ - ### ADD YOUR OWN CODE HERE - THIS IS JUST AN EXAMPLE ### - if Client().active_stack.orchestrator.flavor == "local": - model = get_step_context().model - - # deploy predictor service - bentoml_deployment = bentoml_model_deployer_step.entrypoint( - model_name=model.name, # Name of the model - port=3009, # Port to be used by the http server - production=True, # Deploy the model in production mode - timeout=1000, - bento=bento, - deployment_type=BentoMLDeploymentType.CONTAINER, - ) - - bentoml_service = Client().get_service(name_id_or_prefix=bentoml_deployment.uuid) - - log_artifact_metadata( - metadata={ - "service_type": "bentoml", - "status": bentoml_service.state, - "prediction_url": bentoml_service.prediction_url, - "health_check_url": bentoml_service.health_check_url, - "model_uri": model.get_artifact(name="model").uri, - "bento_tag" : bentoml_service.config.get("bento_tag"), - "bentoml_model_image": bentoml_service.config.get("image"), - } - ) - else: - logger.warning("Skipping deployment as the orchestrator is not local.") - bentoml_deployment = None - ### YOUR CODE ENDS HERE ### - return bentoml_deployment \ No newline at end of file + target_env: str, +) -> Optional[BentoMLLocalDeploymentService]: + # Deploy a model using the MLflow Model Deployer + zenml_client = Client() + step_context = get_step_context() + pipeline_name = step_context.pipeline.name + step_name = step_context.step_run.name + model_deployer = zenml_client.active_stack.model_deployer + bentoml_deployment_config = BentoMLLocalDeploymentConfig( + model_name=step_context.model.name, + model_version=target_env, + description="An example of deploying a model using the MLflow Model Deployer", + pipeline_name=pipeline_name, + pipeline_step_name=step_name, + model_uri=bento.info.labels.get("model_uri"), + bento_tag=str(bento.tag), + bento_uri=bento.info.labels.get("bento_uri"), + working_dir=source_utils.get_source_root(), + timeout=1500, + ) + service = model_deployer.deploy_model( + config=bentoml_deployment_config, + service_type=BentoMLLocalDeploymentService.SERVICE_TYPE, + ) + logger.info( + f"The deployed service info: {model_deployer.get_model_server_info(service)}" + ) + return service diff --git a/train_and_deploy/steps/inference/inference_predict.py b/train_and_deploy/steps/inference/inference_predict.py index 99077df1..76bb017f 100644 --- a/train_and_deploy/steps/inference/inference_predict.py +++ b/train_and_deploy/steps/inference/inference_predict.py @@ -16,13 +16,13 @@ # -from typing import Optional - +from typing import Optional, cast +from zenml.client import Client import pandas as pd from typing_extensions import Annotated from zenml import get_step_context, step -from zenml.integrations.bentoml.services.bentoml_container_deployment import ( - BentoMLContainerDeploymentService, +from zenml.integrations.bentoml.services.bentoml_local_deployment import ( + BentoMLLocalDeploymentService, ) from zenml.logger import get_logger @@ -32,6 +32,7 @@ @step def inference_predict( dataset_inf: pd.DataFrame, + target_env: str, ) -> Annotated[pd.Series, "predictions"]: """Predictions step. @@ -55,12 +56,18 @@ def inference_predict( model = get_step_context().model # get predictor - predictor_service: Optional[BentoMLContainerDeploymentService] = model.load_artifact( - "bentomldeployment" + zenml_client = Client() + model_deployer = zenml_client.active_stack.model_deployer + + # fetch existing services with same pipeline name, step name and model name + existing_services = model_deployer.find_model_server( + model_name=model.name, + model_version=target_env, ) + predictor_service = cast(BentoMLLocalDeploymentService, existing_services[0]) if predictor_service is not None: # run prediction from service - predictions = predictor_service.predict(request=dataset_inf) + predictions = predictor_service.predict(api_endpoint="predict",data=dataset_inf) else: logger.warning( "Predicting from loaded model instead of deployment service "