zenml-io · safoinme · Dec 5, 2024 · Sep 30, 2024 · Sep 30, 2024 · Oct 1, 2024
diff --git a/llm-vllm-deployer/LICENSE b/llm-vllm-deployer/LICENSE
@@ -0,0 +1,15 @@
+Apache Software License 2.0
+
+Copyright (c) ZenML GmbH 2024. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/llm-vllm-deployer/README.md b/llm-vllm-deployer/README.md
@@ -0,0 +1,46 @@
+# ☮️ Deploying open source LLMs using MLOps pipelines with vLLM
+
+Welcome to your newly generated "ZenML LLM vLLM deployment project" project! This is
+a great way to get hands-on with ZenML using production-like template.
+The project contains a collection of ZenML steps, pipelines and other artifacts
+and useful resources that can serve as a solid starting point for deploying open-source LLMs using ZenML.
+
+Using these pipelines, we can run the data-preparation and model finetuning with a single command while using YAML files for [configuration](https://docs.zenml.io/user-guide/production-guide/configure-pipeline) and letting ZenML take care of tracking our metadata and [containerizing our pipelines](https://docs.zenml.io/how-to/customize-docker-builds).
+
+<TODO: Add image from ZenML Cloud for pipeline here>
+
+## 🏃 How to run
+
+In this project, we will deploy the [gpt-2](https://huggingface.co/openai-community/gpt2) model using [vLLM](https://docs.vllm.ai/en/latest/). Before we're able to run any pipeline, we need to set up our environment as follows:
+
+```bash
+# Set up a Python virtual environment, if you haven't already
+python3 -m venv .venv
+source .venv/bin/activate
+
+# Install requirements
+pip install -r requirements.txt
+```
+
+Run the deployment pipeline
+
+```bash
+python run.py
+```
+
+## 📜 Project Structure
+
+The project loosely follows [the recommended ZenML project structure](https://docs.zenml.io/how-to/setting-up-a-project-repository/best-practices):
+
+```
+.
+├── configs                                      # pipeline configuration files
+│   ├── default_vllm_deploy.yaml                 # default local or remote orchestrator configuration
+├── pipelines                                    # `zenml.pipeline` implementations
+│   └── deploy_pipeline.py                       # vllm deployment pipeline
+├── steps                                        # logically grouped `zenml.steps` implementations
+│   ├── vllm_deployer.py                         # deploy model using vllm
+├── README.md                                    # this file
+├── requirements.txt                             # extra Python dependencies 
+└── run.py                                       # CLI tool to run pipelines on ZenML Stack
+```
diff --git a/llm-vllm-deployer/configs/default_vllm_deploy.yaml b/llm-vllm-deployer/configs/default_vllm_deploy.yaml
@@ -0,0 +1,10 @@
+model:
+  name: openai-community/gpt2
+  description: "Deploy `openai-community/gpt2` using vllm."
+  tags:
+    - llm
+    - vllm
+    - openai-community/gpt2
+
+parameters:
+  model: openai-community/gpt2
diff --git a/llm-vllm-deployer/pipelines/__init__.py b/llm-vllm-deployer/pipelines/__init__.py
@@ -0,0 +1,16 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/llm-vllm-deployer/pipelines/deploy_pipeline.py b/llm-vllm-deployer/pipelines/deploy_pipeline.py
@@ -0,0 +1,33 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from zenml import pipeline
+from typing import Annotated
+from steps.vllm_deployer import vllm_model_deployer_step
+from zenml.integrations.vllm.services.vllm_deployment import VLLMDeploymentService
+
+
+@pipeline()
+def deploy_vllm_pipeline(
+    model: str,
+    timeout: int = 1200,
+) -> Annotated[VLLMDeploymentService, "GPT2"]:
+    service = vllm_model_deployer_step(
+        model=model,
+        timeout=timeout,
+    )
+    return service
diff --git a/llm-vllm-deployer/requirements.txt b/llm-vllm-deployer/requirements.txt
@@ -0,0 +1,3 @@
+zenml>=0.66.0
+vllm>= 0.6.0,<0.7.0
+openai>=1.0.0
diff --git a/llm-vllm-deployer/run.py b/llm-vllm-deployer/run.py
@@ -0,0 +1,79 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+from typing import Optional
+
+import click
+
+
+@click.command(
+    help="""
+ZenML LLM VLLM deployer project CLI.
+
+Run the ZenML LLM Finetuning project LLM PEFT finetuning pipelines.
+
+Examples:
+
+  \b
+  # Run the pipeline
+    python run.py
+
+  \b
+  # Run the pipeline with custom config
+    python run.py --config default_vllm_deploy.yaml
+"""
+)
+@click.option(
+    "--config",
+    type=str,
+    default="default_vllm_deploy.yaml",
+    help="Path to the YAML config file.",
+)
+@click.option(
+    "--no-cache",
+    is_flag=True,
+    default=False,
+    help="Disable caching for the pipeline run.",
+)
+def main(
+    config: Optional[str] = None,
+    no_cache: bool = False,
+):
+    """Main entry point for the pipeline execution.
+
+    Args:
+        config: Path to the YAML config file.
+        no_cache: If `True` cache will be disabled.
+    """
+    config_folder = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "configs",
+    )
+    pipeline_args = {"enable_cache": not no_cache}
+    if not config:
+        raise RuntimeError("Config file is required to run a pipeline.")
+
+    pipeline_args["config_path"] = os.path.join(config_folder, config)
+
+    from pipelines.deploy_pipeline import deploy_vllm_pipeline
+
+    deploy_vllm_pipeline.with_options(**pipeline_args)()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/llm-vllm-deployer/steps/__init__.py b/llm-vllm-deployer/steps/__init__.py
@@ -0,0 +1,16 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/llm-vllm-deployer/steps/vllm_deployer.py b/llm-vllm-deployer/steps/vllm_deployer.py
@@ -0,0 +1,114 @@
+#  Copyright (c) ZenML GmbH 2024. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at:
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+#  or implied. See the License for the specific language governing
+#  permissions and limitations under the License.
+"""Implementation of the vllm model deployer pipeline step."""
+
+from typing import Optional, cast
+
+from zenml import get_step_context, step
+from zenml.integrations.vllm.model_deployers.vllm_model_deployer import (
+    VLLMModelDeployer,
+)
+from zenml.integrations.vllm.services.vllm_deployment import (
+    VLLMDeploymentService,
+    VLLMServiceConfig,
+)
+from zenml.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+@step(enable_cache=False)
+def vllm_model_deployer_step(
+    model: str,
+    port: int = 8000,
+    tokenizer: Optional[str] = None,
+    timeout: int = 1200,
+    deploy_decision: bool = True,
+) -> VLLMDeploymentService:
+    """Model deployer pipeline step for vLLM.
+
+    This step deploys a given Bento to a local vLLM http prediction server.
+
+    Args:
+        model: Name or path to huggingface model
+        port: Port used by vllm server
+        tokenizer: Name or path of the huggingface tokenizer to use.
+            If unspecified, model name or path will be used.
+        timeout: the number of seconds to wait for the service to start/stop.
+        deploy_decision: whether to deploy the model or not
+
+    Returns:
+        vLLM deployment service
+    """
+    # get the current active model deployer
+    model_deployer = cast(
+        VLLMModelDeployer, VLLMModelDeployer.get_active_model_deployer()
+    )
+
+    # get pipeline name, step name and run id
+    step_context = get_step_context()
+    pipeline_name = step_context.pipeline.name
+    step_name = step_context.step_run.name
+
+    # create a config for the new model service
+    predictor_cfg = VLLMServiceConfig(
+        model=model,
+        port=port,
+        tokenizer=tokenizer,
+        model_name="default",  # Required for ServiceConfig
+    )
+
+    # update the step configuration with the real pipeline runtime information
+    predictor_cfg = predictor_cfg.model_copy()
+    predictor_cfg.pipeline_name = pipeline_name
+    predictor_cfg.pipeline_step_name = step_name
+
+    # fetch existing services with same pipeline name, step name and model name
+    existing_services = model_deployer.find_model_server(
+        config=predictor_cfg.model_dump(),
+        service_type=VLLMDeploymentService.SERVICE_TYPE,
+    )
+
+    # Creating a new service with inactive state and status by default
+    if existing_services:
+        service = cast(VLLMDeploymentService, existing_services[0])
+
+    if not deploy_decision and existing_services:
+        logger.info(
+            f"Skipping model deployment because the model quality does not "
+            f"meet the criteria. Reusing last model server deployed by step "
+            f"'{step_name}' and pipeline '{pipeline_name}' for model "
+            f"'{model}'..."
+        )
+        if not service.is_running:
+            service.start(timeout=timeout)
+        return service
+
+    # create a new model deployment and replace an old one if it exists
+    new_service = cast(
+        VLLMDeploymentService,
+        model_deployer.deploy_model(
+            replace=True,
+            config=predictor_cfg,
+            timeout=timeout,
+            service_type=VLLMDeploymentService.SERVICE_TYPE,
+        ),
+    )
+
+    logger.info(
+        f"VLLM deployment service started and reachable at:\n"
+        f"    {new_service.prediction_url}\n"
+    )
+
+    return new_service