diff --git a/llm-vllm-deployer/LICENSE b/llm-vllm-deployer/LICENSE new file mode 100644 index 00000000..75d01fb4 --- /dev/null +++ b/llm-vllm-deployer/LICENSE @@ -0,0 +1,15 @@ +Apache Software License 2.0 + +Copyright (c) ZenML GmbH 2024. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/llm-vllm-deployer/README.md b/llm-vllm-deployer/README.md new file mode 100644 index 00000000..1b60cf59 --- /dev/null +++ b/llm-vllm-deployer/README.md @@ -0,0 +1,46 @@ +# ☮️ Deploying open source LLMs using MLOps pipelines with vLLM + +Welcome to your newly generated "ZenML LLM vLLM deployment project" project! This is +a great way to get hands-on with ZenML using production-like template. +The project contains a collection of ZenML steps, pipelines and other artifacts +and useful resources that can serve as a solid starting point for deploying open-source LLMs using ZenML. + +Using these pipelines, we can run the data-preparation and model finetuning with a single command while using YAML files for [configuration](https://docs.zenml.io/user-guide/production-guide/configure-pipeline) and letting ZenML take care of tracking our metadata and [containerizing our pipelines](https://docs.zenml.io/how-to/customize-docker-builds). + + + +## 🏃 How to run + +In this project, we will deploy the [gpt-2](https://huggingface.co/openai-community/gpt2) model using [vLLM](https://docs.vllm.ai/en/latest/). Before we're able to run any pipeline, we need to set up our environment as follows: + +```bash +# Set up a Python virtual environment, if you haven't already +python3 -m venv .venv +source .venv/bin/activate + +# Install requirements +pip install -r requirements.txt +``` + +Run the deployment pipeline + +```bash +python run.py +``` + +## 📜 Project Structure + +The project loosely follows [the recommended ZenML project structure](https://docs.zenml.io/how-to/setting-up-a-project-repository/best-practices): + +``` +. +├── configs # pipeline configuration files +│ ├── default_vllm_deploy.yaml # default local or remote orchestrator configuration +├── pipelines # `zenml.pipeline` implementations +│ └── deploy_pipeline.py # vllm deployment pipeline +├── steps # logically grouped `zenml.steps` implementations +│ ├── vllm_deployer.py # deploy model using vllm +├── README.md # this file +├── requirements.txt # extra Python dependencies +└── run.py # CLI tool to run pipelines on ZenML Stack +``` diff --git a/llm-vllm-deployer/configs/default_vllm_deploy.yaml b/llm-vllm-deployer/configs/default_vllm_deploy.yaml new file mode 100644 index 00000000..cddae0c6 --- /dev/null +++ b/llm-vllm-deployer/configs/default_vllm_deploy.yaml @@ -0,0 +1,10 @@ +model: + name: openai-community/gpt2 + description: "Deploy `openai-community/gpt2` using vllm." + tags: + - llm + - vllm + - openai-community/gpt2 + +parameters: + model: openai-community/gpt2 diff --git a/llm-vllm-deployer/pipelines/__init__.py b/llm-vllm-deployer/pipelines/__init__.py new file mode 100644 index 00000000..e3836e26 --- /dev/null +++ b/llm-vllm-deployer/pipelines/__init__.py @@ -0,0 +1,16 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/llm-vllm-deployer/pipelines/deploy_pipeline.py b/llm-vllm-deployer/pipelines/deploy_pipeline.py new file mode 100644 index 00000000..d5414be5 --- /dev/null +++ b/llm-vllm-deployer/pipelines/deploy_pipeline.py @@ -0,0 +1,33 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from zenml import pipeline +from typing import Annotated +from steps.vllm_deployer import vllm_model_deployer_step +from zenml.integrations.vllm.services.vllm_deployment import VLLMDeploymentService + + +@pipeline() +def deploy_vllm_pipeline( + model: str, + timeout: int = 1200, +) -> Annotated[VLLMDeploymentService, "GPT2"]: + service = vllm_model_deployer_step( + model=model, + timeout=timeout, + ) + return service diff --git a/llm-vllm-deployer/requirements.txt b/llm-vllm-deployer/requirements.txt new file mode 100644 index 00000000..a53368a8 --- /dev/null +++ b/llm-vllm-deployer/requirements.txt @@ -0,0 +1,3 @@ +zenml>=0.66.0 +vllm>= 0.6.0,<0.7.0 +openai>=1.0.0 \ No newline at end of file diff --git a/llm-vllm-deployer/run.py b/llm-vllm-deployer/run.py new file mode 100644 index 00000000..651c8ece --- /dev/null +++ b/llm-vllm-deployer/run.py @@ -0,0 +1,79 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from typing import Optional + +import click + + +@click.command( + help=""" +ZenML LLM VLLM deployer project CLI. + +Run the ZenML LLM Finetuning project LLM PEFT finetuning pipelines. + +Examples: + + \b + # Run the pipeline + python run.py + + \b + # Run the pipeline with custom config + python run.py --config default_vllm_deploy.yaml +""" +) +@click.option( + "--config", + type=str, + default="default_vllm_deploy.yaml", + help="Path to the YAML config file.", +) +@click.option( + "--no-cache", + is_flag=True, + default=False, + help="Disable caching for the pipeline run.", +) +def main( + config: Optional[str] = None, + no_cache: bool = False, +): + """Main entry point for the pipeline execution. + + Args: + config: Path to the YAML config file. + no_cache: If `True` cache will be disabled. + """ + config_folder = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "configs", + ) + pipeline_args = {"enable_cache": not no_cache} + if not config: + raise RuntimeError("Config file is required to run a pipeline.") + + pipeline_args["config_path"] = os.path.join(config_folder, config) + + from pipelines.deploy_pipeline import deploy_vllm_pipeline + + deploy_vllm_pipeline.with_options(**pipeline_args)() + + +if __name__ == "__main__": + main() diff --git a/llm-vllm-deployer/steps/__init__.py b/llm-vllm-deployer/steps/__init__.py new file mode 100644 index 00000000..e3836e26 --- /dev/null +++ b/llm-vllm-deployer/steps/__init__.py @@ -0,0 +1,16 @@ +# Apache Software License 2.0 +# +# Copyright (c) ZenML GmbH 2024. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# \ No newline at end of file diff --git a/llm-vllm-deployer/steps/vllm_deployer.py b/llm-vllm-deployer/steps/vllm_deployer.py new file mode 100644 index 00000000..76770103 --- /dev/null +++ b/llm-vllm-deployer/steps/vllm_deployer.py @@ -0,0 +1,114 @@ +# Copyright (c) ZenML GmbH 2024. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing +# permissions and limitations under the License. +"""Implementation of the vllm model deployer pipeline step.""" + +from typing import Optional, cast + +from zenml import get_step_context, step +from zenml.integrations.vllm.model_deployers.vllm_model_deployer import ( + VLLMModelDeployer, +) +from zenml.integrations.vllm.services.vllm_deployment import ( + VLLMDeploymentService, + VLLMServiceConfig, +) +from zenml.logger import get_logger + +logger = get_logger(__name__) + + +@step(enable_cache=False) +def vllm_model_deployer_step( + model: str, + port: int = 8000, + tokenizer: Optional[str] = None, + timeout: int = 1200, + deploy_decision: bool = True, +) -> VLLMDeploymentService: + """Model deployer pipeline step for vLLM. + + This step deploys a given Bento to a local vLLM http prediction server. + + Args: + model: Name or path to huggingface model + port: Port used by vllm server + tokenizer: Name or path of the huggingface tokenizer to use. + If unspecified, model name or path will be used. + timeout: the number of seconds to wait for the service to start/stop. + deploy_decision: whether to deploy the model or not + + Returns: + vLLM deployment service + """ + # get the current active model deployer + model_deployer = cast( + VLLMModelDeployer, VLLMModelDeployer.get_active_model_deployer() + ) + + # get pipeline name, step name and run id + step_context = get_step_context() + pipeline_name = step_context.pipeline.name + step_name = step_context.step_run.name + + # create a config for the new model service + predictor_cfg = VLLMServiceConfig( + model=model, + port=port, + tokenizer=tokenizer, + model_name="default", # Required for ServiceConfig + ) + + # update the step configuration with the real pipeline runtime information + predictor_cfg = predictor_cfg.model_copy() + predictor_cfg.pipeline_name = pipeline_name + predictor_cfg.pipeline_step_name = step_name + + # fetch existing services with same pipeline name, step name and model name + existing_services = model_deployer.find_model_server( + config=predictor_cfg.model_dump(), + service_type=VLLMDeploymentService.SERVICE_TYPE, + ) + + # Creating a new service with inactive state and status by default + if existing_services: + service = cast(VLLMDeploymentService, existing_services[0]) + + if not deploy_decision and existing_services: + logger.info( + f"Skipping model deployment because the model quality does not " + f"meet the criteria. Reusing last model server deployed by step " + f"'{step_name}' and pipeline '{pipeline_name}' for model " + f"'{model}'..." + ) + if not service.is_running: + service.start(timeout=timeout) + return service + + # create a new model deployment and replace an old one if it exists + new_service = cast( + VLLMDeploymentService, + model_deployer.deploy_model( + replace=True, + config=predictor_cfg, + timeout=timeout, + service_type=VLLMDeploymentService.SERVICE_TYPE, + ), + ) + + logger.info( + f"VLLM deployment service started and reachable at:\n" + f" {new_service.prediction_url}\n" + ) + + return new_service