zenml-io · avishniakov · Jun 27, 2024 · Apr 17, 2024 · Apr 17, 2024 · Apr 17, 2024
diff --git a/llm-lora-finetuning/.dockerignore b/llm-lora-finetuning/.dockerignore
@@ -3,3 +3,4 @@
 !/pipelines/**
 !/steps/**
 !/utils/**
+!/scripts/**
diff --git a/llm-lora-finetuning/README.md b/llm-lora-finetuning/README.md
@@ -34,10 +34,10 @@ pip install -r requirements.txt
 
 ### 👷 Combined feature engineering and finetuning pipeline
 
-The easiest way to get started with just a single command is to run the finetuning pipeline with the `default_finetune.yaml` configuration file, which will do data preparation, model finetuning, evaluation with [Rouge](https://huggingface.co/spaces/evaluate-metric/rouge) and promotion:
+The easiest way to get started with just a single command is to run the finetuning pipeline with the `mistral_default_finetune.yaml` configuration file, which will do data preparation, model finetuning, evaluation with [Rouge](https://huggingface.co/spaces/evaluate-metric/rouge) and promotion:
 
 ```shell
-python run.py --config default_finetune.yaml
+python run.py --config mistral_default_finetune.yaml
 ```
 
 When running the pipeline like this, the trained model will be stored in the ZenML artifact store.
@@ -50,6 +50,19 @@ When running the pipeline like this, the trained model will be stored in the Zen
   <br/>
 </div>
 
+### ⚡ Accelerate your finetuning
+
+Do you want to benefit from multiple GPUs training with Distributed Data Parallelism (DDP)? Then you can use other configuration files prepared for this task.
+For example, `phi_accelerated_local_finetune.yaml` can run finetuning of the [Microsoft Phi 2](https://huggingface.co/microsoft/phi-2) powered by [Hugging Face Accelerate](https://huggingface.co/docs/accelerate/en/index) on all GPUs available in the environment. To do so, just call:
+
+```shell
+python run.py --config phi_accelerated_local_finetune.yaml # if you architecture doesn't support BF16
+# OR
+python run.py --config phi_accelerated_local_bf16_finetune.yaml # if you architecture support BF16
+```
+
+Under the hood, the finetuning step will spin up the accelerated job using the finetuning script CLI wrapper (`scripts/finetune.py`), which will run on all available GPUs.
+
 ## ☁️ Running with a remote stack
 
 To finetune an LLM on remote infrastructure, you can either use a remote orchestrator or a remote step operator. Follow these steps to set up a complete remote stack:
@@ -71,26 +84,33 @@ The project loosely follows [the recommended ZenML project structure](https://do
 
 ```
 .
-├── configs                         # pipeline configuration files
-│   ├── default_finetune.yaml       # default local configuration (or remote orchestrator)
-│   └── remote_finetune.yaml        # default step operator configuration
+├── configs                                       # pipeline configuration files
+│   ├── mistral_default_finetune.yaml             # mistral local configuration (or remote orchestrator)
+│   ├── mistral_remote_finetune.yaml              # mistral step operator configuration
+│   ├── phi_accelerated_local_bf16_finetune.yaml  # phi accelerated local with bf16
+│   ├── phi_accelerated_local_finetune.yaml       # phi accelerated local without bf16
+│   ├── phi_accelerated_remote_finetune.yaml      # phi accelerated step operator without bf16
+│   ├── phi_local_bf16_finetune.yaml              # phi local with bf16
+│   └── phi_local_finetune.yaml                   # phi local without bf16
 ├── materializers
-│   └── directory_materializer.py   # custom materializer to push whole directories to the artifact store and back
-├── pipelines                       # `zenml.pipeline` implementations
-│   └── train.py                    # Finetuning and evaluation pipeline
-├── steps                           # logically grouped `zenml.steps` implementations
-│   ├── evaluate_model.py           # evaluate base and finetuned models using Rouge metrics
-│   ├── finetune.py                 # finetune the base model
-│   ├── prepare_datasets.py         # load and tokenize dataset
-│   └── promote.py                  # promote good models to target environment
-├── utils                           # utility functions
-│   ├── callbacks.py                # custom callbacks
-│   ├── cuda.py                     # helpers for CUDA
-│   ├── loaders.py                  # loaders for models and data
-│   ├── logging.py                  # logging helpers
-│   └── tokenizer.py                # load and tokenize
+│   └── directory_materializer.py                 # custom materializer to push whole directories to the artifact store and back
+├── pipelines                                     # `zenml.pipeline` implementations
+│   └── train.py                                  # Finetuning and evaluation pipeline
+├── scripts                                       # scripts used in the finetuning
+│   └── finetune.py                               # Finetuning function and a CLI wrapper for `accelerate run ...`
+├── steps                                         # logically grouped `zenml.steps` implementations
+│   ├── evaluate_model.py                         # evaluate base and finetuned models using Rouge metrics
+│   ├── finetune.py                               # finetune the base model
+│   ├── prepare_datasets.py                       # load and tokenize dataset
+│   └── promote.py                                # promote good models to target environment
+├── utils                                         # utility functions
+│   ├── callbacks.py                              # custom callbacks
+│   ├── cuda.py                                   # helpers for CUDA
+│   ├── loaders.py                                # loaders for models and data
+│   ├── logging.py                                # logging helpers
+│   └── tokenizer.py                              # load and tokenize
 ├── .dockerignore
-├── README.md                       # this file
-├── requirements.txt                # extra Python dependencies 
-└── run.py                          # CLI tool to run pipelines on ZenML Stack
+├── README.md                                     # this file
+├── requirements.txt                              # extra Python dependencies 
+└── run.py                                        # CLI tool to run pipelines on ZenML Stack
 ```
diff --git a/...-finetuning/configs/default_finetune.yaml → ...ing/configs/mistral_default_finetune.yaml b/...-finetuning/configs/default_finetune.yaml → ...ing/configs/mistral_default_finetune.yaml
@@ -35,7 +35,8 @@ parameters:
     Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
     This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
     The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
-
+  load_in_8bit: True
+
 steps:
   prepare_data:
     parameters:
@@ -46,6 +47,7 @@ steps:
     parameters:
       max_steps: 300
       eval_steps: 100
+      bf16: False
 
   promote:
     parameters:

diff --git a/...a-finetuning/configs/remote_finetune.yaml → ...ning/configs/mistral_remote_finetune.yaml b/...a-finetuning/configs/remote_finetune.yaml → ...ning/configs/mistral_remote_finetune.yaml
@@ -35,7 +35,7 @@ parameters:
     Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
     This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
     The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
-
+  load_in_8bit: True
 
 steps:
   prepare_data:

diff --git a/llm-lora-finetuning/configs/phi_accelerated_local_bf16_finetune.yaml b/llm-lora-finetuning/configs/phi_accelerated_local_bf16_finetune.yaml
@@ -0,0 +1,57 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model:
+  name: llm-peft-microsoft-phi-2
+  description: "Fine-tune `microsoft/phi-2`."
+  tags:
+    - llm
+    - peft
+    - microsoft/phi-2
+  version: 200_steps_accelerate
+
+settings:
+  docker:
+    parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime
+    requirements: requirements.txt
+
+parameters:
+  base_model_id: microsoft/phi-2
+  system_prompt: |
+    Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
+    This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
+    The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
+  use_fast: False
+  load_in_4bit: True
+
+steps:
+  prepare_data:
+    parameters:
+      dataset_name: gem/viggo
+
+  finetune:
+    parameters:
+      max_steps: 200
+      eval_steps: 50
+      bf16: True
+      use_accelerate: True
+
+
+  promote:
+    parameters:
+      metric: rouge2
+      target_stage: staging
diff --git a/llm-lora-finetuning/configs/phi_accelerated_local_finetune.yaml b/llm-lora-finetuning/configs/phi_accelerated_local_finetune.yaml
@@ -0,0 +1,57 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model:
+  name: llm-peft-microsoft-phi-2
+  description: "Fine-tune `microsoft/phi-2`."
+  tags:
+    - llm
+    - peft
+    - microsoft/phi-2
+  version: 25_steps_accelerate
+
+settings:
+  docker:
+    parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime
+    requirements: requirements.txt
+
+parameters:
+  base_model_id: microsoft/phi-2
+  system_prompt: |
+    Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
+    This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
+    The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
+  use_fast: False
+  load_in_4bit: True
+
+steps:
+  prepare_data:
+    parameters:
+      dataset_name: gem/viggo
+
+  finetune:
+    parameters:
+      max_steps: 25
+      eval_steps: 25
+      bf16: False
+      use_accelerate: True
+
+
+  promote:
+    parameters:
+      metric: rouge2
+      target_stage: staging
diff --git a/llm-lora-finetuning/configs/phi_accelerated_remote_finetune.yaml b/llm-lora-finetuning/configs/phi_accelerated_remote_finetune.yaml
@@ -0,0 +1,67 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model:
+  name: llm-peft-microsoft-phi-2
+  description: "Fine-tune `microsoft/phi-2`."
+  tags:
+    - llm
+    - peft
+    - microsoft/phi-2
+  version: 100_steps_accelerate
+
+settings:
+  docker:
+    parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime
+    requirements: requirements.txt
+    environment:
+      PJRT_DEVICE: CUDA
+      USE_TORCH_XLA: "false"
+      MKL_SERVICE_FORCE_INTEL: "1"
+
+parameters:
+  base_model_id: microsoft/phi-2
+  system_prompt: |
+    Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
+    This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
+    The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
+  use_fast: False
+  load_in_4bit: True
+
+steps:
+  prepare_data:
+    parameters:
+      dataset_name: gem/viggo
+
+  finetune:
+    step_operator: gcp_t4x2
+    parameters:
+      max_steps: 100
+      eval_steps: 50
+      bf16: False
+      use_accelerate: True
+
+  evaluate_finetuned:
+    step_operator: gcp_t4x2
+
+  evaluate_base:
+    step_operator: gcp_t4x2
+
+  promote:
+    parameters:
+      metric: rouge2
+      target_stage: staging
diff --git a/llm-lora-finetuning/configs/phi_local_bf16_finetune.yaml b/llm-lora-finetuning/configs/phi_local_bf16_finetune.yaml
@@ -0,0 +1,56 @@
+# Apache Software License 2.0
+#
+# Copyright (c) ZenML GmbH 2024. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+model:
+  name: llm-peft-microsoft-phi-2
+  description: "Fine-tune `microsoft/phi-2`."
+  tags:
+    - llm
+    - peft
+    - microsoft/phi-2
+  version: 200_steps
+
+settings:
+  docker:
+    parent_image: pytorch/pytorch:2.2.2-cuda11.8-cudnn8-runtime
+    requirements: requirements.txt
+
+parameters:
+  base_model_id: microsoft/phi-2
+  system_prompt: |
+    Given a target sentence construct the underlying meaning representation of the input sentence as a single function with attributes and attribute values.
+    This function should describe the target string accurately and the function must be one of the following ['inform', 'request', 'give_opinion', 'confirm', 'verify_attribute', 'suggest', 'request_explanation', 'recommend', 'request_attribute'].
+    The attributes must be one of the following: ['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating', 'genres', 'player_perspective', 'has_multiplayer', 'platforms', 'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier']
+  use_fast: False
+  load_in_4bit: True
+
+steps:
+  prepare_data:
+    parameters:
+      dataset_name: gem/viggo
+
+  finetune:
+    parameters:
+      max_steps: 200
+      eval_steps: 50
+      bf16: True
+
+
+  promote:
+    parameters:
+      metric: rouge2
+      target_stage: staging