Merge branch 'main' into stable_diffusion

nebuly-ai · Mar 21, 2023 · c5ff3a4 · c5ff3a4
2 parents 85f960c + bd38730
commit c5ff3a4
Show file tree

Hide file tree

Showing 31 changed files with 2,803 additions and 739 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -8,15 +8,15 @@ on:
       - ".github/**"
       - "*.md"
       - "docs/**"
-      - "nodebooks/**"
+      - "notebooks/**"
   pull_request:
     branches:
       - "main"
     paths-ignore:
       - ".github/**"
       - "*.md"
       - "docs/**"
-      - "nodebooks/**"
+      - "notebooks/**"
 
 jobs:
   test_on_ubuntu_cpu:

diff --git a/README.md b/README.md
@@ -34,9 +34,9 @@ There are multiple modules we actually provide to boost the performances of your
 
 ✅ [Speedster](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/speedster): Automatically apply the best set of SOTA optimization techniques to achieve the maximum inference speed-up on your hardware.
 
-✅ [Nos](https://github.com/nebuly-ai/nos): Automatically maximize the utilization of GPU resources in a Kubernetes cluster through real-time dynamic partitioning and elastic quotas - Effortless optimization at its finest!
+✅ [Nos](https://github.com/nebuly-ai/nos): Automatically maximize the utilization of GPU resources in a Kubernetes cluster through real-time dynamic partitioning and elastic quotas.
 
-✅ [ChatLLaMA](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/chatllama): Build faster and cheaper ChatGPT-like training process based on LLaMA architectures.
+✅ [ChatLLaMA](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/chatllama): Create hyper-personalized ChatGPT-like assistants using your custom data and the least amount of compute possible.
 
 ✅ [OpenAlphaTensor](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/open_alpha_tensor): Increase the computational performances of an AI model with custom-generated matrix multiplication algorithm fine-tuned for your specific hardware.
 

diff --git a/apps/accelerate/chatllama/README.md b/apps/accelerate/chatllama/README.md
diff --git a/apps/accelerate/chatllama/artifacts/config/config.yaml b/apps/accelerate/chatllama/artifacts/config/config.yaml
@@ -0,0 +1,95 @@
+---
+trainer_config:
+  # learning rates
+  actor_lr: 0.00001
+  critic_lr: 0.00001
+  # PPO Hyperparameters
+  actor_eps_clip: 0.2
+  critic_eps_clip: 0.2
+  beta_s: 0.1
+  # path to examples to be sampled (training dataset) see rlhf_dataset.json
+  examples_path: "./datasets/rlhf_training_data.json"
+  # number of episodes and generation performed for each episode
+  # in the train() method
+  num_episodes: 100
+  max_timesteps: 32
+  # number of timesteps after which the learn() method is called 
+  # (to update the weights)
+  update_timesteps: 32
+  # number of example sampled at each timestep
+  num_examples: 32
+  # batch and epochs for the training
+  batch_size: 1
+  epochs: 1
+  # number of episodes after which update the checkpoints in RL training
+  checkpoint_steps: 10
+  # here specify the name of the actor_rl checkpoint from which resume 
+  # during actor RL training. If null load the last one.
+  checkpoint_name: null
+
+actor_config:
+  model: "facebook/opt-1.3b"
+  model_folder: "./models"
+  tokenizer_folder: "path-to-tokenizer"
+  train_dataset_path: "./datasets/actor_training_data.json"
+  validation_dataset_path: null
+  # froze model embedding during training
+  froze_embeddings: True
+  # use fairscale layers to build the model instead of vanilla pytorch
+  use_fairscale: False
+  # max sequence length for the actor (i.e. prompt + completion) it depends on
+  # the model used.
+  max_sequence_length: 2048
+  # max tokens generated by the actor (completion only)
+  max_tokens: 512
+  # temperature for the actor
+  temperature: 0.9
+  batch_size: 1
+  # number iteration after print
+  iteration_per_print: 1
+  lr: 0.0001
+  epochs: 32
+  # number of backpropagation after saving the checkpoints
+  checkpoint_steps: 3
+  # here specify the name of the actor checkpoint from which resume 
+  # during actor training. If null load the last one.
+  checkpoint_name: null
+  # deepspeed settings
+  deepspeed_enable: False
+  deepspeed_config_path: "path-to-deepspeed-conf"
+
+reward_config:
+  # model to be chosen are gp2-large, bart-base, longformer-base-4096
+  # more can be simply added in the reward.py __init__()
+  model: "gpt2-large"
+  model_folder: "./models"
+  # hidden size of the additional ffw head to produce the scores
+  model_head_hidden_size: 2048
+  train_dataset_path: "./datasets/reward_training_data.json"
+  validation_dataset_path: null
+  batch_size: 1
+  epochs: 32
+  iteration_per_print: 1
+  # steps after which the checkpoint are saved
+  checkpoint_steps: 10
+  # here specify the name of the reward checkpoint from which resume 
+  # during reward training. If null load the last one.
+  checkpoint_name: null
+  lr: 0.0001
+  # deepspeed settings
+  deepspeed_enable: False
+  deepspeed_config_path: "path-to-deepspeed-conf"
+
+critic_config:
+  # model to be chosen are gp2-large, bart-base, longformer-base-4096
+  # more can be simply added in the reward.py __init__()
+  model: "gpt2-large"
+  # hidden size of the additional ffw head to produce the scores
+  model_head_hidden_size: 2048
+  model_folder: "./models"
+  # deepspeed settings
+  deepspeed_enable: False
+  deepspeed_config_path: "path-to-deepspeed-conf"
+  # here specify the name of the critic checkpoint from which resume 
+  # during critic training. If null load the last one.
+  checkpoint_name: null
diff --git a/...e/chatllama/chatllama/rlhf/ds_config.json → ...chatllama/artifacts/config/ds_config.json b/...e/chatllama/chatllama/rlhf/ds_config.json → ...chatllama/artifacts/config/ds_config.json
diff --git a/apps/accelerate/chatllama/artifacts/datasets/actor_dataset.json b/apps/accelerate/chatllama/artifacts/datasets/actor_dataset.json
@@ -0,0 +1,6 @@
+[
+    {
+        "user_input": "here the input of the user",
+        "completion": "here the model completion"
+    }
+]
diff --git a/apps/accelerate/chatllama/artifacts/datasets/reward_dataset.json b/apps/accelerate/chatllama/artifacts/datasets/reward_dataset.json
@@ -0,0 +1,12 @@
+[
+    {
+        "user_input": "here type the user input",
+        "completion": "here type the completion",
+        "score": 4.0
+    },
+    {
+        "user_input": "here type the user input",
+        "completion": "if score is null, it can be evaluated by davinci using reward_trainer.distill()",
+        "score": null 
+    }
+]
diff --git a/apps/accelerate/chatllama/artifacts/datasets/rlhf_dataset.json b/apps/accelerate/chatllama/artifacts/datasets/rlhf_dataset.json
@@ -0,0 +1,5 @@
+[
+    {
+        "user_input": "here the example of user input"
+    }
+]
diff --git a/apps/accelerate/chatllama/artifacts/download_dataset.py b/apps/accelerate/chatllama/artifacts/download_dataset.py
@@ -0,0 +1,235 @@
+import argparse
+import json
+import os
+import re
+
+import numpy as np
+
+from datasets import load_dataset
+
+
+class StanfordNLPSHPDataset:
+    def __init__(
+        self,
+    ) -> None:
+        print("Download the dataset")
+        self.dataset = load_dataset("stanfordnlp/SHP")
+        print("Download Completed")
+
+    def save_dataset(
+        self,
+        dataset_folder: str,
+        number_of_samples: int,
+    ) -> None:
+
+        print("Generate datasets for RLHF")
+
+        # TODO: score in the dataset are not used until now
+        # use the train and test dataset to create the finetuning dataset
+        # for the actor and the reward model
+        # (the second one is rewarded by davinci)
+        conversations = []
+        for i, data in enumerate(self.dataset["train"]):
+            if data["score_A"] > data["score_B"]:
+                response = data["human_ref_A"]
+            else:
+                response = data["human_ref_B"]
+            conv = {
+                "user_input": data["history"],
+                "completion": response,
+                "score": None,
+            }
+            conversations.append(conv)
+
+        for i, data in enumerate(self.dataset["test"]):
+            if data["score_A"] > data["score_B"]:
+                response = data["human_ref_A"]
+            else:
+                response = data["human_ref_B"]
+            conv = {
+                "user_input": data["history"],
+                "completion": response,
+                "score": None,
+            }
+            conversations.append(conv)
+
+        with open(f"{dataset_folder}/actor_training_data.json", "w") as f:
+            json.dump(conversations, f)
+
+        # sample N number of index from 0 to len(conversations)
+        indexes = np.random.choice(
+            len(conversations), size=number_of_samples, replace=False
+        )
+        conversations = [conversations[i] for i in indexes]
+        with open(f"{dataset_folder}/reward_training_data.json", "w") as f:
+            json.dump(conversations, f)
+
+        # use the validation part for the rlhf training
+        conversations = []
+        for i, data in enumerate(self.dataset["validation"]):
+            conv = {
+                "user_input": data["history"],
+            }
+            conversations.append(conv)
+
+        with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f:
+            json.dump(conversations, f)
+
+        print("Generation Completed")
+
+
+class AnthropicRLHF:
+    def __init__(
+        self,
+    ) -> None:
+
+        print("Download the dataset")
+        self.dataset = load_dataset("Anthropic/hh-rlhf")
+        print("Download Completed")
+
+    def save_dataset(
+        self,
+        dataset_folder: str,
+        number_of_samples: int,
+    ) -> None:
+
+        print("Generate datasets for RLHF")
+
+        # generate actor and reward dataset
+        conversations = []
+        for i, data in enumerate(self.dataset["train"]):
+            current_conv = data["chosen"]
+
+            sections = re.split("Assistant:|User:", current_conv)
+            if len(sections) == 2:
+                user_input = sections[0]
+                completion = sections[1]
+            elif len(sections) == 4:
+                user_input = (
+                    f"Human:{sections[0]}\n"
+                    f"Assistant: {sections[1]}"
+                    f"Human:{sections[2]}\n"
+                )
+                completion = sections[3]
+            elif len(sections) == 6:
+                user_input = (
+                    f"Human:{sections[0]}\n"
+                    f"Assistant: {sections[1]}"
+                    f"Human:{sections[2]}\n"
+                    f"Assistant: {sections[3]}\n"
+                    f"Human:{sections[4]}\n"
+                )
+                completion = sections[5]
+            else:
+                continue
+
+            conv = {
+                "user_input": user_input,
+                "completion": completion,
+                "score": None,
+            }
+            conversations.append(conv)
+
+        with open(f"{dataset_folder}/actor_training_data.json", "w") as f:
+            json.dump(conversations, f)
+
+        # sample N number of index from 0 to len(conversations)
+        indexes = np.random.choice(
+            len(conversations), size=number_of_samples, replace=False
+        )
+        conversations = [conversations[i] for i in indexes]
+        with open(f"{dataset_folder}/reward_training_data.json", "w") as f:
+            json.dump(conversations, f)
+
+        # rlhf dataset
+        conversations = []
+        for i, data in enumerate(self.dataset["train"]):
+            current_conv = data["chosen"]
+
+            sections = re.split("Assistant:|User:", current_conv)
+            if len(sections) >= 2:
+                user_input = sections[0]
+                completion = sections[1]
+                conv = {
+                    "user_input": user_input,
+                    "completion": completion,
+                }
+                conversations.append(conv)
+            if len(sections) >= 4:
+                user_input = (
+                    f"Human:{sections[0]}\n"
+                    f"Assistant: {sections[1]}"
+                    f"Human:{sections[2]}\n"
+                )
+                completion = sections[3]
+                conv = {
+                    "user_input": user_input,
+                    "completion": completion,
+                }
+                conversations.append(conv)
+            if len(sections) == 6:
+                user_input = (
+                    f"Human:{sections[0]}\n"
+                    f"Assistant: {sections[1]}"
+                    f"Human:{sections[2]}\n"
+                    f"Assistant: {sections[3]}\n"
+                    f"Human:{sections[4]}\n"
+                )
+                completion = sections[5]
+                conv = {
+                    "user_input": user_input,
+                    "completion": completion,
+                }
+                conversations.append(conv)
+
+        with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f:
+            json.dump(conversations, f)
+
+        print("Generation Completed")
+
+
+if __name__ == "__main__":
+
+    # Setup argument parser
+    parser = argparse.ArgumentParser(
+        prog="generate_rewards.py",
+        description="Generate rewards using LangChain and LLMs",
+    )
+
+    parser.add_argument(
+        "dataset_name",
+        help="dataset name it can be. SSHP: stanfordnlp/SHP or ",
+        choices=["SHP", "ARLHF"],
+    )
+    parser.add_argument(
+        "-p",
+        "--path",
+        help="Specify the path for the dataset",
+        default="./datasets",
+    )
+    parser.add_argument(
+        "-n",
+        "--number_of_samples",
+        help="Specify the number of samples for the reward dataset",
+        default=200,
+    )
+
+    args = parser.parse_args()
+    if os.path.exists(args.path) is False:
+        os.mkdir(args.path)
+
+    try:
+        n_samples = int(args.number_of_samples)
+    except ValueError:
+        raise ValueError("Number of samples should be an integer")
+
+    if args.dataset_name == "SHP":
+        dataset = StanfordNLPSHPDataset()
+        dataset.save_dataset(args.path, n_samples)
+
+    elif args.dataset_name == "ARLHF":
+        dataset = AnthropicRLHF()
+        dataset.save_dataset(
+            args.path,
+            n_samples,
+        )