Skip to content

Commit

Permalink
Merge branch 'main' into stable_diffusion
Browse files Browse the repository at this point in the history
  • Loading branch information
valeriosofi authored Mar 21, 2023
2 parents 85f960c + bd38730 commit c5ff3a4
Show file tree
Hide file tree
Showing 31 changed files with 2,803 additions and 739 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ on:
- ".github/**"
- "*.md"
- "docs/**"
- "nodebooks/**"
- "notebooks/**"
pull_request:
branches:
- "main"
paths-ignore:
- ".github/**"
- "*.md"
- "docs/**"
- "nodebooks/**"
- "notebooks/**"

jobs:
test_on_ubuntu_cpu:
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ There are multiple modules we actually provide to boost the performances of your

✅ [Speedster](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/speedster): Automatically apply the best set of SOTA optimization techniques to achieve the maximum inference speed-up on your hardware.

[Nos](https://github.com/nebuly-ai/nos): Automatically maximize the utilization of GPU resources in a Kubernetes cluster through real-time dynamic partitioning and elastic quotas - Effortless optimization at its finest!
[Nos](https://github.com/nebuly-ai/nos): Automatically maximize the utilization of GPU resources in a Kubernetes cluster through real-time dynamic partitioning and elastic quotas.

[ChatLLaMA](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/chatllama): Build faster and cheaper ChatGPT-like training process based on LLaMA architectures.
[ChatLLaMA](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/chatllama): Create hyper-personalized ChatGPT-like assistants using your custom data and the least amount of compute possible.

✅ [OpenAlphaTensor](https://github.com/nebuly-ai/nebullvm/tree/main/apps/accelerate/open_alpha_tensor): Increase the computational performances of an AI model with custom-generated matrix multiplication algorithm fine-tuned for your specific hardware.

Expand Down
424 changes: 356 additions & 68 deletions apps/accelerate/chatllama/README.md

Large diffs are not rendered by default.

95 changes: 95 additions & 0 deletions apps/accelerate/chatllama/artifacts/config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
---
trainer_config:
# learning rates
actor_lr: 0.00001
critic_lr: 0.00001
# PPO Hyperparameters
actor_eps_clip: 0.2
critic_eps_clip: 0.2
beta_s: 0.1
# path to examples to be sampled (training dataset) see rlhf_dataset.json
examples_path: "./datasets/rlhf_training_data.json"
# number of episodes and generation performed for each episode
# in the train() method
num_episodes: 100
max_timesteps: 32
# number of timesteps after which the learn() method is called
# (to update the weights)
update_timesteps: 32
# number of example sampled at each timestep
num_examples: 32
# batch and epochs for the training
batch_size: 1
epochs: 1
# number of episodes after which update the checkpoints in RL training
checkpoint_steps: 10
# here specify the name of the actor_rl checkpoint from which resume
# during actor RL training. If null load the last one.
checkpoint_name: null

actor_config:
model: "facebook/opt-1.3b"
model_folder: "./models"
tokenizer_folder: "path-to-tokenizer"
train_dataset_path: "./datasets/actor_training_data.json"
validation_dataset_path: null
# froze model embedding during training
froze_embeddings: True
# use fairscale layers to build the model instead of vanilla pytorch
use_fairscale: False
# max sequence length for the actor (i.e. prompt + completion) it depends on
# the model used.
max_sequence_length: 2048
# max tokens generated by the actor (completion only)
max_tokens: 512
# temperature for the actor
temperature: 0.9
batch_size: 1
# number iteration after print
iteration_per_print: 1
lr: 0.0001
epochs: 32
# number of backpropagation after saving the checkpoints
checkpoint_steps: 3
# here specify the name of the actor checkpoint from which resume
# during actor training. If null load the last one.
checkpoint_name: null
# deepspeed settings
deepspeed_enable: False
deepspeed_config_path: "path-to-deepspeed-conf"

reward_config:
# model to be chosen are gp2-large, bart-base, longformer-base-4096
# more can be simply added in the reward.py __init__()
model: "gpt2-large"
model_folder: "./models"
# hidden size of the additional ffw head to produce the scores
model_head_hidden_size: 2048
train_dataset_path: "./datasets/reward_training_data.json"
validation_dataset_path: null
batch_size: 1
epochs: 32
iteration_per_print: 1
# steps after which the checkpoint are saved
checkpoint_steps: 10
# here specify the name of the reward checkpoint from which resume
# during reward training. If null load the last one.
checkpoint_name: null
lr: 0.0001
# deepspeed settings
deepspeed_enable: False
deepspeed_config_path: "path-to-deepspeed-conf"

critic_config:
# model to be chosen are gp2-large, bart-base, longformer-base-4096
# more can be simply added in the reward.py __init__()
model: "gpt2-large"
# hidden size of the additional ffw head to produce the scores
model_head_hidden_size: 2048
model_folder: "./models"
# deepspeed settings
deepspeed_enable: False
deepspeed_config_path: "path-to-deepspeed-conf"
# here specify the name of the critic checkpoint from which resume
# during critic training. If null load the last one.
checkpoint_name: null
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
{
"user_input": "here the input of the user",
"completion": "here the model completion"
}
]
12 changes: 12 additions & 0 deletions apps/accelerate/chatllama/artifacts/datasets/reward_dataset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[
{
"user_input": "here type the user input",
"completion": "here type the completion",
"score": 4.0
},
{
"user_input": "here type the user input",
"completion": "if score is null, it can be evaluated by davinci using reward_trainer.distill()",
"score": null
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
{
"user_input": "here the example of user input"
}
]
235 changes: 235 additions & 0 deletions apps/accelerate/chatllama/artifacts/download_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
import argparse
import json
import os
import re

import numpy as np

from datasets import load_dataset


class StanfordNLPSHPDataset:
def __init__(
self,
) -> None:
print("Download the dataset")
self.dataset = load_dataset("stanfordnlp/SHP")
print("Download Completed")

def save_dataset(
self,
dataset_folder: str,
number_of_samples: int,
) -> None:

print("Generate datasets for RLHF")

# TODO: score in the dataset are not used until now
# use the train and test dataset to create the finetuning dataset
# for the actor and the reward model
# (the second one is rewarded by davinci)
conversations = []
for i, data in enumerate(self.dataset["train"]):
if data["score_A"] > data["score_B"]:
response = data["human_ref_A"]
else:
response = data["human_ref_B"]
conv = {
"user_input": data["history"],
"completion": response,
"score": None,
}
conversations.append(conv)

for i, data in enumerate(self.dataset["test"]):
if data["score_A"] > data["score_B"]:
response = data["human_ref_A"]
else:
response = data["human_ref_B"]
conv = {
"user_input": data["history"],
"completion": response,
"score": None,
}
conversations.append(conv)

with open(f"{dataset_folder}/actor_training_data.json", "w") as f:
json.dump(conversations, f)

# sample N number of index from 0 to len(conversations)
indexes = np.random.choice(
len(conversations), size=number_of_samples, replace=False
)
conversations = [conversations[i] for i in indexes]
with open(f"{dataset_folder}/reward_training_data.json", "w") as f:
json.dump(conversations, f)

# use the validation part for the rlhf training
conversations = []
for i, data in enumerate(self.dataset["validation"]):
conv = {
"user_input": data["history"],
}
conversations.append(conv)

with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f:
json.dump(conversations, f)

print("Generation Completed")


class AnthropicRLHF:
def __init__(
self,
) -> None:

print("Download the dataset")
self.dataset = load_dataset("Anthropic/hh-rlhf")
print("Download Completed")

def save_dataset(
self,
dataset_folder: str,
number_of_samples: int,
) -> None:

print("Generate datasets for RLHF")

# generate actor and reward dataset
conversations = []
for i, data in enumerate(self.dataset["train"]):
current_conv = data["chosen"]

sections = re.split("Assistant:|User:", current_conv)
if len(sections) == 2:
user_input = sections[0]
completion = sections[1]
elif len(sections) == 4:
user_input = (
f"Human:{sections[0]}\n"
f"Assistant: {sections[1]}"
f"Human:{sections[2]}\n"
)
completion = sections[3]
elif len(sections) == 6:
user_input = (
f"Human:{sections[0]}\n"
f"Assistant: {sections[1]}"
f"Human:{sections[2]}\n"
f"Assistant: {sections[3]}\n"
f"Human:{sections[4]}\n"
)
completion = sections[5]
else:
continue

conv = {
"user_input": user_input,
"completion": completion,
"score": None,
}
conversations.append(conv)

with open(f"{dataset_folder}/actor_training_data.json", "w") as f:
json.dump(conversations, f)

# sample N number of index from 0 to len(conversations)
indexes = np.random.choice(
len(conversations), size=number_of_samples, replace=False
)
conversations = [conversations[i] for i in indexes]
with open(f"{dataset_folder}/reward_training_data.json", "w") as f:
json.dump(conversations, f)

# rlhf dataset
conversations = []
for i, data in enumerate(self.dataset["train"]):
current_conv = data["chosen"]

sections = re.split("Assistant:|User:", current_conv)
if len(sections) >= 2:
user_input = sections[0]
completion = sections[1]
conv = {
"user_input": user_input,
"completion": completion,
}
conversations.append(conv)
if len(sections) >= 4:
user_input = (
f"Human:{sections[0]}\n"
f"Assistant: {sections[1]}"
f"Human:{sections[2]}\n"
)
completion = sections[3]
conv = {
"user_input": user_input,
"completion": completion,
}
conversations.append(conv)
if len(sections) == 6:
user_input = (
f"Human:{sections[0]}\n"
f"Assistant: {sections[1]}"
f"Human:{sections[2]}\n"
f"Assistant: {sections[3]}\n"
f"Human:{sections[4]}\n"
)
completion = sections[5]
conv = {
"user_input": user_input,
"completion": completion,
}
conversations.append(conv)

with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f:
json.dump(conversations, f)

print("Generation Completed")


if __name__ == "__main__":

# Setup argument parser
parser = argparse.ArgumentParser(
prog="generate_rewards.py",
description="Generate rewards using LangChain and LLMs",
)

parser.add_argument(
"dataset_name",
help="dataset name it can be. SSHP: stanfordnlp/SHP or ",
choices=["SHP", "ARLHF"],
)
parser.add_argument(
"-p",
"--path",
help="Specify the path for the dataset",
default="./datasets",
)
parser.add_argument(
"-n",
"--number_of_samples",
help="Specify the number of samples for the reward dataset",
default=200,
)

args = parser.parse_args()
if os.path.exists(args.path) is False:
os.mkdir(args.path)

try:
n_samples = int(args.number_of_samples)
except ValueError:
raise ValueError("Number of samples should be an integer")

if args.dataset_name == "SHP":
dataset = StanfordNLPSHPDataset()
dataset.save_dataset(args.path, n_samples)

elif args.dataset_name == "ARLHF":
dataset = AnthropicRLHF()
dataset.save_dataset(
args.path,
n_samples,
)
Loading

0 comments on commit c5ff3a4

Please sign in to comment.