-
Notifications
You must be signed in to change notification settings - Fork 637
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into stable_diffusion
- Loading branch information
Showing
31 changed files
with
2,803 additions
and
739 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
--- | ||
trainer_config: | ||
# learning rates | ||
actor_lr: 0.00001 | ||
critic_lr: 0.00001 | ||
# PPO Hyperparameters | ||
actor_eps_clip: 0.2 | ||
critic_eps_clip: 0.2 | ||
beta_s: 0.1 | ||
# path to examples to be sampled (training dataset) see rlhf_dataset.json | ||
examples_path: "./datasets/rlhf_training_data.json" | ||
# number of episodes and generation performed for each episode | ||
# in the train() method | ||
num_episodes: 100 | ||
max_timesteps: 32 | ||
# number of timesteps after which the learn() method is called | ||
# (to update the weights) | ||
update_timesteps: 32 | ||
# number of example sampled at each timestep | ||
num_examples: 32 | ||
# batch and epochs for the training | ||
batch_size: 1 | ||
epochs: 1 | ||
# number of episodes after which update the checkpoints in RL training | ||
checkpoint_steps: 10 | ||
# here specify the name of the actor_rl checkpoint from which resume | ||
# during actor RL training. If null load the last one. | ||
checkpoint_name: null | ||
|
||
actor_config: | ||
model: "facebook/opt-1.3b" | ||
model_folder: "./models" | ||
tokenizer_folder: "path-to-tokenizer" | ||
train_dataset_path: "./datasets/actor_training_data.json" | ||
validation_dataset_path: null | ||
# froze model embedding during training | ||
froze_embeddings: True | ||
# use fairscale layers to build the model instead of vanilla pytorch | ||
use_fairscale: False | ||
# max sequence length for the actor (i.e. prompt + completion) it depends on | ||
# the model used. | ||
max_sequence_length: 2048 | ||
# max tokens generated by the actor (completion only) | ||
max_tokens: 512 | ||
# temperature for the actor | ||
temperature: 0.9 | ||
batch_size: 1 | ||
# number iteration after print | ||
iteration_per_print: 1 | ||
lr: 0.0001 | ||
epochs: 32 | ||
# number of backpropagation after saving the checkpoints | ||
checkpoint_steps: 3 | ||
# here specify the name of the actor checkpoint from which resume | ||
# during actor training. If null load the last one. | ||
checkpoint_name: null | ||
# deepspeed settings | ||
deepspeed_enable: False | ||
deepspeed_config_path: "path-to-deepspeed-conf" | ||
|
||
reward_config: | ||
# model to be chosen are gp2-large, bart-base, longformer-base-4096 | ||
# more can be simply added in the reward.py __init__() | ||
model: "gpt2-large" | ||
model_folder: "./models" | ||
# hidden size of the additional ffw head to produce the scores | ||
model_head_hidden_size: 2048 | ||
train_dataset_path: "./datasets/reward_training_data.json" | ||
validation_dataset_path: null | ||
batch_size: 1 | ||
epochs: 32 | ||
iteration_per_print: 1 | ||
# steps after which the checkpoint are saved | ||
checkpoint_steps: 10 | ||
# here specify the name of the reward checkpoint from which resume | ||
# during reward training. If null load the last one. | ||
checkpoint_name: null | ||
lr: 0.0001 | ||
# deepspeed settings | ||
deepspeed_enable: False | ||
deepspeed_config_path: "path-to-deepspeed-conf" | ||
|
||
critic_config: | ||
# model to be chosen are gp2-large, bart-base, longformer-base-4096 | ||
# more can be simply added in the reward.py __init__() | ||
model: "gpt2-large" | ||
# hidden size of the additional ffw head to produce the scores | ||
model_head_hidden_size: 2048 | ||
model_folder: "./models" | ||
# deepspeed settings | ||
deepspeed_enable: False | ||
deepspeed_config_path: "path-to-deepspeed-conf" | ||
# here specify the name of the critic checkpoint from which resume | ||
# during critic training. If null load the last one. | ||
checkpoint_name: null |
File renamed without changes.
6 changes: 6 additions & 0 deletions
6
apps/accelerate/chatllama/artifacts/datasets/actor_dataset.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[ | ||
{ | ||
"user_input": "here the input of the user", | ||
"completion": "here the model completion" | ||
} | ||
] |
12 changes: 12 additions & 0 deletions
12
apps/accelerate/chatllama/artifacts/datasets/reward_dataset.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[ | ||
{ | ||
"user_input": "here type the user input", | ||
"completion": "here type the completion", | ||
"score": 4.0 | ||
}, | ||
{ | ||
"user_input": "here type the user input", | ||
"completion": "if score is null, it can be evaluated by davinci using reward_trainer.distill()", | ||
"score": null | ||
} | ||
] |
5 changes: 5 additions & 0 deletions
5
apps/accelerate/chatllama/artifacts/datasets/rlhf_dataset.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[ | ||
{ | ||
"user_input": "here the example of user input" | ||
} | ||
] |
235 changes: 235 additions & 0 deletions
235
apps/accelerate/chatllama/artifacts/download_dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
import argparse | ||
import json | ||
import os | ||
import re | ||
|
||
import numpy as np | ||
|
||
from datasets import load_dataset | ||
|
||
|
||
class StanfordNLPSHPDataset: | ||
def __init__( | ||
self, | ||
) -> None: | ||
print("Download the dataset") | ||
self.dataset = load_dataset("stanfordnlp/SHP") | ||
print("Download Completed") | ||
|
||
def save_dataset( | ||
self, | ||
dataset_folder: str, | ||
number_of_samples: int, | ||
) -> None: | ||
|
||
print("Generate datasets for RLHF") | ||
|
||
# TODO: score in the dataset are not used until now | ||
# use the train and test dataset to create the finetuning dataset | ||
# for the actor and the reward model | ||
# (the second one is rewarded by davinci) | ||
conversations = [] | ||
for i, data in enumerate(self.dataset["train"]): | ||
if data["score_A"] > data["score_B"]: | ||
response = data["human_ref_A"] | ||
else: | ||
response = data["human_ref_B"] | ||
conv = { | ||
"user_input": data["history"], | ||
"completion": response, | ||
"score": None, | ||
} | ||
conversations.append(conv) | ||
|
||
for i, data in enumerate(self.dataset["test"]): | ||
if data["score_A"] > data["score_B"]: | ||
response = data["human_ref_A"] | ||
else: | ||
response = data["human_ref_B"] | ||
conv = { | ||
"user_input": data["history"], | ||
"completion": response, | ||
"score": None, | ||
} | ||
conversations.append(conv) | ||
|
||
with open(f"{dataset_folder}/actor_training_data.json", "w") as f: | ||
json.dump(conversations, f) | ||
|
||
# sample N number of index from 0 to len(conversations) | ||
indexes = np.random.choice( | ||
len(conversations), size=number_of_samples, replace=False | ||
) | ||
conversations = [conversations[i] for i in indexes] | ||
with open(f"{dataset_folder}/reward_training_data.json", "w") as f: | ||
json.dump(conversations, f) | ||
|
||
# use the validation part for the rlhf training | ||
conversations = [] | ||
for i, data in enumerate(self.dataset["validation"]): | ||
conv = { | ||
"user_input": data["history"], | ||
} | ||
conversations.append(conv) | ||
|
||
with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f: | ||
json.dump(conversations, f) | ||
|
||
print("Generation Completed") | ||
|
||
|
||
class AnthropicRLHF: | ||
def __init__( | ||
self, | ||
) -> None: | ||
|
||
print("Download the dataset") | ||
self.dataset = load_dataset("Anthropic/hh-rlhf") | ||
print("Download Completed") | ||
|
||
def save_dataset( | ||
self, | ||
dataset_folder: str, | ||
number_of_samples: int, | ||
) -> None: | ||
|
||
print("Generate datasets for RLHF") | ||
|
||
# generate actor and reward dataset | ||
conversations = [] | ||
for i, data in enumerate(self.dataset["train"]): | ||
current_conv = data["chosen"] | ||
|
||
sections = re.split("Assistant:|User:", current_conv) | ||
if len(sections) == 2: | ||
user_input = sections[0] | ||
completion = sections[1] | ||
elif len(sections) == 4: | ||
user_input = ( | ||
f"Human:{sections[0]}\n" | ||
f"Assistant: {sections[1]}" | ||
f"Human:{sections[2]}\n" | ||
) | ||
completion = sections[3] | ||
elif len(sections) == 6: | ||
user_input = ( | ||
f"Human:{sections[0]}\n" | ||
f"Assistant: {sections[1]}" | ||
f"Human:{sections[2]}\n" | ||
f"Assistant: {sections[3]}\n" | ||
f"Human:{sections[4]}\n" | ||
) | ||
completion = sections[5] | ||
else: | ||
continue | ||
|
||
conv = { | ||
"user_input": user_input, | ||
"completion": completion, | ||
"score": None, | ||
} | ||
conversations.append(conv) | ||
|
||
with open(f"{dataset_folder}/actor_training_data.json", "w") as f: | ||
json.dump(conversations, f) | ||
|
||
# sample N number of index from 0 to len(conversations) | ||
indexes = np.random.choice( | ||
len(conversations), size=number_of_samples, replace=False | ||
) | ||
conversations = [conversations[i] for i in indexes] | ||
with open(f"{dataset_folder}/reward_training_data.json", "w") as f: | ||
json.dump(conversations, f) | ||
|
||
# rlhf dataset | ||
conversations = [] | ||
for i, data in enumerate(self.dataset["train"]): | ||
current_conv = data["chosen"] | ||
|
||
sections = re.split("Assistant:|User:", current_conv) | ||
if len(sections) >= 2: | ||
user_input = sections[0] | ||
completion = sections[1] | ||
conv = { | ||
"user_input": user_input, | ||
"completion": completion, | ||
} | ||
conversations.append(conv) | ||
if len(sections) >= 4: | ||
user_input = ( | ||
f"Human:{sections[0]}\n" | ||
f"Assistant: {sections[1]}" | ||
f"Human:{sections[2]}\n" | ||
) | ||
completion = sections[3] | ||
conv = { | ||
"user_input": user_input, | ||
"completion": completion, | ||
} | ||
conversations.append(conv) | ||
if len(sections) == 6: | ||
user_input = ( | ||
f"Human:{sections[0]}\n" | ||
f"Assistant: {sections[1]}" | ||
f"Human:{sections[2]}\n" | ||
f"Assistant: {sections[3]}\n" | ||
f"Human:{sections[4]}\n" | ||
) | ||
completion = sections[5] | ||
conv = { | ||
"user_input": user_input, | ||
"completion": completion, | ||
} | ||
conversations.append(conv) | ||
|
||
with open(f"{dataset_folder}/rlhf_training_data.json", "w") as f: | ||
json.dump(conversations, f) | ||
|
||
print("Generation Completed") | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
# Setup argument parser | ||
parser = argparse.ArgumentParser( | ||
prog="generate_rewards.py", | ||
description="Generate rewards using LangChain and LLMs", | ||
) | ||
|
||
parser.add_argument( | ||
"dataset_name", | ||
help="dataset name it can be. SSHP: stanfordnlp/SHP or ", | ||
choices=["SHP", "ARLHF"], | ||
) | ||
parser.add_argument( | ||
"-p", | ||
"--path", | ||
help="Specify the path for the dataset", | ||
default="./datasets", | ||
) | ||
parser.add_argument( | ||
"-n", | ||
"--number_of_samples", | ||
help="Specify the number of samples for the reward dataset", | ||
default=200, | ||
) | ||
|
||
args = parser.parse_args() | ||
if os.path.exists(args.path) is False: | ||
os.mkdir(args.path) | ||
|
||
try: | ||
n_samples = int(args.number_of_samples) | ||
except ValueError: | ||
raise ValueError("Number of samples should be an integer") | ||
|
||
if args.dataset_name == "SHP": | ||
dataset = StanfordNLPSHPDataset() | ||
dataset.save_dataset(args.path, n_samples) | ||
|
||
elif args.dataset_name == "ARLHF": | ||
dataset = AnthropicRLHF() | ||
dataset.save_dataset( | ||
args.path, | ||
n_samples, | ||
) |
Oops, something went wrong.