forked from securefederatedai/openfl
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
76d4639
commit 4d967a3
Showing
15 changed files
with
1,366 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#%% | ||
from transformers.models.llama import LlamaForCausalLM, LlamaTokenizer | ||
from transformers import GenerationConfig | ||
|
||
#path = '/home/oamontoy/workspace/weights-llama-2-7B' | ||
path = '/home/oamontoy/workspace/weights-llama-2-7B-chat' | ||
tokenizer = LlamaTokenizer.from_pretrained(path) | ||
model = LlamaForCausalLM.from_pretrained(path) | ||
# %% | ||
from peft import PeftModel | ||
model = PeftModel.from_pretrained(model, "dominguesm/alpaca-lora-ptbr-7b") | ||
# %% | ||
|
||
def generate_prompt(instruction, input=None): | ||
if input: | ||
return f"""Below is a statement that describes a task, paired with an input that provides more context. Write a response that appropriately completes the request. | ||
### instruction: | ||
{instruction} | ||
### input: | ||
{input} | ||
### response:""" | ||
else: | ||
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. | ||
### instruction: | ||
{instruction} | ||
### response:""" | ||
|
||
# %% | ||
from pprint import pprint | ||
# %% | ||
generation_config = GenerationConfig( | ||
temperature=0.1, | ||
top_p=0.75, | ||
num_beams=4, | ||
) | ||
|
||
def evaluate(instruction, input=None): | ||
prompt = generate_prompt(instruction, input) | ||
inputs = tokenizer(prompt, return_tensors="pt") | ||
input_ids = inputs["input_ids"] | ||
generation_output = model.generate( | ||
input_ids=input_ids, | ||
generation_config=generation_config, | ||
return_dict_in_generate=True, | ||
output_scores=True, | ||
max_new_tokens=256 | ||
) | ||
for s in generation_output.sequences: | ||
output = tokenizer.decode(s) | ||
pprint("response: " + output.split("### response:")[1].strip()) | ||
# %% | ||
#evaluate(input("instruction: ")) | ||
# %% | ||
instruction = 'print a long paragraph of giberish' | ||
prompt = generate_prompt(instruction, None) | ||
inputs = tokenizer(prompt, return_tensors="pt") | ||
inputs | ||
#%% | ||
input_ids = inputs["input_ids"] | ||
# %% | ||
generation_output = model.generate( | ||
input_ids=input_ids, | ||
generation_config=generation_config, | ||
return_dict_in_generate=True, | ||
output_scores=True, | ||
max_new_tokens=256 | ||
) | ||
# %% | ||
for s in generation_output.sequences: | ||
output = tokenizer.decode(s) | ||
pprint("response: " + output.split("### response:")[1].strip()) | ||
# %% |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
#%% | ||
import numpy as np | ||
from datasets import load_dataset, load_metric | ||
from peft import LoraConfig, TaskType, get_peft_model | ||
|
||
|
||
from transformers import (AutoModelForSequenceClassification, AutoTokenizer, | ||
DataCollatorWithPadding, Trainer, | ||
TrainingArguments) | ||
|
||
# %% | ||
task = "mrpc" | ||
num_epochs = 20 | ||
lr = 1e-3 | ||
batch_size = 32 | ||
dataset = load_dataset("glue", task) | ||
padding_side = "right" | ||
|
||
metric = load_metric('glue', task) | ||
def compute_metrics(eval_pred): | ||
predictions, labels = eval_pred | ||
predictions = np.argmax(predictions, axis=1) | ||
return metric.compute(predictions=predictions, references=labels) | ||
|
||
#%% | ||
model_checkpoint = "roberta-large" | ||
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, padding_side=padding_side) | ||
if getattr(tokenizer, "pad_token_id") is None: | ||
tokenizer.pad_token_id = tokenizer.eos_token_id | ||
|
||
def tokenize_function(examples): | ||
# max_length=None => use the model max length (it's actually the default) | ||
outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=None) | ||
return outputs | ||
|
||
tokenized_datasets = dataset.map( | ||
tokenize_function, | ||
batched=True, | ||
remove_columns=["idx", "sentence1", "sentence2"], | ||
) | ||
tokenized_datasets = tokenized_datasets.rename_column("label", "labels") | ||
tokenized_datasets.set_format("torch") | ||
data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="longest") | ||
|
||
# %% | ||
model = AutoModelForSequenceClassification.from_pretrained( | ||
model_checkpoint, return_dict=True) | ||
print(len(list(model.named_parameters()))) | ||
orig_layers = [n for n,p in model.named_parameters()] | ||
|
||
peft_config = LoraConfig( | ||
task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias="all" | ||
) | ||
|
||
#%% | ||
|
||
model = get_peft_model(model, peft_config) | ||
print(len(list(model.named_parameters()))) | ||
withpeft_layers = [n.replace('base_model.model.','') for n,p in model.named_parameters()] | ||
model.print_trainable_parameters() | ||
BATCH_SIZE = 32 | ||
NUM_EPOCHS = 5 | ||
LR = 1e-3 | ||
WEIGHT_DECAY = 0.01 | ||
#%% | ||
|
||
training_args = TrainingArguments( | ||
output_dir="roberta-large-lora-seq1_tests", | ||
learning_rate=LR, | ||
per_device_train_batch_size=BATCH_SIZE, | ||
per_device_eval_batch_size=BATCH_SIZE, | ||
num_train_epochs=NUM_EPOCHS, | ||
weight_decay=WEIGHT_DECAY, | ||
evaluation_strategy="epoch", | ||
save_strategy="epoch", | ||
load_best_model_at_end=True, | ||
) | ||
trainer = Trainer( | ||
model=model, | ||
args=training_args, | ||
train_dataset=tokenized_datasets["train"].select(range(200)), | ||
eval_dataset=tokenized_datasets["test"].select(range(200)), | ||
tokenizer=tokenizer, | ||
compute_metrics=compute_metrics, | ||
data_collator=data_collator | ||
) | ||
|
||
trainer.train() | ||
#%% | ||
#model.save_pretrained('saved_model') | ||
# %% | ||
|
||
from accelerate import Accelerator | ||
from torch.utils.data import DataLoader | ||
from torch.optim import AdamW | ||
from transformers import ( | ||
AutoConfig, | ||
AutoModelForSequenceClassification, | ||
AutoTokenizer, | ||
DataCollatorWithPadding, | ||
PretrainedConfig, | ||
SchedulerType, | ||
default_data_collator, | ||
get_scheduler, | ||
) | ||
import torch | ||
from tqdm import tqdm | ||
|
||
#%% | ||
model = AutoModelForSequenceClassification.from_pretrained( | ||
model_checkpoint, return_dict=True) | ||
print(len(list(model.named_parameters()))) | ||
orig_layers = [n for n,p in model.named_parameters()] | ||
|
||
peft_config = LoraConfig( | ||
task_type=TaskType.SEQ_CLS, inference_mode=False, r=16, lora_alpha=16, lora_dropout=0.1, bias="all" | ||
) | ||
|
||
#%% | ||
|
||
model = get_peft_model(model, peft_config) | ||
print(len(list(model.named_parameters()))) | ||
withpeft_layers = [n.replace('base_model.model.','') for n,p in model.named_parameters()] | ||
model.print_trainable_parameters() | ||
BATCH_SIZE = 32 | ||
NUM_EPOCHS = 1 | ||
LR = 1e-3 | ||
WEIGHT_DECAY = 0.01 | ||
#%% | ||
|
||
#accelerator = Accelerator() | ||
#accelerator.wait_for_everyone() | ||
|
||
def compute_metrics(eval_pred): | ||
predictions, labels = eval_pred | ||
predictions = np.argmax(predictions, axis=1) | ||
return metric.compute(predictions=predictions, references=labels) | ||
|
||
no_decay = ["bias", "LayerNorm.weight"] | ||
optimizer_grouped_parameters = [ | ||
{ | ||
"params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], | ||
"weight_decay": 0.01, | ||
}, | ||
{ | ||
"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], | ||
"weight_decay": 0.0, | ||
}, | ||
] | ||
optimizer = AdamW(optimizer_grouped_parameters, lr=LR) | ||
|
||
|
||
train_dataloader = DataLoader(tokenized_datasets["train"].select(range(200)), collate_fn=data_collator, batch_size=BATCH_SIZE) | ||
valid_dataloader = DataLoader(tokenized_datasets["test"].select(range(200)), collate_fn=data_collator, batch_size=BATCH_SIZE) | ||
|
||
lr_scheduler = get_scheduler( | ||
name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=len(train_dataloader) * NUM_EPOCHS | ||
) | ||
def eval_metrics(model, dataloader, device='cpu'): | ||
model.eval() | ||
for batch in dataloader: | ||
batch = {k: v.to(device) for k, v in batch.items()} | ||
|
||
with torch.no_grad(): | ||
outputs = model(**batch) | ||
|
||
logits = outputs.logits | ||
predictions = torch.argmax(logits, dim=-1) | ||
metric.add_batch(predictions=predictions, references=batch["labels"]) | ||
model.train() | ||
return metric.compute() | ||
|
||
#model, optimizer, train_dataloader, valid_dataloader, lr_scheduler = accelerator.prepare( | ||
# model, optimizer, train_dataloader, valid_dataloader, lr_scheduler | ||
# ) | ||
|
||
for epoch in range(NUM_EPOCHS): | ||
model.train() | ||
losses = [] | ||
for batch in tqdm(train_dataloader): | ||
outputs = model(**batch) | ||
loss = outputs.loss | ||
loss.backward() | ||
|
||
optimizer.step() | ||
lr_scheduler.step() | ||
optimizer.zero_grad() | ||
losses.append(loss.detach()) | ||
|
||
accuracy = eval_metrics(model, valid_dataloader) | ||
print(accuracy, 'loss: ', np.mean(losses)) | ||
# %% |
Oops, something went wrong.