muellerzr · Foxglove144 · Jan 14, 2024 · Jan 14, 2024 · Jan 14, 2024 · Jan 14, 2024
diff --git a/masked_language_modeling.py b/masked_language_modeling.py
@@ -97,10 +97,14 @@ def group_texts(examples):
 print("Training...")
 trainer.train()
 
+# Use the evaluate() method to evaluate the model and get its perplexity:
+eval_results = trainer.evaluate()
+print(f"Perplexity: {math.exp(eval_results['eval_loss']):.2f}")
+
 # Performing inference
 text = "The Milky Way is a <mask> galaxy."
 # We need to tokenize the inputs and turn them to PyTorch tensors
-encoded_input = tokenizer(text, return_tensors="pt").input_ids
+encoded_input = tokenizer(text, return_tensors="pt")
 
 # To move the batch to the right device automatically, use `PartialState().device`
 # which will always work no matter the environment

diff --git a/multiple_choice.py b/multiple_choice.py
@@ -19,7 +19,7 @@
 
 # Load dataset
 print(f"Downloading dataset ({dataset_name})")
-dataset = load_dataset(dataset_name, "regular", split="train[:8%]")
+dataset = load_dataset(dataset_name, "regular", split="train[:8%]", trust_remote_code=True)
 dataset = dataset.train_test_split(test_size=0.2)
 
 # Tokenize the dataset

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,9 @@
+accelerate==0.26.1
+datasets==2.16.1
+evaluate==0.4.1
+numpy==1.23.5
+rouge-score==0.1.2
+sacrebleu==2.4.0
+seqeval==1.2.2
+torch==2.1.0
+transformers==4.35.2
diff --git a/sequence_classification.py b/sequence_classification.py
@@ -2,6 +2,7 @@
 # for sequence classification. Based on the Tasks documentation
 # originally from: https://hf.co/docs/transformers/tasks/sequence_classification
 import evaluate
+from accelerate import PartialState
 import numpy as np
 import torch
 from datasets import load_dataset
@@ -96,7 +97,7 @@ def compute_metrics(evaluation_preds):
 # Performing inference
 text = "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three."
 # We need to tokenize the inputs and turn them to PyTorch tensors
-encoded_input = tokenizer(text, return_tensors="pt").to("cuda")
+encoded_input = tokenizer(text, return_tensors="pt").to(PartialState().device)
 
 # Then we can perform raw torch inference:
 print("Performing inference...")

diff --git a/summarization.py b/summarization.py
@@ -2,6 +2,7 @@
 # for sequence classification. Based on the Tasks documentation
 # originally from: https://hf.co/docs/transformers/tasks/sequence_classification
 import evaluate
+from accelerate import PartialState
 import numpy as np
 from datasets import load_dataset
 from transformers import (
@@ -68,8 +69,8 @@ def compute_metrics(eval_pred):
 training_args = Seq2SeqTrainingArguments(
     output_dir="results/summarization",  # Where weights are stored
     learning_rate=2e-5,  # The learning rate during training
-    per_device_train_batch_size=16,  # Number of samples per batch during training
-    per_device_eval_batch_size=16,  # Number of samples per batch during evaluation
+    per_device_train_batch_size=8,  # Number of samples per batch during training
+    per_device_eval_batch_size=8,  # Number of samples per batch during evaluation
     num_train_epochs=4,  # How many iterations through the dataloaders should be done
     weight_decay=0.01,  # Regularization penalization
     evaluation_strategy="epoch",  # How often metrics on the evaluation dataset should be computed
@@ -97,7 +98,7 @@ def compute_metrics(eval_pred):
 
 # Performing inference
 text = "summarize: The Inflation Reduction Act lowers prescription drug costs, health care costs, and energy costs. It's the most aggressive action on tackling the climate crisis in American history, which will lift up American workers and create good-paying, union jobs across the country. It'll lower the deficit and ask the ultra-wealthy and corporations to pay their fair share. And no one making under $400,000 per year will pay a penny more in taxes."  # We need to tokenize the inputs and turn them to PyTorch tensors
-encoded_input = tokenizer(text, return_tensors="pt").input_ids
+encoded_input = tokenizer(text, return_tensors="pt").input_ids.to(PartialState().device)
 
 # Then we can perform inference using `model.generate`:
 print("Performing inference...")

diff --git a/token_classification.py b/token_classification.py
@@ -2,6 +2,7 @@
 # for token classification. Based on the Tasks documentation
 # originally from: https://hf.co/docs/transformers/tasks/token_classification
 import evaluate
+from accelerate import PartialState
 import numpy as np
 import torch
 from datasets import load_dataset
@@ -143,7 +144,7 @@ def compute_metrics(evaluation_preds):
 # Performing inference
 text = "The Golden State Warriors are an American professional basketball team based in San Francisco."
 # We need to tokenize the inputs and turn them to PyTorch tensors
-encoded_input = tokenizer(text, return_tensors="pt")
+encoded_input = tokenizer(text, return_tensors="pt").to(PartialState().device)
 
 # Then we can perform raw torch inference:
 print("Performing inference...")
@@ -153,5 +154,5 @@ def compute_metrics(evaluation_preds):
 
 # Finally, decode our outputs
 predictions = logits.argmax(dim=2)
-print(f"Prediction: {[id2label[pred] for pred in predictions[0]]}")
+print(f"Prediction: {[id2label[pred.item()] for pred in predictions[0]]}")
 # Can also use `model.config.id2label` instead
diff --git a/translation.py b/translation.py
@@ -3,6 +3,7 @@
 # originally from: https://hf.co/docs/transformers/tasks/translation
 import evaluate
 import numpy as np
+from accelerate import PartialState
 from datasets import load_dataset
 from transformers import (
     AutoModelForSeq2SeqLM,
@@ -113,7 +114,7 @@ def compute_metrics(eval_preds):
 # Performing inference
 text = "translate English to French: Legumes share resources with nitrogen-fixing bacteria."
 # We need to tokenize the inputs and turn them to PyTorch tensors
-encoded_input = tokenizer(text, return_tensors="pt").input_ids
+encoded_input = tokenizer(text, return_tensors="pt").input_ids.to(PartialState().device)
 
 # Then we can perform inference using `model.generate()`:
 print("Performing inference...")