-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
469c271
commit 2326aaf
Showing
7 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
from shallowflow.trainer import LocalGPUTrainer, GTX1660Config | ||
from datasets import load_dataset | ||
|
||
def main(): | ||
# Configure for GTX 1660 | ||
config = GTX1660Config( | ||
batch_size=8, | ||
mixed_precision=True, | ||
gradient_checkpointing=True | ||
) | ||
|
||
# Load model and tokenizer | ||
model = AutoModelForCausalLM.from_pretrained("gpt2") | ||
tokenizer = AutoTokenizer.from_pretrained("gpt2") | ||
|
||
# Initialize trainer with wandb tracking | ||
trainer = LocalGPUTrainer( | ||
model=model, | ||
tokenizer=tokenizer, | ||
config=config, | ||
project_name="shallowflow-local", | ||
entity="your-wandb-username" # Optional | ||
) | ||
|
||
# Load tiny shakespeare dataset | ||
dataset = load_dataset("tiny_shakespeare") | ||
train_dataset = dataset["train"] | ||
eval_dataset = dataset["validation"] | ||
|
||
try: | ||
# Train with monitoring | ||
trainer.train( | ||
train_dataset=train_dataset, | ||
eval_dataset=eval_dataset, | ||
num_epochs=3 | ||
) | ||
finally: | ||
# Ensure wandb tracking is properly closed | ||
trainer.finish() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import os | ||
import argparse | ||
import torch | ||
from transformers import ( | ||
AutoModelForCausalLM, | ||
AutoTokenizer, | ||
Trainer, | ||
TrainingArguments | ||
) | ||
from datasets import load_dataset | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--model_name", type=str) | ||
parser.add_argument("--epochs", type=int, default=3) | ||
parser.add_argument("--learning_rate", type=float, default=3e-4) | ||
return parser.parse_args() | ||
|
||
def main(): | ||
args = parse_args() | ||
|
||
# SageMaker environment variables | ||
training_dir = os.environ["SM_CHANNEL_TRAINING"] | ||
model_dir = os.environ["SM_MODEL_DIR"] | ||
num_gpus = os.environ["SM_NUM_GPUS"] | ||
|
||
# Load model and tokenizer | ||
model = AutoModelForCausalLM.from_pretrained(args.model_name) | ||
tokenizer = AutoTokenizer.from_pretrained(args.model_name) | ||
|
||
# Load Tiny Shakespeare dataset | ||
dataset = load_dataset("tiny_shakespeare", split="train") | ||
|
||
# Training arguments optimized for compiler | ||
training_args = TrainingArguments( | ||
output_dir=model_dir, | ||
num_train_epochs=args.epochs, | ||
learning_rate=args.learning_rate, | ||
per_device_train_batch_size=16, | ||
optim="adamw_torch_xla", # Optimized for Training Compiler | ||
dataloader_num_workers=4, | ||
preprocessing_num_workers=4 | ||
) | ||
|
||
# Initialize trainer | ||
trainer = Trainer( | ||
model=model, | ||
args=training_args, | ||
train_dataset=dataset, | ||
tokenizer=tokenizer | ||
) | ||
|
||
# Train | ||
trainer.train() | ||
|
||
# Save model | ||
trainer.save_model(model_dir) | ||
|
||
# Required for distributed training | ||
def _mp_fn(index): | ||
main() | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import torch | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
from datasets import load_dataset | ||
from shallowflow import LLMTrainer, TrainingConfig | ||
import argparse | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description='Train GPT-2 with ShallowFlow') | ||
parser.add_argument('--model_name', default='gpt2', help='Model name or path') | ||
parser.add_argument('--batch_size', type=int, default=16) | ||
parser.add_argument('--learning_rate', type=float, default=3e-4) | ||
parser.add_argument('--num_epochs', type=int, default=3) | ||
parser.add_argument('--output_dir', default='outputs') | ||
return parser.parse_args() | ||
|
||
def main(): | ||
args = parse_args() | ||
|
||
# Initialize config | ||
config = TrainingConfig( | ||
model_name=args.model_name, | ||
batch_size=args.batch_size, | ||
learning_rate=args.learning_rate, | ||
num_epochs=args.num_epochs | ||
) | ||
|
||
# Load model and tokenizer | ||
model = AutoModelForCausalLM.from_pretrained(args.model_name) | ||
tokenizer = AutoTokenizer.from_pretrained(args.model_name) | ||
|
||
# Load dataset | ||
dataset = load_dataset("wikitext", "wikitext-2-raw-v1") | ||
|
||
# Initialize trainer | ||
trainer = LLMTrainer( | ||
model=model, | ||
tokenizer=tokenizer, | ||
config=config | ||
) | ||
|
||
# Train | ||
trainer.train( | ||
train_dataset=dataset["train"], | ||
eval_dataset=dataset["validation"] | ||
) | ||
|
||
# Save model | ||
trainer.save_model(args.output_dir) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import torch | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
from datasets import load_dataset | ||
from shallowflow import LLMTrainer, TrainingConfig | ||
from shallowflow.optimizations import LoRAConfig | ||
import argparse | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description='Train GPT-2 with LoRA') | ||
parser.add_argument('--model_name', default='gpt2') | ||
parser.add_argument('--batch_size', type=int, default=16) | ||
parser.add_argument('--lora_rank', type=int, default=8) | ||
parser.add_argument('--lora_alpha', type=int, default=16) | ||
parser.add_argument('--output_dir', default='outputs_lora') | ||
return parser.parse_args() | ||
|
||
def main(): | ||
args = parse_args() | ||
|
||
# Initialize configs | ||
training_config = TrainingConfig( | ||
model_name=args.model_name, | ||
batch_size=args.batch_size, | ||
use_lora=True | ||
) | ||
|
||
lora_config = LoRAConfig( | ||
rank=args.lora_rank, | ||
alpha=args.lora_alpha | ||
) | ||
|
||
# Load model and tokenizer | ||
model = AutoModelForCausalLM.from_pretrained(args.model_name) | ||
tokenizer = AutoTokenizer.from_pretrained(args.model_name) | ||
|
||
# Initialize trainer with LoRA | ||
trainer = LLMTrainer( | ||
model=model, | ||
tokenizer=tokenizer, | ||
config=training_config, | ||
lora_config=lora_config | ||
) | ||
|
||
# Load and process dataset | ||
dataset = load_dataset("wikitext", "wikitext-2-raw-v1") | ||
|
||
# Train | ||
trainer.train( | ||
train_dataset=dataset["train"], | ||
eval_dataset=dataset["validation"] | ||
) | ||
|
||
# Save LoRA weights | ||
trainer.save_lora_weights(args.output_dir) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import torch | ||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
from datasets import load_dataset | ||
from shallowflow import LLMTrainer, TrainingConfig | ||
from shallowflow.optimizations import QuantizationConfig | ||
import argparse | ||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser(description='Train GPT-2 with Quantization') | ||
parser.add_argument('--model_name', default='gpt2') | ||
parser.add_argument('--batch_size', type=int, default=16) | ||
parser.add_argument('--bits', type=int, default=8) | ||
parser.add_argument('--output_dir', default='outputs_quantized') | ||
return parser.parse_args() | ||
|
||
def main(): | ||
args = parse_args() | ||
|
||
# Initialize configs | ||
training_config = TrainingConfig( | ||
model_name=args.model_name, | ||
batch_size=args.batch_size, | ||
use_quantization=True | ||
) | ||
|
||
quant_config = QuantizationConfig( | ||
bits=args.bits, | ||
symmetric=True | ||
) | ||
|
||
# Load model and tokenizer | ||
model = AutoModelForCausalLM.from_pretrained(args.model_name) | ||
tokenizer = AutoTokenizer.from_pretrained(args.model_name) | ||
|
||
# Initialize trainer with quantization | ||
trainer = LLMTrainer( | ||
model=model, | ||
tokenizer=tokenizer, | ||
config=training_config, | ||
quantization_config=quant_config | ||
) | ||
|
||
# Load dataset | ||
dataset = load_dataset("wikitext", "wikitext-2-raw-v1") | ||
|
||
# Train | ||
trainer.train( | ||
train_dataset=dataset["train"], | ||
eval_dataset=dataset["validation"] | ||
) | ||
|
||
# Save quantized model | ||
trainer.save_quantized_model(args.output_dir) | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from dataclasses import dataclass | ||
from typing import Optional | ||
import sagemaker | ||
from sagemaker.huggingface import HuggingFace | ||
from sagemaker.training_compiler import TrainingCompilerConfig | ||
|
||
@dataclass | ||
class SageMakerConfig: | ||
instance_type: str = "ml.g4dn.xlarge" | ||
instance_count: int = 1 | ||
use_compiler: bool = True | ||
max_epochs: int = 3 | ||
learning_rate: float = 3e-4 | ||
|
||
class SageMakerManager: | ||
def __init__(self, config: SageMakerConfig): | ||
self.config = config | ||
self.session = sagemaker.Session() | ||
|
||
def setup_compiler_training( | ||
self, | ||
model_name: str, | ||
script_path: str | ||
): | ||
# Configure Training Compiler | ||
compiler_config = TrainingCompilerConfig(enabled=True) | ||
|
||
# Create HuggingFace Estimator | ||
estimator = HuggingFace( | ||
entry_point=script_path, | ||
instance_type=self.config.instance_type, | ||
instance_count=self.config.instance_count, | ||
compiler_config=compiler_config, | ||
transformers_version="4.26.0", | ||
pytorch_version="1.13.1", | ||
py_version="py39", | ||
role=sagemaker.get_execution_role(), | ||
hyperparameters={ | ||
"epochs": self.config.max_epochs, | ||
"learning_rate": self.config.learning_rate, | ||
"model_name": model_name | ||
} | ||
) | ||
|
||
return estimator |