Skip to content

Commit

Permalink
Add epochs arg in pipeline and finetune
Browse files Browse the repository at this point in the history
  • Loading branch information
matsuobasho committed Nov 20, 2023
1 parent 2a0329e commit ee0efb9
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
7 changes: 5 additions & 2 deletions codegen_model_comparison/cloud/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ inputs:
path: azureml:functions:1
batch_size: 20
seq_length: 100
epochs: 5
#checkpoint1: "stanford-crfm/alias-gpt2-small-x21"
checkpoint1: "Salesforce/codegen-350M-mono"
checkpoint2: "Deci/DeciCoder-1b"
Expand All @@ -32,6 +33,7 @@ jobs:
data: ${{parent.inputs.data}}
batch_size: ${{parent.inputs.batch_size}}
seq_length: ${{parent.inputs.seq_length}}
epochs: ${{parent.inputs.epochs}}
outputs:
model:
${{parent.outputs.model1}}
Expand All @@ -41,7 +43,7 @@ jobs:
environment: azureml:codegen_env@latest
compute: azureml:cpu-cheap
command: >-
python finetune.py --checkpoint ${{inputs.checkpoint}} --data_path ${{inputs.data}} --batch_size ${{inputs.batch_size}} --seq_length ${{inputs.seq_length}} --model_dir ${{outputs.model}}
python finetune.py --checkpoint ${{inputs.checkpoint}} --data_path ${{inputs.data}} --batch_size ${{inputs.batch_size}} --seq_length ${{inputs.seq_length}} --epochs ${{inputs.epochs}} --model_dir ${{outputs.model}}
predict_codegen:
type: command
Expand All @@ -65,6 +67,7 @@ jobs:
data: ${{parent.inputs.data}}
batch_size: ${{parent.inputs.batch_size}}
seq_length: ${{parent.inputs.seq_length}}
epochs: ${{parent.inputs.epochs}}
outputs:
model:
${{parent.outputs.model2}}
Expand All @@ -74,7 +77,7 @@ jobs:
environment: azureml:codegen_env@latest
compute: azureml:cpu-cheap
command: >-
python finetune.py --checkpoint ${{inputs.checkpoint}} --data_path ${{inputs.data}} --batch_size ${{inputs.batch_size}} --seq_length ${{inputs.seq_length}} --model_dir ${{outputs.model}}
python finetune.py --checkpoint ${{inputs.checkpoint}} --data_path ${{inputs.data}} --batch_size ${{inputs.batch_size}} --seq_length ${{inputs.seq_length}} --epochs ${{inputs.epochs}} --model_dir ${{outputs.model}}
predict_deci:
Expand Down
8 changes: 3 additions & 5 deletions codegen_model_comparison/src/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def main(args):
data_path = args.data_path
batch_size = args.batch_size
seq_length = args.seq_length
epochs = args.epochs
model_dir = args.model_dir

handler = logging.StreamHandler()
Expand Down Expand Up @@ -83,7 +84,7 @@ def main(args):
training_args = TrainingArguments(output_dir=model_dir,
gradient_checkpointing=True,
evaluation_strategy="epoch",
num_train_epochs=1)
num_train_epochs=epochs)

bleu = evaluate.load("bleu")

Expand All @@ -103,17 +104,14 @@ def main(args):

trainer.save_model()

# model_files = os.listdir(trainer.args.output_dir)
# print("Output directory:", trainer.args.output_dir)
# print("Model files in output directory:", model_files)


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint", type=str)
parser.add_argument("--data_path", type=str)
parser.add_argument("--batch_size", type=int)
parser.add_argument("--seq_length", type=int)
parser.add_argument("--epochs", type=int)
parser.add_argument("--model_dir", type=str)
args = parser.parse_args()

Expand Down

0 comments on commit ee0efb9

Please sign in to comment.