diff --git a/README.md b/README.md index 57dded9..89c91de 100644 --- a/README.md +++ b/README.md @@ -75,8 +75,8 @@ curl --location 'https://fed-ledger-prod.flock.io/api/v1/tasks/submit-result' \ --data '{ "task_id": 29, "data":{ - "hg_repo_id": "Qwen/Qwen1.5-1.8B-Chat", - "base_model": "qwen1.5", + "hg_repo_id": "Qwen/Qwen2.5-1.5B, + "base_model": "qwen2.5", "gpu_type": "", "revision": "" } diff --git a/training_args.yaml b/training_args.yaml index 5ec6ff1..5359d3f 100644 --- a/training_args.yaml +++ b/training_args.yaml @@ -1,4 +1,4 @@ -Qwen/Qwen1.5-0.5B: +Qwen/Qwen2.5-0.5B: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -6,7 +6,7 @@ Qwen/Qwen1.5-0.5B: lora_alpha: 16 lora_dropout: 0.1 -Qwen/Qwen1.5-1.8B: +Qwen/Qwen2.5-1.5B: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -14,7 +14,7 @@ Qwen/Qwen1.5-1.8B: lora_alpha: 8 lora_dropout: 0.1 -Qwen/Qwen1.5-7B: +Qwen/Qwen2.5-7B: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -22,7 +22,7 @@ Qwen/Qwen1.5-7B: lora_alpha: 8 lora_dropout: 0.1 -google/gemma-2b: +google/gemma-2-2b: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 @@ -30,7 +30,7 @@ google/gemma-2b: lora_alpha: 8 lora_dropout: 0.1 -google/gemma-7b: +google/gemma-2-7b: per_device_train_batch_size: 1 gradient_accumulation_steps: 8 num_train_epochs: 1 diff --git a/utils/constants.py b/utils/constants.py index 42b9aa4..d4317e6 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -13,25 +13,25 @@ } model2template = { - "Qwen/Qwen1.5-0.5B": qwen_template, - "Qwen/Qwen1.5-1.8B": qwen_template, - "Qwen/Qwen1.5-7B": qwen_template, - "google/gemma-2b": gemma_template, - "google/gemma-7b": gemma_template, + "Qwen/Qwen2.5-0.5B": qwen_template, + "Qwen/Qwen2.5-1.5B": qwen_template, + "Qwen/Qwen2.5-7B": qwen_template, + "google/gemma-2-2b": gemma_template, + "google/gemma-2-9b": gemma_template, } model2size = { - "Qwen/Qwen1.5-0.5B": 620_000_000, - "Qwen/Qwen1.5-1.8B": 1_840_000_000, - "Qwen/Qwen1.5-7B": 7_720_000_000, - "google/gemma-2b": 2_510_000_000, - "google/gemma-7b": 8_540_000_000, + "Qwen/Qwen2.5-0.5B": 494_000_000, + "Qwen/Qwen2.5-1.5B": 1_540_000_000, + "Qwen/Qwen2.5-7B": 7_620_000_000, + "google/gemma-2-2b": 2_610_000_000, + "google/gemma-2-9b": 9_240_000_000, } model2base_model = { - "Qwen/Qwen1.5-0.5B": "qwen1.5", - "Qwen/Qwen1.5-1.8B": "qwen1.5", - "Qwen/Qwen1.5-7B": "qwen1.5", - "google/gemma-2b": "gemma", - "google/gemma-7b": "gemma", + "Qwen/Qwen2.5-0.5B": "qwen2.5", + "Qwen/Qwen2.5-1.5B": "qwen2.5", + "Qwen/Qwen2.5-7B": "qwen2.5", + "google/gemma-2-2b": "gemma2", + "google/gemma-2-9b": "gemma2", }