Skip to content

Commit

Permalink
[lora] Add tests in multi LoRA integration test
Browse files Browse the repository at this point in the history
  • Loading branch information
xyang16 committed Nov 15, 2024
1 parent c97d8a2 commit 9fc3a7e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 14 deletions.
33 changes: 23 additions & 10 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,9 @@
"option.dtype": "fp16",
"option.adapters": "adapters",
"option.enable_lora": "true",
"option.max_loras": 2,
"option.max_lora_rank": 16,
"option.fully_sharded_loras": "true",
"adapter_ids": ["tloen/alpaca-lora-7b", "22h/cabrita-lora-v0-1"],
"adapter_names": ["english-alpaca", "portugese-alpaca"],
"option.gpu_memory_utilization": "0.8",
Expand All @@ -439,6 +442,7 @@
"option.dtype": "fp16",
"option.adapters": "adapters",
"option.enable_lora": "true",
"option.max_loras": 6,
"option.max_cpu_loras": 8,
"adapter_ids": ["tloen/alpaca-lora-7b"] * 20,
"adapter_names": [f"english-alpaca-{i}" for i in range(20)],
Expand All @@ -457,14 +461,14 @@
"max",
"option.task":
"text-generation",
"option.dtype":
"fp16",
"option.adapters":
"adapters",
"option.quantize":
"awq",
"option.enable_lora":
"true",
"option.max_lora_rank":
64,
"option.adapters":
"adapters",
"adapter_ids": [
"UnderstandLing/llama-2-13b-chat-fr",
"UnderstandLing/llama-2-13b-chat-es"
Expand All @@ -482,14 +486,14 @@
"text-generation",
"option.dtype":
"fp16",
"option.adapters":
"adapters",
"option.enable_lora":
"true",
"option.max_lora_rank":
64,
"option.max_loras":
2,
"option.adapters":
"adapters",
"adapter_ids": [
"UnderstandLing/Mistral-7B-Instruct-v0.2-es",
"UnderstandLing/Mistral-7B-Instruct-v0.2-de"
Expand All @@ -505,16 +509,18 @@
"max",
"option.task":
"text-generation",
"option.dtype":
"fp16",
"option.adapters":
"adapters",
"option.quantize":
"awq",
"option.enable_lora":
"true",
"option.max_lora_rank":
64,
"option.max_loras":
2,
"option.lora_dtype":
"float16",
"option.adapters":
"adapters",
"adapter_ids": [
"UnderstandLing/Mistral-7B-Instruct-v0.2-es",
"UnderstandLing/Mistral-7B-Instruct-v0.2-de"
Expand All @@ -538,6 +544,8 @@
"true",
"option.max_lora_rank":
64,
"option.long_lora_scaling_factors":
"4.0",
"adapter_ids": [
"UnderstandLing/Llama-3-8B-Instruct-fr",
"UnderstandLing/Llama-3-8B-Instruct-es",
Expand Down Expand Up @@ -712,6 +720,7 @@
"option.dtype": "fp16",
"option.adapters": "adapters",
"option.enable_lora": "true",
"option.max_loras": 2,
"adapter_ids": ["tloen/alpaca-lora-7b", "22h/cabrita-lora-v0-1"],
"adapter_names": ["english-alpaca", "portugese-alpaca"],
"option.gpu_memory_utilization": "0.8",
Expand Down Expand Up @@ -781,6 +790,8 @@
64,
"option.max_loras":
2,
"option.lora_dtype":
"float16",
"adapter_ids": [
"UnderstandLing/Mistral-7B-Instruct-v0.2-es",
"UnderstandLing/Mistral-7B-Instruct-v0.2-de"
Expand Down Expand Up @@ -816,6 +827,8 @@
"true",
"option.max_lora_rank":
64,
"option.long_lora_scaling_factors":
"4.0",
"adapter_ids": [
"UnderstandLing/Llama-3-8B-Instruct-fr",
"UnderstandLing/Llama-3-8B-Instruct-es",
Expand Down
8 changes: 4 additions & 4 deletions tests/integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,13 +634,13 @@ def test_llama_68m_speculative_eagle(self):
@pytest.mark.gpu_4
class TestVllmLora:

def test_lora_unmerged(self):
def test_lora_llama2_7b(self):
with Runner('lmi', 'llama-7b-unmerged-lora') as r:
prepare.build_vllm_model("llama-7b-unmerged-lora")
r.launch()
client.run("vllm_adapters llama-7b-unmerged-lora".split())

def test_lora_unmerged_overflow(self):
def test_lora_llama2_7b_overflow(self):
with Runner('lmi', 'llama-7b-unmerged-lora-overflow') as r:
prepare.build_vllm_model("llama-7b-unmerged-lora-overflow")
r.launch()
Expand Down Expand Up @@ -676,13 +676,13 @@ def test_lora_llama3_8b(self):
@pytest.mark.gpu_4
class TestLmiDistLora:

def test_lora_unmerged(self):
def test_lora_llama2_7b(self):
with Runner('lmi', 'llama-7b-unmerged-lora') as r:
prepare.build_lmi_dist_model("llama-7b-unmerged-lora")
r.launch()
client.run("lmi_dist_adapters llama-7b-unmerged-lora".split())

def test_lora_unmerged_overflow(self):
def test_lora_llama2_7b_overflow(self):
with Runner('lmi', 'llama-7b-unmerged-lora-overflow') as r:
prepare.build_lmi_dist_model("llama-7b-unmerged-lora-overflow")
r.launch()
Expand Down

0 comments on commit 9fc3a7e

Please sign in to comment.