[lora] Add tests in multi LoRA integration test

deepjavalibrary · Nov 15, 2024 · 9fc3a7e · 9fc3a7e
1 parent c97d8a2
commit 9fc3a7e
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 14 deletions.
diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py
@@ -428,6 +428,9 @@
         "option.dtype": "fp16",
         "option.adapters": "adapters",
         "option.enable_lora": "true",
+        "option.max_loras": 2,
+        "option.max_lora_rank": 16,
+        "option.fully_sharded_loras": "true",
         "adapter_ids": ["tloen/alpaca-lora-7b", "22h/cabrita-lora-v0-1"],
         "adapter_names": ["english-alpaca", "portugese-alpaca"],
         "option.gpu_memory_utilization": "0.8",
@@ -439,6 +442,7 @@
         "option.dtype": "fp16",
         "option.adapters": "adapters",
         "option.enable_lora": "true",
+        "option.max_loras": 6,
         "option.max_cpu_loras": 8,
         "adapter_ids": ["tloen/alpaca-lora-7b"] * 20,
         "adapter_names": [f"english-alpaca-{i}" for i in range(20)],
@@ -457,14 +461,14 @@
         "max",
         "option.task":
         "text-generation",
-        "option.dtype":
-        "fp16",
-        "option.adapters":
-        "adapters",
+        "option.quantize":
+        "awq",
         "option.enable_lora":
         "true",
         "option.max_lora_rank":
         64,
+        "option.adapters":
+        "adapters",
         "adapter_ids": [
             "UnderstandLing/llama-2-13b-chat-fr",
             "UnderstandLing/llama-2-13b-chat-es"
@@ -482,14 +486,14 @@
         "text-generation",
         "option.dtype":
         "fp16",
-        "option.adapters":
-        "adapters",
         "option.enable_lora":
         "true",
         "option.max_lora_rank":
         64,
         "option.max_loras":
         2,
+        "option.adapters":
+        "adapters",
         "adapter_ids": [
             "UnderstandLing/Mistral-7B-Instruct-v0.2-es",
             "UnderstandLing/Mistral-7B-Instruct-v0.2-de"
@@ -505,16 +509,18 @@
         "max",
         "option.task":
         "text-generation",
-        "option.dtype":
-        "fp16",
-        "option.adapters":
-        "adapters",
+        "option.quantize":
+        "awq",
         "option.enable_lora":
         "true",
         "option.max_lora_rank":
         64,
         "option.max_loras":
         2,
+        "option.lora_dtype":
+        "float16",
+        "option.adapters":
+        "adapters",
         "adapter_ids": [
             "UnderstandLing/Mistral-7B-Instruct-v0.2-es",
             "UnderstandLing/Mistral-7B-Instruct-v0.2-de"
@@ -538,6 +544,8 @@
         "true",
         "option.max_lora_rank":
         64,
+        "option.long_lora_scaling_factors":
+        "4.0",
         "adapter_ids": [
             "UnderstandLing/Llama-3-8B-Instruct-fr",
             "UnderstandLing/Llama-3-8B-Instruct-es",
@@ -712,6 +720,7 @@
         "option.dtype": "fp16",
         "option.adapters": "adapters",
         "option.enable_lora": "true",
+        "option.max_loras": 2,
         "adapter_ids": ["tloen/alpaca-lora-7b", "22h/cabrita-lora-v0-1"],
         "adapter_names": ["english-alpaca", "portugese-alpaca"],
         "option.gpu_memory_utilization": "0.8",
@@ -781,6 +790,8 @@
         64,
         "option.max_loras":
         2,
+        "option.lora_dtype":
+        "float16",
         "adapter_ids": [
             "UnderstandLing/Mistral-7B-Instruct-v0.2-es",
             "UnderstandLing/Mistral-7B-Instruct-v0.2-de"
@@ -816,6 +827,8 @@
         "true",
         "option.max_lora_rank":
         64,
+        "option.long_lora_scaling_factors":
+        "4.0",
         "adapter_ids": [
             "UnderstandLing/Llama-3-8B-Instruct-fr",
             "UnderstandLing/Llama-3-8B-Instruct-es",

diff --git a/tests/integration/tests.py b/tests/integration/tests.py
@@ -634,13 +634,13 @@ def test_llama_68m_speculative_eagle(self):
 @pytest.mark.gpu_4
 class TestVllmLora:
 
-    def test_lora_unmerged(self):
+    def test_lora_llama2_7b(self):
         with Runner('lmi', 'llama-7b-unmerged-lora') as r:
             prepare.build_vllm_model("llama-7b-unmerged-lora")
             r.launch()
             client.run("vllm_adapters llama-7b-unmerged-lora".split())
 
-    def test_lora_unmerged_overflow(self):
+    def test_lora_llama2_7b_overflow(self):
         with Runner('lmi', 'llama-7b-unmerged-lora-overflow') as r:
             prepare.build_vllm_model("llama-7b-unmerged-lora-overflow")
             r.launch()
@@ -676,13 +676,13 @@ def test_lora_llama3_8b(self):
 @pytest.mark.gpu_4
 class TestLmiDistLora:
 
-    def test_lora_unmerged(self):
+    def test_lora_llama2_7b(self):
         with Runner('lmi', 'llama-7b-unmerged-lora') as r:
             prepare.build_lmi_dist_model("llama-7b-unmerged-lora")
             r.launch()
             client.run("lmi_dist_adapters llama-7b-unmerged-lora".split())
 
-    def test_lora_unmerged_overflow(self):
+    def test_lora_llama2_7b_overflow(self):
         with Runner('lmi', 'llama-7b-unmerged-lora-overflow') as r:
             prepare.build_lmi_dist_model("llama-7b-unmerged-lora-overflow")
             r.launch()