From 0b6dfa5ba813a49950333f4d9b2ff7d573980bf5 Mon Sep 17 00:00:00 2001 From: Um Changyong Date: Wed, 30 Oct 2024 08:18:10 +0900 Subject: [PATCH 1/6] feat: update vllm module --- libs/community/langchain_community/llms/vllm.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/llms/vllm.py b/libs/community/langchain_community/llms/vllm.py index dc8a7a76d24ed..b9b1d6a2b26c9 100644 --- a/libs/community/langchain_community/llms/vllm.py +++ b/libs/community/langchain_community/llms/vllm.py @@ -124,6 +124,12 @@ def _generate( ) -> LLMResult: """Run the LLM on the given prompt and input.""" from vllm import SamplingParams + from vllm.lora.request import LoRARequest + + lora_request = kwargs.pop("lora_request", None) + + if lora_request is not None and not isinstance(lora_request, LoRARequest): + raise TypeError("lora_request must be an instance of LoRARequest") # build sampling parameters params = {**self._default_params, **kwargs, "stop": stop} @@ -133,9 +139,13 @@ def _generate( sample_params = SamplingParams( **{k: v for k, v in params.items() if k in known_keys} ) - + + # 모델 호출 시 lora_request 추가 # call the model - outputs = self.client.generate(prompts, sample_params) + if lora_request: + outputs = self.client.generate(prompts, sample_params, lora_request=lora_request) + else: + outputs = self.client.generate(prompts, sample_params) generations = [] for output in outputs: From 0cb15d97c97fbb7f33e8b9fae203ed1639d7f3a4 Mon Sep 17 00:00:00 2001 From: Um Changyong Date: Wed, 30 Oct 2024 10:38:41 +0900 Subject: [PATCH 2/6] update: remove unused sentence and reformat codes --- libs/community/langchain_community/llms/vllm.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libs/community/langchain_community/llms/vllm.py b/libs/community/langchain_community/llms/vllm.py index b9b1d6a2b26c9..a008c89071def 100644 --- a/libs/community/langchain_community/llms/vllm.py +++ b/libs/community/langchain_community/llms/vllm.py @@ -127,7 +127,7 @@ def _generate( from vllm.lora.request import LoRARequest lora_request = kwargs.pop("lora_request", None) - + if lora_request is not None and not isinstance(lora_request, LoRARequest): raise TypeError("lora_request must be an instance of LoRARequest") @@ -139,11 +139,12 @@ def _generate( sample_params = SamplingParams( **{k: v for k, v in params.items() if k in known_keys} ) - - # 모델 호출 시 lora_request 추가 + # call the model if lora_request: - outputs = self.client.generate(prompts, sample_params, lora_request=lora_request) + outputs = self.client.generate( + prompts, sample_params, lora_request=lora_request + ) else: outputs = self.client.generate(prompts, sample_params) From 14e1993c3321ed5d1cc6e0c562fc4bf1033bd699 Mon Sep 17 00:00:00 2001 From: Um Changyong Date: Wed, 30 Oct 2024 14:13:09 +0900 Subject: [PATCH 3/6] update: validation in vllm framework --- libs/community/langchain_community/llms/vllm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/libs/community/langchain_community/llms/vllm.py b/libs/community/langchain_community/llms/vllm.py index a008c89071def..66a0f17756b99 100644 --- a/libs/community/langchain_community/llms/vllm.py +++ b/libs/community/langchain_community/llms/vllm.py @@ -124,13 +124,9 @@ def _generate( ) -> LLMResult: """Run the LLM on the given prompt and input.""" from vllm import SamplingParams - from vllm.lora.request import LoRARequest lora_request = kwargs.pop("lora_request", None) - if lora_request is not None and not isinstance(lora_request, LoRARequest): - raise TypeError("lora_request must be an instance of LoRARequest") - # build sampling parameters params = {**self._default_params, **kwargs, "stop": stop} From a4a25c85cafba723756c71bd5038ce819eb2ecd0 Mon Sep 17 00:00:00 2001 From: Um Changyong Date: Thu, 31 Oct 2024 20:41:33 +0900 Subject: [PATCH 4/6] docs: Add content for Lora adapter in the VLLM page --- docs/docs/integrations/llms/vllm.ipynb | 27 ++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/docs/integrations/llms/vllm.ipynb b/docs/docs/integrations/llms/vllm.ipynb index 6d45b102dc1ac..a4637bf13f138 100644 --- a/docs/docs/integrations/llms/vllm.ipynb +++ b/docs/docs/integrations/llms/vllm.ipynb @@ -246,6 +246,33 @@ ")\n", "print(llm.invoke(\"Rome is\"))" ] + }, + { + "cell_type": "markdown", + "id": "bd3f0f51", + "metadata": {}, + "source": [ + "## LoRA adapter\n", + "LoRA adapters can be used with any vLLM model that implements `SupportsLoRA`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2682ca6c", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.llms import VLLM\n", + "from vllm.lora.request import LoRARequest\n", + "\n", + "llm = VLLM(model=\"meta-llama/Llama-2-7b-hf\", enable_lora=True)\n", + "\n", + "LoRA_ADAPTER_PATH = \"path/to/adapter\"\n", + "lora_adapter = LoRARequest(\"lora_adapter\", 1, LoRA_ADAPTER_PATH)\n", + "\n", + "print(llm.invoke(\"What are some popular Korean street foods?\", lora_request=lora_adapter))\n" + ] } ], "metadata": { From e0724d5e4014cc8031cb6d50cfcfee8c504d030f Mon Sep 17 00:00:00 2001 From: Um Changyong Date: Thu, 31 Oct 2024 21:00:03 +0900 Subject: [PATCH 5/6] fix: modify update code to comply with linting --- docs/docs/integrations/llms/vllm.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/docs/integrations/llms/vllm.ipynb b/docs/docs/integrations/llms/vllm.ipynb index a4637bf13f138..c7127e04f9fe4 100644 --- a/docs/docs/integrations/llms/vllm.ipynb +++ b/docs/docs/integrations/llms/vllm.ipynb @@ -271,7 +271,9 @@ "LoRA_ADAPTER_PATH = \"path/to/adapter\"\n", "lora_adapter = LoRARequest(\"lora_adapter\", 1, LoRA_ADAPTER_PATH)\n", "\n", - "print(llm.invoke(\"What are some popular Korean street foods?\", lora_request=lora_adapter))\n" + "print(\n", + " llm.invoke(\"What are some popular Korean street foods?\", lora_request=lora_adapter)\n", + ")\n" ] } ], From 30eb2cd074ab40544ec36cdea145429724cfde54 Mon Sep 17 00:00:00 2001 From: Um Changyong Date: Thu, 31 Oct 2024 12:39:17 +0000 Subject: [PATCH 6/6] fix: remove blank line --- docs/docs/integrations/llms/vllm.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/integrations/llms/vllm.ipynb b/docs/docs/integrations/llms/vllm.ipynb index c7127e04f9fe4..1e1baff963161 100644 --- a/docs/docs/integrations/llms/vllm.ipynb +++ b/docs/docs/integrations/llms/vllm.ipynb @@ -273,7 +273,7 @@ "\n", "print(\n", " llm.invoke(\"What are some popular Korean street foods?\", lora_request=lora_adapter)\n", - ")\n" + ")" ] } ],