From 6d2a76ac05e1003ce4cd1406956853065db36eeb Mon Sep 17 00:00:00 2001 From: Kwan Kin Chan <55791584+kenchanLOL@users.noreply.github.com> Date: Tue, 22 Oct 2024 20:41:47 -0500 Subject: [PATCH] langchain_huggingface: Fix multiple GPU usage bug in from_model_id function (#23628) - [ ] **Description:** - pass the device_map into model_kwargs - removing the unused device_map variable in the hf_pipeline function call - [ ] **Issue:** issue #13128 When using the from_model_id function to load a Hugging Face model for text generation across multiple GPUs, the model defaults to loading on the CPU despite multiple GPUs being available using the expected format ``` python llm = HuggingFacePipeline.from_model_id( model_id="model-id", task="text-generation", device_map="auto", ) ``` Currently, to enable multiple GPU , we have to pass in variable in this format instead ``` python llm = HuggingFacePipeline.from_model_id( model_id="model-id", task="text-generation", device=None, model_kwargs={ "device_map": "auto", } ) ``` This issue arises due to improper handling of the device and device_map parameters. - [ ] **Explanation:** 1. In from_model_id, the model is created using model_kwargs and passed as the model variable of the pipeline function. So at this moment, to load the model with multiple GPUs, "device_map" needs to be set to "auto" within model_kwargs. Otherwise, the model defaults to loading on the CPU. 2. The device_map variable in from_model_id is not utilized correctly. In the pipeline function's source code of tnansformer: - The device_map variable is stored in the model_kwargs dictionary (lines 867-878 of transformers/src/transformers/pipelines/\__init__.py). ```python if device_map is not None: ...... model_kwargs["device_map"] = device_map ``` - The model is constructed with model_kwargs containing the device_map value ONLY IF it is a string (lines 893-903 of transformers/src/transformers/pipelines/\__init__.py). ```python if isinstance(model, str) or framework is None: model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]} framework, model = infer_framework_load_model( ... , **model_kwargs, ) ``` - Consequently, since a model object is already passed to the pipeline function, the device_map variable from from_model_id is never used. 3. The device_map variable in from_model_id not only appears unused but also causes errors. Without explicitly setting device=None, attempting to load the model on multiple GPUs may result in the following error: ``` Device has 2 GPUs available. Provide device={deviceId} to `from_model_id` to use available GPUs for execution. deviceId is -1 (default) for CPU and can be a positive integer associated with CUDA device id. Traceback (most recent call last): File "foo.py", line 15, in llm = HuggingFacePipeline.from_model_id( File "foo\site-packages\langchain_huggingface\llms\huggingface_pipeline.py", line 217, in from_model_id pipeline = hf_pipeline( File "foo\lib\site-packages\transformers\pipelines\__init__.py", line 1108, in pipeline return pipeline_class(model=model, framework=framework, task=task, **kwargs) File "foo\lib\site-packages\transformers\pipelines\text_generation.py", line 96, in __init__ super().__init__(*args, **kwargs) File "foo\lib\site-packages\transformers\pipelines\base.py", line 835, in __init__ raise ValueError( ValueError: The model has been loaded with `accelerate` and therefore cannot be moved to a specific device. Please discard the `device` argument when creating your pipeline object. ``` This error occurs because, in from_model_id, the default values in from_model_id for device and device_map are -1 and None, respectively. It would passes the statement (`device_map is not None and device < 0`) and keep the device as -1 so the pipeline function later raises an error when trying to move a GPU-loaded model back to the CPU. https://github.com/langchain-ai/langchain/blob/19eb82e68bbba12f8c7cde01aba1ee92dfae2ec2/libs/community/langchain_community/llms/huggingface_pipeline.py#L204-L213 If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: William FH <13333726+hinthornw@users.noreply.github.com> Co-authored-by: Erick Friis Co-authored-by: vbarda --- .../llms/huggingface_pipeline.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py b/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py index 42754d4698ad1..9c8d5f7145a21 100644 --- a/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py +++ b/libs/partners/huggingface/langchain_huggingface/llms/huggingface_pipeline.py @@ -74,7 +74,7 @@ def from_model_id( model_id: str, task: str, backend: str = "default", - device: Optional[int] = -1, + device: Optional[int] = None, device_map: Optional[str] = None, model_kwargs: Optional[dict] = None, pipeline_kwargs: Optional[dict] = None, @@ -96,7 +96,21 @@ def from_model_id( "Please install it with `pip install transformers`." ) - _model_kwargs = model_kwargs or {} + _model_kwargs = model_kwargs.copy() if model_kwargs else {} + if device_map is not None: + if device is not None: + raise ValueError( + "Both `device` and `device_map` are specified. " + "`device` will override `device_map`. " + "You will most likely encounter unexpected behavior." + "Please remove `device` and keep " + "`device_map`." + ) + + if "device_map" in _model_kwargs: + raise ValueError("`device_map` is already specified in `model_kwargs`.") + + _model_kwargs["device_map"] = device_map tokenizer = AutoTokenizer.from_pretrained(model_id, **_model_kwargs) try: @@ -218,7 +232,6 @@ def from_model_id( model=model, tokenizer=tokenizer, device=device, - device_map=device_map, batch_size=batch_size, model_kwargs=_model_kwargs, **_pipeline_kwargs,