Skip to content

Commit

Permalink
Merge branch 'development' of https://github.com/vtuber-plan/langport
Browse files Browse the repository at this point in the history
…into development
  • Loading branch information
jstzwj committed Jun 29, 2024
2 parents 69f6e12 + 101efc3 commit d8d54e7
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
2 changes: 1 addition & 1 deletion langport/model/adapters/qwen.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


class QwenAdapter(BaseAdapter):
"""The model adapter for Robin"""
"""The model adapter for Qwen"""

def match(self, model_path: str):
if model_path.endswith('/'):
Expand Down
18 changes: 11 additions & 7 deletions langport/model/executor/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,17 @@ def load_model(
)
else:
if "8" in quantization:
model, tokenizer = load_compress_model(
model_path=model_path, device=device, compression_config=default_compression_config, **kwargs
)
# model, tokenizer = load_compress_model(
# model_path=model_path, device=device, compression_config=default_compression_config, **kwargs
# )
kwargs["load_in_8bit"] = True
model, tokenizer = self._load_hf_model(adapter, model_path, kwargs)
elif "4" in quantization:
model, tokenizer = load_compress_model(
model_path=model_path, device=device, compression_config=bit4_compression_config, **kwargs
)
# model, tokenizer = load_compress_model(
# model_path=model_path, device=device, compression_config=bit4_compression_config, **kwargs
# )
kwargs["load_in_4bit"] = True
model, tokenizer = self._load_hf_model(adapter, model_path, kwargs)
else:
model, tokenizer = load_compress_model(
model_path=model_path, device=device, compression_config=default_compression_config, **kwargs
Expand Down Expand Up @@ -318,7 +322,7 @@ def load_model(
ds_engine = deepspeed.init_inference(model=model, config=config)
model = ds_engine.module
else:
if (device == "cuda" and num_gpus == 1 and not cpu_offloading) or device == "mps":
if (device == "cuda" and num_gpus == 1 and not cpu_offloading and quantization is None) or device == "mps":
model.to(device)

if debug:
Expand Down

0 comments on commit d8d54e7

Please sign in to comment.