diff --git a/libs/community/langchain_community/chat_models/__init__.py b/libs/community/langchain_community/chat_models/__init__.py index 9c83bdecbfc88..ea38ac2f25aa2 100644 --- a/libs/community/langchain_community/chat_models/__init__.py +++ b/libs/community/langchain_community/chat_models/__init__.py @@ -122,6 +122,9 @@ from langchain_community.chat_models.mlx import ( ChatMLX, ) + from langchain_community.chat_models.modelscope_endpoint import ( + ModelscopeChatEndpoint, + ) from langchain_community.chat_models.moonshot import ( MoonshotChat, ) @@ -251,6 +254,7 @@ "JinaChat", "LlamaEdgeChatService", "MiniMaxChat", + "ModelscopeChatEndpoint", "MoonshotChat", "PaiEasChatEndpoint", "PromptLayerChatOpenAI", @@ -316,6 +320,7 @@ "JinaChat": "langchain_community.chat_models.jinachat", "LlamaEdgeChatService": "langchain_community.chat_models.llama_edge", "MiniMaxChat": "langchain_community.chat_models.minimax", + "ModelscopeChatEndpoint": "langchain_community.chat_models.modelscope_endpoint", "MoonshotChat": "langchain_community.chat_models.moonshot", "PaiEasChatEndpoint": "langchain_community.chat_models.pai_eas_endpoint", "PromptLayerChatOpenAI": "langchain_community.chat_models.promptlayer_openai", diff --git a/libs/community/langchain_community/chat_models/modelscope_endpoint.py b/libs/community/langchain_community/chat_models/modelscope_endpoint.py new file mode 100644 index 0000000000000..3eff0c7120058 --- /dev/null +++ b/libs/community/langchain_community/chat_models/modelscope_endpoint.py @@ -0,0 +1,128 @@ +"""Wrapper around modelscope chat endpoint models.""" + +from typing import Dict + +from langchain_core.utils import ( + convert_to_secret_str, + get_from_dict_or_env, + pre_init, +) + +from langchain_community.chat_models import ChatOpenAI +from langchain_community.llms.modelscope_endpoint import ( + MODELSCOPE_SERVICE_URL_BASE, + ModelscopeCommon, +) + + +class ModelscopeChatEndpoint(ModelscopeCommon, ChatOpenAI): # type: ignore[misc, override, override] + """Modelscope chat model inference api integration. To use, must have a modelscope account and a modelscope sdk token. + Refer to https://modelscope.cn/docs/model-service/API-Inference/intro for more details. + + Setup: + Install ``openai`` and set environment variables ``MODELSCOPE_SDK_TOKEN``. + + .. code-block:: bash + + pip install openai + export MODELSCOPE_SDK_TOKEN="your-modelscope-sdk-token" + + Key init args — completion params: + model: str + Name of Modelscope model to use. Refer to https://modelscope.cn/docs/model-service/API-Inference/intro for available models. + temperature: Optional[float] + Sampling temperature, defaults to 0.3. + max_tokens: Optional[int] + Max number of tokens to generate, defaults to 1024. + + Key init args — client params: + modelscope_sdk_token: Optional[str] + Modelscope SDK Token. If not passed in will be read from env var MODELSCOPE_SDK_TOKEN. + api_base: Optional[str] + Base URL for API requests. + + See full list of supported init args and their descriptions in the params section. + + Instantiate: + .. code-block:: python + + from langchain_community.chat_models import ModelscopeChatEndpoint + + chat = ModelscopeChatEndpoint( + modelscope_sdk_token="your-modelscope-sdk-token", + model="Qwen/Qwen2.5-Coder-32B-Instruct", + temperature=0.5, + # api_base="...", + # other params... + ) + + Invoke: + .. code-block:: python + + messages = [ + ("system", "你擅长编程"), + ("human", "写一个快速排序的代码"), + ] + chat.invoke(messages) + + .. code-block:: python + + AIMessage( + content='def quick_sort(arr): ...', + additional_kwargs={}, + response_metadata={ + 'token_usage': { + 'completion_tokens': 312, + 'prompt_tokens': 27, + 'total_tokens': 339 + }, + 'model_name': 'Qwen/Qwen2.5-Coder-32B-Instruct', + 'system_fingerprint': None, + 'finish_reason': 'stop', + 'logprobs': None + }, + id='run-71c03f4e-6628-41d5-beb6-d2559ae68266-0' + ) + Stream: + .. code-block:: python + + for chunk in chat.stream(messages): + print(chunk) + + """ # noqa: E501 + + @pre_init + def validate_environment(cls, values: Dict) -> Dict: + """Validate that the environment is set up correctly.""" + values["modelscope_sdk_token"] = convert_to_secret_str( + get_from_dict_or_env( + values, + ["modelscope_sdk_token", "api_key"], + "MODELSCOPE_SDK_TOKEN", + ) + ) + + try: + import openai + + except ImportError: + raise ImportError( + "Could not import openai python package. " + "Please install it with `pip install openai`." + ) + + client_params = { + "api_key": values["modelscope_sdk_token"].get_secret_value(), + "base_url": values["base_url"] + if "base_url" in values + else MODELSCOPE_SERVICE_URL_BASE, + } + + if not values.get("client"): + values["client"] = openai.OpenAI(**client_params).chat.completions + if not values.get("async_client"): + values["async_client"] = openai.AsyncOpenAI( + **client_params + ).chat.completions + + return values diff --git a/libs/community/langchain_community/llms/__init__.py b/libs/community/langchain_community/llms/__init__.py index 45e00524292a8..0d7d91930880b 100644 --- a/libs/community/langchain_community/llms/__init__.py +++ b/libs/community/langchain_community/llms/__init__.py @@ -368,6 +368,12 @@ def _import_modal() -> Type[BaseLLM]: return Modal +def _import_modelscope_endpoint() -> Type[BaseLLM]: + from langchain_community.llms.modelscope_endpoint import ModelscopeEndpoint + + return ModelscopeEndpoint + + def _import_mosaicml() -> Type[BaseLLM]: from langchain_community.llms.mosaicml import MosaicML @@ -785,6 +791,8 @@ def __getattr__(name: str) -> Any: return _import_mlx_pipeline() elif name == "Modal": return _import_modal() + elif name == "ModelscopeEndpoint": + return _import_modelscope_endpoint() elif name == "MosaicML": return _import_mosaicml() elif name == "NLPCloud": @@ -947,6 +955,7 @@ def __getattr__(name: str) -> Any: "MlflowAIGateway", "MLXPipeline", "Modal", + "ModelscopeEndpoint", "MosaicML", "NIBittensorLLM", "NLPCloud", @@ -1052,6 +1061,7 @@ def get_type_to_cls_dict() -> Dict[str, Callable[[], Type[BaseLLM]]]: "mlflow-ai-gateway": _import_mlflow_ai_gateway, "mlx_pipeline": _import_mlx_pipeline, "modal": _import_modal, + "modelscope_endpoint": _import_modelscope_endpoint, "mosaic": _import_mosaicml, "nebula": _import_symblai_nebula, "nibittensor": _import_bittensor, diff --git a/libs/community/langchain_community/llms/modelscope_endpoint.py b/libs/community/langchain_community/llms/modelscope_endpoint.py new file mode 100644 index 0000000000000..b8beb0451a49e --- /dev/null +++ b/libs/community/langchain_community/llms/modelscope_endpoint.py @@ -0,0 +1,273 @@ +import json +from collections.abc import Mapping +from typing import Any, AsyncIterator, Dict, Iterator, List, Optional + +import httpx +import requests +from langchain_core.callbacks import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain_core.language_models import LLM +from langchain_core.outputs.generation import GenerationChunk +from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init +from pydantic import ( + BaseModel, + ConfigDict, + Field, + SecretStr, + model_validator, +) + +from langchain_community.llms.utils import enforce_stop_tokens + +MODELSCOPE_SERVICE_URL_BASE = "https://api-inference.modelscope.cn/v1" + + +def _convert_chunk_to_str(chunk: str) -> str: + if chunk == "": + return "" + chunk = chunk.lstrip("data: ") + if chunk == "[DONE]": + return "" + data = json.loads(chunk) + text = data["choices"][0]["delta"]["content"] + return text + + +class ModelscopeClient(BaseModel): + """An API client that talks to the Modelscope api inference server.""" + + api_key: SecretStr + """The API key to use for authentication.""" + base_url: str = MODELSCOPE_SERVICE_URL_BASE + timeout: int = 60 + + def completion(self, request: Any) -> str: + headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"} + response = requests.post( + f"{self.base_url}/chat/completions", + headers=headers, + json=request, + timeout=self.timeout, + ) + if not response.ok: + raise ValueError(f"HTTP {response.status_code} error: {response.text}") + return response.json()["choices"][0]["message"]["content"] + + async def acompletion(self, request: Any) -> str: + async with httpx.AsyncClient(timeout=self.timeout) as client: + headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"} + response = await client.post( + f"{self.base_url}/chat/completions", + headers=headers, + json=request, + ) + if not response.status_code == 200: + raise ValueError(f"HTTP {response.status_code} error: {response.text}") + return response.json()["choices"][0]["message"]["content"] + + def stream(self, request: Any) -> Iterator[str]: + headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"} + with requests.post( + f"{self.base_url}/chat/completions", + headers=headers, + json=request, + timeout=self.timeout, + stream=True, + ) as response: + if not response.ok: + raise ValueError(f"HTTP {response.status_code} error: {response.text}") + for line in response.iter_lines(decode_unicode=True): + text = _convert_chunk_to_str(line) + if text: + yield text + + async def astream(self, request: Any) -> AsyncIterator[str]: + async with httpx.AsyncClient(timeout=self.timeout) as client: + headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"} + async with client.stream( + "POST", + f"{self.base_url}/chat/completions", + headers=headers, + json=request, + ) as response: + if not response.status_code == 200: + raise ValueError( + f"HTTP {response.status_code} error: {response.text}" + ) + async for line in response.aiter_lines(): + text = _convert_chunk_to_str(line) + if text: + yield text + + +class ModelscopeCommon(BaseModel): + """Common parameters for Modelscope LLMs.""" + + client: Any + base_url: str = MODELSCOPE_SERVICE_URL_BASE + modelscope_sdk_token: Optional[SecretStr] = Field(default=None, alias="api_key") + model_name: str = Field(default="Qwen/Qwen2.5-Coder-32B-Instruct", alias="model") + """Model name. Available models listed here: https://modelscope.cn/docs/model-service/API-Inference/intro """ + max_tokens: int = 1024 + """Maximum number of tokens to generate.""" + temperature: float = 0.3 + """Temperature parameter (higher values make the model more creative).""" + timeout: int = 60 + """Timeout for the request.""" + + model_config = ConfigDict(populate_by_name=True, protected_namespaces=()) + + @property + def lc_secrets(self) -> dict: + """A map of constructor argument names to secret ids. + + For example, + {"modelscope_sdk_token": "MODELSCOPE_SDK_TOKEN"} + """ + return {"modelscope_sdk_token": "MODELSCOPE_SDK_TOKEN"} + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling OpenAI API.""" + return { + "model": self.model_name, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + } + + @property + def _invocation_params(self) -> Dict[str, Any]: + return {**self._default_params} + + @model_validator(mode="before") + @classmethod + def build_extra(cls, values: Dict[str, Any]) -> Any: + """Build extra parameters. + Override the superclass method, prevent the model parameter from being + overridden. + """ + return values + + @pre_init + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + values["modelscope_sdk_token"] = convert_to_secret_str( + get_from_dict_or_env( + values, ["modelscope_sdk_token", "api_key"], "MODELSCOPE_SDK_TOKEN" + ) + ) + + values["client"] = ModelscopeClient( + api_key=values["modelscope_sdk_token"], + base_url=values["base_url"], + timeout=values["timeout"], + ) + return values + + +class ModelscopeEndpoint(ModelscopeCommon, LLM): + """Modelscope model inference API endpoint. + + To use, you should have a modelscope account and the environment variable ``MODELSCOPE_SDK_TOKEN`` set with your + API key. Refer to https://modelscope.cn/docs/model-service/API-Inference/intro for more details. + + Example: + .. code-block:: python + + from langchain_community.llms.modelscope_endpoint import ModelscopeEndpoint + + llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct") + + # invoke + llm.invoke("write a quick sort in python") + # stream + for chunk in llm.stream("write a quick sort in python"): + print(chunk, end='', flush=True) + # ainvoke + asyncio.run(llm.ainvoke("write a quick sort in python")) + # astream + async for chunk in llm.astream("write a quick sort in python"): + print(chunk, end='', flush=True) + + """ + + model_config = ConfigDict( + populate_by_name=True, + ) + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "modelscope_endpoint" + + @property + def _identifying_params(self) -> Mapping[str, Any]: + return { + "base_url": self.base_url, + "model_name": self.model_name, + "max_tokens": self.max_tokens, + "temperature": self.temperature, + } + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + request = self._invocation_params + request["messages"] = [{"role": "user", "content": prompt}] + request.update(kwargs) + text = self.client.completion(request) + if stop is not None: + # This is required since the stop tokens + # are not enforced by the model parameters + text = enforce_stop_tokens(text, stop) + + return text + + async def _acall( + self, + prompt: str, + stop: Optional[list[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + request = self._invocation_params + request["messages"] = [{"role": "user", "content": prompt}] + request.update(kwargs) + text = await self.client.acompletion(request) + if stop is not None: + text = enforce_stop_tokens(text, stop) + return text + + def _stream( + self, + prompt: str, + stop: Optional[list[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + request = self._invocation_params + request["messages"] = [{"role": "user", "content": prompt}] + request.update(kwargs) + request["stream"] = True + for text in self.client.stream(request): + yield GenerationChunk(text=text) + + async def _astream( + self, + prompt: str, + stop: Optional[list[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[GenerationChunk]: + request = self._invocation_params + request["messages"] = [{"role": "user", "content": prompt}] + request.update(kwargs) + request["stream"] = True + async for text in self.client.astream(request): + yield GenerationChunk(text=text) diff --git a/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py b/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py new file mode 100644 index 0000000000000..fc93573745bd1 --- /dev/null +++ b/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py @@ -0,0 +1,44 @@ +"""Test Modelscope Chat Model API.""" + +from langchain_core.messages import AIMessage, BaseMessage, HumanMessage + +from langchain_community.chat_models.modelscope_endpoint import ModelscopeChatEndpoint + + +def test_modelscope_chat_call() -> None: + chat = ModelscopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct") + response = chat.invoke([HumanMessage(content="Say foo:")]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_modelscope_chat_multiple_history() -> None: + """Tests multiple history works.""" + chat = ModelscopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct") + + response = chat.invoke( + [ + HumanMessage(content="Hello."), + AIMessage(content="Hello!"), + HumanMessage(content="How are you doing?"), + ] + ) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_modelscope_chat_stream() -> None: + """Test that stream works.""" + chat = ModelscopeChatEndpoint( + model="Qwen/Qwen2.5-Coder-32B-Instruct", + streaming=True, + ) + response = chat.stream( + [ + HumanMessage(content="Hello."), + AIMessage(content="Hello!"), + HumanMessage(content="Who are you?"), + ] + ) + for chunk in response: + assert isinstance(chunk.content, str) diff --git a/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py b/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py new file mode 100644 index 0000000000000..dc09209b2cc10 --- /dev/null +++ b/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py @@ -0,0 +1,43 @@ +"""Test ModelscopeEndpoint API wrapper.""" + +from typing import AsyncIterator, Iterator + +from langchain_community.llms.modelscope_endpoint import ModelscopeEndpoint + + +def test_modelscope_call() -> None: + """Test valid call to Modelscope.""" + llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct") + output = llm.invoke("Say foo:") + assert isinstance(output, str) + + +def test_modelscope_streaming() -> None: + """Test streaming call to Modelscope.""" + llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct") + generator = llm.stream("write a quick sort in python") + stream_results_string = "" + assert isinstance(generator, Iterator) + + for chunk in generator: + assert isinstance(chunk, str) + stream_results_string = chunk + assert len(stream_results_string.strip()) > 1 + + +async def test_modelscope_call_async() -> None: + llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct") + output = await llm.ainvoke("write a quick sort in python") + assert isinstance(output, str) + + +async def test_modelscope_streaming_async() -> None: + llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct") + generator = llm.astream("write a quick sort in python") + stream_results_string = "" + assert isinstance(generator, AsyncIterator) + + async for chunk in generator: + assert isinstance(chunk, str) + stream_results_string = chunk + assert len(stream_results_string.strip()) > 1