diff --git a/libs/community/langchain_community/chat_models/__init__.py b/libs/community/langchain_community/chat_models/__init__.py
index 9c83bdecbfc88..ea38ac2f25aa2 100644
--- a/libs/community/langchain_community/chat_models/__init__.py
+++ b/libs/community/langchain_community/chat_models/__init__.py
@@ -122,6 +122,9 @@
     from langchain_community.chat_models.mlx import (
         ChatMLX,
     )
+    from langchain_community.chat_models.modelscope_endpoint import (
+        ModelscopeChatEndpoint,
+    )
     from langchain_community.chat_models.moonshot import (
         MoonshotChat,
     )
@@ -251,6 +254,7 @@
     "JinaChat",
     "LlamaEdgeChatService",
     "MiniMaxChat",
+    "ModelscopeChatEndpoint",
     "MoonshotChat",
     "PaiEasChatEndpoint",
     "PromptLayerChatOpenAI",
@@ -316,6 +320,7 @@
     "JinaChat": "langchain_community.chat_models.jinachat",
     "LlamaEdgeChatService": "langchain_community.chat_models.llama_edge",
     "MiniMaxChat": "langchain_community.chat_models.minimax",
+    "ModelscopeChatEndpoint": "langchain_community.chat_models.modelscope_endpoint",
     "MoonshotChat": "langchain_community.chat_models.moonshot",
     "PaiEasChatEndpoint": "langchain_community.chat_models.pai_eas_endpoint",
     "PromptLayerChatOpenAI": "langchain_community.chat_models.promptlayer_openai",
diff --git a/libs/community/langchain_community/chat_models/modelscope_endpoint.py b/libs/community/langchain_community/chat_models/modelscope_endpoint.py
new file mode 100644
index 0000000000000..3eff0c7120058
--- /dev/null
+++ b/libs/community/langchain_community/chat_models/modelscope_endpoint.py
@@ -0,0 +1,128 @@
+"""Wrapper around modelscope chat endpoint models."""
+
+from typing import Dict
+
+from langchain_core.utils import (
+    convert_to_secret_str,
+    get_from_dict_or_env,
+    pre_init,
+)
+
+from langchain_community.chat_models import ChatOpenAI
+from langchain_community.llms.modelscope_endpoint import (
+    MODELSCOPE_SERVICE_URL_BASE,
+    ModelscopeCommon,
+)
+
+
+class ModelscopeChatEndpoint(ModelscopeCommon, ChatOpenAI):  # type: ignore[misc, override, override]
+    """Modelscope chat model inference api integration. To use, must have a modelscope account and a modelscope sdk token.
+    Refer to https://modelscope.cn/docs/model-service/API-Inference/intro for more details.
+
+    Setup:
+        Install ``openai`` and set environment variables ``MODELSCOPE_SDK_TOKEN``.
+
+        .. code-block:: bash
+
+            pip install openai
+            export MODELSCOPE_SDK_TOKEN="your-modelscope-sdk-token"
+
+    Key init args — completion params:
+        model: str
+            Name of Modelscope model to use. Refer to https://modelscope.cn/docs/model-service/API-Inference/intro for available models.
+        temperature: Optional[float]
+            Sampling temperature, defaults to 0.3.
+        max_tokens: Optional[int]
+            Max number of tokens to generate, defaults to 1024.
+
+    Key init args — client params:
+        modelscope_sdk_token: Optional[str]
+            Modelscope SDK Token. If not passed in will be read from env var MODELSCOPE_SDK_TOKEN.
+        api_base: Optional[str]
+            Base URL for API requests.
+
+    See full list of supported init args and their descriptions in the params section.
+
+    Instantiate:
+        .. code-block:: python
+
+            from langchain_community.chat_models import ModelscopeChatEndpoint
+
+            chat = ModelscopeChatEndpoint(
+                modelscope_sdk_token="your-modelscope-sdk-token",
+                model="Qwen/Qwen2.5-Coder-32B-Instruct",
+                temperature=0.5,
+                # api_base="...",
+                # other params...
+            )
+
+    Invoke:
+        .. code-block:: python
+
+            messages = [
+                ("system", "你擅长编程"),
+                ("human", "写一个快速排序的代码"),
+            ]
+            chat.invoke(messages)
+
+        .. code-block:: python
+
+            AIMessage(
+                content='def quick_sort(arr): ...',
+                additional_kwargs={},
+                response_metadata={
+                    'token_usage': {
+                        'completion_tokens': 312,
+                        'prompt_tokens': 27,
+                        'total_tokens': 339
+                    },
+                    'model_name': 'Qwen/Qwen2.5-Coder-32B-Instruct',
+                    'system_fingerprint': None,
+                    'finish_reason': 'stop',
+                    'logprobs': None
+                },
+                id='run-71c03f4e-6628-41d5-beb6-d2559ae68266-0'
+            )
+    Stream:
+        .. code-block:: python
+
+            for chunk in chat.stream(messages):
+                print(chunk)
+
+    """  # noqa: E501
+
+    @pre_init
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that the environment is set up correctly."""
+        values["modelscope_sdk_token"] = convert_to_secret_str(
+            get_from_dict_or_env(
+                values,
+                ["modelscope_sdk_token", "api_key"],
+                "MODELSCOPE_SDK_TOKEN",
+            )
+        )
+
+        try:
+            import openai
+
+        except ImportError:
+            raise ImportError(
+                "Could not import openai python package. "
+                "Please install it with `pip install openai`."
+            )
+
+        client_params = {
+            "api_key": values["modelscope_sdk_token"].get_secret_value(),
+            "base_url": values["base_url"]
+            if "base_url" in values
+            else MODELSCOPE_SERVICE_URL_BASE,
+        }
+
+        if not values.get("client"):
+            values["client"] = openai.OpenAI(**client_params).chat.completions
+        if not values.get("async_client"):
+            values["async_client"] = openai.AsyncOpenAI(
+                **client_params
+            ).chat.completions
+
+        return values
diff --git a/libs/community/langchain_community/llms/__init__.py b/libs/community/langchain_community/llms/__init__.py
index 45e00524292a8..0d7d91930880b 100644
--- a/libs/community/langchain_community/llms/__init__.py
+++ b/libs/community/langchain_community/llms/__init__.py
@@ -368,6 +368,12 @@ def _import_modal() -> Type[BaseLLM]:
     return Modal
 
 
+def _import_modelscope_endpoint() -> Type[BaseLLM]:
+    from langchain_community.llms.modelscope_endpoint import ModelscopeEndpoint
+
+    return ModelscopeEndpoint
+
+
 def _import_mosaicml() -> Type[BaseLLM]:
     from langchain_community.llms.mosaicml import MosaicML
 
@@ -785,6 +791,8 @@ def __getattr__(name: str) -> Any:
         return _import_mlx_pipeline()
     elif name == "Modal":
         return _import_modal()
+    elif name == "ModelscopeEndpoint":
+        return _import_modelscope_endpoint()
     elif name == "MosaicML":
         return _import_mosaicml()
     elif name == "NLPCloud":
@@ -947,6 +955,7 @@ def __getattr__(name: str) -> Any:
     "MlflowAIGateway",
     "MLXPipeline",
     "Modal",
+    "ModelscopeEndpoint",
     "MosaicML",
     "NIBittensorLLM",
     "NLPCloud",
@@ -1052,6 +1061,7 @@ def get_type_to_cls_dict() -> Dict[str, Callable[[], Type[BaseLLM]]]:
         "mlflow-ai-gateway": _import_mlflow_ai_gateway,
         "mlx_pipeline": _import_mlx_pipeline,
         "modal": _import_modal,
+        "modelscope_endpoint": _import_modelscope_endpoint,
         "mosaic": _import_mosaicml,
         "nebula": _import_symblai_nebula,
         "nibittensor": _import_bittensor,
diff --git a/libs/community/langchain_community/llms/modelscope_endpoint.py b/libs/community/langchain_community/llms/modelscope_endpoint.py
new file mode 100644
index 0000000000000..b8beb0451a49e
--- /dev/null
+++ b/libs/community/langchain_community/llms/modelscope_endpoint.py
@@ -0,0 +1,273 @@
+import json
+from collections.abc import Mapping
+from typing import Any, AsyncIterator, Dict, Iterator, List, Optional
+
+import httpx
+import requests
+from langchain_core.callbacks import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
+from langchain_core.language_models import LLM
+from langchain_core.outputs.generation import GenerationChunk
+from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env, pre_init
+from pydantic import (
+    BaseModel,
+    ConfigDict,
+    Field,
+    SecretStr,
+    model_validator,
+)
+
+from langchain_community.llms.utils import enforce_stop_tokens
+
+MODELSCOPE_SERVICE_URL_BASE = "https://api-inference.modelscope.cn/v1"
+
+
+def _convert_chunk_to_str(chunk: str) -> str:
+    if chunk == "":
+        return ""
+    chunk = chunk.lstrip("data: ")
+    if chunk == "[DONE]":
+        return ""
+    data = json.loads(chunk)
+    text = data["choices"][0]["delta"]["content"]
+    return text
+
+
+class ModelscopeClient(BaseModel):
+    """An API client that talks to the Modelscope api inference server."""
+
+    api_key: SecretStr
+    """The API key to use for authentication."""
+    base_url: str = MODELSCOPE_SERVICE_URL_BASE
+    timeout: int = 60
+
+    def completion(self, request: Any) -> str:
+        headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"}
+        response = requests.post(
+            f"{self.base_url}/chat/completions",
+            headers=headers,
+            json=request,
+            timeout=self.timeout,
+        )
+        if not response.ok:
+            raise ValueError(f"HTTP {response.status_code} error: {response.text}")
+        return response.json()["choices"][0]["message"]["content"]
+
+    async def acompletion(self, request: Any) -> str:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"}
+            response = await client.post(
+                f"{self.base_url}/chat/completions",
+                headers=headers,
+                json=request,
+            )
+            if not response.status_code == 200:
+                raise ValueError(f"HTTP {response.status_code} error: {response.text}")
+            return response.json()["choices"][0]["message"]["content"]
+
+    def stream(self, request: Any) -> Iterator[str]:
+        headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"}
+        with requests.post(
+            f"{self.base_url}/chat/completions",
+            headers=headers,
+            json=request,
+            timeout=self.timeout,
+            stream=True,
+        ) as response:
+            if not response.ok:
+                raise ValueError(f"HTTP {response.status_code} error: {response.text}")
+            for line in response.iter_lines(decode_unicode=True):
+                text = _convert_chunk_to_str(line)
+                if text:
+                    yield text
+
+    async def astream(self, request: Any) -> AsyncIterator[str]:
+        async with httpx.AsyncClient(timeout=self.timeout) as client:
+            headers = {"Authorization": f"Bearer {self.api_key.get_secret_value()}"}
+            async with client.stream(
+                "POST",
+                f"{self.base_url}/chat/completions",
+                headers=headers,
+                json=request,
+            ) as response:
+                if not response.status_code == 200:
+                    raise ValueError(
+                        f"HTTP {response.status_code} error: {response.text}"
+                    )
+                async for line in response.aiter_lines():
+                    text = _convert_chunk_to_str(line)
+                    if text:
+                        yield text
+
+
+class ModelscopeCommon(BaseModel):
+    """Common parameters for Modelscope LLMs."""
+
+    client: Any
+    base_url: str = MODELSCOPE_SERVICE_URL_BASE
+    modelscope_sdk_token: Optional[SecretStr] = Field(default=None, alias="api_key")
+    model_name: str = Field(default="Qwen/Qwen2.5-Coder-32B-Instruct", alias="model")
+    """Model name. Available models listed here: https://modelscope.cn/docs/model-service/API-Inference/intro """
+    max_tokens: int = 1024
+    """Maximum number of tokens to generate."""
+    temperature: float = 0.3
+    """Temperature parameter (higher values make the model more creative)."""
+    timeout: int = 60
+    """Timeout for the request."""
+
+    model_config = ConfigDict(populate_by_name=True, protected_namespaces=())
+
+    @property
+    def lc_secrets(self) -> dict:
+        """A map of constructor argument names to secret ids.
+
+        For example,
+            {"modelscope_sdk_token": "MODELSCOPE_SDK_TOKEN"}
+        """
+        return {"modelscope_sdk_token": "MODELSCOPE_SDK_TOKEN"}
+
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling OpenAI API."""
+        return {
+            "model": self.model_name,
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+        }
+
+    @property
+    def _invocation_params(self) -> Dict[str, Any]:
+        return {**self._default_params}
+
+    @model_validator(mode="before")
+    @classmethod
+    def build_extra(cls, values: Dict[str, Any]) -> Any:
+        """Build extra parameters.
+        Override the superclass method, prevent the model parameter from being
+        overridden.
+        """
+        return values
+
+    @pre_init
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        values["modelscope_sdk_token"] = convert_to_secret_str(
+            get_from_dict_or_env(
+                values, ["modelscope_sdk_token", "api_key"], "MODELSCOPE_SDK_TOKEN"
+            )
+        )
+
+        values["client"] = ModelscopeClient(
+            api_key=values["modelscope_sdk_token"],
+            base_url=values["base_url"],
+            timeout=values["timeout"],
+        )
+        return values
+
+
+class ModelscopeEndpoint(ModelscopeCommon, LLM):
+    """Modelscope model inference API endpoint.
+
+    To use, you should have a modelscope account and the environment variable ``MODELSCOPE_SDK_TOKEN`` set with your
+    API key. Refer to https://modelscope.cn/docs/model-service/API-Inference/intro for more details.
+
+    Example:
+        .. code-block:: python
+
+            from langchain_community.llms.modelscope_endpoint import ModelscopeEndpoint
+
+            llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+
+            # invoke
+            llm.invoke("write a quick sort in python")
+            # stream
+            for chunk in llm.stream("write a quick sort in python"):
+                print(chunk, end='', flush=True)
+            # ainvoke
+            asyncio.run(llm.ainvoke("write a quick sort in python"))
+            # astream
+            async for chunk in llm.astream("write a quick sort in python"):
+                print(chunk, end='', flush=True)
+
+    """
+
+    model_config = ConfigDict(
+        populate_by_name=True,
+    )
+
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "modelscope_endpoint"
+
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        return {
+            "base_url": self.base_url,
+            "model_name": self.model_name,
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+        }
+
+    def _call(
+        self,
+        prompt: str,
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        request = self._invocation_params
+        request["messages"] = [{"role": "user", "content": prompt}]
+        request.update(kwargs)
+        text = self.client.completion(request)
+        if stop is not None:
+            # This is required since the stop tokens
+            # are not enforced by the model parameters
+            text = enforce_stop_tokens(text, stop)
+
+        return text
+
+    async def _acall(
+        self,
+        prompt: str,
+        stop: Optional[list[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> str:
+        request = self._invocation_params
+        request["messages"] = [{"role": "user", "content": prompt}]
+        request.update(kwargs)
+        text = await self.client.acompletion(request)
+        if stop is not None:
+            text = enforce_stop_tokens(text, stop)
+        return text
+
+    def _stream(
+        self,
+        prompt: str,
+        stop: Optional[list[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> Iterator[GenerationChunk]:
+        request = self._invocation_params
+        request["messages"] = [{"role": "user", "content": prompt}]
+        request.update(kwargs)
+        request["stream"] = True
+        for text in self.client.stream(request):
+            yield GenerationChunk(text=text)
+
+    async def _astream(
+        self,
+        prompt: str,
+        stop: Optional[list[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> AsyncIterator[GenerationChunk]:
+        request = self._invocation_params
+        request["messages"] = [{"role": "user", "content": prompt}]
+        request.update(kwargs)
+        request["stream"] = True
+        async for text in self.client.astream(request):
+            yield GenerationChunk(text=text)
diff --git a/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py b/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py
new file mode 100644
index 0000000000000..fc93573745bd1
--- /dev/null
+++ b/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py
@@ -0,0 +1,44 @@
+"""Test Modelscope Chat Model API."""
+
+from langchain_core.messages import AIMessage, BaseMessage, HumanMessage
+
+from langchain_community.chat_models.modelscope_endpoint import ModelscopeChatEndpoint
+
+
+def test_modelscope_chat_call() -> None:
+    chat = ModelscopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    response = chat.invoke([HumanMessage(content="Say foo:")])
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+def test_modelscope_chat_multiple_history() -> None:
+    """Tests multiple history works."""
+    chat = ModelscopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+
+    response = chat.invoke(
+        [
+            HumanMessage(content="Hello."),
+            AIMessage(content="Hello!"),
+            HumanMessage(content="How are you doing?"),
+        ]
+    )
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+def test_modelscope_chat_stream() -> None:
+    """Test that stream works."""
+    chat = ModelscopeChatEndpoint(
+        model="Qwen/Qwen2.5-Coder-32B-Instruct",
+        streaming=True,
+    )
+    response = chat.stream(
+        [
+            HumanMessage(content="Hello."),
+            AIMessage(content="Hello!"),
+            HumanMessage(content="Who are you?"),
+        ]
+    )
+    for chunk in response:
+        assert isinstance(chunk.content, str)
diff --git a/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py b/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py
new file mode 100644
index 0000000000000..dc09209b2cc10
--- /dev/null
+++ b/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py
@@ -0,0 +1,43 @@
+"""Test ModelscopeEndpoint API wrapper."""
+
+from typing import AsyncIterator, Iterator
+
+from langchain_community.llms.modelscope_endpoint import ModelscopeEndpoint
+
+
+def test_modelscope_call() -> None:
+    """Test valid call to Modelscope."""
+    llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    output = llm.invoke("Say foo:")
+    assert isinstance(output, str)
+
+
+def test_modelscope_streaming() -> None:
+    """Test streaming call to Modelscope."""
+    llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    generator = llm.stream("write a quick sort in python")
+    stream_results_string = ""
+    assert isinstance(generator, Iterator)
+
+    for chunk in generator:
+        assert isinstance(chunk, str)
+        stream_results_string = chunk
+    assert len(stream_results_string.strip()) > 1
+
+
+async def test_modelscope_call_async() -> None:
+    llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    output = await llm.ainvoke("write a quick sort in python")
+    assert isinstance(output, str)
+
+
+async def test_modelscope_streaming_async() -> None:
+    llm = ModelscopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    generator = llm.astream("write a quick sort in python")
+    stream_results_string = ""
+    assert isinstance(generator, AsyncIterator)
+
+    async for chunk in generator:
+        assert isinstance(chunk, str)
+        stream_results_string = chunk
+    assert len(stream_results_string.strip()) > 1