diff --git a/libs/community/langchain_community/llms/modelscope_endpoint.py b/libs/community/langchain_community/llms/modelscope_endpoint.py
index d93793a02dbe9..3070607313229 100644
--- a/libs/community/langchain_community/llms/modelscope_endpoint.py
+++ b/libs/community/langchain_community/llms/modelscope_endpoint.py
@@ -105,7 +105,7 @@ async def astream(self, request: Any) -> AsyncIterator[str]:
 class ModelScopeCommon(BaseModel):
     """Common parameters for Modelscope LLMs."""
 
-    client: Any
+    client: Any = Field(default=None)
     base_url: str = MODELSCOPE_SERVICE_URL_BASE
     modelscope_sdk_token: Optional[SecretStr] = Field(default=None, alias="api_key")
     model_name: str = Field(default="Qwen/Qwen2.5-Coder-32B-Instruct", alias="model")
diff --git a/libs/community/langchain_community/llms/modelscope_pipeline.py b/libs/community/langchain_community/llms/modelscope_pipeline.py
deleted file mode 100644
index b2e992036a3e1..0000000000000
--- a/libs/community/langchain_community/llms/modelscope_pipeline.py
+++ /dev/null
@@ -1,151 +0,0 @@
-from __future__ import annotations
-
-import logging
-from typing import Any, Dict, Iterator, List, Mapping, Optional
-
-from langchain_core.callbacks import CallbackManagerForLLMRun
-from langchain_core.language_models.llms import BaseLLM
-from langchain_core.outputs import Generation, GenerationChunk, LLMResult
-
-DEFAULT_MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"
-DEFAULT_TASK = "chat"
-VALID_TASKS = (
-    "chat",
-    "text-generation",
-)
-DEFAULT_BATCH_SIZE = 4
-
-logger = logging.getLogger(__name__)
-
-
-class ModelScopePipeline(BaseLLM):
-    """ModelScope Pipeline API.
-
-    To use, you should have the ``modelscope[framework]`` and ``ms-swift[llm]`` python package installed,
-    you can install with ``pip install 'ms-swift[llm]' 'modelscope[framework]' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html``.
-
-    Only supports `chat` task for now.
-
-    Example using from_model_id:
-        .. code-block:: python
-
-            from langchain_community.llms.modelscope_pipeline import ModelScopePipeline
-            llm = ModelScopePipeline.from_model_id(
-                model_id="Qwen/Qwen2.5-0.5B-Instruct",
-                task="chat",
-                generate_kwargs={'do_sample': True, 'max_new_tokens': 128},
-            )
-            llm.invoke("Hello, how are you?")
-    """  # noqa: E501
-
-    pipeline: Any  #: :meta private:
-    task: str = DEFAULT_TASK
-    model_id: str = DEFAULT_MODEL_ID
-    model_revision: Optional[str] = None
-    generate_kwargs: Optional[Dict[Any, Any]] = None
-    """Keyword arguments passed to the pipeline."""
-    batch_size: int = DEFAULT_BATCH_SIZE
-    """Batch size to use when passing multiple documents to generate."""
-
-    @classmethod
-    def from_model_id(
-        cls,
-        model_id: str = DEFAULT_MODEL_ID,
-        model_revision: Optional[str] = None,
-        task: str = DEFAULT_TASK,
-        device_map: Optional[str] = None,
-        generate_kwargs: Optional[Dict[Any, Any]] = None,
-        batch_size: int = DEFAULT_BATCH_SIZE,
-        **kwargs: Any,
-    ) -> ModelScopePipeline:
-        """Construct the pipeline object from model_id and task."""
-        try:
-            from modelscope import pipeline  # type: ignore[import]
-        except ImportError:
-            raise ValueError(
-                "Could not import modelscope python package. "
-                "Please install it with `pip install 'ms-swift[llm]' 'modelscope[framework]' -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html`."  # noqa: E501
-            )
-        modelscope_pipeline = pipeline(
-            task=task,
-            model=model_id,
-            model_revision=model_revision,
-            device_map="auto" if device_map is None else device_map,
-            llm_framework="swift",
-            **kwargs,
-        )
-        return cls(
-            pipeline=modelscope_pipeline,
-            task=task,
-            model_id=model_id,
-            model_revision=model_revision,
-            generate_kwargs=generate_kwargs,
-            batch_size=batch_size,
-            **kwargs,
-        )
-
-    @property
-    def _identifying_params(self) -> Mapping[str, Any]:
-        """Get the identifying parameters."""
-        return {
-            "model_id": self.model_id,
-            "generate_kwargs": self.generate_kwargs,
-        }
-
-    @property
-    def _llm_type(self) -> str:
-        return "modelscope_pipeline"
-
-    def _stream(
-        self,
-        prompt: str,
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> Iterator[GenerationChunk]:
-        if self.generate_kwargs is not None:
-            gen_cfg = {**self.generate_kwargs, **kwargs}
-        else:
-            gen_cfg = {**kwargs}
-
-        for stream_output in self.pipeline.stream_generate(prompt, **gen_cfg):
-            text = stream_output["text"]
-            chunk = GenerationChunk(text=text)
-            if run_manager:
-                run_manager.on_llm_new_token(chunk.text, chunk=chunk)
-            yield chunk
-
-    def _generate(
-        self,
-        prompts: List[str],
-        stop: Optional[List[str]] = None,
-        run_manager: Optional[CallbackManagerForLLMRun] = None,
-        **kwargs: Any,
-    ) -> LLMResult:
-        # List to hold all results
-        text_generations: List[str] = []
-        if self.generate_kwargs is not None:
-            gen_cfg = {**self.generate_kwargs, **kwargs}
-        else:
-            gen_cfg = {**kwargs}
-
-        for i in range(0, len(prompts), self.batch_size):
-            batch_prompts = prompts[i : i + self.batch_size]
-
-            # Process batch of prompts
-            responses = self.pipeline(
-                batch_prompts,
-                **gen_cfg,
-            )
-            # Process each response in the batch
-            for j, response in enumerate(responses):
-                if isinstance(response, list):
-                    # if model returns multiple generations, pick the top one
-                    response = response[0]
-                text = response["text"]
-                # Append the processed text to results
-                text_generations.append(text)
-
-        return LLMResult(
-            generations=[[Generation(text=text)] for text in text_generations]
-        )
diff --git a/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py b/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py
index 537677ebc4787..f8cc89ad0ebd7 100644
--- a/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py
+++ b/libs/community/tests/integration_tests/chat_models/test_modelscope_chat_endpoint.py
@@ -6,7 +6,7 @@
 
 
 def test_modelscope_chat_call() -> None:
-    chat = ModelScopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    chat = ModelScopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")  # type: ignore
     response = chat.invoke([HumanMessage(content="Say foo:")])
     assert isinstance(response, BaseMessage)
     assert isinstance(response.content, str)
@@ -14,7 +14,7 @@ def test_modelscope_chat_call() -> None:
 
 def test_modelscope_chat_multiple_history() -> None:
     """Tests multiple history works."""
-    chat = ModelScopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    chat = ModelScopeChatEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")  # type: ignore
 
     response = chat.invoke(
         [
@@ -29,7 +29,7 @@ def test_modelscope_chat_multiple_history() -> None:
 
 def test_modelscope_chat_stream() -> None:
     """Test that stream works."""
-    chat = ModelScopeChatEndpoint(
+    chat = ModelScopeChatEndpoint(  # type: ignore
         model="Qwen/Qwen2.5-Coder-32B-Instruct",
         streaming=True,
     )
diff --git a/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py b/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py
index ad08eab3cd02b..e7b9c67e5297b 100644
--- a/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py
+++ b/libs/community/tests/integration_tests/llms/test_modelscope_endpoint.py
@@ -7,14 +7,14 @@
 
 def test_modelscope_call() -> None:
     """Test valid call to Modelscope."""
-    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")  # type: ignore
     output = llm.invoke("Say foo:")
     assert isinstance(output, str)
 
 
 def test_modelscope_streaming() -> None:
     """Test streaming call to Modelscope."""
-    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")  # type: ignore
     generator = llm.stream("write a quick sort in python")
     stream_results_string = ""
     assert isinstance(generator, Iterator)
@@ -26,13 +26,13 @@ def test_modelscope_streaming() -> None:
 
 
 async def test_modelscope_call_async() -> None:
-    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")  # type: ignore
     output = await llm.ainvoke("write a quick sort in python")
     assert isinstance(output, str)
 
 
 async def test_modelscope_streaming_async() -> None:
-    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")
+    llm = ModelScopeEndpoint(model="Qwen/Qwen2.5-Coder-32B-Instruct")  # type: ignore
     generator = llm.astream("write a quick sort in python")
     stream_results_string = ""
     assert isinstance(generator, AsyncIterator)
diff --git a/libs/community/tests/integration_tests/llms/test_modelscope_pipeline.py b/libs/community/tests/integration_tests/llms/test_modelscope_pipeline.py
deleted file mode 100644
index e69de29bb2d1d..0000000000000