Merge branch 'langchain-ai:master' into test_branch

langchain-ai · Nov 27, 2024 · 0943ed3 · 0943ed3
2 parents f55743a + a8b21af
commit 0943ed3
Show file tree

Hide file tree

Showing 41 changed files with 3,254 additions and 2,059 deletions.
diff --git a/.github/scripts/check_diff.py b/.github/scripts/check_diff.py
@@ -32,18 +32,8 @@
     "huggingface",
 ]
 
-# Cap python version at 3.12 for some packages with dependencies that are not yet
-# compatible with python 3.13 (mostly hf tokenizers).
 PY_312_MAX_PACKAGES = [
-    f"libs/partners/{integration}"
-    for integration in [
-        "chroma",
-        "couchbase",
-        "huggingface",
-        "mistralai",
-        "nomic",
-        "qdrant",
-    ]
+    "libs/partners/huggingface",  # https://github.com/pytorch/pytorch/issues/130249
 ]
 
 
@@ -138,9 +128,7 @@ def _get_configs_for_single_dir(job: str, dir_: str) -> List[Dict[str, str]]:
         py_versions = ["3.9", "3.13"]
 
     elif dir_ == "libs/community" and job == "extended-tests":
-        # community extended test resolution in 3.12 is slow
-        # even in uv
-        py_versions = ["3.9", "3.11"]
+        py_versions = ["3.9", "3.12"]
 
     elif dir_ == "libs/community" and job == "compile-integration-tests":
         # community integration deps are slow in 3.12

diff --git a/cookbook/README.md b/cookbook/README.md
@@ -63,4 +63,5 @@ Notebook | Description
 [oracleai_demo.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/oracleai_demo.ipynb) | This guide outlines how to utilize Oracle AI Vector Search alongside Langchain for an end-to-end RAG pipeline, providing step-by-step examples. The process includes loading documents from various sources using OracleDocLoader, summarizing them either within or outside the database with OracleSummary, and generating embeddings similarly through OracleEmbeddings. It also covers chunking documents according to specific requirements using Advanced Oracle Capabilities from OracleTextSplitter, and finally, storing and indexing these documents in a Vector Store for querying with OracleVS.
 [rag-locally-on-intel-cpu.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/rag-locally-on-intel-cpu.ipynb) | Perform Retrieval-Augmented-Generation (RAG) on locally downloaded open-source models using langchain and open source tools and execute it on Intel Xeon CPU. We showed an example of how to apply RAG on Llama 2 model and enable it to answer the queries related to Intel Q1 2024 earnings release.
 [visual_RAG_vdms.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/visual_RAG_vdms.ipynb) | Performs Visual Retrieval-Augmented-Generation (RAG) using videos and scene descriptions generated by open source models.
-[contextual_rag.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/contextual_rag.ipynb) | Performs contextual retrieval-augmented generation (RAG) prepending chunk-specific explanatory context to each chunk before embedding.
+[contextual_rag.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/contextual_rag.ipynb) | Performs contextual retrieval-augmented generation (RAG) prepending chunk-specific explanatory context to each chunk before embedding.
+[rag-agents-locally-on-intel-cpu.ipynb](https://github.com/langchain-ai/langchain/tree/master/cookbook/local_rag_agents_intel_cpu.ipynb) | Build a RAG agent locally with open source models that routes questions through one of two paths to find answers. The agent generates answers based on documents retrieved from either the vector database or retrieved from web search. If the vector database lacks relevant information, the agent opts for web search. Open-source models for LLM and embeddings are used locally on an Intel Xeon CPU to execute this pipeline.
diff --git a/cookbook/local_rag_agents_intel_cpu.ipynb b/cookbook/local_rag_agents_intel_cpu.ipynb
diff --git a/docs/docs/concepts/embedding_models.mdx b/docs/docs/concepts/embedding_models.mdx
@@ -25,7 +25,7 @@ Embeddings allow search system to find relevant documents not just based on keyw
 
 (1) **Embed text as a vector**: Embeddings transform text into a numerical vector representation.
 
-(2) **Measure similarity**: Embedding vectors can be comparing using simple mathematical operations.
+(2) **Measure similarity**: Embedding vectors can be compared using simple mathematical operations.
 
 ## Embedding 
 

diff --git a/docs/src/theme/VectorStoreTabs.js b/docs/src/theme/VectorStoreTabs.js
@@ -12,7 +12,7 @@ export default function VectorStoreTabs(props) {
         {
             value: "In-memory",
             label: "In-memory",
-            text: `from langchain_core.vector_stores import InMemoryVectorStore\n\n${vectorStoreVarName} = InMemoryVectorStore(embeddings)`,
+            text: `from langchain_core.vectorstores import InMemoryVectorStore\n\n${vectorStoreVarName} = InMemoryVectorStore(embeddings)`,
             packageName: "langchain-core",
             default: true,
         },

diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt
@@ -55,7 +55,6 @@ openai<2
 openapi-pydantic>=0.3.2,<0.4
 oracle-ads>=2.9.1,<3
 oracledb>=2.2.0,<3
-outlines[test]>=0.1.0,<0.2
 pandas>=2.0.1,<3
 pdfminer-six>=20221105,<20240706
 pgvector>=0.1.6,<0.2

diff --git a/libs/community/langchain_community/agents/openai_assistant/base.py b/libs/community/langchain_community/agents/openai_assistant/base.py
@@ -543,11 +543,16 @@ def _create_run(self, input: dict) -> Any:
         Returns:
             Any: The created run object.
         """
-        params = {
-            k: v
-            for k, v in input.items()
-            if k in ("instructions", "model", "tools", "tool_resources", "run_metadata")
-        }
+        allowed_assistant_params = (
+            "instructions",
+            "model",
+            "tools",
+            "tool_resources",
+            "run_metadata",
+            "truncation_strategy",
+            "max_prompt_tokens",
+        )
+        params = {k: v for k, v in input.items() if k in allowed_assistant_params}
         return self.client.beta.threads.runs.create(
             input["thread_id"],
             assistant_id=self.assistant_id,

diff --git a/libs/community/langchain_community/vectorstores/oraclevs.py b/libs/community/langchain_community/vectorstores/oraclevs.py
@@ -762,7 +762,7 @@ def similarity_search_by_vector_returning_embeddings(
         k: int,
         filter: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
-    ) -> List[Tuple[Document, float, np.ndarray[np.float32, Any]]]:
+    ) -> List[Tuple[Document, float, np.ndarray]]:
         embedding_arr: Any
         if self.insert_mode == "clob":
             embedding_arr = json.dumps(embedding)

diff --git a/libs/community/langchain_community/vectorstores/supabase.py b/libs/community/langchain_community/vectorstores/supabase.py
@@ -270,7 +270,7 @@ def similarity_search_by_vector_returning_embeddings(
         k: int,
         filter: Optional[Dict[str, Any]] = None,
         postgrest_filter: Optional[str] = None,
-    ) -> List[Tuple[Document, float, np.ndarray[np.float32, Any]]]:
+    ) -> List[Tuple[Document, float, np.ndarray]]:
         match_documents_params = self.match_args(query, filter)
         query_builder = self._client.rpc(self.query_name, match_documents_params)