Merge branch 'master' into cc/tool_calls_no_args

langchain-ai · Jul 5, 2024 · efb9578 · efb9578
2 parents 791cc16 + 902b57d
commit efb9578
Show file tree

Hide file tree

Showing 62 changed files with 3,917 additions and 470 deletions.
diff --git a/docs/docs/integrations/chat/ibm_watsonx.ipynb b/docs/docs/integrations/chat/ibm_watsonx.ipynb
@@ -454,7 +454,7 @@
     "\n",
     "Please note that `ChatWatsonx.bind_tools` is on beta state, so right now we only support `mistralai/mixtral-8x7b-instruct-v01` model.\n",
     "\n",
-    "You should also redefine `max_new_tokens` parameter to get the entire model response. By default `max_new_tokens` is set ot 20."
+    "You should also redefine `max_new_tokens` parameter to get the entire model response. By default `max_new_tokens` is set to 20."
    ]
   },
   {
@@ -577,7 +577,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.1.undefined"
   }
  },
  "nbformat": 4,

diff --git a/docs/docs/integrations/document_loaders/arxiv.ipynb b/docs/docs/integrations/document_loaders/arxiv.ipynb
diff --git a/docs/docs/integrations/toolkits/jira.ipynb b/docs/docs/integrations/toolkits/jira.ipynb
@@ -14,7 +14,8 @@
     "To use this tool, you must first set as environment variables:\n",
     "    JIRA_API_TOKEN\n",
     "    JIRA_USERNAME\n",
-    "    JIRA_INSTANCE_URL"
+    "    JIRA_INSTANCE_URL\n",
+    "    JIRA_CLOUD"
    ]
   },
   {
@@ -88,7 +89,8 @@
     "os.environ[\"JIRA_API_TOKEN\"] = \"abc\"\n",
     "os.environ[\"JIRA_USERNAME\"] = \"123\"\n",
     "os.environ[\"JIRA_INSTANCE_URL\"] = \"https://jira.atlassian.com\"\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"xyz\""
+    "os.environ[\"OPENAI_API_KEY\"] = \"xyz\"\n",
+    "os.environ[\"JIRA_CLOUD\"] = \"True\""
    ]
   },
   {

diff --git a/docs/docs/integrations/tools/google_drive.ipynb b/docs/docs/integrations/tools/google_drive.ipynb
@@ -99,7 +99,7 @@
    },
    "outputs": [],
    "source": [
-    "from langchain_googldrive.tools.google_drive.tool import GoogleDriveSearchTool\n",
+    "from langchain_googledrive.tools.google_drive.tool import GoogleDriveSearchTool\n",
     "from langchain_googledrive.utilities.google_drive import GoogleDriveAPIWrapper\n",
     "\n",
     "# By default, search only in the filename.\n",

diff --git a/docs/docs/tutorials/llm_chain.ipynb b/docs/docs/tutorials/llm_chain.ipynb
@@ -325,7 +325,7 @@
    "id": "fedf6f13",
    "metadata": {},
    "source": [
-    "Next, we can create the PromptTemplate. This will be a combination of the `system_template` as well as a simpler template for where the put the text"
+    "Next, we can create the PromptTemplate. This will be a combination of the `system_template` as well as a simpler template for where to put the text to be translated"
    ]
   },
   {

diff --git a/docs/docs/tutorials/summarization.ipynb b/docs/docs/tutorials/summarization.ipynb
@@ -640,7 +640,7 @@
    "metadata": {},
    "source": [
     "## Splitting and summarizing in a single chain\n",
-    "For convenience, we can wrap both the text splitting of our long document and summarizing in a single `AnalyzeDocumentsChain`."
+    "For convenience, we can wrap both the text splitting of our long document and summarizing in a single [chain](/docs/how_to/sequence):"
    ]
   },
   {
@@ -650,12 +650,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from langchain.chains import AnalyzeDocumentChain\n",
+    "def split_text(text: str):\n",
+    "    return text_splitter.create_documents([text])\n",
     "\n",
-    "summarize_document_chain = AnalyzeDocumentChain(\n",
-    "    combine_docs_chain=chain, text_splitter=text_splitter\n",
-    ")\n",
-    "summarize_document_chain.invoke(docs[0].page_content)"
+    "\n",
+    "summarize_document_chain = split_text | chain"
    ]
   },
   {

diff --git a/libs/community/langchain_community/document_loaders/arxiv.py b/libs/community/langchain_community/document_loaders/arxiv.py
@@ -8,23 +8,146 @@
 
 class ArxivLoader(BaseLoader):
     """Load a query result from `Arxiv`.
-
     The loader converts the original PDF format into the text.
 
-    Args:
-        Supports all arguments of `ArxivAPIWrapper`.
-    """
+    Setup:
+        Install ``arxiv`` and ``PyMuPDF`` packages.
+        ``PyMuPDF`` transforms PDF files downloaded from the arxiv.org site
+        into the text format.
+
+        .. code-block:: bash
+
+            pip install -U arxiv pymupdf
+
+
+    Instantiate:
+        .. code-block:: python
+
+            from langchain_community.document_loaders import ArxivLoader
+
+            loader = ArxivLoader(
+                query="reasoning",
+                # load_max_docs=2,
+                # load_all_available_meta=False
+            )
+
+    Load:
+        .. code-block:: python
+
+            docs = loader.load()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+            Understanding the Reasoning Ability of Language Models
+            From the Perspective of Reasoning Paths Aggre
+            {
+                'Published': '2024-02-29',
+                'Title': 'Understanding the Reasoning Ability of Language Models From the
+                        Perspective of Reasoning Paths Aggregation',
+                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
+                        Wenhu Chen, William Yang Wang',
+                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
+                        without explicit fine-tuning...'
+            }
+
+
+    Lazy load:
+        .. code-block:: python
+
+            docs = []
+            docs_lazy = loader.lazy_load()
+
+            # async variant:
+            # docs_lazy = await loader.alazy_load()
+
+            for doc in docs_lazy:
+                docs.append(doc)
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            Understanding the Reasoning Ability of Language Models
+            From the Perspective of Reasoning Paths Aggre
+            {
+                'Published': '2024-02-29',
+                'Title': 'Understanding the Reasoning Ability of Language Models From the
+                        Perspective of Reasoning Paths Aggregation',
+                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
+                        Wenhu Chen, William Yang Wang',
+                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
+                        without explicit fine-tuning...'
+            }
+
+    Async load:
+        .. code-block:: python
+
+            docs = await loader.aload()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            Understanding the Reasoning Ability of Language Models
+            From the Perspective of Reasoning Paths Aggre
+            {
+                'Published': '2024-02-29',
+                'Title': 'Understanding the Reasoning Ability of Language Models From the
+                        Perspective of Reasoning Paths Aggregation',
+                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
+                        Wenhu Chen, William Yang Wang',
+                'Summary': 'Pre-trained language models (LMs) are able to perform complex reasoning
+                        without explicit fine-tuning...'
+            }
+
+    Use summaries of articles as docs:
+        .. code-block:: python
+
+            from langchain_community.document_loaders import ArxivLoader
+
+            loader = ArxivLoader(
+                query="reasoning"
+            )
+
+            docs = loader.get_summaries_as_docs()
+            print(docs[0].page_content[:100])
+            print(docs[0].metadata)
+
+        .. code-block:: python
+
+            Pre-trained language models (LMs) are able to perform complex reasoning
+            without explicit fine-tuning
+            {
+                'Entry ID': 'http://arxiv.org/abs/2402.03268v2',
+                'Published': datetime.date(2024, 2, 29),
+                'Title': 'Understanding the Reasoning Ability of Language Models From the
+                        Perspective of Reasoning Paths Aggregation',
+                'Authors': 'Xinyi Wang, Alfonso Amayuelas, Kexun Zhang, Liangming Pan,
+                        Wenhu Chen, William Yang Wang'
+            }
+    """  # noqa: E501
 
     def __init__(
         self, query: str, doc_content_chars_max: Optional[int] = None, **kwargs: Any
     ):
+        """Initialize with search query to find documents in the Arxiv.
+        Supports all arguments of `ArxivAPIWrapper`.
+
+        Args:
+            query: free text which used to find documents in the Arxiv
+            doc_content_chars_max: cut limit for the length of a document's content
+        """  # noqa: E501
+
         self.query = query
         self.client = ArxivAPIWrapper(
             doc_content_chars_max=doc_content_chars_max, **kwargs
         )
 
     def lazy_load(self) -> Iterator[Document]:
+        """Lazy load Arvix documents"""
         yield from self.client.lazy_load(self.query)
 
     def get_summaries_as_docs(self) -> List[Document]:
+        """Uses papers summaries as documents rather than source Arvix papers"""
         return self.client.get_summaries_as_docs(self.query)
diff --git a/libs/community/langchain_community/graph_vectorstores/__init__.py b/libs/community/langchain_community/graph_vectorstores/__init__.py
@@ -0,0 +1,3 @@
+from langchain_community.graph_vectorstores.cassandra import CassandraGraphVectorStore
+
+__all__ = ["CassandraGraphVectorStore"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from langchain_community.graph_vectorstores.cassandra import CassandraGraphVectorStore

		__all__ = ["CassandraGraphVectorStore"]