Merge branch 'master' into master

langchain-ai · Sep 6, 2023 · e017577 · e017577
2 parents 493c2e6 + c732d8f
commit e017577
Show file tree

Hide file tree

Showing 13 changed files with 55 additions and 26 deletions.
diff --git a/.github/actions/poetry_setup/action.yml b/.github/actions/poetry_setup/action.yml
@@ -39,10 +39,35 @@ runs:
       with:
         path: |
           /opt/pipx/venvs/poetry
-          /opt/pipx_bin/poetry
         # This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
         key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
 
+    - name: Refresh shell hashtable and fixup softlinks
+      if: steps.cache-bin-poetry.outputs.cache-hit == 'true'
+      shell: bash
+      env:
+        POETRY_VERSION: ${{ inputs.poetry-version }}
+        PYTHON_VERSION: ${{ inputs.python-version }}
+      run: |
+        set -eux
+
+        # Refresh the shell hashtable, to ensure correct `which` output.
+        hash -r
+
+        # `actions/cache@v3` doesn't always seem able to correctly unpack softlinks.
+        # Delete and recreate the softlinks pipx expects to have.
+        rm /opt/pipx/venvs/poetry/bin/python
+        cd /opt/pipx/venvs/poetry/bin
+        ln -s "$(which "python$PYTHON_VERSION")" python
+        chmod +x python
+        cd /opt/pipx_bin/
+        ln -s /opt/pipx/venvs/poetry/bin/poetry poetry
+        chmod +x poetry
+
+        # Ensure everything got set up correctly.
+        /opt/pipx/venvs/poetry/bin/python --version
+        /opt/pipx_bin/poetry --version
+
     - name: Install poetry
       if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
       shell: bash

diff --git a/.github/workflows/langchain_ci.yml b/.github/workflows/langchain_ci.yml
@@ -6,6 +6,8 @@ on:
     branches: [ master ]
   pull_request:
     paths:
+      - '.github/actions/poetry_setup/action.yml'
+      - '.github/tools/**'
       - '.github/workflows/_lint.yml'
       - '.github/workflows/_test.yml'
       - '.github/workflows/_pydantic_compatibility.yml'

diff --git a/.github/workflows/langchain_experimental_ci.yml b/.github/workflows/langchain_experimental_ci.yml
@@ -6,6 +6,8 @@ on:
     branches: [ master ]
   pull_request:
     paths:
+      - '.github/actions/poetry_setup/action.yml'
+      - '.github/tools/**'
       - '.github/workflows/_lint.yml'
       - '.github/workflows/_test.yml'
       - '.github/workflows/langchain_experimental_ci.yml'

diff --git a/docs/docs_skeleton/docs/use_cases/question_answering/_category_.yml b/docs/docs_skeleton/docs/use_cases/question_answering/_category_.yml
@@ -0,0 +1 @@
+position: 0
diff --git a/docs/docs_skeleton/docs/use_cases/web_scraping/index.mdx b/docs/docs_skeleton/docs/use_cases/web_scraping/index.mdx
@@ -1,7 +1,3 @@
----
-sidebar_position: 3
----
-
 # Web Scraping
 
 Web scraping has historically been a challenging endeavor due to the ever-changing nature of website structures, making it tedious for developers to maintain their scraping scripts. Traditional methods often rely on specific HTML tags and patterns which, when altered, can disrupt data extraction processes.

diff --git a/docs/extras/use_cases/more/_category_.yml b/docs/extras/use_cases/more/_category_.yml
@@ -1,2 +1 @@
 label: 'More'
-position: 1
diff --git a/docs/extras/use_cases/more/code_writing/index.mdx b/docs/extras/use_cases/more/code_writing/index.mdx
@@ -1,7 +1,3 @@
----
-sidebar_position: 0
----
-
 # Code writing
 
 :::warning

diff --git a/docs/extras/use_cases/more/self_check/index.mdx b/docs/extras/use_cases/more/self_check/index.mdx
@@ -1,7 +1,3 @@
----
-sidebar_position: 0
----
-
 # Self-checking
 
 One of the main issues with using LLMs is that they can often hallucinate and make false claims. One of the surprisingly effective ways to remediate this is to use the LLM itself to check its own answers.

diff --git a/libs/langchain/langchain/chains/graph_qa/sparql.py b/libs/langchain/langchain/chains/graph_qa/sparql.py
@@ -84,17 +84,17 @@ def _call(
         _intent = self.sparql_intent_chain.run({"prompt": prompt}, callbacks=callbacks)
         intent = _intent.strip()
 
-        if "SELECT" not in intent and "UPDATE" not in intent:
-            raise ValueError(
-                "I am sorry, but this prompt seems to fit none of the currently "
-                "supported SPARQL query types, i.e., SELECT and UPDATE."
-            )
-        elif intent.find("SELECT") < intent.find("UPDATE"):
+        if "SELECT" in intent and "UPDATE" not in intent:
             sparql_generation_chain = self.sparql_generation_select_chain
             intent = "SELECT"
-        else:
+        elif "UPDATE" in intent and "SELECT" not in intent:
             sparql_generation_chain = self.sparql_generation_update_chain
             intent = "UPDATE"
+        else:
+            raise ValueError(
+                "I am sorry, but this prompt seems to fit none of the currently "
+                "supported SPARQL query types, i.e., SELECT and UPDATE."
+            )
 
         _run_manager.on_text("Identified intent:", end="\n", verbose=self.verbose)
         _run_manager.on_text(intent, color="green", end="\n", verbose=self.verbose)

diff --git a/libs/langchain/langchain/document_loaders/async_html.py b/libs/langchain/langchain/document_loaders/async_html.py
@@ -1,7 +1,8 @@
 import asyncio
 import logging
 import warnings
-from typing import Any, Dict, Iterator, List, Optional, Union
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, Dict, Iterator, List, Optional, Union, cast
 
 import aiohttp
 import requests
@@ -129,9 +130,18 @@ def lazy_load(self) -> Iterator[Document]:
     def load(self) -> List[Document]:
         """Load text from the url(s) in web_path."""
 
-        results = asyncio.run(self.fetch_all(self.web_paths))
+        try:
+            # Raises RuntimeError if there is no current event loop.
+            asyncio.get_running_loop()
+            # If there is a current event loop, we need to run the async code
+            # in a separate loop, in a separate thread.
+            with ThreadPoolExecutor(max_workers=1) as executor:
+                future = executor.submit(asyncio.run, self.fetch_all(self.web_paths))
+                results = future.result()
+        except RuntimeError:
+            results = asyncio.run(self.fetch_all(self.web_paths))
         docs = []
-        for i, text in enumerate(results):
+        for i, text in enumerate(cast(List[str], results)):
             metadata = {"source": self.web_paths[i]}
             docs.append(Document(page_content=text, metadata=metadata))
 

diff --git a/libs/langchain/langchain/llms/huggingface_text_gen_inference.py b/libs/langchain/langchain/llms/huggingface_text_gen_inference.py
@@ -65,7 +65,7 @@ class HuggingFaceTextGenInference(LLM):
     typical_p: Optional[float] = 0.95
     """Typical Decoding mass. See [Typical Decoding for Natural Language
     Generation](https://arxiv.org/abs/2202.00666) for more information."""
-    temperature: float = 0.8
+    temperature: Optional[float] = 0.8
     """The value used to module the logits distribution."""
     repetition_penalty: Optional[float] = None
     """The parameter for repetition penalty. 1.0 means no penalty.

diff --git a/libs/langchain/langchain/output_parsers/__init__.py b/libs/langchain/langchain/output_parsers/__init__.py
@@ -20,6 +20,7 @@
 from langchain.output_parsers.list import (
     CommaSeparatedListOutputParser,
     ListOutputParser,
+    NumberedListOutputParser,
 )
 from langchain.output_parsers.pydantic import PydanticOutputParser
 from langchain.output_parsers.rail_parser import GuardrailsOutputParser
@@ -36,6 +37,7 @@
     "EnumOutputParser",
     "GuardrailsOutputParser",
     "ListOutputParser",
+    "NumberedListOutputParser",
     "OutputFixingParser",
     "PydanticOutputParser",
     "RegexDictParser",

diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain"
-version = "0.0.281"
+version = "0.0.282"
 description = "Building applications with LLMs through composability"
 authors = []
 license = "MIT"