Skip to content

Commit

Permalink
[Python]: Fix concurrency issues in caching for urllib3<2.0 (#1183)
Browse files Browse the repository at this point in the history
  • Loading branch information
hinthornw authored Nov 6, 2024
1 parent 5b03590 commit 9fb8288
Show file tree
Hide file tree
Showing 6 changed files with 710 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .github/actions/python-integration-tests/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ runs:
LANGCHAIN_ENDPOINT: https://beta.api.smith.langchain.com
LANGCHAIN_API_KEY: ${{ inputs.langchain-api-key-beta }}
OPENAI_API_KEY: ${{ inputs.openai-api-key }}
LANGSMITH_TEST_CACHE: tests/cassettes
run: make integration_tests_fast
shell: bash
working-directory: python
Expand All @@ -57,6 +58,7 @@ runs:
LANGCHAIN_API_KEY: ${{ inputs.langchain-api-key-prod }}
OPENAI_API_KEY: ${{ inputs.openai-api-key }}
ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }}
LANGSMITH_TEST_CACHE: tests/cassettes
run: make doctest
shell: bash
working-directory: python
Expand All @@ -69,6 +71,7 @@ runs:
LANGCHAIN_API_KEY: ${{ inputs.langchain-api-key-beta }}
OPENAI_API_KEY: ${{ inputs.openai-api-key }}
ANTHROPIC_API_KEY: ${{ inputs.anthropic-api-key }}
LANGSMITH_TEST_CACHE: tests/cassettes
run: make evals
shell: bash
working-directory: python
Expand Down
86 changes: 86 additions & 0 deletions python/langsmith/_internal/_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# mypy: disable-error-code="import-untyped"
import functools

import six # noqa

This comment has been minimized.

Copy link
@nachonavarro

nachonavarro Nov 8, 2024

This line breaks langsmith at the moment, add it to pyproject.toml?

from urllib3 import __version__ as urllib3version # noqa
from urllib3 import connection # noqa


# Copied from https://github.com/urllib3/urllib3/blob/1c994dfc8c5d5ecaee8ed3eb585d4785f5febf6e/src/urllib3/connection.py#L231
def request(self, method, url, body=None, headers=None):
"""Make the request.
This function is based on the urllib3 request method, with modifications
to handle potential issues when using vcrpy in concurrent workloads.
Args:
self: The HTTPConnection instance.
method (str): The HTTP method (e.g., 'GET', 'POST').
url (str): The URL for the request.
body (Optional[Any]): The body of the request.
headers (Optional[dict]): Headers to send with the request.
Returns:
The result of calling the parent request method.
"""
# Update the inner socket's timeout value to send the request.
# This only triggers if the connection is re-used.
if getattr(self, "sock", None) is not None:
self.sock.settimeout(self.timeout)

if headers is None:
headers = {}
else:
# Avoid modifying the headers passed into .request()
headers = headers.copy()
if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
headers["User-Agent"] = connection._get_default_user_agent()
# The above is all the same ^^^
# The following is different:
return self._parent_request(method, url, body=body, headers=headers)


_PATCHED = False


def patch_urllib3():
"""Patch the request method of urllib3 to avoid type errors when using vcrpy.
In concurrent workloads (such as the tracing background queue), the
connection pool can get in a state where an HTTPConnection is created
before vcrpy patches the HTTPConnection class. In urllib3 >= 2.0 this isn't
a problem since they use the proper super().request(...) syntax, but in older
versions, super(HTTPConnection, self).request is used, resulting in a TypeError
since self is no longer a subclass of "HTTPConnection" (which at this point
is vcr.stubs.VCRConnection).
This method patches the class to fix the super() syntax to avoid mixed inheritance.
In the case of the LangSmith tracing logic, it doesn't really matter since we always
exclude cache checks for calls to LangSmith.
The patch is only applied for urllib3 versions older than 2.0.
"""
global _PATCHED
if _PATCHED:
return
from packaging import version

if version.parse(urllib3version) >= version.parse("2.0"):
_PATCHED = True
return

# Lookup the parent class and its request method
parent_class = connection.HTTPConnection.__bases__[0]
parent_request = parent_class.request

def new_request(self, *args, **kwargs):
"""Handle parent request.
This method binds the parent's request method to self and then
calls our modified request function.
"""
self._parent_request = functools.partial(parent_request, self)
return request(self, *args, **kwargs)

connection.HTTPConnection.request = new_request
_PATCHED = True
4 changes: 2 additions & 2 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@
multipart as rqtb_multipart,
)
from typing_extensions import TypeGuard
from urllib3.poolmanager import PoolKey # type: ignore[attr-defined]
from urllib3.util import Retry
from urllib3.poolmanager import PoolKey # type: ignore[attr-defined, import-untyped]
from urllib3.util import Retry # type: ignore[import-untyped]

import langsmith
from langsmith import env as ls_env
Expand Down
5 changes: 4 additions & 1 deletion python/langsmith/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@
import httpx
import requests
from typing_extensions import ParamSpec
from urllib3.util import Retry
from urllib3.util import Retry # type: ignore[import-untyped]

from langsmith import schemas as ls_schemas
from langsmith._internal import _patch as patch_urllib3

_LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -492,6 +493,8 @@ def with_cache(
"vcrpy is required to use caching. Install with:"
'pip install -U "langsmith[vcr]"'
)
# Fix concurrency issue in vcrpy's patching
patch_urllib3.patch_urllib3()

def _filter_request_headers(request: Any) -> Any:
if ignore_hosts and any(request.url.startswith(host) for host in ignore_hosts):
Expand Down
Loading

0 comments on commit 9fb8288

Please sign in to comment.