From c0f374540968dad08cf3ae3ce9622cfbc7d305cd Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 14:16:10 -0400 Subject: [PATCH 01/18] Add crush dumping if content size limit exceeded --- python/langsmith/client.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 6fe4d33c0..07da52a89 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -731,7 +731,7 @@ def _get_settings(self) -> ls_schemas.LangSmithSettings: return self._settings - def _content_above_size(self, content_length: Optional[int]) -> Optional[str]: + def _content_above_size(self, content_length: Optional[int], request: Optional[requests.Request | requests.PreparedRequest] ) -> Optional[str]: if content_length is None or self._info is None: return None info = cast(ls_schemas.LangSmithInfo, self._info) @@ -742,6 +742,11 @@ def _content_above_size(self, content_length: Optional[int]) -> Optional[str]: if size_limit is None: return None if content_length > size_limit: + should_debug_crash_dump = os.getenv("LANGSMITH_DEBUG_CRASH_DUMP") in ["1", "true"] + if should_debug_crash_dump: + with open("content_size_limit_crash_dump.jsonl", "a") as f: + json.dump(request.body, f) + f.write("\n") return ( f"The content length of {content_length} bytes exceeds the " f"maximum size limit of {size_limit} bytes." @@ -918,7 +923,7 @@ def request_with_retries( if e.request else "" ) - size_rec = self._content_above_size(content_length) + size_rec = self._content_above_size(content_length, e.request) if size_rec: recommendation = size_rec except ValueError: From e6cbc10f148e343eb4a76a66d4a4810db7613851 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 14:24:20 -0400 Subject: [PATCH 02/18] Check that request is not None --- python/langsmith/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 07da52a89..90f2c36fd 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -743,9 +743,9 @@ def _content_above_size(self, content_length: Optional[int], request: Optional[r return None if content_length > size_limit: should_debug_crash_dump = os.getenv("LANGSMITH_DEBUG_CRASH_DUMP") in ["1", "true"] - if should_debug_crash_dump: + if should_debug_crash_dump and request is not None: with open("content_size_limit_crash_dump.jsonl", "a") as f: - json.dump(request.body, f) + json.dump(request, f) f.write("\n") return ( f"The content length of {content_length} bytes exceeds the " From ea9682bbedc678de0d179b8c9f3b0209f491284c Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 15:11:02 -0400 Subject: [PATCH 03/18] Reformat with poetry --- python/langsmith/client.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 90f2c36fd..760ab26d9 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -731,7 +731,11 @@ def _get_settings(self) -> ls_schemas.LangSmithSettings: return self._settings - def _content_above_size(self, content_length: Optional[int], request: Optional[requests.Request | requests.PreparedRequest] ) -> Optional[str]: + def _content_above_size( + self, + content_length: Optional[int], + request: Optional[requests.Request | requests.PreparedRequest], + ) -> Optional[str]: if content_length is None or self._info is None: return None info = cast(ls_schemas.LangSmithInfo, self._info) @@ -742,7 +746,10 @@ def _content_above_size(self, content_length: Optional[int], request: Optional[r if size_limit is None: return None if content_length > size_limit: - should_debug_crash_dump = os.getenv("LANGSMITH_DEBUG_CRASH_DUMP") in ["1", "true"] + should_debug_crash_dump = os.getenv("LANGSMITH_DEBUG_CRASH_DUMP") in [ + "1", + "true", + ] if should_debug_crash_dump and request is not None: with open("content_size_limit_crash_dump.jsonl", "a") as f: json.dump(request, f) From 1e12160be59f71a646c667f6cc1b1cdca751c905 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 15:26:33 -0400 Subject: [PATCH 04/18] Kick off file write on separate thread --- python/langsmith/client.py | 7 ++++--- python/langsmith/utils.py | 9 +++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 760ab26d9..79d23b637 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -751,9 +751,10 @@ def _content_above_size( "true", ] if should_debug_crash_dump and request is not None: - with open("content_size_limit_crash_dump.jsonl", "a") as f: - json.dump(request, f) - f.write("\n") + threading.Thread( + target=ls_utils.write_to_crash_dump, + args=(request, "content_size_limit_crash_dump.jsonl"), + ).start() return ( f"The content length of {content_length} bytes exceeds the " f"maximum size limit of {size_limit} bytes." diff --git a/python/langsmith/utils.py b/python/langsmith/utils.py index 4be0ce8fd..a29a7a332 100644 --- a/python/langsmith/utils.py +++ b/python/langsmith/utils.py @@ -15,6 +15,7 @@ import sys import threading import traceback +import json from concurrent.futures import Future, ThreadPoolExecutor from typing import ( Any, @@ -790,3 +791,11 @@ def _get_function_name(fn: Callable, depth: int = 0) -> str: return _get_function_name(fn.__call__, depth + 1) return str(fn) + + +def write_to_crash_dump( + request: requests.Request | requests.PreparedRequest, file_name: str +): + with open(file_name, "a") as f: + json.dump(request, f) + f.write("\n") From 1830cf0b76d52b61c2555a489c33b59a8f3fe6ff Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 15:28:59 -0400 Subject: [PATCH 05/18] Make function private --- python/langsmith/client.py | 2 +- python/langsmith/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 79d23b637..fffa704f6 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -752,7 +752,7 @@ def _content_above_size( ] if should_debug_crash_dump and request is not None: threading.Thread( - target=ls_utils.write_to_crash_dump, + target=ls_utils._write_to_crash_dump, args=(request, "content_size_limit_crash_dump.jsonl"), ).start() return ( diff --git a/python/langsmith/utils.py b/python/langsmith/utils.py index a29a7a332..6d672a00b 100644 --- a/python/langsmith/utils.py +++ b/python/langsmith/utils.py @@ -793,7 +793,7 @@ def _get_function_name(fn: Callable, depth: int = 0) -> str: return str(fn) -def write_to_crash_dump( +def _write_to_crash_dump( request: requests.Request | requests.PreparedRequest, file_name: str ): with open(file_name, "a") as f: From 69f02cca3d5c16c0b3948c52cd92ff0e4f6aa5ee Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 15:34:13 -0400 Subject: [PATCH 06/18] Sort imports --- python/langsmith/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/utils.py b/python/langsmith/utils.py index 6d672a00b..0eb7f12da 100644 --- a/python/langsmith/utils.py +++ b/python/langsmith/utils.py @@ -7,6 +7,7 @@ import copy import enum import functools +import json import logging import os import pathlib @@ -15,7 +16,6 @@ import sys import threading import traceback -import json from concurrent.futures import Future, ThreadPoolExecutor from typing import ( Any, From 4903f7a8b97175baf99ea8678d9c131a5d85b0df Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 15:55:00 -0400 Subject: [PATCH 07/18] Remove threading, write should be inexpensive --- python/langsmith/client.py | 7 +++---- python/langsmith/utils.py | 8 -------- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index fffa704f6..760ab26d9 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -751,10 +751,9 @@ def _content_above_size( "true", ] if should_debug_crash_dump and request is not None: - threading.Thread( - target=ls_utils._write_to_crash_dump, - args=(request, "content_size_limit_crash_dump.jsonl"), - ).start() + with open("content_size_limit_crash_dump.jsonl", "a") as f: + json.dump(request, f) + f.write("\n") return ( f"The content length of {content_length} bytes exceeds the " f"maximum size limit of {size_limit} bytes." diff --git a/python/langsmith/utils.py b/python/langsmith/utils.py index 0eb7f12da..64ec79a6a 100644 --- a/python/langsmith/utils.py +++ b/python/langsmith/utils.py @@ -7,7 +7,6 @@ import copy import enum import functools -import json import logging import os import pathlib @@ -792,10 +791,3 @@ def _get_function_name(fn: Callable, depth: int = 0) -> str: return str(fn) - -def _write_to_crash_dump( - request: requests.Request | requests.PreparedRequest, file_name: str -): - with open(file_name, "a") as f: - json.dump(request, f) - f.write("\n") From 9e39b7acb1a606d38cd68e88f52cff8857a00b6d Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 15:56:26 -0400 Subject: [PATCH 08/18] Ran poetry --- python/langsmith/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/langsmith/utils.py b/python/langsmith/utils.py index 64ec79a6a..4be0ce8fd 100644 --- a/python/langsmith/utils.py +++ b/python/langsmith/utils.py @@ -790,4 +790,3 @@ def _get_function_name(fn: Callable, depth: int = 0) -> str: return _get_function_name(fn.__call__, depth + 1) return str(fn) - From 47a16dcb1dbec144bf2dc54805bbd894b875f83e Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 16:45:10 -0400 Subject: [PATCH 09/18] Test if write is the issue --- python/langsmith/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 760ab26d9..8a9069168 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -752,7 +752,7 @@ def _content_above_size( ] if should_debug_crash_dump and request is not None: with open("content_size_limit_crash_dump.jsonl", "a") as f: - json.dump(request, f) + # json.dump(request, f) f.write("\n") return ( f"The content length of {content_length} bytes exceeds the " From e5d1415cf80f87c5cb7490b19bde751e78118ba7 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Tue, 8 Oct 2024 16:57:07 -0400 Subject: [PATCH 10/18] Add dump --- python/langsmith/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 8a9069168..760ab26d9 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -752,7 +752,7 @@ def _content_above_size( ] if should_debug_crash_dump and request is not None: with open("content_size_limit_crash_dump.jsonl", "a") as f: - # json.dump(request, f) + json.dump(request, f) f.write("\n") return ( f"The content length of {content_length} bytes exceeds the " From d946180e1d9a7753c2370c1bcaf3ba771ee20d2a Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Wed, 9 Oct 2024 11:37:59 -0400 Subject: [PATCH 11/18] Address comments + mask api key --- python/langsmith/client.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 760ab26d9..56a9c9177 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -17,9 +17,11 @@ import collections import concurrent.futures as cf import contextlib +import copy import datetime import decimal import functools +import gzip import importlib import importlib.metadata import io @@ -731,11 +733,7 @@ def _get_settings(self) -> ls_schemas.LangSmithSettings: return self._settings - def _content_above_size( - self, - content_length: Optional[int], - request: Optional[requests.Request | requests.PreparedRequest], - ) -> Optional[str]: + def _content_above_size(self, content_length: Optional[int]) -> Optional[str]: if content_length is None or self._info is None: return None info = cast(ls_schemas.LangSmithInfo, self._info) @@ -746,14 +744,6 @@ def _content_above_size( if size_limit is None: return None if content_length > size_limit: - should_debug_crash_dump = os.getenv("LANGSMITH_DEBUG_CRASH_DUMP") in [ - "1", - "true", - ] - if should_debug_crash_dump and request is not None: - with open("content_size_limit_crash_dump.jsonl", "a") as f: - json.dump(request, f) - f.write("\n") return ( f"The content length of {content_length} bytes exceeds the " f"maximum size limit of {size_limit} bytes." @@ -930,7 +920,7 @@ def request_with_retries( if e.request else "" ) - size_rec = self._content_above_size(content_length, e.request) + size_rec = self._content_above_size(content_length) if size_rec: recommendation = size_rec except ValueError: @@ -943,6 +933,17 @@ def request_with_retries( filler = "*" * (max(0, len(api_key) - 7)) masked_api_key = f"{prefix}{filler}{suffix}" + debug_crash_dump_file = ls_utils.get_env_var( + "LANGSMITH_DEBUG_CRASH_DUMP" + ) + if debug_crash_dump_file is not None: + request_data = copy.deepcopy(e.request) if e.request else {} + if "x-api-key" in request_data.get("headers", {}): + request_data["headers"]["x-api-key"] = masked_api_key + with gzip.open(debug_crash_dump_file, "ab") as f: + json_data = json.dumps(request_data).encode("utf-8") + f.write(json_data + b"\n") + raise ls_utils.LangSmithConnectionError( f"Connection error caused failure to {method} {pathname}" f" in LangSmith API. {recommendation}" From f3d6aa281fa5a26ed8ef52f4065d0dae1d20afc8 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Wed, 9 Oct 2024 11:54:18 -0400 Subject: [PATCH 12/18] Add type hint for copied request_data --- python/langsmith/client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 56a9c9177..5c9c183bb 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -937,7 +937,9 @@ def request_with_retries( "LANGSMITH_DEBUG_CRASH_DUMP" ) if debug_crash_dump_file is not None: - request_data = copy.deepcopy(e.request) if e.request else {} + request_data: ( + requests.Request | requests.PreparedRequest | dict + ) = (copy.deepcopy(e.request) if e.request else {}) if "x-api-key" in request_data.get("headers", {}): request_data["headers"]["x-api-key"] = masked_api_key with gzip.open(debug_crash_dump_file, "ab") as f: From b6fd11b837a4754027cc62fb51b4146a9ba1575a Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Wed, 9 Oct 2024 12:00:52 -0400 Subject: [PATCH 13/18] Fix type hints --- python/langsmith/client.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 5c9c183bb..4590f7ccb 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -936,12 +936,12 @@ def request_with_retries( debug_crash_dump_file = ls_utils.get_env_var( "LANGSMITH_DEBUG_CRASH_DUMP" ) - if debug_crash_dump_file is not None: - request_data: ( - requests.Request | requests.PreparedRequest | dict - ) = (copy.deepcopy(e.request) if e.request else {}) - if "x-api-key" in request_data.get("headers", {}): - request_data["headers"]["x-api-key"] = masked_api_key + if debug_crash_dump_file is not None and e.request is not None: + request_data: requests.Request | requests.PreparedRequest = ( + copy.deepcopy(e.request) + ) + if "x-api-key" in request_data.headers: + request_data.headers["x-api-key"] = masked_api_key with gzip.open(debug_crash_dump_file, "ab") as f: json_data = json.dumps(request_data).encode("utf-8") f.write(json_data + b"\n") From 5c6888b91da7445553b075136ec6e8113f8dd984 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Wed, 9 Oct 2024 13:49:28 -0400 Subject: [PATCH 14/18] Add try except around crash dump writing --- python/langsmith/client.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 4590f7ccb..190c787a8 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -937,14 +937,19 @@ def request_with_retries( "LANGSMITH_DEBUG_CRASH_DUMP" ) if debug_crash_dump_file is not None and e.request is not None: - request_data: requests.Request | requests.PreparedRequest = ( - copy.deepcopy(e.request) - ) - if "x-api-key" in request_data.headers: - request_data.headers["x-api-key"] = masked_api_key - with gzip.open(debug_crash_dump_file, "ab") as f: - json_data = json.dumps(request_data).encode("utf-8") - f.write(json_data + b"\n") + try: + request_data: requests.Request | requests.PreparedRequest = ( + copy.deepcopy(e.request) + ) + if "x-api-key" in request_data.headers: + request_data.headers["x-api-key"] = masked_api_key + with gzip.open(debug_crash_dump_file, "ab") as f: + json_data = json.dumps(request_data).encode("utf-8") + f.write(json_data + b"\n") + recommendation += f" More info can be found in: {debug_crash_dump_file}." + except Exception as write_error: + recommendation += f" Encountered this error while trying to write to {debug_crash_dump_file}: {repr(write_error)}" + continue raise ls_utils.LangSmithConnectionError( f"Connection error caused failure to {method} {pathname}" From 9dd556cca3068d43cdfe1d4a74c6b02ca4a4c5f1 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Wed, 9 Oct 2024 13:51:15 -0400 Subject: [PATCH 15/18] Reformat for style --- python/langsmith/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 190c787a8..19dceb76c 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -938,9 +938,9 @@ def request_with_retries( ) if debug_crash_dump_file is not None and e.request is not None: try: - request_data: requests.Request | requests.PreparedRequest = ( - copy.deepcopy(e.request) - ) + request_data: ( + requests.Request | requests.PreparedRequest + ) = copy.deepcopy(e.request) if "x-api-key" in request_data.headers: request_data.headers["x-api-key"] = masked_api_key with gzip.open(debug_crash_dump_file, "ab") as f: From e41f3a162565fc2f9ab64491d48493204edf0746 Mon Sep 17 00:00:00 2001 From: nhuang-lc Date: Wed, 9 Oct 2024 17:24:53 -0700 Subject: [PATCH 16/18] Update python/langsmith/client.py Co-authored-by: William FH <13333726+hinthornw@users.noreply.github.com> --- python/langsmith/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 19dceb76c..73fe6be04 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -943,7 +943,7 @@ def request_with_retries( ) = copy.deepcopy(e.request) if "x-api-key" in request_data.headers: request_data.headers["x-api-key"] = masked_api_key - with gzip.open(debug_crash_dump_file, "ab") as f: + with gzip.open(debug_crash_dump_file, "ab", encoding="utf-8") as f: json_data = json.dumps(request_data).encode("utf-8") f.write(json_data + b"\n") recommendation += f" More info can be found in: {debug_crash_dump_file}." From feb769cb6b78fbf88d442a0b12dcc2e7619f35bf Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Wed, 9 Oct 2024 22:03:20 -0400 Subject: [PATCH 17/18] Manually build request to log to prevent other sensitive fields --- python/langsmith/client.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 73fe6be04..c907ee952 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -938,14 +938,33 @@ def request_with_retries( ) if debug_crash_dump_file is not None and e.request is not None: try: - request_data: ( - requests.Request | requests.PreparedRequest - ) = copy.deepcopy(e.request) - if "x-api-key" in request_data.headers: - request_data.headers["x-api-key"] = masked_api_key - with gzip.open(debug_crash_dump_file, "ab", encoding="utf-8") as f: - json_data = json.dumps(request_data).encode("utf-8") - f.write(json_data + b"\n") + headers = e.request.headers.copy() + headers.pop("x-api-key", None) + request_to_log = { + "method": e.request.method, + "url": e.request.url, + "headers": headers, + "body": ( + e.request.body + if isinstance(e.request, requests.PreparedRequest) + else None + ), + "data": ( + e.request.data + if isinstance(e.request, requests.Request) + else None + ), + "json": ( + e.request.json + if isinstance(e.request, requests.Request) + else None + ), + } + with gzip.open( + debug_crash_dump_file, "at", encoding="utf-8" + ) as f: + json_data = json.dumps(request_to_log) + f.write(json_data + "\n") recommendation += f" More info can be found in: {debug_crash_dump_file}." except Exception as write_error: recommendation += f" Encountered this error while trying to write to {debug_crash_dump_file}: {repr(write_error)}" From 5af0b29edabbeb1a9e70ae24f5f00fd844d94fd3 Mon Sep 17 00:00:00 2001 From: Nick Huang Date: Wed, 9 Oct 2024 22:07:00 -0400 Subject: [PATCH 18/18] Remove unused imports --- python/langsmith/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index c907ee952..c593603d5 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -17,7 +17,6 @@ import collections import concurrent.futures as cf import contextlib -import copy import datetime import decimal import functools