From 4752a6bb098e934946a615f9a3e01a39bbb517d7 Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 1 Feb 2024 13:25:49 -0800 Subject: [PATCH 1/7] Update serialization test --- python/tests/unit_tests/test_client.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index f2ceeee78..8b35eeac9 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -4,6 +4,7 @@ import gc import json import os +import threading import time import uuid import weakref @@ -497,10 +498,11 @@ class AttrDict: current_time = datetime.now() class NestedClass: - __slots__ = ["person"] + __slots__ = ["person", "lock"] def __init__(self) -> None: self.person = Person(name="foo") + self.lock = threading.Lock() class MyNamedTuple(NamedTuple): foo: str @@ -540,11 +542,19 @@ class MyNamedTuple(NamedTuple): "a_str": "foo", "an_int": 1, "a_float": 1.1, - "nested_class": {"person": {"name": "foo"}}, + "nested_class": ( + lambda val: val["person"] == {"name": "foo"} + and "_thread.lock object" in val.get("lock") + ), "attr_dict": {"foo": "foo", "bar": 1}, "named_tuple": ["foo", 1], } - assert res == expected + assert set(expected) == set(res) + for k, v in expected.items(): + if callable(v): + assert v(res[k]) + else: + assert res[k] == v def test_host_url() -> None: From 7e3c93a05ef436c21b1e2b109994008f9cf57b2c Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 1 Feb 2024 13:55:49 -0800 Subject: [PATCH 2/7] UPdate serialization --- python/langsmith/client.py | 25 ++++++++++++++++++++----- python/tests/unit_tests/test_client.py | 26 ++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 4b7659d2c..547e51ef6 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -151,11 +151,19 @@ def _default_retry_config() -> Retry: return Retry(**retry_params) # type: ignore -def _serialize_json(obj: Any) -> Any: +_PRIMITIVE_TYPES = (str, int, float, bool) +_MAX_DEPTH = 3 + + +def _serialize_json(obj: Any, depth: int = 0) -> Any: if isinstance(obj, datetime.datetime): return obj.isoformat() if isinstance(obj, uuid.UUID): return str(obj) + if obj is None or isinstance(obj, _PRIMITIVE_TYPES): + return obj + if depth >= _MAX_DEPTH: + return repr(obj) try: serialization_methods = [ ("model_dump_json", True), # Pydantic V2 @@ -180,13 +188,20 @@ def _serialize_json(obj: Any) -> Any: return dataclasses.asdict(obj) try: if hasattr(obj, "__slots__"): - return {slot: getattr(obj, slot) for slot in obj.__slots__} + all_attrs = {slot: getattr(obj, slot, None) for slot in obj.__slots__} + elif hasattr(obj, "__dict__"): + all_attrs = vars(obj) else: - return vars(obj) - except Exception as e: + return repr(obj) + return { + # Avoid crashes on cycles + k: _serialize_json(v, depth + 1) if v is not obj else repr(v) + for k, v in all_attrs.items() + } + except BaseException as e: logger.debug(f"Failed to serialize {type(obj)} to JSON using vars: {e}") return repr(obj) - except Exception as e: + except BaseException as e: logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") return repr(obj) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 8b35eeac9..68d5e7a78 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -504,6 +504,27 @@ def __init__(self) -> None: self.person = Person(name="foo") self.lock = threading.Lock() + class CyclicClass: + def __init__(self) -> None: + self.cyclic = self + + def __repr__(self) -> str: + return "SoCyclic" + from playwright.sync_api import sync_playwright + browser = sync_playwright().start().chromium.launch(headless=True, args=None) + class CyclicClass2: + def __init__(self) -> None: + self.cyclic = None + self.other = None + self.page = browser.new_page() + + def __repr__(self) -> str: + return "SoCyclic2" + + cycle_2 = CyclicClass2() + cycle_2.cyclic = CyclicClass2() + cycle_2.cyclic.other = cycle_2 + class MyNamedTuple(NamedTuple): foo: str bar: int @@ -525,6 +546,8 @@ class MyNamedTuple(NamedTuple): "nested_class": NestedClass(), "attr_dict": AttrDict(foo="foo", bar=1), "named_tuple": MyNamedTuple(foo="foo", bar=1), + "cyclic": CyclicClass(), + # "cyclic2": cycle_2, } res = json.loads(json.dumps(to_serialize, default=_serialize_json)) @@ -548,6 +571,9 @@ class MyNamedTuple(NamedTuple): ), "attr_dict": {"foo": "foo", "bar": 1}, "named_tuple": ["foo", 1], + "cyclic": {"cyclic": "SoCyclic"}, + # We don't really care about this case just want to not err + # "cyclic2": lambda _: True, } assert set(expected) == set(res) for k, v in expected.items(): From 0d29ecd36ce527d74e45e10f81fb6b3d84ff0906 Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:00:52 -0800 Subject: [PATCH 3/7] fmt --- python/tests/unit_tests/test_client.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 68d5e7a78..212d6cf65 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -11,7 +11,7 @@ from datetime import datetime from enum import Enum from io import BytesIO -from typing import NamedTuple, Optional +from typing import Any, NamedTuple, Optional from unittest import mock from unittest.mock import MagicMock, patch @@ -510,12 +510,15 @@ def __init__(self) -> None: def __repr__(self) -> str: return "SoCyclic" + from playwright.sync_api import sync_playwright + browser = sync_playwright().start().chromium.launch(headless=True, args=None) + class CyclicClass2: def __init__(self) -> None: - self.cyclic = None - self.other = None + self.cyclic: Any = None + self.other: Any = None self.page = browser.new_page() def __repr__(self) -> str: From 23cfe8b6495631ad32139a862fe44f7e19e7850b Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:31:32 -0800 Subject: [PATCH 4/7] unused --- python/tests/unit_tests/test_client.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 212d6cf65..4d3f9ff12 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -511,15 +511,10 @@ def __init__(self) -> None: def __repr__(self) -> str: return "SoCyclic" - from playwright.sync_api import sync_playwright - - browser = sync_playwright().start().chromium.launch(headless=True, args=None) - class CyclicClass2: def __init__(self) -> None: self.cyclic: Any = None self.other: Any = None - self.page = browser.new_page() def __repr__(self) -> str: return "SoCyclic2" @@ -550,7 +545,7 @@ class MyNamedTuple(NamedTuple): "attr_dict": AttrDict(foo="foo", bar=1), "named_tuple": MyNamedTuple(foo="foo", bar=1), "cyclic": CyclicClass(), - # "cyclic2": cycle_2, + "cyclic2": cycle_2, } res = json.loads(json.dumps(to_serialize, default=_serialize_json)) @@ -576,7 +571,7 @@ class MyNamedTuple(NamedTuple): "named_tuple": ["foo", 1], "cyclic": {"cyclic": "SoCyclic"}, # We don't really care about this case just want to not err - # "cyclic2": lambda _: True, + "cyclic2": lambda _: True, } assert set(expected) == set(res) for k, v in expected.items(): From d3d579c6f61448307811fdb3d4a5450da6ec6fac Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:19:15 -0800 Subject: [PATCH 5/7] support sets, lists, etc. --- python/langsmith/client.py | 45 +++++++++++++------------- python/tests/unit_tests/test_client.py | 15 ++++++++- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 547e51ef6..08face41b 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -151,20 +151,24 @@ def _default_retry_config() -> Retry: return Retry(**retry_params) # type: ignore -_PRIMITIVE_TYPES = (str, int, float, bool) +_PRIMITIVE_TYPES = (str, int, float, bool, tuple, list, dict) _MAX_DEPTH = 3 def _serialize_json(obj: Any, depth: int = 0) -> Any: - if isinstance(obj, datetime.datetime): - return obj.isoformat() - if isinstance(obj, uuid.UUID): - return str(obj) - if obj is None or isinstance(obj, _PRIMITIVE_TYPES): - return obj - if depth >= _MAX_DEPTH: - return repr(obj) try: + if isinstance(obj, datetime.datetime): + return obj.isoformat() + if isinstance(obj, uuid.UUID): + return str(obj) + if obj is None or isinstance(obj, _PRIMITIVE_TYPES): + return obj + if isinstance(obj, set): + return list(obj) + if isinstance(obj, bytes): + return obj.decode("utf-8") + if depth >= _MAX_DEPTH: + return repr(obj) serialization_methods = [ ("model_dump_json", True), # Pydantic V2 ("json", True), # Pydantic V1 @@ -186,21 +190,16 @@ def _serialize_json(obj: Any, depth: int = 0) -> Any: if dataclasses.is_dataclass(obj): # Regular dataclass return dataclasses.asdict(obj) - try: - if hasattr(obj, "__slots__"): - all_attrs = {slot: getattr(obj, slot, None) for slot in obj.__slots__} - elif hasattr(obj, "__dict__"): - all_attrs = vars(obj) - else: - return repr(obj) - return { - # Avoid crashes on cycles - k: _serialize_json(v, depth + 1) if v is not obj else repr(v) - for k, v in all_attrs.items() - } - except BaseException as e: - logger.debug(f"Failed to serialize {type(obj)} to JSON using vars: {e}") + if hasattr(obj, "__slots__"): + all_attrs = {slot: getattr(obj, slot, None) for slot in obj.__slots__} + elif hasattr(obj, "__dict__"): + all_attrs = vars(obj) + else: return repr(obj) + return { + k: _serialize_json(v, depth=depth + 1) if v is not obj else repr(v) + for k, v in all_attrs.items() + } except BaseException as e: logger.debug(f"Failed to serialize {type(obj)} to JSON: {e}") return repr(obj) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 4d3f9ff12..edfa29dcb 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -460,6 +460,11 @@ class MyClass: def __init__(self, x: int) -> None: self.x = x self.y = "y" + self.a_list = [1, 2, 3] + self.a_tuple = (1, 2, 3) + self.a_set = {1, 2, 3} + self.a_dict = {"foo": "bar"} + self.my_bytes = b"foo" class MyClassWithSlots: __slots__ = ["x", "y"] @@ -552,7 +557,15 @@ class MyNamedTuple(NamedTuple): expected = { "uid": str(uid), "time": current_time.isoformat(), - "my_class": {"x": 1, "y": "y"}, + "my_class": { + "x": 1, + "y": "y", + "a_list": [1, 2, 3], + "a_tuple": [1, 2, 3], + "a_set": [1, 2, 3], + "a_dict": {"foo": "bar"}, + "my_bytes": "foo", + }, "my_slotted_class": {"x": 1, "y": "y"}, "my_dataclass": {"foo": "foo", "bar": 1}, "my_enum": "foo", From 8f332471d3cefa9a92339f3d615bc8491e95a385 Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:23:51 -0800 Subject: [PATCH 6/7] Update to handle lists/tees/etc --- python/tests/unit_tests/test_client.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index edfa29dcb..2d4570e7a 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -2,6 +2,7 @@ import asyncio import dataclasses import gc +import itertools import json import os import threading @@ -466,6 +467,12 @@ def __init__(self, x: int) -> None: self.a_dict = {"foo": "bar"} self.my_bytes = b"foo" + class ClassWithTee: + def __init__(self) -> None: + tee_a, tee_b = itertools.tee(range(10)) + self.tee_a = tee_a + self.tee_b = tee_b + class MyClassWithSlots: __slots__ = ["x", "y"] @@ -536,6 +543,7 @@ class MyNamedTuple(NamedTuple): "uid": uid, "time": current_time, "my_class": MyClass(1), + "class_with_tee": ClassWithTee(), "my_slotted_class": MyClassWithSlots(1), "my_dataclass": MyDataclass("foo", 1), "my_enum": MyEnum.FOO, @@ -566,6 +574,9 @@ class MyNamedTuple(NamedTuple): "a_dict": {"foo": "bar"}, "my_bytes": "foo", }, + "class_with_tee": lambda val: all( + ["_tee object" in val[key] for key in ["tee_a", "tee_b"]] + ), "my_slotted_class": {"x": 1, "y": "y"}, "my_dataclass": {"foo": "foo", "bar": 1}, "my_enum": "foo", From 56a6d63282649686ac814806be61a991f10ae3ef Mon Sep 17 00:00:00 2001 From: William Fu-Hinthorn <13333726+hinthornw@users.noreply.github.com> Date: Thu, 1 Feb 2024 15:25:42 -0800 Subject: [PATCH 7/7] agani --- python/tests/unit_tests/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index 2d4570e7a..0e5302183 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -514,7 +514,7 @@ class NestedClass: def __init__(self) -> None: self.person = Person(name="foo") - self.lock = threading.Lock() + self.lock = [threading.Lock()] class CyclicClass: def __init__(self) -> None: @@ -589,7 +589,7 @@ class MyNamedTuple(NamedTuple): "a_float": 1.1, "nested_class": ( lambda val: val["person"] == {"name": "foo"} - and "_thread.lock object" in val.get("lock") + and "_thread.lock object" in str(val.get("lock")) ), "attr_dict": {"foo": "foo", "bar": 1}, "named_tuple": ["foo", 1],