langchain-ai · isahers1 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024 · Dec 11, 2024
diff --git a/python/langsmith/_internal/_operations.py b/python/langsmith/_internal/_operations.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
 import itertools
 import logging
+import os
 import uuid
 from typing import Literal, Optional, Union, cast
 
@@ -246,7 +247,7 @@
             ),
         )
     if op.attachments:
-        for n, (content_type, valb) in op.attachments.items():
+        for n, (content_type, data) in op.attachments.items():
             if "." in n:
                 logger.warning(
                     f"Skipping logging of attachment '{n}' "
@@ -256,17 +257,32 @@
                 )
                 continue
 
-            acc_parts.append(
-                (
-                    f"attachment.{op.id}.{n}",
+            if isinstance(data, bytes):
+                acc_parts.append(
                     (
-                        None,
-                        valb,
-                        content_type,
-                        {"Content-Length": str(len(valb))},
-                    ),
+                        f"attachment.{op.id}.{n}",
+                        (
+                            None,
+                            data,
+                            content_type,
+                            {"Content-Length": str(len(data))},
+                        ),
+                    )
+                )
+            else:
+                file_path = data
+                file_size = os.path.getsize(file_path)
+                acc_parts.append(
+                    (
+                        f"attachment.{op.id}.{n}",
+                        (
+                            None,
+                            open(file_path, "rb"),  # type: ignore[arg-type]
+                            f"{content_type}; length={file_size}",
+                            {},
+                        ),
+                    )
                 )
-            )
     return MultipartPartsAndContext(
         acc_parts,
         f"trace={op.trace_id},id={op.id}",

diff --git a/python/langsmith/client.py b/python/langsmith/client.py
@@ -1835,7 +1835,12 @@ def read_run(
         response = self.request_with_retries(
             "GET", f"/runs/{_as_uuid(run_id, 'run_id')}"
         )
-        run = ls_schemas.Run(**response.json(), _host_url=self._host_url)
+        attachments = _convert_stored_attachments_to_attachments_dict(
+            response.json(), "s3_urls"
+        )
+        run = ls_schemas.Run(
+            attachments=attachments, **response.json(), _host_url=self._host_url
+        )
         if load_child_runs and run.child_run_ids:
             run = self._load_child_runs(run)
         return run
@@ -2031,7 +2036,13 @@ def list_runs(
         for i, run in enumerate(
             self._get_cursor_paginated_list("/runs/query", body=body_query)
         ):
-            yield ls_schemas.Run(**run, _host_url=self._host_url)
+            # Should this be behind a flag?
+            attachments = _convert_stored_attachments_to_attachments_dict(
+                run, "s3_urls"
+            )
+            yield ls_schemas.Run(
+                attachments=attachments, **run, _host_url=self._host_url
+            )
             if limit is not None and i + 1 >= limit:
                 break
 
@@ -3894,16 +3905,9 @@ def read_example(
         )
 
         example = response.json()
-        attachments = {}
-        if example["attachment_urls"]:
-            for key, value in example["attachment_urls"].items():
-                response = requests.get(value["presigned_url"], stream=True)
-                response.raise_for_status()
-                reader = io.BytesIO(response.content)
-                attachments[key.removeprefix("attachment.")] = {
-                    "presigned_url": value["presigned_url"],
-                    "reader": reader,
-                }
+        attachments = _convert_stored_attachments_to_attachments_dict(
+            example, "attachment_urls"
+        )
 
         return ls_schemas.Example(
             **{k: v for k, v in example.items() if k != "attachment_urls"},
@@ -3980,17 +3984,9 @@ def list_examples(
         for i, example in enumerate(
             self._get_paginated_list("/examples", params=params)
         ):
-            attachments = {}
-            if example["attachment_urls"]:
-                for key, value in example["attachment_urls"].items():
-                    response = requests.get(value["presigned_url"], stream=True)
-                    response.raise_for_status()
-                    reader = io.BytesIO(response.content)
-                    attachments[key.removeprefix("attachment.")] = {
-                        "presigned_url": value["presigned_url"],
-                        "reader": reader,
-                    }
-
+            attachments = _convert_stored_attachments_to_attachments_dict(
+                example, "attachment_urls"
+            )
             yield ls_schemas.Example(
                 **{k: v for k, v in example.items() if k != "attachment_urls"},
                 attachments=attachments,
@@ -6657,3 +6653,17 @@ def convert_prompt_to_anthropic_format(
         return anthropic._get_request_payload(messages, stop=stop)
     except Exception as e:
         raise ls_utils.LangSmithError(f"Error converting to Anthropic format: {e}")
+
+
+def _convert_stored_attachments_to_attachments_dict(data, attachments_key):
+    attachments_dict = {}
+    if attachments_key in data and data[attachments_key]:
+        for key, value in data[attachments_key].items():
+            response = requests.get(value["presigned_url"], stream=True)
+            response.raise_for_status()
+            reader = io.BytesIO(response.content)
+            attachments_dict[key.removeprefix("attachment.")] = {
+                "presigned_url": value["presigned_url"],
+                "reader": reader,
+            }
+    return attachments_dict
diff --git a/python/langsmith/run_trees.py b/python/langsmith/run_trees.py
@@ -318,7 +318,9 @@ def patch(self) -> None:
         """Patch the run tree to the API in a background thread."""
         if not self.end_time:
             self.end()
-        attachments = self.attachments
+        attachments = {
+            a: v for a, v in self.attachments.items() if isinstance(v, tuple)
+        }
         try:
             # Avoid loading the same attachment twice
             if attachments:

diff --git a/python/langsmith/schemas.py b/python/langsmith/schemas.py
@@ -39,6 +39,8 @@
         StrictInt,
     )
 
+from pathlib import Path
+
 from typing_extensions import Literal
 
 SCORE_TYPE = Union[StrictBool, StrictInt, StrictFloat, None]
@@ -63,7 +65,7 @@ def my_function(bar: int, my_val: Attachment):
     data: bytes
 
 
-Attachments = Dict[str, Union[Tuple[str, bytes], Attachment]]
+Attachments = Dict[str, Union[Tuple[str, bytes], Attachment, Tuple[str, Path]]]
 """Attachments associated with the run. 
 Each entry is a tuple of (mime_type, bytes), or (mime_type, file_path)"""
 
@@ -370,7 +372,9 @@ class RunBase(BaseModel):
     tags: Optional[List[str]] = None
     """Tags for categorizing or annotating the run."""
 
-    attachments: Attachments = Field(default_factory=dict)
+    attachments: Union[Attachments, Dict[str, AttachmentInfo]] = Field(
+        default_factory=dict
+    )
     """Attachments associated with the run.
     Each entry is a tuple of (mime_type, bytes)."""
 
@@ -390,6 +394,11 @@ def __repr__(self):
         """Return a string representation of the RunBase object."""
         return f"{self.__class__}(id={self.id}, name='{self.name}', run_type='{self.run_type}')"
 
+    class Config:
+        """Configuration class for the schema."""
+
+        arbitrary_types_allowed = True
+
 
 class Run(RunBase):
     """Run schema when loading from the DB."""