Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
isahers1 committed Dec 9, 2024
1 parent 66228e8 commit a5ee599
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 61 deletions.
29 changes: 13 additions & 16 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3580,6 +3580,7 @@ def _prepate_multipart_data(
def upload_examples_multipart(
self,
*,
dataset_id: ID_TYPE,
uploads: List[ls_schemas.ExampleUploadWithAttachments] = None,
) -> ls_schemas.UpsertExamplesResponse:
"""Upload examples."""
Expand All @@ -3592,10 +3593,6 @@ def upload_examples_multipart(
if uploads is None:
uploads = []
encoder, data = self._prepate_multipart_data(uploads, include_dataset_id=False)
dataset_ids = set([example.dataset_id for example in uploads])
if len(dataset_ids) > 1:
raise ValueError("All examples must be in the same dataset.")
dataset_id = list(dataset_ids)[0]

response = self.request_with_retries(
"POST",
Expand Down Expand Up @@ -3823,21 +3820,21 @@ def read_example(
)

example = response.json()
attachment_urls = {}
attachments_info = {}
if example["attachment_urls"]:
for key, value in example["attachment_urls"].items():
response = requests.get(value["presigned_url"], stream=True)
response.raise_for_status()
reader = io.BytesIO(response.content)
attachment_urls[key.split(".")[1]] = (
value["presigned_url"],
reader,
)
attachments_info[key.split(".")[1]] = {
"presigned_url": value["presigned_url"],
"reader": reader,
}
del example["attachment_urls"]

return ls_schemas.Example(
**example,
attachment_urls=attachment_urls,
attachments_info=attachments_info,
_host_url=self._host_url,
_tenant_id=self._get_optional_tenant_id(),
)
Expand Down Expand Up @@ -3910,21 +3907,21 @@ def list_examples(
for i, example in enumerate(
self._get_paginated_list("/examples", params=params)
):
attachment_urls = {}
attachments_info = {}
if example["attachment_urls"]:
for key, value in example["attachment_urls"].items():
response = requests.get(value["presigned_url"], stream=True)
response.raise_for_status()
reader = io.BytesIO(response.content)
attachment_urls[key.split(".")[1]] = (
value["presigned_url"],
reader,
)
attachments_info[key.split(".")[1]] = {
"presigned_url": value["presigned_url"],
"reader": reader,
}
del example["attachment_urls"]

yield ls_schemas.Example(
**example,
attachment_urls=attachment_urls,
attachments_info=attachments_info,
_host_url=self._host_url,
_tenant_id=self._get_optional_tenant_id(),
)
Expand Down
6 changes: 5 additions & 1 deletion python/langsmith/evaluation/_arunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ def _get_run(r: run_trees.RunTree) -> None:
with rh.tracing_context(enabled=True):
try:
args = (
(example.inputs, example.attachment_urls)
(example.inputs, example.attachments_info)
if include_attachments
else (example.inputs,)
)
Expand All @@ -1044,6 +1044,10 @@ def _get_run(r: run_trees.RunTree) -> None:
client=client,
),
)
if include_attachments and example.attachments_info is not None:
for attachment in example.attachments_info:
reader = example.attachments_info[attachment]["reader"]
reader.seek(0)
except Exception as e:
logger.error(
f"Error running target function: {e}", exc_info=True, stacklevel=1
Expand Down
8 changes: 4 additions & 4 deletions python/langsmith/evaluation/_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1834,17 +1834,17 @@ def _get_run(r: rt.RunTree) -> None:
)
try:
args = (
(example.inputs, example.attachment_urls)
(example.inputs, example.attachments_info)
if include_attachments
else (example.inputs,)
)
fn(
*args,
langsmith_extra=langsmith_extra,
)
if include_attachments and example.attachment_urls is not None:
for attachment in example.attachment_urls:
_, reader = example.attachment_urls[attachment]
if include_attachments and example.attachments_info is not None:
for attachment in example.attachments_info:
reader = example.attachments_info[attachment]["reader"]
reader.seek(0)
except Exception as e:
logger.error(
Expand Down
30 changes: 22 additions & 8 deletions python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,6 @@ class ExampleBase(BaseModel):
inputs: Dict[str, Any] = Field(default_factory=dict)
outputs: Optional[Dict[str, Any]] = Field(default=None)
metadata: Optional[Dict[str, Any]] = Field(default=None)
attachment_urls: Optional[Dict[str, Tuple[str, BinaryIOLike]]] = Field(default=None)
"""Dictionary with attachment names as keys and a tuple of the S3 url
and a reader of the data for the file."""

class Config:
"""Configuration class for the schema."""
Expand All @@ -112,16 +109,30 @@ class ExampleCreate(ExampleBase):
split: Optional[Union[str, List[str]]] = None


class ExampleUpsertWithAttachments(ExampleCreate):
"""Example create with attachments."""
class ExampleUploadWithAttachments(BaseModel):
"""Example upload with attachments."""

id: Optional[UUID]
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
inputs: Dict[str, Any] = Field(default_factory=dict)
outputs: Optional[Dict[str, Any]] = Field(default=None)
metadata: Optional[Dict[str, Any]] = Field(default=None)
split: Optional[Union[str, List[str]]] = None
attachments: Optional[Attachments] = None


class ExampleUploadWithAttachments(ExampleUpsertWithAttachments):
"""Example upload with attachments."""
class ExampleUpsertWithAttachments(ExampleUploadWithAttachments):
"""Example create with attachments."""

dataset_id: UUID

pass

class AttachmentInfo(TypedDict):
"""Info for an attachment."""

presigned_url: str
reader: BinaryIOLike
# TODO: add mime type


class Example(ExampleBase):
Expand All @@ -135,6 +146,9 @@ class Example(ExampleBase):
modified_at: Optional[datetime] = Field(default=None)
runs: List[Run] = Field(default_factory=list)
source_run_id: Optional[UUID] = None
attachments_info: Optional[Dict[str, AttachmentInfo]] = Field(default=None)
"""Dictionary with attachment names as keys and a tuple of the S3 url
and a reader of the data for the file."""
_host_url: Optional[str] = PrivateAttr(default=None)
_tenant_id: Optional[UUID] = PrivateAttr(default=None)

Expand Down
44 changes: 12 additions & 32 deletions python/tests/integration_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,6 @@ def test_upload_examples_multipart(langchain_client: Client):
example_id = uuid4()
example_1 = ExampleUploadWithAttachments(
id=example_id,
dataset_id=dataset.id,
inputs={"text": "hello world"},
attachments={
"test_file": ("text/plain", b"test content"),
Expand All @@ -402,13 +401,11 @@ def test_upload_examples_multipart(langchain_client: Client):

# Test example with minimum required fields
example_2 = ExampleUploadWithAttachments(
dataset_id=dataset.id,
inputs={"text": "minimal example"},
)

# Test example with outputs and multiple attachments
example_3 = ExampleUploadWithAttachments(
dataset_id=dataset.id,
inputs={"text": "example with outputs"},
outputs={"response": "test response"},
attachments={
Expand All @@ -419,7 +416,7 @@ def test_upload_examples_multipart(langchain_client: Client):

# Test uploading multiple examples at once
created_examples = langchain_client.upload_examples_multipart(
uploads=[example_1, example_2, example_3]
dataset_id=dataset.id, uploads=[example_1, example_2, example_3]
)
assert created_examples["count"] == 3

Expand All @@ -439,43 +436,28 @@ def test_upload_examples_multipart(langchain_client: Client):
# Verify example with ID was created with correct ID
example_with_id = [ex for ex in examples if ex.id == example_id][0]
assert example_with_id.inputs["text"] == "hello world"
assert "test_file" in example_with_id.attachment_urls
assert "test_file" in example_with_id.attachments_info

# Verify example with outputs and multiple attachments
example_with_outputs = next(
ex
for ex in examples
if ex.outputs and ex.outputs.get("response") == "test response"
)
assert len(example_with_outputs.attachment_urls) == 2
assert "file1" in example_with_outputs.attachment_urls
assert "file2" in example_with_outputs.attachment_urls
assert len(example_with_outputs.attachments_info) == 2
assert "file1" in example_with_outputs.attachments_info
assert "file2" in example_with_outputs.attachments_info

# Test uploading to non-existent dataset fails
fake_id = uuid4()
with pytest.raises(LangSmithNotFoundError):
langchain_client.upload_examples_multipart(
dataset_id=fake_id,
uploads=[
ExampleUploadWithAttachments(
dataset_id=fake_id,
inputs={"text": "should fail"},
)
]
)

# Test uploading examples to different datasets fails
with pytest.raises(ValueError, match="All examples must be in the same dataset"):
langchain_client.upload_examples_multipart(
uploads=[
ExampleUploadWithAttachments(
dataset_id=dataset.id,
inputs={"text": "example 1"},
),
ExampleUploadWithAttachments(
dataset_id=uuid4(),
inputs={"text": "example 2"},
),
]
],
)

# Clean up
Expand Down Expand Up @@ -1283,22 +1265,21 @@ def test_evaluate_with_attachments(langchain_client: Client) -> None:
)

# 2. Create example with attachments
example = ExampleUpsertWithAttachments(
dataset_id=dataset.id,
example = ExampleUploadWithAttachments(
inputs={"question": "What is shown in the image?"},
outputs={"answer": "test image"},
attachments={
"image": ("image/png", b"fake image data for testing"),
},
)

langchain_client.upsert_examples_multipart(upserts=[example])
langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])

# 3. Define target function that uses attachments
def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
# Verify we receive the attachment data
assert "image" in attachments
image_url, image_data = attachments["image"]
image_data = attachments["image"]["reader"]
assert image_data.read() == b"fake image data for testing"
return {"answer": "test image"}

Expand Down Expand Up @@ -1345,12 +1326,11 @@ def test_evaluate_with_no_attachments(langchain_client: Client) -> None:
)

# Verify we can create example the new way without attachments
example = ExampleUpsertWithAttachments(
dataset_id=dataset.id,
example = ExampleUploadWithAttachments(
inputs={"question": "What is 3+1?"},
outputs={"answer": "4"},
)
langchain_client.upsert_examples_multipart(upserts=[example])
langchain_client.upload_examples_multipart(dataset_id=dataset.id, uploads=[example])

def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
# Verify we receive an empty attachments dict
Expand Down

0 comments on commit a5ee599

Please sign in to comment.