Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] add mime type to attachment info #1341

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/langsmith/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Client for interacting with the LangSmith API.

Check notice on line 1 in python/langsmith/client.py

View workflow job for this annotation

GitHub Actions / benchmark

Benchmark results

........... WARNING: the benchmark result may be unstable * the standard deviation (102 ms) is 15% of the mean (699 ms) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. create_5_000_run_trees: Mean +- std dev: 699 ms +- 102 ms ........... WARNING: the benchmark result may be unstable * the standard deviation (157 ms) is 12% of the mean (1.35 sec) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. create_10_000_run_trees: Mean +- std dev: 1.35 sec +- 0.16 sec ........... WARNING: the benchmark result may be unstable * the standard deviation (175 ms) is 13% of the mean (1.39 sec) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. create_20_000_run_trees: Mean +- std dev: 1.39 sec +- 0.18 sec ........... dumps_class_nested_py_branch_and_leaf_200x400: Mean +- std dev: 692 us +- 10 us ........... dumps_class_nested_py_leaf_50x100: Mean +- std dev: 24.8 ms +- 0.2 ms ........... dumps_class_nested_py_leaf_100x200: Mean +- std dev: 102 ms +- 2 ms ........... dumps_dataclass_nested_50x100: Mean +- std dev: 25.0 ms +- 0.2 ms ........... WARNING: the benchmark result may be unstable * the standard deviation (15.0 ms) is 22% of the mean (68.6 ms) Try to rerun the benchmark with more runs, values and/or loops. Run 'python -m pyperf system tune' command to reduce the system jitter. Use pyperf stats, pyperf dump and pyperf hist to analyze results. Use --quiet option to hide these warnings. dumps_pydantic_nested_50x100: Mean +- std dev: 68.6 ms +- 15.0 ms ........... dumps_pydanticv1_nested_50x100: Mean +- std dev: 194 ms +- 2 ms

Check notice on line 1 in python/langsmith/client.py

View workflow job for this annotation

GitHub Actions / benchmark

Comparison against main

+-----------------------------------------------+----------+------------------------+ | Benchmark | main | changes | +===============================================+==========+========================+ | dumps_pydanticv1_nested_50x100 | 218 ms | 194 ms: 1.13x faster | +-----------------------------------------------+----------+------------------------+ | create_5_000_run_trees | 724 ms | 699 ms: 1.04x faster | +-----------------------------------------------+----------+------------------------+ | create_10_000_run_trees | 1.39 sec | 1.35 sec: 1.03x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_100x200 | 103 ms | 102 ms: 1.01x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_leaf_50x100 | 24.9 ms | 24.8 ms: 1.00x faster | +-----------------------------------------------+----------+------------------------+ | dumps_class_nested_py_branch_and_leaf_200x400 | 695 us | 692 us: 1.00x faster | +-----------------------------------------------+----------+------------------------+ | dumps_dataclass_nested_50x100 | 25.1 ms | 25.0 ms: 1.00x faster | +-----------------------------------------------+----------+------------------------+ | create_20_000_run_trees | 1.39 sec | 1.39 sec: 1.00x faster | +-----------------------------------------------+----------+------------------------+ | dumps_pydantic_nested_50x100 | 64.6 ms | 68.6 ms: 1.06x slower | +-----------------------------------------------+----------+------------------------+ | Geometric mean | (ref) | 1.02x faster | +-----------------------------------------------+----------+------------------------+

Use the client to customize API keys / workspace ocnnections, SSl certs,
etc. for tracing.
Expand Down Expand Up @@ -3903,6 +3903,7 @@
attachments[key.removeprefix("attachment.")] = {
"presigned_url": value["presigned_url"],
"reader": reader,
"mime_type": value["mime_type"],
}

return ls_schemas.Example(
Expand Down Expand Up @@ -3989,6 +3990,7 @@
attachments[key.removeprefix("attachment.")] = {
"presigned_url": value["presigned_url"],
"reader": reader,
"mime_type": value["mime_type"],
}

yield ls_schemas.Example(
Expand Down
2 changes: 1 addition & 1 deletion python/langsmith/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class AttachmentInfo(TypedDict):

presigned_url: str
reader: BinaryIOLike
# TODO: add mime type
mime_type: str


class Example(ExampleBase):
Expand Down
108 changes: 108 additions & 0 deletions python/tests/integration_tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,6 +1253,114 @@ def test_list_examples_attachments_keys(langchain_client: Client) -> None:

langchain_client.delete_dataset(dataset_id=dataset.id)

@pytest.mark.skip(reason="Need to land https://github.com/langchain-ai/langchainplus/pull/7415 first")
def test_mime_type_is_propogated(langchain_client: Client) -> None:
"""Test that the mime type is propogated correctly."""
dataset_name = "__test_mime_type_is_propogated" + uuid4().hex[:4]
dataset = langchain_client.create_dataset(dataset_name=dataset_name)

langchain_client.upload_examples_multipart(
dataset_id=dataset.id,
uploads=[
ExampleUploadWithAttachments(
inputs={"text": "hello world"},
outputs={"response": "hi there"},
attachments={
"test_file": ("text/plain", b"test content"),
},
)
],
)

example = next(langchain_client.list_examples(dataset_id=dataset.id))
assert example.attachments["test_file"]["mime_type"] == "text/plain"

example = langchain_client.read_example(example_id=example.id)
assert example.attachments["test_file"]["mime_type"] == "text/plain"

langchain_client.delete_dataset(dataset_id=dataset.id)

@pytest.mark.skip(reason="Need to land https://github.com/langchain-ai/langchainplus/pull/7415 first")
def test_evaluate_mime_type_is_propogated(langchain_client: Client) -> None:
"""Test that the mime type is propogated correctly when evaluating."""
dataset_name = "__test_evaluate_mime_type_is_propogated" + uuid4().hex[:4]
dataset = langchain_client.create_dataset(dataset_name=dataset_name)

langchain_client.upload_examples_multipart(
dataset_id=dataset.id,
uploads=[
ExampleUploadWithAttachments(
inputs={"text": "hello world"},
outputs={"response": "hi there"},
attachments={
"test_file": ("text/plain", b"test content"),
},
)
],
)

def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
# Verify we receive the attachment data
assert attachments["test_file"]["mime_type"] == "text/plain"
return {"answer": "hi there"}

def evaluator(outputs: dict, reference_outputs: dict, attachments: dict) -> Dict[str, Any]:
# Verify we receive the attachment data
assert attachments["test_file"]["mime_type"] == "text/plain"
return {
"score": float(
reference_outputs.get("answer") == outputs.get("answer") # type: ignore
)
}

langchain_client.evaluate(
target,
data=dataset_name,
evaluators=[evaluator]
)

langchain_client.delete_dataset(dataset_name=dataset_name)

@pytest.mark.skip(reason="Need to land https://github.com/langchain-ai/langchainplus/pull/7415 first")
async def test_evaluate_mime_type_is_propogated(langchain_client: Client) -> None:
"""Test that the mime type is propogated correctly when evaluating."""
dataset_name = "__test_evaluate_mime_type_is_propogated" + uuid4().hex[:4]
dataset = langchain_client.create_dataset(dataset_name=dataset_name)

langchain_client.upload_examples_multipart(
dataset_id=dataset.id,
uploads=[
ExampleUploadWithAttachments(
inputs={"text": "hello world"},
outputs={"response": "hi there"},
attachments={
"test_file": ("text/plain", b"test content"),
},
)
],
)

async def target(inputs: Dict[str, Any], attachments: Dict[str, Any]) -> Dict[str, Any]:
# Verify we receive the attachment data
assert attachments["test_file"]["mime_type"] == "text/plain"
return {"answer": "hi there"}

async def evaluator(outputs: dict, reference_outputs: dict, attachments: dict) -> Dict[str, Any]:
# Verify we receive the attachment data
assert attachments["test_file"]["mime_type"] == "text/plain"
return {
"score": float(
reference_outputs.get("answer") == outputs.get("answer") # type: ignore
)
}

await langchain_client.aevaluate(
target,
data=dataset_name,
evaluators=[evaluator]
)

langchain_client.delete_dataset(dataset_name=dataset_name)

def test_evaluate_with_attachments_multiple_evaluators(
langchain_client: Client,
Expand Down
Loading