Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bagatur/rfc pred eval inline #1236

Draft
wants to merge 42 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
e9e2131
wip
isahers1 Nov 13, 2024
ff30541
unit test
isahers1 Nov 13, 2024
152ec59
integration test skeleton
isahers1 Nov 13, 2024
27b1546
integration test passing
isahers1 Nov 13, 2024
53a0f14
wip
isahers1 Nov 14, 2024
025aa6d
wip
isahers1 Nov 14, 2024
4208b6e
Update python/langsmith/client.py
isahers1 Nov 14, 2024
fd16baa
more edits
isahers1 Nov 14, 2024
28a4677
nit
isahers1 Nov 14, 2024
816302d
nit
isahers1 Nov 14, 2024
aa947a6
remove dev endpoint in test
isahers1 Nov 18, 2024
a82063b
typo
isahers1 Nov 18, 2024
b18df6b
Merge branch 'main' into isaac/multipartstuff
isahers1 Nov 18, 2024
ad19daf
fmt
isahers1 Nov 18, 2024
390ac66
yml changes
isahers1 Nov 18, 2024
523e5d1
fmt
isahers1 Nov 18, 2024
ed3aa1c
example search restoration
isahers1 Nov 18, 2024
ce73afc
fmt
isahers1 Nov 18, 2024
460b16b
list -> List
isahers1 Nov 18, 2024
4e9edf4
dict -> Dict
isahers1 Nov 18, 2024
b6b9d79
fmt
isahers1 Nov 18, 2024
bc9ec6f
undo yml changes
isahers1 Nov 18, 2024
15708dc
unit test fix
isahers1 Nov 18, 2024
527174a
unit test fix
isahers1 Nov 18, 2024
81f5249
unit test fix
isahers1 Nov 18, 2024
0b476e8
Merge branch 'main' into isaac/multipartstuff
isahers1 Nov 19, 2024
f36a0cb
make evaluate function compatible with attachments (#1218)
isahers1 Nov 19, 2024
ddbe2f5
file path update
isahers1 Nov 19, 2024
c1ba615
add benchmarks
jakerachleff Nov 19, 2024
3544171
better error message
jakerachleff Nov 19, 2024
3cc32c5
aevaluate
isahers1 Nov 19, 2024
161e0d1
Merge branch 'isaac/multipartstuff' of https://github.com/langchain-a…
isahers1 Nov 19, 2024
08a6f34
unit test for _include_attachments
isahers1 Nov 20, 2024
8e2e704
test that adding examples without attachments still lets you run evals
isahers1 Nov 20, 2024
cfa0e4c
fmt
isahers1 Nov 20, 2024
de38a37
fmt
isahers1 Nov 20, 2024
2e74735
fmt
isahers1 Nov 20, 2024
f26c996
attempt fix
isahers1 Nov 20, 2024
095aae9
fix test
isahers1 Nov 20, 2024
a99da23
add unit test
isahers1 Nov 20, 2024
d00ef43
rfc: dont store results
baskaryan Nov 20, 2024
f6cf2ba
fmt
baskaryan Nov 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions python/bench/upload_example_with_large_file_attachment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import os
import statistics
import time
from pathlib import Path
from typing import Dict

from langsmith import Client
from langsmith.schemas import ExampleUpsertWithAttachments

WRITE_BATCH = 10000


def create_large_file(size: int, dir: str) -> str:
"""Create a large file for benchmarking purposes."""
filename = f"large_file_{size}.txt"
filepath = os.path.join(dir, filename)

# delete the file if it exists
print("Deleting existing file...")
if os.path.exists(filepath):
os.remove(filepath)

print("Creating big file...")
with open(filepath, "w") as f:
curr_size = 0
while curr_size < size:
f.write("a" * (size - curr_size))
curr_size += size - curr_size

print("Done creating big file...")
return filepath


DATASET_NAME = "upsert_big_file_to_dataset"


def benchmark_big_file_upload(
size_bytes: int, num_examples: int, samples: int = 1
) -> Dict:
"""
Benchmark run creation with specified parameters.
Returns timing statistics.
"""
multipart_timings = []

for _ in range(samples):
client = Client()

if client.has_dataset(dataset_name=DATASET_NAME):
client.delete_dataset(dataset_name=DATASET_NAME)

dataset = client.create_dataset(
DATASET_NAME,
description="Test dataset for big file upload",
)
large_file = create_large_file(size_bytes, "/tmp")
examples = [
ExampleUpsertWithAttachments(
dataset_id=dataset.id,
inputs={"a": 1},
outputs={"b": 2},
attachments={
"bigfile": ("text/plain", Path(large_file)),
},
)
for _ in range(num_examples)
]

multipart_start = time.perf_counter()
client.upsert_examples_multipart(upserts=examples)
multipart_elapsed = time.perf_counter() - multipart_start

multipart_timings.append(multipart_elapsed)

return {
"mean": statistics.mean(multipart_timings),
"median": statistics.median(multipart_timings),
"stdev": (
statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0
),
"min": min(multipart_timings),
"max": max(multipart_timings),
}


size_bytes = 50000000
num_examples = 10


def main(size_bytes: int, num_examples: int = 1):
"""
Run benchmarks with different combinations of parameters and report results.
"""
results = benchmark_big_file_upload(size_bytes, num_examples)

print(f"\nBenchmark Results for size {size_bytes} and {num_examples} examples:")
print("-" * 30)
print(f"{'Metric':<15} {'Result':>20}")
print("-" * 30)

metrics = ["mean", "median", "stdev", "min", "max"]
for metric in metrics:
print(f"{results[metric]:>20.4f}")

print("-" * 30)
print(f"{'Throughput':<15} {num_examples / results['mean']:>20.2f} ")
print("(examples/second)")


if __name__ == "__main__":
main(size_bytes, num_examples)
143 changes: 143 additions & 0 deletions python/bench/upload_examples_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import statistics
import time
from typing import Dict
from uuid import uuid4

from langsmith import Client
from langsmith.schemas import DataType, ExampleUpsertWithAttachments


def create_large_json(length: int) -> Dict:
"""Create a large JSON object for benchmarking purposes."""
large_array = [
{
"index": i,
"data": f"This is element number {i}",
"nested": {"id": i, "value": f"Nested value for element {i}"},
}
for i in range(length)
]

return {
"name": "Huge JSON" + str(uuid4()),
"description": "This is a very large JSON object for benchmarking purposes.",
"array": large_array,
"metadata": {
"created_at": "2024-10-22T19:00:00Z",
"author": "Python Program",
"version": 1.0,
},
}


def create_example_data(dataset_id: str, json_size: int) -> Dict:
"""Create a single example data object."""
return ExampleUpsertWithAttachments(
**{
"dataset_id": dataset_id,
"inputs": create_large_json(json_size),
"outputs": create_large_json(json_size),
}
)


DATASET_NAME = "upsert_llm_evaluator_benchmark_dataset"


def benchmark_example_uploading(
num_examples: int, json_size: int, samples: int = 1
) -> Dict:
"""
Benchmark run creation with specified parameters.
Returns timing statistics.
"""
multipart_timings, old_timings = [], []

for _ in range(samples):
client = Client()

if client.has_dataset(dataset_name=DATASET_NAME):
client.delete_dataset(dataset_name=DATASET_NAME)

dataset = client.create_dataset(
DATASET_NAME,
description="Test dataset for multipart example upload",
data_type=DataType.kv,
)
examples = [
create_example_data(dataset.id, json_size) for i in range(num_examples)
]

# Old method
old_start = time.perf_counter()
# inputs = [e.inputs for e in examples]
# outputs = [e.outputs for e in examples]
# # the create_examples endpoint fails above 20mb
# # so this will crash with json_size > ~100
# client.create_examples(inputs=inputs, outputs=outputs, dataset_id=dataset.id)
old_elapsed = time.perf_counter() - old_start

# New method
multipart_start = time.perf_counter()
client.upsert_examples_multipart(upserts=examples)
multipart_elapsed = time.perf_counter() - multipart_start

multipart_timings.append(multipart_elapsed)
old_timings.append(old_elapsed)

return {
"old": {
"mean": statistics.mean(old_timings),
"median": statistics.median(old_timings),
"stdev": statistics.stdev(old_timings) if len(old_timings) > 1 else 0,
"min": min(old_timings),
"max": max(old_timings),
},
"new": {
"mean": statistics.mean(multipart_timings),
"median": statistics.median(multipart_timings),
"stdev": (
statistics.stdev(multipart_timings) if len(multipart_timings) > 1 else 0
),
"min": min(multipart_timings),
"max": max(multipart_timings),
},
}


json_size = 1000
num_examples = 1000


def main(json_size: int, num_examples: int):
"""
Run benchmarks with different combinations of parameters and report results.
"""
results = benchmark_example_uploading(
num_examples=num_examples, json_size=json_size
)

print(
f"\nBenchmark Results for {num_examples} examples with JSON size {json_size}:"
)
print("-" * 60)
print(f"{'Metric':<15} {'Old Method':>20} {'New Method':>20}")
print("-" * 60)

metrics = ["mean", "median", "stdev", "min", "max"]
for metric in metrics:
print(
f"{metric:<15} {results['old'][metric]:>20.4f} "
f"{results['new'][metric]:>20.4f}"
)

print("-" * 60)
print(
f"{'Throughput':<15} {num_examples / results['old']['mean']:>20.2f} "
f"{num_examples / results['new']['mean']:>20.2f}"
)
print("(examples/second)")


if __name__ == "__main__":
main(json_size, num_examples)
26 changes: 16 additions & 10 deletions python/langsmith/_internal/_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import itertools
import logging
import uuid
from pathlib import Path
from typing import Literal, Optional, Union, cast

import orjson
Expand Down Expand Up @@ -214,7 +215,7 @@ def serialized_run_operation_to_multipart_parts_and_context(
op: SerializedRunOperation,
) -> MultipartPartsAndContext:
acc_parts: list[MultipartPart] = []

valb: Union[bytes, Path]
# this is main object, minus inputs/outputs/events/attachments
acc_parts.append(
(
Expand Down Expand Up @@ -257,17 +258,22 @@ def serialized_run_operation_to_multipart_parts_and_context(
)
continue

acc_parts.append(
(
f"attachment.{op.id}.{n}",
if isinstance(valb, Path):
# TODO: actually deal with this case
# This is just for speed of getting something out
continue
else:
acc_parts.append(
(
None,
valb,
content_type,
{"Content-Length": str(len(valb))},
),
f"attachment.{op.id}.{n}",
(
None,
valb,
content_type,
{"Content-Length": str(len(valb))},
),
)
)
)
return MultipartPartsAndContext(
acc_parts,
f"trace={op.trace_id},id={op.id}",
Expand Down
Loading
Loading