diff --git a/python/docs/create_api_rst.py b/python/docs/create_api_rst.py index 4db4c3282..0e9482856 100644 --- a/python/docs/create_api_rst.py +++ b/python/docs/create_api_rst.py @@ -111,7 +111,9 @@ def _load_module_members(module_path: str, namespace: str) -> ModuleMembers: else ( "enum" if issubclass(type_, Enum) - else "Pydantic" if issubclass(type_, BaseModel) else "Regular" + else "Pydantic" + if issubclass(type_, BaseModel) + else "Regular" ) ) classes_.append( diff --git a/python/tests/evaluation/test_evaluation.py b/python/tests/evaluation/test_evaluation.py index 849318556..000eb3007 100644 --- a/python/tests/evaluation/test_evaluation.py +++ b/python/tests/evaluation/test_evaluation.py @@ -242,18 +242,10 @@ async def apredict(inputs: dict) -> dict: if _has_pandas(): df = results.to_pandas() assert len(df) == 10 - examples = client.list_examples(dataset_name=dataset.name) + all_examples = list(client.list_examples(dataset_name=dataset.name)) all_results = [r async for r in results] - all_examples = [] - for example in examples: - count = 0 - for r in all_results: - if r["run"].reference_example_id == example.id: - count += 1 - assert count == 2 - all_examples.append(example) - - # Wait for there to be 2x runs vs. examples + + # Wait for there to be same num runs vs. examples def check_run_count(): current_runs = list( client.list_runs(project_name=results.experiment_name, is_root=True) @@ -261,13 +253,13 @@ def check_run_count(): for r in current_runs: assert "accuracy" in r.feedback_stats assert "slow_accuracy" in r.feedback_stats - return current_runs, len(current_runs) == 2 * len(all_examples) + return current_runs, len(current_runs) == len(all_examples) final_runs = wait_for(check_run_count, max_sleep_time=60, sleep_time=2) - assert len(final_runs) == 2 * len( + assert len(final_runs) == len( all_examples - ), f"Expected {2 * len(all_examples)} runs, but got {len(final_runs)}" + ), f"Expected {len(all_examples)} runs, but got {len(final_runs)}" # Run it again with the existing project results2 = await aevaluate(