From a18e8e53ef0982153649a1605f9d98b95ee30d48 Mon Sep 17 00:00:00 2001 From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:03:12 -0500 Subject: [PATCH 1/8] displaying exp names in ray dashboard (#123) * displaying exp names in ray dashboard * fixing tests --- src/agentlab/experiments/exp_utils.py | 15 ++++++++------- src/agentlab/experiments/graph_execution_ray.py | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/agentlab/experiments/exp_utils.py b/src/agentlab/experiments/exp_utils.py index 97ce527d..95c7a71c 100644 --- a/src/agentlab/experiments/exp_utils.py +++ b/src/agentlab/experiments/exp_utils.py @@ -1,13 +1,13 @@ -import os -from pathlib import Path -from browsergym.experiments.loop import _move_old_exp, yield_all_exp_results -from tqdm import tqdm import logging -from browsergym.experiments.loop import ExpArgs -from contextlib import contextmanager +import os import signal import sys -from time import time, sleep +from contextlib import contextmanager +from pathlib import Path +from time import sleep, time + +from browsergym.experiments.loop import ExpArgs, _move_old_exp, yield_all_exp_results +from tqdm import tqdm logger = logging.getLogger(__name__) # Get logger based on module name @@ -130,6 +130,7 @@ def add_dependencies(exp_args_list: list[ExpArgs], task_dependencies: dict[str, class MockedExpArgs: def __init__(self, exp_id, depends_on=None): self.exp_id = exp_id + self.exp_name = f"exp_{exp_id}" self.depends_on = depends_on if depends_on else [] self.start_time = None self.end_time = None diff --git a/src/agentlab/experiments/graph_execution_ray.py b/src/agentlab/experiments/graph_execution_ray.py index 5dd18d4a..231a130c 100644 --- a/src/agentlab/experiments/graph_execution_ray.py +++ b/src/agentlab/experiments/graph_execution_ray.py @@ -28,7 +28,7 @@ def get_task(exp_arg: bgym.ExpArgs): dependency_tasks = [get_task(exp_args_map[dep_key]) for dep_key in exp_arg.depends_on] # Create new task that depends on the dependency results - task_map[exp_arg.exp_id] = run_exp.remote( + task_map[exp_arg.exp_id] = run_exp.options(name=f"{exp_arg.exp_name}").remote( exp_arg, *dependency_tasks, avg_step_timeout=avg_step_timeout ) return task_map[exp_arg.exp_id] From a7d6467ed2c49110e384b8f965f14e5a08f5c1c2 Mon Sep 17 00:00:00 2001 From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com> Date: Fri, 15 Nov 2024 11:43:30 -0500 Subject: [PATCH 2/8] enabling chat o_0 (#124) --- src/agentlab/ui_assistant.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/agentlab/ui_assistant.py b/src/agentlab/ui_assistant.py index 2bebaa41..96bbb0f9 100644 --- a/src/agentlab/ui_assistant.py +++ b/src/agentlab/ui_assistant.py @@ -3,6 +3,7 @@ from browsergym.experiments.loop import EnvArgs, ExpArgs from agentlab.agents.agent_args import AgentArgs +from agentlab.agents.generic_agent.generic_agent import GenericAgentArgs from agentlab.experiments.exp_utils import RESULTS_DIR from agentlab.experiments.launch_exp import import_object @@ -14,6 +15,9 @@ def make_exp_args(agent_args: AgentArgs, start_url="https://www.google.com"): except AttributeError: pass + if isinstance(agent_args, GenericAgentArgs): + agent_args.flags.enable_chat = True + exp_args = ExpArgs( agent_args=agent_args, env_args=EnvArgs( From cd61c80c89d4966b2b9e8db27efa1ad6900bd7b1 Mon Sep 17 00:00:00 2001 From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com> Date: Fri, 15 Nov 2024 14:09:12 -0500 Subject: [PATCH 3/8] Fixing discussion object when adding images w/o detail (#128) * switching to goal_object in xray * adding excpetion for when summary isnt available * fixing discussion w/ images --- src/agentlab/analyze/agent_xray.py | 36 +++++++++++++++++------------- src/agentlab/llm/llm_utils.py | 5 ++--- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/agentlab/analyze/agent_xray.py b/src/agentlab/analyze/agent_xray.py index da7e98d3..c4946850 100644 --- a/src/agentlab/analyze/agent_xray.py +++ b/src/agentlab/analyze/agent_xray.py @@ -21,6 +21,7 @@ from agentlab.experiments.exp_utils import RESULTS_DIR from agentlab.experiments.study import get_most_recent_study from agentlab.llm.chat_api import make_system_message, make_user_message +from agentlab.llm.llm_utils import BaseMessage as AgentLabBaseMessage from agentlab.llm.llm_utils import Discussion select_dir_instructions = "Select Experiment Directory" @@ -740,7 +741,7 @@ def get_episode_info(info: Info): steps_info = info.exp_result.steps_info step_info = steps_info[info.step] try: - goal = step_info.obs["goal"] + goal = step_info.obs["goal_object"] except KeyError: goal = None try: @@ -757,7 +758,7 @@ def get_episode_info(info: Info): **Goal:** -{code(goal)} +{code(str(AgentLabBaseMessage('', goal)))} **Task info:** @@ -992,20 +993,23 @@ def get_directory_contents(results_dir: Path): continue exp_description = dir.name - # get summary*.csv files and find the most recent - summary_files = list(dir.glob("summary*.csv")) - if len(summary_files) != 0: - most_recent_summary = max(summary_files, key=os.path.getctime) - summary_df = pd.read_csv(most_recent_summary) - - # get row with max avg_reward - max_reward_row = summary_df.loc[summary_df["avg_reward"].idxmax()] - reward = max_reward_row["avg_reward"] * 100 - completed = max_reward_row["n_completed"] - n_err = max_reward_row["n_err"] - exp_description += ( - f" - avg-reward: {reward:.1f}% - completed: {completed} - errors: {n_err}" - ) + try: + # get summary*.csv files and find the most recent + summary_files = list(dir.glob("summary*.csv")) + if len(summary_files) != 0: + most_recent_summary = max(summary_files, key=os.path.getctime) + summary_df = pd.read_csv(most_recent_summary) + + # get row with max avg_reward + max_reward_row = summary_df.loc[summary_df["avg_reward"].idxmax()] + reward = max_reward_row["avg_reward"] * 100 + completed = max_reward_row["n_completed"] + n_err = max_reward_row["n_err"] + exp_description += ( + f" - avg-reward: {reward:.1f}% - completed: {completed} - errors: {n_err}" + ) + except Exception as e: + print(f"Error while reading summary file: {e}") exp_descriptions.append(exp_description) diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py index eaa2a5e0..ddff3556 100644 --- a/src/agentlab/llm/llm_utils.py +++ b/src/agentlab/llm/llm_utils.py @@ -8,8 +8,7 @@ import time from copy import deepcopy from functools import cache -from typing import TYPE_CHECKING -from typing import Any, Union +from typing import TYPE_CHECKING, Any, Union from warnings import warn import numpy as np @@ -356,7 +355,7 @@ def add_image(self, image: np.ndarray | Image.Image | str, detail: str = None): if detail: self.add_content("image_url", {"url": image_url, "detail": detail}) else: - self.add_content("image_url", image_url) + self.add_content("image_url", {"url": image_url}) def to_markdown(self): if isinstance(self["content"], str): From a4ba2f5da200c7e6ed9dc3f35d8e45b5f42c54dc Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Fri, 15 Nov 2024 16:42:31 -0500 Subject: [PATCH 4/8] adding screenshot descriptions --- src/agentlab/agents/dynamic_prompting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/agentlab/agents/dynamic_prompting.py b/src/agentlab/agents/dynamic_prompting.py index 73688f0f..a284fe37 100644 --- a/src/agentlab/agents/dynamic_prompting.py +++ b/src/agentlab/agents/dynamic_prompting.py @@ -446,8 +446,12 @@ def add_screenshot(self, prompt: BaseMessage) -> BaseMessage: if self.flags.use_screenshot: if self.flags.use_som: screenshot = self.obs["screenshot_som"] + prompt.add_text( + "\n## Screenshot:\nHere is a screenshot of the page, it is annotated with bounding boxes and corresponding bids:" + ) else: screenshot = self.obs["screenshot"] + prompt.add_text("\n## Screenshot:\nHere is a screenshot of the page:") img_url = image_to_jpg_base64_url(screenshot) prompt.add_image(img_url, detail=self.flags.openai_vision_detail) return prompt From 2b9b527fa568f3ebd20531e265dbc5f9916cf7c9 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Fri, 15 Nov 2024 16:42:44 -0500 Subject: [PATCH 5/8] typo --- src/agentlab/experiments/study.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py index b42f0bb5..d9f24429 100644 --- a/src/agentlab/experiments/study.py +++ b/src/agentlab/experiments/study.py @@ -3,6 +3,7 @@ import pickle import re import uuid +from abc import ABC, abstractmethod from dataclasses import dataclass from datetime import datetime from pathlib import Path @@ -16,11 +17,7 @@ from agentlab.experiments import args from agentlab.experiments import reproducibility_util as repro from agentlab.experiments.exp_utils import RESULTS_DIR, add_dependencies -from agentlab.experiments.launch_exp import ( - find_incomplete, - non_dummy_count, - run_experiments, -) +from agentlab.experiments.launch_exp import find_incomplete, non_dummy_count, run_experiments logger = logging.getLogger(__name__) @@ -353,7 +350,7 @@ def _agents_on_benchmark( The logging level for individual jobs. Returns: - study: Study + list[ExpArgs]: The list of experiments to run. """ if not isinstance(agents, (list, tuple)): From 32865050045c8c71df35c34ff30a6b420a4e258c Mon Sep 17 00:00:00 2001 From: Alexandre Lacoste Date: Fri, 15 Nov 2024 19:49:11 -0500 Subject: [PATCH 6/8] Study to multi eval (#126) * yet another way to kill timedout jobs * Improve timeout handling in task polling logic * Add method to override max_steps in Study class * add support for tab visibility in observation flags and update related components * fix tests * black * Improve timeout handling in task polling logic * yet another way to kill timedout jobs (#108) * Add method to override max_steps in Study class * add support for tab visibility in observation flags and update related components * fix tests * black * black * Fix sorting bug. improve directory content retrieval with summary statistics * fix test * black * tmp * add error report, add cum cost to summary and ray backend by default * sequential studies --------- Co-authored-by: Maxime Gasse --- src/agentlab/experiments/study.py | 199 ++++++++++++++++++++++-------- 1 file changed, 149 insertions(+), 50 deletions(-) diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py index d9f24429..0f390279 100644 --- a/src/agentlab/experiments/study.py +++ b/src/agentlab/experiments/study.py @@ -1,7 +1,7 @@ +from abc import ABC, abstractmethod import gzip import logging import pickle -import re import uuid from abc import ABC, abstractmethod from dataclasses import dataclass @@ -14,16 +14,100 @@ from agentlab.agents.agent_args import AgentArgs from agentlab.analyze import inspect_results -from agentlab.experiments import args from agentlab.experiments import reproducibility_util as repro from agentlab.experiments.exp_utils import RESULTS_DIR, add_dependencies from agentlab.experiments.launch_exp import find_incomplete, non_dummy_count, run_experiments + logger = logging.getLogger(__name__) +def make_study( + agent_args: list[AgentArgs], + benchmark: bgym.Benchmark, + logging_level_stdout=logging.WARNING, + suffix="", + comment=None, + ignore_dependencies=False, +): + + if isinstance(benchmark, str): + benchmark = bgym.DEFAULT_BENCHMARKS[benchmark]() + + """Make a study from a list of agents and a benchmark.""" + if "webarena" in benchmark.name and len(agent_args) > 1: + logger.warning( + "*WebArena* requires manual reset after each evaluation. Running through SequentialStudies." + ) + studies = [] + for agent in agent_args: + studies.append( + Study( + [agent], + benchmark, + logging_level=logging_level_stdout, + suffix=suffix, + comment=comment, + ignore_dependencies=ignore_dependencies, + ) + ) + + return SequentialStudies(studies) + else: + return Study( + agent_args, + benchmark, + logging_level=logging_level_stdout, + suffix=suffix, + comment=comment, + ignore_dependencies=ignore_dependencies, + ) + + +class AbstractStudy(ABC): + dir: Path = None + suffix: str = "" + + @abstractmethod + def find_incomplete(self, include_errors=True): + """Search for missing""" + + @abstractmethod + def run(self, n_jobs=1, parallel_backend="ray", strict_reproducibility=False, n_relaunch=3): + """Run the study""" + + def make_dir(self, exp_root=RESULTS_DIR): + if self.dir is None: + dir_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}_{self.name}" + + self.dir = Path(exp_root) / dir_name + self.dir.mkdir(parents=True, exist_ok=True) + + def save(self, exp_root=RESULTS_DIR): + """Pickle the study to the directory""" + # TODO perhaps remove exp_args_list before pickling and when loading bring them from the individual directories + + self.make_dir(exp_root=exp_root) + with gzip.open(self.dir / "study.pkl.gz", "wb") as f: + pickle.dump(self, f) + + def get_results(self, suffix="", also_save=True): + """Recursively load all results from the study directory and summarize them.""" + result_df = inspect_results.load_result_df(self.dir) + error_report = inspect_results.error_report(result_df, max_stack_trace=3, use_log=True) + summary_df = inspect_results.summarize_study(result_df) + + if also_save: + suffix = f"_{suffix}" if suffix else "" + result_df.to_csv(self.dir / f"result_df{suffix}.csv") + summary_df.to_csv(self.dir / f"summary_df{suffix}.csv") + (self.dir / f"error_report{suffix}.md").write_text(error_report) + + return result_df, summary_df, error_report + + @dataclass -class Study: +class Study(AbstractStudy): """A study coresponds to one or multiple agents evaluated on a benchmark. This is part of the high level API to help keep experiments organized and reproducible. @@ -139,7 +223,7 @@ def run( self._run(n_jobs, parallel_backend, strict_reproducibility) suffix = f"trial_{i + 1}_of_{n_relaunch}" - _, summary_df, error_report = self.get_results(suffix=suffix) + _, summary_df, _ = self.get_results(suffix=suffix) logger.info("\n" + str(summary_df)) n_incomplete, n_error = self.find_incomplete(include_errors=relaunch_errors) @@ -197,60 +281,17 @@ def append_to_journal(self, strict_reproducibility=True): ValueError: If the reproducibility information is not compatible with the report. """ + _, summary_df, _ = self.get_results() repro.append_to_journal( self.reproducibility_info, - self.get_report(), + summary_df, strict_reproducibility=strict_reproducibility, ) - def get_results(self, suffix="", also_save=True): - result_df = inspect_results.load_result_df(self.dir) - error_report = inspect_results.error_report(result_df, max_stack_trace=3, use_log=True) - summary_df = inspect_results.summarize_study(result_df) - - if also_save: - suffix = f"_{suffix}" if suffix else "" - result_df.to_csv(self.dir / f"result_df{suffix}.csv") - summary_df.to_csv(self.dir / f"summary_df{suffix}.csv") - (self.dir / f"error_report{suffix}.md").write_text(error_report) - - return result_df, summary_df, error_report - @property def name(self): agent_names = [a.agent_name for a in self.agent_args] - if len(agent_names) == 1: - study_name = f"{agent_names[0]}_on_{self.benchmark.name}" - else: - study_name = f"{len(agent_names)}_agents_on_{self.benchmark.name}" - - study_name = slugify(study_name, max_length=100, allow_unicode=True) - - if self.suffix: - study_name += f"_{self.suffix}" - return study_name - - def make_dir(self, exp_root=RESULTS_DIR): - if self.dir is None: - dir_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}_{self.name}" - - self.dir = Path(exp_root) / dir_name - self.dir.mkdir(parents=True, exist_ok=True) - - def save(self): - """Pickle the study to the directory""" - - # TODO perhaps remove exp_args_list before pickling and when loading bring them from the individual directories - - self.make_dir() - - with gzip.open(self.dir / "study.pkl.gz", "wb") as f: - pickle.dump(self, f) - - def get_report(self, ignore_cache=False, ignore_stale=False): - return inspect_results.get_study_summary( - self.dir, ignore_cache=ignore_cache, ignore_stale=ignore_stale - ) + return _make_study_name(agent_names, [self.benchmark.name], self.suffix) def override_max_steps(self, max_steps): for exp_args in self.exp_args_list: @@ -285,6 +326,64 @@ def load_most_recent(root_dir: Path = None, contains=None) -> "Study": return Study.load(get_most_recent_study(root_dir, contains=contains)) +def _make_study_name(agent_names, benchmark_names, suffix=None): + """Make a study name from the agent and benchmark names.""" + if len(agent_names) == 1: + agent_name = agent_names[0] + else: + agent_name = f"{len(agent_names)}_agents" + + if len(benchmark_names) == 1: + benchmark_name = benchmark_names[0] + else: + benchmark_name = f"{len(benchmark_names)}_benchmarks" + + study_name = f"{agent_name}_on_{benchmark_name}_{suffix if suffix else ''}" + + return slugify(study_name, max_length=200, allow_unicode=True) + + +@dataclass +class SequentialStudies(AbstractStudy): + """ + Sequential execution of multiple studies. + + This is required for e.g. WebArena, where a server reset is required between evaluations of each agent. + """ + + studies: list[Study] + + @property + def name(self): + """The name of the study.""" + agent_names = [a.agent_name for study in self.studies for a in study.agent_args] + benchmark_names = [study.benchmark.name for study in self.studies] + return _make_study_name(agent_names, benchmark_names, self.suffix) + + def find_incomplete(self, include_errors=True): + for study in self.studies: + study.find_incomplete(include_errors=include_errors) + + def run(self, n_jobs=1, parallel_backend="ray", strict_reproducibility=False, n_relaunch=3): + + self.save() + + for study in self.studies: + study.make_dir(exp_root=self.dir) + study.run(n_jobs, parallel_backend, strict_reproducibility, n_relaunch) + _, summary_df, _ = self.get_results() + logger.info("\n" + str(summary_df)) + logger.info(f"SequentialStudies {self.name} finished.") + + def override_max_steps(self, max_steps): + for study in self.studies: + study.override_max_steps(max_steps) + + def append_to_journal(self, strict_reproducibility=True): + for study in self.studies: + study.append_to_journal(strict_reproducibility=strict_reproducibility) + + def get_most_recent_study( root_dir: Path = None, date_format: str = "%Y-%m-%d_%H-%M-%S", contains=None ): From 9fe2a1a2ced1f4caa247262f7a24a999ea431517 Mon Sep 17 00:00:00 2001 From: ThibaultLSDC Date: Thu, 28 Nov 2024 19:40:22 +0000 Subject: [PATCH 7/8] AB results --- reproducibility_journal.csv | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv index ad2bfaa8..7fb1d401 100644 --- a/reproducibility_journal.csv +++ b/reproducibility_journal.csv @@ -46,3 +46,10 @@ ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,weblinx_test,0.0.1.de ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.089,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a, ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.125,0.006,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a, ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.079,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a, +ThibaultLSDC,GenericAgent-gpt-4o,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.005,0.003,2,213/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None, +ThibaultLSDC,GenericAgent-gpt-4o-mini,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.002,0.002,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None, +ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.008,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None, +ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.005,8,206/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None, +ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-8b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.001,0.001,15,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None, +ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None, +ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.009,0.005,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c, M: src/agentlab/experiments/study.py,0.13.1,None, From 77e0484d42525ce1ac4e9e98ae2ecf756d25bb88 Mon Sep 17 00:00:00 2001 From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com> Date: Thu, 28 Nov 2024 20:44:22 +0100 Subject: [PATCH 8/8] wth github ? --- src/agentlab/experiments/study.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py index 7f166edc..c091d117 100644 --- a/src/agentlab/experiments/study.py +++ b/src/agentlab/experiments/study.py @@ -109,7 +109,7 @@ class AbstractStudy(ABC): def find_incomplete(self, include_errors=True): """Prepare the study for relaunching by finding incomplete experiments""" - @abstractmethod + @abstractmethod def run(self, n_jobs=1, parallel_backend="ray", strict_reproducibility=False, n_relaunch=3): """Run the study"""