From a18e8e53ef0982153649a1605f9d98b95ee30d48 Mon Sep 17 00:00:00 2001
From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com>
Date: Thu, 14 Nov 2024 16:03:12 -0500
Subject: [PATCH 1/8] displaying exp names in ray dashboard (#123)

* displaying exp names in ray dashboard

* fixing tests
---
 src/agentlab/experiments/exp_utils.py           | 15 ++++++++-------
 src/agentlab/experiments/graph_execution_ray.py |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/agentlab/experiments/exp_utils.py b/src/agentlab/experiments/exp_utils.py
index 97ce527d..95c7a71c 100644
--- a/src/agentlab/experiments/exp_utils.py
+++ b/src/agentlab/experiments/exp_utils.py
@@ -1,13 +1,13 @@
-import os
-from pathlib import Path
-from browsergym.experiments.loop import _move_old_exp, yield_all_exp_results
-from tqdm import tqdm
 import logging
-from browsergym.experiments.loop import ExpArgs
-from contextlib import contextmanager
+import os
 import signal
 import sys
-from time import time, sleep
+from contextlib import contextmanager
+from pathlib import Path
+from time import sleep, time
+
+from browsergym.experiments.loop import ExpArgs, _move_old_exp, yield_all_exp_results
+from tqdm import tqdm
 
 logger = logging.getLogger(__name__)  # Get logger based on module name
 
@@ -130,6 +130,7 @@ def add_dependencies(exp_args_list: list[ExpArgs], task_dependencies: dict[str,
 class MockedExpArgs:
     def __init__(self, exp_id, depends_on=None):
         self.exp_id = exp_id
+        self.exp_name = f"exp_{exp_id}"
         self.depends_on = depends_on if depends_on else []
         self.start_time = None
         self.end_time = None
diff --git a/src/agentlab/experiments/graph_execution_ray.py b/src/agentlab/experiments/graph_execution_ray.py
index 5dd18d4a..231a130c 100644
--- a/src/agentlab/experiments/graph_execution_ray.py
+++ b/src/agentlab/experiments/graph_execution_ray.py
@@ -28,7 +28,7 @@ def get_task(exp_arg: bgym.ExpArgs):
             dependency_tasks = [get_task(exp_args_map[dep_key]) for dep_key in exp_arg.depends_on]
 
             # Create new task that depends on the dependency results
-            task_map[exp_arg.exp_id] = run_exp.remote(
+            task_map[exp_arg.exp_id] = run_exp.options(name=f"{exp_arg.exp_name}").remote(
                 exp_arg, *dependency_tasks, avg_step_timeout=avg_step_timeout
             )
         return task_map[exp_arg.exp_id]

From a7d6467ed2c49110e384b8f965f14e5a08f5c1c2 Mon Sep 17 00:00:00 2001
From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com>
Date: Fri, 15 Nov 2024 11:43:30 -0500
Subject: [PATCH 2/8] enabling chat o_0 (#124)

---
 src/agentlab/ui_assistant.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/agentlab/ui_assistant.py b/src/agentlab/ui_assistant.py
index 2bebaa41..96bbb0f9 100644
--- a/src/agentlab/ui_assistant.py
+++ b/src/agentlab/ui_assistant.py
@@ -3,6 +3,7 @@
 from browsergym.experiments.loop import EnvArgs, ExpArgs
 
 from agentlab.agents.agent_args import AgentArgs
+from agentlab.agents.generic_agent.generic_agent import GenericAgentArgs
 from agentlab.experiments.exp_utils import RESULTS_DIR
 from agentlab.experiments.launch_exp import import_object
 
@@ -14,6 +15,9 @@ def make_exp_args(agent_args: AgentArgs, start_url="https://www.google.com"):
     except AttributeError:
         pass
 
+    if isinstance(agent_args, GenericAgentArgs):
+        agent_args.flags.enable_chat = True
+
     exp_args = ExpArgs(
         agent_args=agent_args,
         env_args=EnvArgs(

From cd61c80c89d4966b2b9e8db27efa1ad6900bd7b1 Mon Sep 17 00:00:00 2001
From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com>
Date: Fri, 15 Nov 2024 14:09:12 -0500
Subject: [PATCH 3/8] Fixing discussion object when adding images w/o detail
 (#128)

* switching to goal_object in xray

* adding excpetion for when summary isnt available

* fixing discussion w/ images
---
 src/agentlab/analyze/agent_xray.py | 36 +++++++++++++++++-------------
 src/agentlab/llm/llm_utils.py      |  5 ++---
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/src/agentlab/analyze/agent_xray.py b/src/agentlab/analyze/agent_xray.py
index da7e98d3..c4946850 100644
--- a/src/agentlab/analyze/agent_xray.py
+++ b/src/agentlab/analyze/agent_xray.py
@@ -21,6 +21,7 @@
 from agentlab.experiments.exp_utils import RESULTS_DIR
 from agentlab.experiments.study import get_most_recent_study
 from agentlab.llm.chat_api import make_system_message, make_user_message
+from agentlab.llm.llm_utils import BaseMessage as AgentLabBaseMessage
 from agentlab.llm.llm_utils import Discussion
 
 select_dir_instructions = "Select Experiment Directory"
@@ -740,7 +741,7 @@ def get_episode_info(info: Info):
         steps_info = info.exp_result.steps_info
         step_info = steps_info[info.step]
         try:
-            goal = step_info.obs["goal"]
+            goal = step_info.obs["goal_object"]
         except KeyError:
             goal = None
         try:
@@ -757,7 +758,7 @@ def get_episode_info(info: Info):
 
 **Goal:**
 
-{code(goal)}
+{code(str(AgentLabBaseMessage('', goal)))}
 
 **Task info:**
 
@@ -992,20 +993,23 @@ def get_directory_contents(results_dir: Path):
             continue
 
         exp_description = dir.name
-        # get summary*.csv files and find the most recent
-        summary_files = list(dir.glob("summary*.csv"))
-        if len(summary_files) != 0:
-            most_recent_summary = max(summary_files, key=os.path.getctime)
-            summary_df = pd.read_csv(most_recent_summary)
-
-            # get row with max avg_reward
-            max_reward_row = summary_df.loc[summary_df["avg_reward"].idxmax()]
-            reward = max_reward_row["avg_reward"] * 100
-            completed = max_reward_row["n_completed"]
-            n_err = max_reward_row["n_err"]
-            exp_description += (
-                f" - avg-reward: {reward:.1f}% - completed: {completed} - errors: {n_err}"
-            )
+        try:
+            # get summary*.csv files and find the most recent
+            summary_files = list(dir.glob("summary*.csv"))
+            if len(summary_files) != 0:
+                most_recent_summary = max(summary_files, key=os.path.getctime)
+                summary_df = pd.read_csv(most_recent_summary)
+
+                # get row with max avg_reward
+                max_reward_row = summary_df.loc[summary_df["avg_reward"].idxmax()]
+                reward = max_reward_row["avg_reward"] * 100
+                completed = max_reward_row["n_completed"]
+                n_err = max_reward_row["n_err"]
+                exp_description += (
+                    f" - avg-reward: {reward:.1f}% - completed: {completed} - errors: {n_err}"
+                )
+        except Exception as e:
+            print(f"Error while reading summary file: {e}")
 
         exp_descriptions.append(exp_description)
 
diff --git a/src/agentlab/llm/llm_utils.py b/src/agentlab/llm/llm_utils.py
index eaa2a5e0..ddff3556 100644
--- a/src/agentlab/llm/llm_utils.py
+++ b/src/agentlab/llm/llm_utils.py
@@ -8,8 +8,7 @@
 import time
 from copy import deepcopy
 from functools import cache
-from typing import TYPE_CHECKING
-from typing import Any, Union
+from typing import TYPE_CHECKING, Any, Union
 from warnings import warn
 
 import numpy as np
@@ -356,7 +355,7 @@ def add_image(self, image: np.ndarray | Image.Image | str, detail: str = None):
         if detail:
             self.add_content("image_url", {"url": image_url, "detail": detail})
         else:
-            self.add_content("image_url", image_url)
+            self.add_content("image_url", {"url": image_url})
 
     def to_markdown(self):
         if isinstance(self["content"], str):

From a4ba2f5da200c7e6ed9dc3f35d8e45b5f42c54dc Mon Sep 17 00:00:00 2001
From: ThibaultLSDC <thibault.de.chezelles@gmail.com>
Date: Fri, 15 Nov 2024 16:42:31 -0500
Subject: [PATCH 4/8] adding screenshot descriptions

---
 src/agentlab/agents/dynamic_prompting.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/agentlab/agents/dynamic_prompting.py b/src/agentlab/agents/dynamic_prompting.py
index 73688f0f..a284fe37 100644
--- a/src/agentlab/agents/dynamic_prompting.py
+++ b/src/agentlab/agents/dynamic_prompting.py
@@ -446,8 +446,12 @@ def add_screenshot(self, prompt: BaseMessage) -> BaseMessage:
         if self.flags.use_screenshot:
             if self.flags.use_som:
                 screenshot = self.obs["screenshot_som"]
+                prompt.add_text(
+                    "\n## Screenshot:\nHere is a screenshot of the page, it is annotated with bounding boxes and corresponding bids:"
+                )
             else:
                 screenshot = self.obs["screenshot"]
+                prompt.add_text("\n## Screenshot:\nHere is a screenshot of the page:")
             img_url = image_to_jpg_base64_url(screenshot)
             prompt.add_image(img_url, detail=self.flags.openai_vision_detail)
         return prompt

From 2b9b527fa568f3ebd20531e265dbc5f9916cf7c9 Mon Sep 17 00:00:00 2001
From: ThibaultLSDC <thibault.de.chezelles@gmail.com>
Date: Fri, 15 Nov 2024 16:42:44 -0500
Subject: [PATCH 5/8] typo

---
 src/agentlab/experiments/study.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py
index b42f0bb5..d9f24429 100644
--- a/src/agentlab/experiments/study.py
+++ b/src/agentlab/experiments/study.py
@@ -3,6 +3,7 @@
 import pickle
 import re
 import uuid
+from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -16,11 +17,7 @@
 from agentlab.experiments import args
 from agentlab.experiments import reproducibility_util as repro
 from agentlab.experiments.exp_utils import RESULTS_DIR, add_dependencies
-from agentlab.experiments.launch_exp import (
-    find_incomplete,
-    non_dummy_count,
-    run_experiments,
-)
+from agentlab.experiments.launch_exp import find_incomplete, non_dummy_count, run_experiments
 
 logger = logging.getLogger(__name__)
 
@@ -353,7 +350,7 @@ def _agents_on_benchmark(
             The logging level for individual jobs.
 
     Returns:
-        study: Study
+        list[ExpArgs]: The list of experiments to run.
     """
 
     if not isinstance(agents, (list, tuple)):

From 32865050045c8c71df35c34ff30a6b420a4e258c Mon Sep 17 00:00:00 2001
From: Alexandre Lacoste <alex.lacoste.shmu@gmail.com>
Date: Fri, 15 Nov 2024 19:49:11 -0500
Subject: [PATCH 6/8] Study to multi eval (#126)

* yet another way to kill timedout jobs

* Improve timeout handling in task polling logic

* Add method to override max_steps in Study class

* add support for tab visibility in observation flags and update related components

* fix tests

* black

* Improve timeout handling in task polling logic

* yet another way to kill timedout jobs (#108)

* Add method to override max_steps in Study class

* add support for tab visibility in observation flags and update related components

* fix tests

* black

* black

* Fix sorting bug.
 improve directory content retrieval with summary statistics

* fix test

* black

* tmp

* add error report, add cum cost to summary and ray backend by default

* sequential studies

---------

Co-authored-by: Maxime Gasse <maxime.gasse@gmail.com>
---
 src/agentlab/experiments/study.py | 199 ++++++++++++++++++++++--------
 1 file changed, 149 insertions(+), 50 deletions(-)

diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py
index d9f24429..0f390279 100644
--- a/src/agentlab/experiments/study.py
+++ b/src/agentlab/experiments/study.py
@@ -1,7 +1,7 @@
+from abc import ABC, abstractmethod
 import gzip
 import logging
 import pickle
-import re
 import uuid
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
@@ -14,16 +14,100 @@
 
 from agentlab.agents.agent_args import AgentArgs
 from agentlab.analyze import inspect_results
-from agentlab.experiments import args
 from agentlab.experiments import reproducibility_util as repro
 from agentlab.experiments.exp_utils import RESULTS_DIR, add_dependencies
 from agentlab.experiments.launch_exp import find_incomplete, non_dummy_count, run_experiments
 
+
 logger = logging.getLogger(__name__)
 
 
+def make_study(
+    agent_args: list[AgentArgs],
+    benchmark: bgym.Benchmark,
+    logging_level_stdout=logging.WARNING,
+    suffix="",
+    comment=None,
+    ignore_dependencies=False,
+):
+
+    if isinstance(benchmark, str):
+        benchmark = bgym.DEFAULT_BENCHMARKS[benchmark]()
+
+    """Make a study from a list of agents and a benchmark."""
+    if "webarena" in benchmark.name and len(agent_args) > 1:
+        logger.warning(
+            "*WebArena* requires manual reset after each evaluation. Running through SequentialStudies."
+        )
+        studies = []
+        for agent in agent_args:
+            studies.append(
+                Study(
+                    [agent],
+                    benchmark,
+                    logging_level=logging_level_stdout,
+                    suffix=suffix,
+                    comment=comment,
+                    ignore_dependencies=ignore_dependencies,
+                )
+            )
+
+        return SequentialStudies(studies)
+    else:
+        return Study(
+            agent_args,
+            benchmark,
+            logging_level=logging_level_stdout,
+            suffix=suffix,
+            comment=comment,
+            ignore_dependencies=ignore_dependencies,
+        )
+
+
+class AbstractStudy(ABC):
+    dir: Path = None
+    suffix: str = ""
+
+    @abstractmethod
+    def find_incomplete(self, include_errors=True):
+        """Search for missing"""
+
+    @abstractmethod
+    def run(self, n_jobs=1, parallel_backend="ray", strict_reproducibility=False, n_relaunch=3):
+        """Run the study"""
+
+    def make_dir(self, exp_root=RESULTS_DIR):
+        if self.dir is None:
+            dir_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}_{self.name}"
+
+            self.dir = Path(exp_root) / dir_name
+        self.dir.mkdir(parents=True, exist_ok=True)
+
+    def save(self, exp_root=RESULTS_DIR):
+        """Pickle the study to the directory"""
+        # TODO perhaps remove exp_args_list before pickling and when loading bring them from the individual directories
+
+        self.make_dir(exp_root=exp_root)
+        with gzip.open(self.dir / "study.pkl.gz", "wb") as f:
+            pickle.dump(self, f)
+
+    def get_results(self, suffix="", also_save=True):
+        """Recursively load all results from the study directory and summarize them."""
+        result_df = inspect_results.load_result_df(self.dir)
+        error_report = inspect_results.error_report(result_df, max_stack_trace=3, use_log=True)
+        summary_df = inspect_results.summarize_study(result_df)
+
+        if also_save:
+            suffix = f"_{suffix}" if suffix else ""
+            result_df.to_csv(self.dir / f"result_df{suffix}.csv")
+            summary_df.to_csv(self.dir / f"summary_df{suffix}.csv")
+            (self.dir / f"error_report{suffix}.md").write_text(error_report)
+
+        return result_df, summary_df, error_report
+
+
 @dataclass
-class Study:
+class Study(AbstractStudy):
     """A study coresponds to one or multiple agents evaluated on a benchmark.
 
     This is part of the high level API to help keep experiments organized and reproducible.
@@ -139,7 +223,7 @@ def run(
             self._run(n_jobs, parallel_backend, strict_reproducibility)
 
             suffix = f"trial_{i + 1}_of_{n_relaunch}"
-            _, summary_df, error_report = self.get_results(suffix=suffix)
+            _, summary_df, _ = self.get_results(suffix=suffix)
             logger.info("\n" + str(summary_df))
 
             n_incomplete, n_error = self.find_incomplete(include_errors=relaunch_errors)
@@ -197,60 +281,17 @@ def append_to_journal(self, strict_reproducibility=True):
             ValueError: If the reproducibility information is not compatible
                 with the report.
         """
+        _, summary_df, _ = self.get_results()
         repro.append_to_journal(
             self.reproducibility_info,
-            self.get_report(),
+            summary_df,
             strict_reproducibility=strict_reproducibility,
         )
 
-    def get_results(self, suffix="", also_save=True):
-        result_df = inspect_results.load_result_df(self.dir)
-        error_report = inspect_results.error_report(result_df, max_stack_trace=3, use_log=True)
-        summary_df = inspect_results.summarize_study(result_df)
-
-        if also_save:
-            suffix = f"_{suffix}" if suffix else ""
-            result_df.to_csv(self.dir / f"result_df{suffix}.csv")
-            summary_df.to_csv(self.dir / f"summary_df{suffix}.csv")
-            (self.dir / f"error_report{suffix}.md").write_text(error_report)
-
-        return result_df, summary_df, error_report
-
     @property
     def name(self):
         agent_names = [a.agent_name for a in self.agent_args]
-        if len(agent_names) == 1:
-            study_name = f"{agent_names[0]}_on_{self.benchmark.name}"
-        else:
-            study_name = f"{len(agent_names)}_agents_on_{self.benchmark.name}"
-
-        study_name = slugify(study_name, max_length=100, allow_unicode=True)
-
-        if self.suffix:
-            study_name += f"_{self.suffix}"
-        return study_name
-
-    def make_dir(self, exp_root=RESULTS_DIR):
-        if self.dir is None:
-            dir_name = f"{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}_{self.name}"
-
-            self.dir = Path(exp_root) / dir_name
-        self.dir.mkdir(parents=True, exist_ok=True)
-
-    def save(self):
-        """Pickle the study to the directory"""
-
-        # TODO perhaps remove exp_args_list before pickling and when loading bring them from the individual directories
-
-        self.make_dir()
-
-        with gzip.open(self.dir / "study.pkl.gz", "wb") as f:
-            pickle.dump(self, f)
-
-    def get_report(self, ignore_cache=False, ignore_stale=False):
-        return inspect_results.get_study_summary(
-            self.dir, ignore_cache=ignore_cache, ignore_stale=ignore_stale
-        )
+        return _make_study_name(agent_names, [self.benchmark.name], self.suffix)
 
     def override_max_steps(self, max_steps):
         for exp_args in self.exp_args_list:
@@ -285,6 +326,64 @@ def load_most_recent(root_dir: Path = None, contains=None) -> "Study":
         return Study.load(get_most_recent_study(root_dir, contains=contains))
 
 
+def _make_study_name(agent_names, benchmark_names, suffix=None):
+    """Make a study name from the agent and benchmark names."""
+    if len(agent_names) == 1:
+        agent_name = agent_names[0]
+    else:
+        agent_name = f"{len(agent_names)}_agents"
+
+    if len(benchmark_names) == 1:
+        benchmark_name = benchmark_names[0]
+    else:
+        benchmark_name = f"{len(benchmark_names)}_benchmarks"
+
+    study_name = f"{agent_name}_on_{benchmark_name}_{suffix if suffix else ''}"
+
+    return slugify(study_name, max_length=200, allow_unicode=True)
+
+
+@dataclass
+class SequentialStudies(AbstractStudy):
+    """
+    Sequential execution of multiple studies.
+
+    This is required for e.g. WebArena, where a server reset is required between evaluations of each agent.
+    """
+
+    studies: list[Study]
+
+    @property
+    def name(self):
+        """The name of the study."""
+        agent_names = [a.agent_name for study in self.studies for a in study.agent_args]
+        benchmark_names = [study.benchmark.name for study in self.studies]
+        return _make_study_name(agent_names, benchmark_names, self.suffix)
+
+    def find_incomplete(self, include_errors=True):
+        for study in self.studies:
+            study.find_incomplete(include_errors=include_errors)
+
+    def run(self, n_jobs=1, parallel_backend="ray", strict_reproducibility=False, n_relaunch=3):
+
+        self.save()
+
+        for study in self.studies:
+            study.make_dir(exp_root=self.dir)
+            study.run(n_jobs, parallel_backend, strict_reproducibility, n_relaunch)
+        _, summary_df, _ = self.get_results()
+        logger.info("\n" + str(summary_df))
+        logger.info(f"SequentialStudies {self.name} finished.")
+
+    def override_max_steps(self, max_steps):
+        for study in self.studies:
+            study.override_max_steps(max_steps)
+
+    def append_to_journal(self, strict_reproducibility=True):
+        for study in self.studies:
+            study.append_to_journal(strict_reproducibility=strict_reproducibility)
+
+
 def get_most_recent_study(
     root_dir: Path = None, date_format: str = "%Y-%m-%d_%H-%M-%S", contains=None
 ):

From 9fe2a1a2ced1f4caa247262f7a24a999ea431517 Mon Sep 17 00:00:00 2001
From: ThibaultLSDC <thibault.de.chezelles@gmail.com>
Date: Thu, 28 Nov 2024 19:40:22 +0000
Subject: [PATCH 7/8] AB results

---
 reproducibility_journal.csv | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/reproducibility_journal.csv b/reproducibility_journal.csv
index ad2bfaa8..7fb1d401 100644
--- a/reproducibility_journal.csv
+++ b/reproducibility_journal.csv
@@ -46,3 +46,10 @@ ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,weblinx_test,0.0.1.de
 ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.089,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
 ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.125,0.006,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
 ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,weblinx_test,0.0.1.dev13,2024-11-07_21-42-30,b9451759-4f0e-492c-a3c8-fa5109d2d9b1,0.079,0.005,0,2650/2650,None,Linux (#66-Ubuntu SMP Fri Aug 30 13:56:20 UTC 2024),3.12.7,1.39.0,0.2.3,7a5b91e62056fa8fb26efdd2f64f5b25a92b817c,,0.12.0,8633c30c31e6a5a1d5122835c035aa56d18f3f0a,
+ThibaultLSDC,GenericAgent-gpt-4o,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.005,0.003,2,213/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c,  M: src/agentlab/experiments/study.py,0.13.1,None,
+ThibaultLSDC,GenericAgent-gpt-4o-mini,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.002,0.002,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c,  M: src/agentlab/experiments/study.py,0.13.1,None,
+ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-405b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.008,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c,  M: src/agentlab/experiments/study.py,0.13.1,None,
+ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-70b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.005,8,206/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c,  M: src/agentlab/experiments/study.py,0.13.1,None,
+ThibaultLSDC,GenericAgent-meta-llama_llama-3.1-8b-instruct,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.001,0.001,15,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c,  M: src/agentlab/experiments/study.py,0.13.1,None,
+ThibaultLSDC,GenericAgent-anthropic_claude-3.5-sonnet:beta,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.007,0.003,1,212/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c,  M: src/agentlab/experiments/study.py,0.13.1,None,
+ThibaultLSDC,GenericAgent-openai_o1-mini-2024-09-12,assistantbench,0.13.1,2024-11-28_19-34-58,d93a2398-2b70-41ce-b989-364fed988d73,0.009,0.005,1,214/214,None,Linux (#68-Ubuntu SMP Mon Oct 7 14:34:20 UTC 2024),3.12.7,1.39.0,0.3.0,32865050045c8c71df35c34ff30a6b420a4e258c,  M: src/agentlab/experiments/study.py,0.13.1,None,

From 77e0484d42525ce1ac4e9e98ae2ecf756d25bb88 Mon Sep 17 00:00:00 2001
From: Thibault LSDC <78021491+ThibaultLSDC@users.noreply.github.com>
Date: Thu, 28 Nov 2024 20:44:22 +0100
Subject: [PATCH 8/8] wth github ?

---
 src/agentlab/experiments/study.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agentlab/experiments/study.py b/src/agentlab/experiments/study.py
index 7f166edc..c091d117 100644
--- a/src/agentlab/experiments/study.py
+++ b/src/agentlab/experiments/study.py
@@ -109,7 +109,7 @@ class AbstractStudy(ABC):
     def find_incomplete(self, include_errors=True):
         """Prepare the study for relaunching by finding incomplete experiments"""
 
-        @abstractmethod
+    @abstractmethod
     def run(self, n_jobs=1, parallel_backend="ray", strict_reproducibility=False, n_relaunch=3):
         """Run the study"""