update

tsinghua-fib-lab · Jan 7, 2025 · cf5cc0f · cf5cc0f
1 parent 188608c
commit cf5cc0f
Show file tree

Hide file tree

Showing 5 changed files with 44 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -178,7 +178,7 @@ evaluation_results = simulator.evaluate()
   - [Recommendation Track](https://tsinghua-fib-lab.github.io/AgentSocietyChallenge/pages/recommendation-track.html)
   - Please register your team first.
   - When you submit your agent, please carefully **SELECT the TRACK you want to submit to.**
-- **The content of your submission should be a zip file containing your agent (Only one `{your_agent}.py` file without evaluation code).**
+- **The content of your submission should be a .py file containing your agent (Only one `{your_team}.py` file without evaluation code).**
 - Example submissions:
   - For Track 1: [submission_1](example/trackOneSubmission_example.zip)
   - For Track 2: [submission_2](example/trackTwoSubmission_example.zip)

diff --git a/docs/assets/data/behavior_leaderboard.csv b/docs/assets/data/behavior_leaderboard.csv
@@ -1,3 +1,4 @@
 Team name,Submission Time,Preference Estimation,Review Generation,Overall Quality
 baseline,2024-12-31, 0.6879, 0.8098, 0.7489
 RankMe, 2025-01-03, 0.6823, 0.8087, 0.7455
+伸腿瞪眼丸, 2025-01-06, 0.7966, 0.8700, 0.8333
diff --git a/docs/assets/data/recommendation_leaderboard.csv b/docs/assets/data/recommendation_leaderboard.csv
@@ -1,3 +1,5 @@
 Team name,Submission Time,Top-1 hr,Top-3 hr,Top-5 hr,Overall hr
 baseline, 2024-12-31, 0.0766, 0.2033, 0.3116, 0.1972
 RankMe, 2025-01-04, 0.0683, 0.2050, 0.2833, 0.1855
+谭湘文, 2025-01-06, 0.4316, 0.6516, 0.7516, 0.6116
+KON, 2025-01-06, 0.3383, 0.5900, 0.6850, 0.5377
diff --git a/docs/pages/submission-guidelines.html b/docs/pages/submission-guidelines.html
@@ -288,7 +288,7 @@ <h2>Submit Your Agent</h2>
                 <ul>
                     <li>Please register your team first. <a href="https://forms.gle/wisLykLK8eGB2X1PA" target="_blank">Registration</a></li>
                     <li>Please carefully <strong>SELECT the TRACK</strong> you want to submit to.</li>
-                    <li><strong>The content of your submission should be a zip file containing your agent (Only one <code>{your_agent}.py</code> file without evaluation code).</strong></li>
+                    <li><strong>The content of your submission should be a .py file containing your agent (Only one <code>{your_team}.py</code> file without evaluation code).</strong></li>
                     <li>Example submissions:
                         <ul>
                             <li>For Track 1: <a href="https://github.com/tsinghua-fib-lab/AgentSocietyChallenge/blob/main/example/trackOneSubmission_example.zip" target="_blank">submission_1</a></li>

diff --git a/websocietysimulator/simulator.py b/websocietysimulator/simulator.py
@@ -121,17 +121,24 @@ def set_llm(self, llm: Union[LLMBase, list[LLMBase]]):
         self.llm = llm
         logger.info("LLM set")
 
-    def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = False, max_workers: int = None) -> List[Any]:
+    def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = False, max_workers: int = None, time_limitation: float = None) -> List[Any]:
         """
-        Run the simulation with optional multi-threading support.
+        Run the simulation with optional multi-threading support and time limitation.
         
         Args:
             number_of_tasks: Number of tasks to run. If None, run all tasks.
             enable_threading: Whether to enable multi-threading. Default is False.
             max_workers: Maximum number of threads to use. If None, will use min(32, number_of_tasks).
+            time_limitation: Time limit in minutes. If None, no time limit is applied.
         Returns:
             List of outputs from agents for each scenario.
         """
+        import time
+        from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError
+
+        start_time = time.time()
+        timeout_seconds = time_limitation * 60 if time_limitation else None
+
         logger.info("Running simulation")
         if not self.agent_class:
             raise RuntimeError("Agent class is not set. Use set_agent() to set it.")
@@ -145,6 +152,11 @@ def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = F
         if not enable_threading:
             self.simulation_outputs = []
             for index, task in enumerate(task_to_run):
+                # 检查是否超时
+                if timeout_seconds and (time.time() - start_time) > timeout_seconds:
+                    logger.warning(f"Time limit ({time_limitation} minutes) reached. Stopping simulation.")
+                    break
+
                 if isinstance(self.llm, list):
                     agent = self.agent_class(llm=self.llm[index%len(self.llm)])
                 else:
@@ -167,15 +179,17 @@ def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = F
                 logger.info(f"Simulation finished for task {index}")
         else:
             # 多线程处理
-            from concurrent.futures import ThreadPoolExecutor
             from threading import Lock
 
             log_lock = Lock()
             self.simulation_outputs = [None] * len(task_to_run)
 
             def process_task(task_index_tuple):
                 index, task = task_index_tuple
-                agent = self.agent_class(llm=self.llm)
+                if isinstance(self.llm, list):
+                    agent = self.agent_class(llm=self.llm[index%len(self.llm)])
+                else:
+                    agent = self.agent_class(llm=self.llm)
                 agent.set_interaction_tool(self.interaction_tool)
                 agent.insert_task(task)
 
@@ -195,7 +209,7 @@ def process_task(task_index_tuple):
                     logger.info(f"Simulation finished for task {index}")
 
                 self.simulation_outputs[index] = result
-                return result
+                return index, result
 
             # 确定线程数
             if max_workers is None:
@@ -204,10 +218,29 @@ def process_task(task_index_tuple):
                 max_workers = min(max_workers, len(task_to_run))
 
             logger.info(f"Running with {max_workers} threads")
+
             with ThreadPoolExecutor(max_workers=max_workers) as executor:
-                list(executor.map(process_task, enumerate(task_to_run)))
+                # 提交所有任务
+                future_to_index = {
+                    executor.submit(process_task, (i, task)): i 
+                    for i, task in enumerate(task_to_run)
+                }
+
+                try:
+                    # 等待所有任务完成或达到时间限制
+                    for future in as_completed(future_to_index, timeout=timeout_seconds):
+                        try:
+                            index, result = future.result()
+                            self.simulation_outputs[index] = result
+                        except Exception as e:
+                            logger.error(f"Task failed with error: {str(e)}")
+                except TimeoutError:
+                    logger.error(f"Time limit ({time_limitation} minutes) reached.")
+                    raise TimeoutError
 
         logger.info("Simulation finished")
+        # 过滤掉None值（未完成的任务）
+        self.simulation_outputs = [output for output in self.simulation_outputs if output is not None]
         return self.simulation_outputs
 
     def evaluate(self) -> Dict[str, Any]: