diff --git a/README.md b/README.md
index e425ea4..e248152 100644
--- a/README.md
+++ b/README.md
@@ -178,7 +178,7 @@ evaluation_results = simulator.evaluate()
- [Recommendation Track](https://tsinghua-fib-lab.github.io/AgentSocietyChallenge/pages/recommendation-track.html)
- Please register your team first.
- When you submit your agent, please carefully **SELECT the TRACK you want to submit to.**
-- **The content of your submission should be a zip file containing your agent (Only one `{your_agent}.py` file without evaluation code).**
+- **The content of your submission should be a .py file containing your agent (Only one `{your_team}.py` file without evaluation code).**
- Example submissions:
- For Track 1: [submission_1](example/trackOneSubmission_example.zip)
- For Track 2: [submission_2](example/trackTwoSubmission_example.zip)
diff --git a/docs/assets/data/behavior_leaderboard.csv b/docs/assets/data/behavior_leaderboard.csv
index df706f2..8d02351 100644
--- a/docs/assets/data/behavior_leaderboard.csv
+++ b/docs/assets/data/behavior_leaderboard.csv
@@ -1,3 +1,4 @@
Team name,Submission Time,Preference Estimation,Review Generation,Overall Quality
baseline,2024-12-31, 0.6879, 0.8098, 0.7489
RankMe, 2025-01-03, 0.6823, 0.8087, 0.7455
+伸腿瞪眼丸, 2025-01-06, 0.7966, 0.8700, 0.8333
diff --git a/docs/assets/data/recommendation_leaderboard.csv b/docs/assets/data/recommendation_leaderboard.csv
index af75aa8..82de16e 100644
--- a/docs/assets/data/recommendation_leaderboard.csv
+++ b/docs/assets/data/recommendation_leaderboard.csv
@@ -1,3 +1,5 @@
Team name,Submission Time,Top-1 hr,Top-3 hr,Top-5 hr,Overall hr
baseline, 2024-12-31, 0.0766, 0.2033, 0.3116, 0.1972
RankMe, 2025-01-04, 0.0683, 0.2050, 0.2833, 0.1855
+谭湘文, 2025-01-06, 0.4316, 0.6516, 0.7516, 0.6116
+KON, 2025-01-06, 0.3383, 0.5900, 0.6850, 0.5377
\ No newline at end of file
diff --git a/docs/pages/submission-guidelines.html b/docs/pages/submission-guidelines.html
index 984fa8a..cb45d83 100644
--- a/docs/pages/submission-guidelines.html
+++ b/docs/pages/submission-guidelines.html
@@ -288,7 +288,7 @@
Submit Your Agent
- Please register your team first. Registration
- Please carefully SELECT the TRACK you want to submit to.
- - The content of your submission should be a zip file containing your agent (Only one
{your_agent}.py
file without evaluation code).
+ - The content of your submission should be a .py file containing your agent (Only one
{your_team}.py
file without evaluation code).
- Example submissions:
- For Track 1: submission_1
diff --git a/websocietysimulator/simulator.py b/websocietysimulator/simulator.py
index bae6ddc..b6e4890 100644
--- a/websocietysimulator/simulator.py
+++ b/websocietysimulator/simulator.py
@@ -121,17 +121,24 @@ def set_llm(self, llm: Union[LLMBase, list[LLMBase]]):
self.llm = llm
logger.info("LLM set")
- def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = False, max_workers: int = None) -> List[Any]:
+ def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = False, max_workers: int = None, time_limitation: float = None) -> List[Any]:
"""
- Run the simulation with optional multi-threading support.
+ Run the simulation with optional multi-threading support and time limitation.
Args:
number_of_tasks: Number of tasks to run. If None, run all tasks.
enable_threading: Whether to enable multi-threading. Default is False.
max_workers: Maximum number of threads to use. If None, will use min(32, number_of_tasks).
+ time_limitation: Time limit in minutes. If None, no time limit is applied.
Returns:
List of outputs from agents for each scenario.
"""
+ import time
+ from concurrent.futures import ThreadPoolExecutor, as_completed, TimeoutError
+
+ start_time = time.time()
+ timeout_seconds = time_limitation * 60 if time_limitation else None
+
logger.info("Running simulation")
if not self.agent_class:
raise RuntimeError("Agent class is not set. Use set_agent() to set it.")
@@ -145,6 +152,11 @@ def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = F
if not enable_threading:
self.simulation_outputs = []
for index, task in enumerate(task_to_run):
+ # 检查是否超时
+ if timeout_seconds and (time.time() - start_time) > timeout_seconds:
+ logger.warning(f"Time limit ({time_limitation} minutes) reached. Stopping simulation.")
+ break
+
if isinstance(self.llm, list):
agent = self.agent_class(llm=self.llm[index%len(self.llm)])
else:
@@ -167,7 +179,6 @@ def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = F
logger.info(f"Simulation finished for task {index}")
else:
# 多线程处理
- from concurrent.futures import ThreadPoolExecutor
from threading import Lock
log_lock = Lock()
@@ -175,7 +186,10 @@ def run_simulation(self, number_of_tasks: int = None, enable_threading: bool = F
def process_task(task_index_tuple):
index, task = task_index_tuple
- agent = self.agent_class(llm=self.llm)
+ if isinstance(self.llm, list):
+ agent = self.agent_class(llm=self.llm[index%len(self.llm)])
+ else:
+ agent = self.agent_class(llm=self.llm)
agent.set_interaction_tool(self.interaction_tool)
agent.insert_task(task)
@@ -195,7 +209,7 @@ def process_task(task_index_tuple):
logger.info(f"Simulation finished for task {index}")
self.simulation_outputs[index] = result
- return result
+ return index, result
# 确定线程数
if max_workers is None:
@@ -204,10 +218,29 @@ def process_task(task_index_tuple):
max_workers = min(max_workers, len(task_to_run))
logger.info(f"Running with {max_workers} threads")
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
- list(executor.map(process_task, enumerate(task_to_run)))
+ # 提交所有任务
+ future_to_index = {
+ executor.submit(process_task, (i, task)): i
+ for i, task in enumerate(task_to_run)
+ }
+
+ try:
+ # 等待所有任务完成或达到时间限制
+ for future in as_completed(future_to_index, timeout=timeout_seconds):
+ try:
+ index, result = future.result()
+ self.simulation_outputs[index] = result
+ except Exception as e:
+ logger.error(f"Task failed with error: {str(e)}")
+ except TimeoutError:
+ logger.error(f"Time limit ({time_limitation} minutes) reached.")
+ raise TimeoutError
logger.info("Simulation finished")
+ # 过滤掉None值(未完成的任务)
+ self.simulation_outputs = [output for output in self.simulation_outputs if output is not None]
return self.simulation_outputs
def evaluate(self) -> Dict[str, Any]: