From 20010d8aa241ed14fbd4e8e47d11121cca1510cc Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 11:40:29 +0100
Subject: [PATCH 01/11] init: add benchmarking script for standardized
 simulation performance metrics

---
 scripts/benchmark02.py | 140 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 scripts/benchmark02.py

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
new file mode 100644
index 0000000..5aafeac
--- /dev/null
+++ b/scripts/benchmark02.py
@@ -0,0 +1,140 @@
+from dataclasses import dataclass
+from typing import Dict
+import time
+import json
+import platform
+import psutil
+import numpy as np
+from loguru import logger
+from robot_sf.gym_env.robot_env import RobotEnv
+from robot_sf.gym_env.env_config import EnvSettings
+from stable_baselines3 import PPO
+
+
+@dataclass
+class BenchmarkMetrics:
+    steps_per_second: float
+    avg_step_time_ms: float
+    total_episodes: int
+    system_info: Dict
+    config_hash: str
+
+    def to_dict(self) -> Dict:
+        return {
+            "steps_per_second": self.steps_per_second,
+            "avg_step_time_ms": self.avg_step_time_ms,
+            "total_episodes": self.total_episodes,
+            "system_info": self.system_info,
+            "config_hash": self.config_hash,
+        }
+
+
+def run_standardized_benchmark(num_steps: int = 10000) -> BenchmarkMetrics:
+    """Run a standardized simulation benchmark."""
+    # Fixed configuration
+    env_config = EnvSettings()
+    env_config.sim_config.difficulty = 2
+    env_config.sim_config.ped_density_by_difficulty = [0.02, 0.04, 0.08]
+
+    # Initialize environment
+    env = RobotEnv(env_config)
+    model = PPO.load("./model/run_043", env=env)
+
+    # Track timing
+    step_times = []
+    episodes = 0
+    obs = env.reset()
+
+    for _ in range(num_steps):
+        start = time.perf_counter()
+
+        action, _ = model.predict(obs, deterministic=True)
+        obs, _, done, _, _ = env.step(action)
+
+        step_times.append(time.perf_counter() - start)
+
+        if done:
+            episodes += 1
+            obs = env.reset()
+
+    # Calculate metrics
+    avg_step_time = np.mean(step_times)
+    steps_per_sec = 1.0 / avg_step_time
+
+    # System info
+    system_info = {
+        "platform": platform.platform(),
+        "processor": platform.processor(),
+        "python_version": platform.python_version(),
+        "cpu_count": psutil.cpu_count(),
+        "memory_gb": psutil.virtual_memory().total / (1024**3),
+    }
+
+    # Generate config hash
+    config_str = str(env_config.sim_config.__dict__)
+    config_hash = hash(config_str)
+
+    return BenchmarkMetrics(
+        steps_per_second=steps_per_sec,
+        avg_step_time_ms=avg_step_time * 1000,
+        total_episodes=episodes,
+        system_info=system_info,
+        config_hash=str(config_hash),
+    )
+
+
+def save_benchmark_results(
+    benchmark_metrics: BenchmarkMetrics, baseline_file: str = "benchmark_baseline.json"
+):
+    """Save benchmark results and compare to baseline."""
+
+    # Load baseline if exists
+    try:
+        with open(baseline_file, "r", encoding="utf-8") as f:
+            baseline = json.load(f)
+    except FileNotFoundError:
+        baseline = None
+
+    # Current results
+    results = {"timestamp": time.time(), "metrics": benchmark_metrics.to_dict()}
+
+    # Calculate relative performance
+    if baseline and baseline["metrics"]["config_hash"] == benchmark_metrics.config_hash:
+        relative_perf = (
+            benchmark_metrics.steps_per_second / baseline["metrics"]["steps_per_second"]
+        )
+        results["relative_performance"] = relative_perf
+
+    # Save results
+    with open(
+        f"benchmark_results_{time.strftime('%Y%m%d_%H%M%S')}.json",
+        "w",
+        encoding="utf-8",
+    ) as f:
+        json.dump(results, f, indent=2)
+
+
+def create_baseline():
+    """Create a baseline benchmark for future comparisons."""
+    baseline_metrics = run_standardized_benchmark()
+
+    with open("benchmark_baseline.json", "w", encoding="utf-8") as f:
+        json.dump(
+            {"timestamp": time.time(), "metrics": baseline_metrics.to_dict()},
+            f,
+            indent=2,
+        )
+
+
+if __name__ == "__main__":
+    logger.info("Running standardized benchmark...")
+    # Run benchmark
+    metrics = run_standardized_benchmark()
+
+    logger.info(f"Steps per second: {metrics.steps_per_second:.2f}")
+    logger.info(f"Average step time: {metrics.avg_step_time_ms:.2f} ms")
+    logger.info(f"Total episodes: {metrics.total_episodes}")
+
+    logger.info("Saving benchmark results...")
+    # Save and compare to baseline
+    save_benchmark_results(metrics)

From 42368970833fcb79a79263ed1cf134538636b56e Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 14:56:44 +0100
Subject: [PATCH 02/11] refactor: enhance benchmark functionality with optional
 model loading and additional metrics

---
 scripts/benchmark02.py | 116 ++++++++++++++++++++++++++---------------
 1 file changed, 73 insertions(+), 43 deletions(-)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index 5aafeac..81ed25a 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Dict
+from typing import Dict, Optional
 import time
 import json
 import platform
@@ -13,11 +13,15 @@
 
 @dataclass
 class BenchmarkMetrics:
+    """Metrics collected during benchmark runs"""
+
     steps_per_second: float
     avg_step_time_ms: float
     total_episodes: int
     system_info: Dict
     config_hash: str
+    observation_space_info: Dict
+    used_random_actions: bool = False
 
     def to_dict(self) -> Dict:
         return {
@@ -26,11 +30,20 @@ def to_dict(self) -> Dict:
             "total_episodes": self.total_episodes,
             "system_info": self.system_info,
             "config_hash": self.config_hash,
+            "observation_space_info": self.observation_space_info,
+            "used_random_actions": self.used_random_actions,
         }
 
 
-def run_standardized_benchmark(num_steps: int = 10000) -> BenchmarkMetrics:
-    """Run a standardized simulation benchmark."""
+def run_standardized_benchmark(
+    num_steps: int = 2_000, model_path: Optional[str] = "./model/run_043"
+) -> BenchmarkMetrics:
+    """Run a standardized simulation benchmark.
+
+    Args:
+        num_steps: Number of simulation steps to run
+        model_path: Path to the model file. If None, uses random actions
+    """
     # Fixed configuration
     env_config = EnvSettings()
     env_config.sim_config.difficulty = 2
@@ -38,25 +51,59 @@ def run_standardized_benchmark(num_steps: int = 10000) -> BenchmarkMetrics:
 
     # Initialize environment
     env = RobotEnv(env_config)
-    model = PPO.load("./model/run_043", env=env)
+
+    # Record observation space info
+    obs_space_info = {
+        "drive_state_shape": env.observation_space["drive_state"].shape,
+        "rays_shape": env.observation_space["rays"].shape,
+        "drive_state_bounds": {
+            "low": env.observation_space["drive_state"].low.tolist(),
+            "high": env.observation_space["drive_state"].high.tolist(),
+        },
+    }
+
+    # Try to load model, fall back to random actions if fails
+    used_random_actions = False
+    if model_path:
+        try:
+            model = PPO.load(model_path, env=env)
+            logger.info("Successfully loaded model")
+        except (ValueError, Exception) as e:
+            logger.warning(f"Failed to load model: {e}")
+            logger.info("Falling back to random actions")
+            model = None
+            used_random_actions = True
+    else:
+        model = None
+        used_random_actions = True
 
     # Track timing
     step_times = []
     episodes = 0
     obs = env.reset()
 
-    for _ in range(num_steps):
+    logger.info("Starting benchmark run...")
+    for i in range(num_steps):
         start = time.perf_counter()
 
-        action, _ = model.predict(obs, deterministic=True)
+        # Get action from model or random
+        if model:
+            action, _ = model.predict(obs, deterministic=True)
+        else:
+            action = env.action_space.sample()
+
         obs, _, done, _, _ = env.step(action)
 
-        step_times.append(time.perf_counter() - start)
+        step_time = time.perf_counter() - start
+        step_times.append(step_time)
 
         if done:
             episodes += 1
             obs = env.reset()
 
+        if i % 1000 == 0:
+            logger.debug(f"Completed {i}/{num_steps} steps")
+
     # Calculate metrics
     avg_step_time = np.mean(step_times)
     steps_per_sec = 1.0 / avg_step_time
@@ -68,54 +115,26 @@ def run_standardized_benchmark(num_steps: int = 10000) -> BenchmarkMetrics:
         "python_version": platform.python_version(),
         "cpu_count": psutil.cpu_count(),
         "memory_gb": psutil.virtual_memory().total / (1024**3),
+        "cpu_freq": psutil.cpu_freq()._asdict() if psutil.cpu_freq() else None,
     }
 
     # Generate config hash
     config_str = str(env_config.sim_config.__dict__)
-    config_hash = hash(config_str)
+    config_hash = str(hash(config_str))
 
     return BenchmarkMetrics(
         steps_per_second=steps_per_sec,
         avg_step_time_ms=avg_step_time * 1000,
         total_episodes=episodes,
         system_info=system_info,
-        config_hash=str(config_hash),
+        config_hash=config_hash,
+        observation_space_info=obs_space_info,
+        used_random_actions=used_random_actions,
     )
 
 
-def save_benchmark_results(
-    benchmark_metrics: BenchmarkMetrics, baseline_file: str = "benchmark_baseline.json"
-):
-    """Save benchmark results and compare to baseline."""
-
-    # Load baseline if exists
-    try:
-        with open(baseline_file, "r", encoding="utf-8") as f:
-            baseline = json.load(f)
-    except FileNotFoundError:
-        baseline = None
-
-    # Current results
-    results = {"timestamp": time.time(), "metrics": benchmark_metrics.to_dict()}
-
-    # Calculate relative performance
-    if baseline and baseline["metrics"]["config_hash"] == benchmark_metrics.config_hash:
-        relative_perf = (
-            benchmark_metrics.steps_per_second / baseline["metrics"]["steps_per_second"]
-        )
-        results["relative_performance"] = relative_perf
-
-    # Save results
-    with open(
-        f"benchmark_results_{time.strftime('%Y%m%d_%H%M%S')}.json",
-        "w",
-        encoding="utf-8",
-    ) as f:
-        json.dump(results, f, indent=2)
-
-
 def create_baseline():
-    """Create a baseline benchmark for future comparisons."""
+    """Create a new baseline benchmark."""
     baseline_metrics = run_standardized_benchmark()
 
     with open("benchmark_baseline.json", "w", encoding="utf-8") as f:
@@ -126,15 +145,26 @@ def create_baseline():
         )
 
 
+def save_benchmark_results(results: BenchmarkMetrics):
+    """Save benchmark results to a JSON file."""
+    with open("benchmark_results.json", "w", encoding="utf-8") as f:
+        json.dump(
+            {"timestamp": time.time(), "metrics": results.to_dict()},
+            f,
+            indent=2,
+        )
+
+
 if __name__ == "__main__":
     logger.info("Running standardized benchmark...")
+
     # Run benchmark
     metrics = run_standardized_benchmark()
 
     logger.info(f"Steps per second: {metrics.steps_per_second:.2f}")
     logger.info(f"Average step time: {metrics.avg_step_time_ms:.2f} ms")
     logger.info(f"Total episodes: {metrics.total_episodes}")
+    logger.info(f"Used random actions: {metrics.used_random_actions}")
 
-    logger.info("Saving benchmark results...")
-    # Save and compare to baseline
+    # Save results
     save_benchmark_results(metrics)

From 68759c701083c7f0ca52b5c1a9183400bdf2e20d Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 15:17:23 +0100
Subject: [PATCH 03/11] feat: add environment info to benchmark metrics and
 improve JSON output formatting

---
 scripts/benchmark02.py | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index 81ed25a..28ae529 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -22,6 +22,7 @@ class BenchmarkMetrics:
     config_hash: str
     observation_space_info: Dict
     used_random_actions: bool = False
+    env_info: Dict = None
 
     def to_dict(self) -> Dict:
         return {
@@ -32,6 +33,7 @@ def to_dict(self) -> Dict:
             "config_hash": self.config_hash,
             "observation_space_info": self.observation_space_info,
             "used_random_actions": self.used_random_actions,
+            "env_info": self.env_info,
         }
 
 
@@ -118,6 +120,13 @@ def run_standardized_benchmark(
         "cpu_freq": psutil.cpu_freq()._asdict() if psutil.cpu_freq() else None,
     }
 
+    # Environment info
+    env_info = {
+        "difficulty": env_config.sim_config.difficulty,
+        "ped_density_by_difficulty": env_config.sim_config.ped_density_by_difficulty,
+        "map_name": list(env_config.map_pool.map_defs.keys()),
+    }
+
     # Generate config hash
     config_str = str(env_config.sim_config.__dict__)
     config_hash = str(hash(config_str))
@@ -130,6 +139,7 @@ def run_standardized_benchmark(
         config_hash=config_hash,
         observation_space_info=obs_space_info,
         used_random_actions=used_random_actions,
+        env_info=env_info,
     )
 
 
@@ -139,17 +149,25 @@ def create_baseline():
 
     with open("benchmark_baseline.json", "w", encoding="utf-8") as f:
         json.dump(
-            {"timestamp": time.time(), "metrics": baseline_metrics.to_dict()},
+            {
+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
+                "metrics": baseline_metrics.to_dict(),
+            },
             f,
             indent=2,
         )
 
 
-def save_benchmark_results(results: BenchmarkMetrics):
+def save_benchmark_results(
+    results: BenchmarkMetrics, json_file: str = "benchmark_results.json"
+):
     """Save benchmark results to a JSON file."""
-    with open("benchmark_results.json", "w", encoding="utf-8") as f:
+    with open(json_file, "w", encoding="utf-8") as f:
         json.dump(
-            {"timestamp": time.time(), "metrics": results.to_dict()},
+            {
+                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
+                "metrics": results.to_dict(),
+            },
             f,
             indent=2,
         )

From c2c5fad48090ef198e472a0b94b92b45a44d4bb6 Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 15:20:20 +0100
Subject: [PATCH 04/11] feat: enhance benchmark results saving with optional
 appending to JSON file

---
 scripts/benchmark02.py | 72 +++++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 25 deletions(-)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index 28ae529..f0ca797 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -143,34 +143,56 @@ def run_standardized_benchmark(
     )
 
 
-def create_baseline():
-    """Create a new baseline benchmark."""
-    baseline_metrics = run_standardized_benchmark()
-
-    with open("benchmark_baseline.json", "w", encoding="utf-8") as f:
-        json.dump(
-            {
-                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
-                "metrics": baseline_metrics.to_dict(),
-            },
-            f,
-            indent=2,
-        )
-
-
 def save_benchmark_results(
-    results: BenchmarkMetrics, json_file: str = "benchmark_results.json"
+    results: BenchmarkMetrics,
+    json_file: str = "benchmark_results.json",
+    append: bool = True,
 ):
     """Save benchmark results to a JSON file."""
-    with open(json_file, "w", encoding="utf-8") as f:
-        json.dump(
-            {
-                "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
-                "metrics": results.to_dict(),
-            },
-            f,
-            indent=2,
-        )
+    if append:
+        try:
+            with open(json_file, "r+", encoding="utf-8") as f:
+                data = json.load(f)
+                if not isinstance(data, list):
+                    data = [data]
+                data.append(
+                    {
+                        "timestamp": time.strftime(
+                            "%Y-%m-%d %H:%M:%S", time.localtime()
+                        ),
+                        "metrics": results.to_dict(),
+                    }
+                )
+                f.seek(0)
+                json.dump(data, f, indent=2)
+        except FileNotFoundError:
+            with open(json_file, "w", encoding="utf-8") as f:
+                json.dump(
+                    [
+                        {
+                            "timestamp": time.strftime(
+                                "%Y-%m-%d %H:%M:%S", time.localtime()
+                            ),
+                            "metrics": results.to_dict(),
+                        }
+                    ],
+                    f,
+                    indent=2,
+                )
+    else:
+        with open(json_file, "w", encoding="utf-8") as f:
+            json.dump(
+                [
+                    {
+                        "timestamp": time.strftime(
+                            "%Y-%m-%d %H:%M:%S", time.localtime()
+                        ),
+                        "metrics": results.to_dict(),
+                    }
+                ],
+                f,
+                indent=2,
+            )
 
 
 if __name__ == "__main__":

From 592f0f70c5e513412638e13fa09e8a8589bbbabd Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 15:21:30 +0100
Subject: [PATCH 05/11] feat: add benchmark results JSON file with detailed
 metrics and system information

---
 benchmark_results.json | 184 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 benchmark_results.json

diff --git a/benchmark_results.json b/benchmark_results.json
new file mode 100644
index 0000000..d5e7dbc
--- /dev/null
+++ b/benchmark_results.json
@@ -0,0 +1,184 @@
+[
+  {
+    "timestamp": "2024-11-26 15:16:51",
+    "metrics": {
+      "steps_per_second": 61.76119723423083,
+      "avg_step_time_ms": 16.191395969988662,
+      "total_episodes": 16,
+      "system_info": {
+        "platform": "macOS-15.1.1-arm64-arm-64bit",
+        "processor": "arm",
+        "python_version": "3.12.7",
+        "cpu_count": 14,
+        "memory_gb": 24.0,
+        "cpu_freq": {
+          "current": 4,
+          "min": 1,
+          "max": 4
+        }
+      },
+      "config_hash": "5070639080499893366",
+      "observation_space_info": {
+        "drive_state_shape": [
+          3,
+          5
+        ],
+        "rays_shape": [
+          3,
+          272
+        ],
+        "drive_state_bounds": {
+          "low": [
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ]
+          ],
+          "high": [
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ]
+          ]
+        }
+      },
+      "used_random_actions": true,
+      "env_info": {
+        "difficulty": 2,
+        "ped_density_by_difficulty": [
+          0.02,
+          0.04,
+          0.08
+        ],
+        "map_name": [
+          "uni_campus_big"
+        ]
+      }
+    }
+  },
+  {
+    "timestamp": "2024-11-26 15:20:11",
+    "metrics": {
+      "steps_per_second": 64.21469955469131,
+      "avg_step_time_ms": 15.572758370508382,
+      "total_episodes": 10,
+      "system_info": {
+        "platform": "macOS-15.1.1-arm64-arm-64bit",
+        "processor": "arm",
+        "python_version": "3.12.7",
+        "cpu_count": 14,
+        "memory_gb": 24.0,
+        "cpu_freq": {
+          "current": 4,
+          "min": 1,
+          "max": 4
+        }
+      },
+      "config_hash": "1929142745803813092",
+      "observation_space_info": {
+        "drive_state_shape": [
+          3,
+          5
+        ],
+        "rays_shape": [
+          3,
+          272
+        ],
+        "drive_state_bounds": {
+          "low": [
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ]
+          ],
+          "high": [
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ]
+          ]
+        }
+      },
+      "used_random_actions": true,
+      "env_info": {
+        "difficulty": 2,
+        "ped_density_by_difficulty": [
+          0.02,
+          0.04,
+          0.08
+        ],
+        "map_name": [
+          "uni_campus_big"
+        ]
+      }
+    }
+  }
+]
\ No newline at end of file

From 0c245fd16d77e4a79440b812deeafbc52ff92e85 Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 16:13:33 +0100
Subject: [PATCH 06/11] Update scripts/benchmark02.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 scripts/benchmark02.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index f0ca797..c99e855 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -22,7 +22,7 @@ class BenchmarkMetrics:
     config_hash: str
     observation_space_info: Dict
     used_random_actions: bool = False
-    env_info: Dict = None
+    env_info: Dict = field(default_factory=dict)
 
     def to_dict(self) -> Dict:
         return {

From 4368e7acba0f0ce1ebe85ba7f5326559296b092d Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 16:14:11 +0100
Subject: [PATCH 07/11] Update scripts/benchmark02.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 scripts/benchmark02.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index c99e855..78b0288 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -165,6 +165,7 @@ def save_benchmark_results(
                 )
                 f.seek(0)
                 json.dump(data, f, indent=2)
+                f.truncate()
         except FileNotFoundError:
             with open(json_file, "w", encoding="utf-8") as f:
                 json.dump(

From 42f64602008caf15f8e6c5de22bde748b00261ab Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 16:14:44 +0100
Subject: [PATCH 08/11] Update scripts/benchmark02.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 scripts/benchmark02.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index 78b0288..cf3cb6d 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -70,7 +70,7 @@ def run_standardized_benchmark(
         try:
             model = PPO.load(model_path, env=env)
             logger.info("Successfully loaded model")
-        except (ValueError, Exception) as e:
+        except ValueError as e:
             logger.warning(f"Failed to load model: {e}")
             logger.info("Falling back to random actions")
             model = None

From b473c0accc847a8706531f52d7d0cb7dca181ceb Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Tue, 26 Nov 2024 16:57:46 +0100
Subject: [PATCH 09/11] feat: enhance save_benchmark_results function with
 detailed docstring and logging

---
 scripts/benchmark02.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index cf3cb6d..d5f5335 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Dict, Optional
 import time
 import json
@@ -148,7 +148,19 @@ def save_benchmark_results(
     json_file: str = "benchmark_results.json",
     append: bool = True,
 ):
-    """Save benchmark results to a JSON file."""
+    """
+    Save benchmark results to a JSON file.
+
+    Parameters:
+    results (BenchmarkMetrics): The benchmark metrics to save.
+    json_file (str): The path to the JSON file where results will be saved.
+        Defaults to "benchmark_results.json".
+    append (bool): If True, append the results to the existing file.
+        If False, overwrite the file. Defaults to True.
+
+    Raises:
+    FileNotFoundError: If the file does not exist and append is True, a new file will be created.
+    """
     if append:
         try:
             with open(json_file, "r+", encoding="utf-8") as f:
@@ -166,6 +178,7 @@ def save_benchmark_results(
                 f.seek(0)
                 json.dump(data, f, indent=2)
                 f.truncate()
+            logger.info(f"Appended results to {json_file}")
         except FileNotFoundError:
             with open(json_file, "w", encoding="utf-8") as f:
                 json.dump(
@@ -180,6 +193,7 @@ def save_benchmark_results(
                     f,
                     indent=2,
                 )
+            logger.warning(f"Appending failed. Created new file {json_file}")
     else:
         with open(json_file, "w", encoding="utf-8") as f:
             json.dump(
@@ -194,6 +208,7 @@ def save_benchmark_results(
                 f,
                 indent=2,
             )
+        logger.info(f"Saved results to {json_file}")
 
 
 if __name__ == "__main__":

From c2f6685d5fee6aa3279f98e240c21deb5ccee35a Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Wed, 27 Nov 2024 08:24:41 +0100
Subject: [PATCH 10/11] feat: add additional benchmark results with detailed
 metrics and system information

---
 benchmark_results.json | 182 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 182 insertions(+)

diff --git a/benchmark_results.json b/benchmark_results.json
index d5e7dbc..1b93e88 100644
--- a/benchmark_results.json
+++ b/benchmark_results.json
@@ -180,5 +180,187 @@
         ]
       }
     }
+  },
+  {
+    "timestamp": "2024-11-26 16:49:39",
+    "metrics": {
+      "steps_per_second": 60.65170177469126,
+      "avg_step_time_ms": 16.48758354241727,
+      "total_episodes": 14,
+      "system_info": {
+        "platform": "macOS-15.1.1-arm64-arm-64bit",
+        "processor": "arm",
+        "python_version": "3.12.7",
+        "cpu_count": 14,
+        "memory_gb": 24.0,
+        "cpu_freq": {
+          "current": 4,
+          "min": 1,
+          "max": 4
+        }
+      },
+      "config_hash": "2211083461599127183",
+      "observation_space_info": {
+        "drive_state_shape": [
+          3,
+          5
+        ],
+        "rays_shape": [
+          3,
+          272
+        ],
+        "drive_state_bounds": {
+          "low": [
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ]
+          ],
+          "high": [
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ]
+          ]
+        }
+      },
+      "used_random_actions": true,
+      "env_info": {
+        "difficulty": 2,
+        "ped_density_by_difficulty": [
+          0.02,
+          0.04,
+          0.08
+        ],
+        "map_name": [
+          "uni_campus_big"
+        ]
+      }
+    }
+  },
+  {
+    "timestamp": "2024-11-26 16:58:13",
+    "metrics": {
+      "steps_per_second": 63.12528437719299,
+      "avg_step_time_ms": 15.841512792634603,
+      "total_episodes": 16,
+      "system_info": {
+        "platform": "macOS-15.1.1-arm64-arm-64bit",
+        "processor": "arm",
+        "python_version": "3.12.7",
+        "cpu_count": 14,
+        "memory_gb": 24.0,
+        "cpu_freq": {
+          "current": 4,
+          "min": 1,
+          "max": 4
+        }
+      },
+      "config_hash": "6694611064870271734",
+      "observation_space_info": {
+        "drive_state_shape": [
+          3,
+          5
+        ],
+        "rays_shape": [
+          3,
+          272
+        ],
+        "drive_state_bounds": {
+          "low": [
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ],
+            [
+              0.0,
+              -1.0,
+              0.0,
+              -1.0,
+              -1.0
+            ]
+          ],
+          "high": [
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ],
+            [
+              1.0,
+              1.0,
+              1.0,
+              1.0,
+              1.0
+            ]
+          ]
+        }
+      },
+      "used_random_actions": true,
+      "env_info": {
+        "difficulty": 2,
+        "ped_density_by_difficulty": [
+          0.02,
+          0.04,
+          0.08
+        ],
+        "map_name": [
+          "uni_campus_big"
+        ]
+      }
+    }
   }
 ]
\ No newline at end of file

From 46870d8a672391bce91b15f5786d62fef579f8f6 Mon Sep 17 00:00:00 2001
From: ll7 <32880741+ll7@users.noreply.github.com>
Date: Wed, 27 Nov 2024 08:31:43 +0100
Subject: [PATCH 11/11] feat: close environment and log completion message in
 benchmark run

---
 scripts/benchmark02.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scripts/benchmark02.py b/scripts/benchmark02.py
index d5f5335..347e01c 100644
--- a/scripts/benchmark02.py
+++ b/scripts/benchmark02.py
@@ -110,6 +110,9 @@ def run_standardized_benchmark(
     avg_step_time = np.mean(step_times)
     steps_per_sec = 1.0 / avg_step_time
 
+    env.close()
+    logger.info("Benchmark run complete. env closed. return metrics")
+
     # System info
     system_info = {
         "platform": platform.platform(),