From 713e0fbcf86e6461c50e5161707ae3192d6ece74 Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Thu, 1 Apr 2021 11:36:15 +0200
Subject: [PATCH 01/14] Fix reward output and reward calculation

---
 sumoGym/environment.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index 4c99d0d..a763d4b 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -69,9 +69,11 @@ def __init__(self,
                  seed=None):
 
         super(SUMOEnvironment, self).__init__()
+        self.render_mode = mode
+        self.save_path = None
         self.name = "SuMoGyM"
         np.random.seed(seed if seed is not None else 42)
-        self.seed = seed
+        self.seed_ = seed
         if radar_range is None:
             radar_range = [50, 9]  # x and y
         self.radar_range = radar_range
@@ -132,7 +134,7 @@ def start(self):
                         ]
 
         self.sumoCmd.append("--seed")
-        self.sumoCmd.append(str(int(np.random.randint(0, 1000000))) if self.seed is None else f"{self.seed}")
+        self.sumoCmd.append(str(int(np.random.randint(0, 1000000))) if self.seed_ is None else f"{self.seed_}")
 
         traci.start(self.sumoCmd[:4])
 
@@ -159,8 +161,8 @@ def _inner_reset(self):
         """
         # Changing configuration
         self._choose_random_simulation()
-        if self.seed is None:
-            self.update_seed()
+        if self.seed_ is None:
+            self.seed()
         # Loads traci configuration
         traci.load(self.sumoCmd[1:])
         # Resetting configuration
@@ -183,13 +185,17 @@ def _inner_reset(self):
 
         return self._get_observation()
 
-    def update_seed(self):
+    def seed(self, seed=None):
         """
 
         :return:
         """
         index = self.sumoCmd.index("--seed")
-        self.sumoCmd[index + 1] = str(int(np.random.randint(0, 1000000)))
+        if seed is None:
+            self.sumoCmd[index + 1] = str(int(np.random.randint(0, 1000000)))
+        else:
+            self.sumoCmd[index + 1] = str(int(seed))
+        self.seed_ = seed
 
     def _setup_observation_space(self, x_range=50, y_range=9):
         """
@@ -397,7 +403,7 @@ def _inner_step(self, action):
                 if "lane" in exc.args[0]:
                     cause, reward, terminated = self._get_terminating_events(True, left_=True)
                     self.render()
-                    return self._get_observation(), sum(reward), terminated, {'cause': cause, 'cumulants': reward,
+                    return self._get_observation(), reward, terminated, {'cause': cause,
                                                                               'velocity': self.state['speed'],
                                                                               'distance': self.state['x_position']
                                                                                           - self.ego_start_position,
@@ -415,7 +421,7 @@ def _inner_step(self, action):
             # creating the images if render is true.
             self.render()
 
-            return self._get_observation(), sum(reward), terminated, {'cause': cause, 'cumulants': reward,
+            return self._get_observation(), reward, terminated, {'cause': cause,
                                                                       'velocity': self.state['speed'],
                                                                       'distance': self.state['x_position']
                                                                                   - self.ego_start_position,
@@ -514,7 +520,7 @@ def _get_terminating_events(self, is_lane_change, left_=False):
         else:
             temp_reward["speed"] = self.reward_dict[cause][1]
         # getting lane change reward.
-        if is_lane_change:
+        if is_lane_change and cause is None:
             temp_reward["lane_change"] = self.reward_dict["lane_change"][1]
         elif cause is None:
             temp_reward["lane_change"] = self.reward_dict["lane_change"][1] -1

From f5a3d5f7d35d7f9322479e12b23ef62b157033b9 Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Thu, 1 Apr 2021 12:07:42 +0200
Subject: [PATCH 02/14] add cumulated rewards

---
 sumoGym/environment.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index a763d4b..953af27 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -403,7 +403,8 @@ def _inner_step(self, action):
                 if "lane" in exc.args[0]:
                     cause, reward, terminated = self._get_terminating_events(True, left_=True)
                     self.render()
-                    return self._get_observation(), reward, terminated, {'cause': cause,
+                    return self._get_observation(), sum(reward), terminated, {'cause': cause,
+                                                                              'cumulants': reward,
                                                                               'velocity': self.state['speed'],
                                                                               'distance': self.state['x_position']
                                                                                           - self.ego_start_position,
@@ -421,7 +422,8 @@ def _inner_step(self, action):
             # creating the images if render is true.
             self.render()
 
-            return self._get_observation(), reward, terminated, {'cause': cause,
+            return self._get_observation(), sum(reward), terminated, {'cause': cause,
+                                                                      'cumulants': reward,
                                                                       'velocity': self.state['speed'],
                                                                       'distance': self.state['x_position']
                                                                                   - self.ego_start_position,

From da81348d8aca849e0dc208b93d08d8dab73f5f1d Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Thu, 8 Apr 2021 10:43:30 +0200
Subject: [PATCH 03/14] added reward changes making q comparable

---
 sumoGym/environment.py | 48 ++++++++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index 953af27..744a42b 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -266,14 +266,29 @@ def _setup_reward_system(self, reward_type='features'):
                                 'collision': [True, -100.0, False],  # when causing collision
                                 'slow': [True, -100.0, False],  # when being too slow
                                 'left_highway': [True, -100.0, False],  # when leaving highway
+                                'speed': [False, 0.0, True],
+                                # negative reward proportional to the difference from v_des
+                                'lane_change': [False, 0.0, True],  # successful lane-change
+                                'keep_right': [False, 0.0, True],  # whenever the available most right lane is used
+                                'follow_distance': [False, 0.0, True],
+                                # whenever closer than required follow distance,
+                                # proportional negative
+                                'cut_in_distance': [False, 0.0, True],  # whenever cuts in closer then should.
+                                'type': reward_type}
+
+        elif reward_type == 'positive':
+            self.reward_dict = {'success': [True, 0.0, False],  # if successful episode
+                                'collision': [True, -1.0, False],  # when causing collision
+                                'slow': [True, -1.0, False],  # when being too slow
+                                'left_highway': [True, -1.0, False],  # when leaving highway
                                 'speed': [False, 1.0, True],
                                 # negative reward proportional to the difference from v_des
                                 'lane_change': [False, 1.0, True],  # successful lane-change
                                 'keep_right': [False, 1.0, True],  # whenever the available most right lane is used
-                                'follow_distance': [False, -1.0, True],
+                                'follow_distance': [False, 1.0, True],
                                 # whenever closer than required follow distance,
                                 # proportional negative
-                                'cut_in_distance': [False, -1.0, True],  # whenever cuts in closer then should.
+                                'cut_in_distance': [False, 1.0, True],  # whenever cuts in closer then should.
                                 'type': reward_type}
         else:
             raise RuntimeError("Reward system can not be found")
@@ -484,21 +499,26 @@ def _get_terminating_events(self, is_lane_change, left_=False):
         if temp_reward.get("keep_right", [False, False, False])[2]:
             if self.observation is not None:
                 temp_reward["keep_right"] = self.reward_dict["keep_right"][1] if self.observation["lane_id"] == 0 or (
-                            self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else self.reward_dict["keep_right"][1] -1
+                        self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else \
+                self.reward_dict["keep_right"][1] - 1
             else:
-                temp_reward["keep_right"] = self.reward_dict[cause][1] if cause is not None else 0
+                if cause is not None:
+                    temp_reward["keep_right"] = self.reward_dict[cause][1]
+                else:
+                    temp_reward["keep_right"] = 0
 
         if temp_reward.get("follow_distance", [False, False, False])[2]:
             if self.observation is not None:
                 follow_time = (
-                            (self.observation["FE"]["dx"] + self.observation["FE"]["dv"] * self.dt) / self.observation[
-                        "speed"] * 2)
+                        (self.observation["FE"]["dx"] + self.observation["FE"]["dv"] * self.dt) / self.observation[
+                    "speed"] * 2)
                 if follow_time < 1:
-                    temp_reward["follow_distance"] = max(follow_time - 1, -1)
+                    temp_reward["follow_distance"] = max(self.reward_dict["follow_distance"][1] + follow_time - 1,
+                                                         self.reward_dict["follow_distance"][1] - 1)
                 else:
-                    temp_reward["follow_distance"] = 0
+                    temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1]
             elif cause is None:
-                temp_reward["follow_distance"] = 0
+                temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1]
             else:
                 temp_reward["follow_distance"] = self.reward_dict[cause][1]
 
@@ -507,11 +527,13 @@ def _get_terminating_events(self, is_lane_change, left_=False):
                 follow_time = ((-1 * self.observation["RE"]["dx"] - self.observation["RE"]["dv"] * self.dt) /
                                self.observation["speed"] * 2)
                 if follow_time < 0.5:
-                    temp_reward["cut_in_distance"] = max(2 * (follow_time - 0.5), -1)
+                    temp_reward["cut_in_distance"] = max(
+                        self.reward_dict["cut_in_distance"][1] + 2 * (follow_time - 0.5),
+                        self.reward_dict["cut_in_distance"][1] - 1)
                 else:
-                    temp_reward["cut_in_distance"] = 0
+                    temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1]
             elif cause is None:
-                temp_reward["cut_in_distance"] = 0
+                temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1]
             else:
                 temp_reward["cut_in_distance"] = self.reward_dict[cause][1]
 
@@ -525,7 +547,7 @@ def _get_terminating_events(self, is_lane_change, left_=False):
         if is_lane_change and cause is None:
             temp_reward["lane_change"] = self.reward_dict["lane_change"][1]
         elif cause is None:
-            temp_reward["lane_change"] = self.reward_dict["lane_change"][1] -1
+            temp_reward["lane_change"] = self.reward_dict["lane_change"][1] - 1
         else:
             temp_reward["lane_change"] = self.reward_dict[cause][1]
         # constructing the reward vector

From 440bdcd471e50558a599bca539746b4b7d7c30b2 Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Fri, 16 Apr 2021 08:33:13 +0200
Subject: [PATCH 04/14] Added terminated features evaluation corrected

---
 evaluation.py          | 10 ++++++----
 sumoGym/environment.py |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/evaluation.py b/evaluation.py
index b123b6d..2994842 100644
--- a/evaluation.py
+++ b/evaluation.py
@@ -140,7 +140,7 @@ def plot_episode_stat(file):
             "lane_changes": sum(lane_changes) / len(lane_changes),
             "desired_speed_difference": speeds - desired_speeds,
             "keeping_right": keep_right,
-            "average_reward_per_step": r.mean(),
+            "average_reward_per_step": r.mean() if len(r.shape) < 2 else r.sum(-1).mean(),
             "cause": cause,
             "distance_before_lane_change": distance_before_lane_change,
             "distance_after_lane_change": distance_after_lane_change,
@@ -208,9 +208,10 @@ def draw_causes(cause_dicts, labels):
         ax.bar_label(rects, label_type='center', color=text_color)
     ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1),
               loc='lower left', fontsize='small')
+    plt.tight_layout()
 
 def eval_full_statistics(global_statistics, save_figures_path=None):
-    eval_values = ["ego_speed", "desired_speed_difference", "follow_distance", "front_tiv",
+    eval_values = ["ego_speed", "desired_speed_difference", "front_tiv", "rear_tiv",
                    "lane_changes", "keeping_right", "average_reward_per_step",
                    "tiv_before_lane_change", "tiv_after_lane_change", ]
     if save_figures_path is not None and not os.path.exists(save_figures_path):
@@ -261,13 +262,14 @@ def eval_full_statistics(global_statistics, save_figures_path=None):
 
 def draw_boxplot(data, labels, names):
     fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=False, sharey=True, figsize=(8, 12))
-    # fig.suptitle("title")
+    fig.suptitle("Evaluating episodic behavior")
     plt.autoscale()
     for i, ax in enumerate(axes.flatten()):
         ax.boxplot(data[i], labels=labels[i], autorange=True, showfliers=True,
                    notch=False, meanline=True, whis=[5, 95], sym="", vert=False)
         ax.set_title(names[i])
         # ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center')
+    plt.tight_layout()
 
 def fig_plot(data, title, names):
     fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=True, sharey=True, figsize=(8, 12))
@@ -290,7 +292,7 @@ def fig_plot(data, title, names):
 if __name__ == "__main__":
     dir_of_eval = [
         # "/cache/RL/training_with_policy/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210324_102545/",
-        "/cache/RL/Eval_fastrl/",
+        "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/",
         # "/cache/hdd/new_rewards/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210209_101340",
     ]
     for run in dir_of_eval:
diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index 744a42b..886bd49 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -277,7 +277,7 @@ def _setup_reward_system(self, reward_type='features'):
                                 'type': reward_type}
 
         elif reward_type == 'positive':
-            self.reward_dict = {'success': [True, 0.0, False],  # if successful episode
+            self.reward_dict = {'success': [True, 0.0, True],  # if successful episode
                                 'collision': [True, -1.0, False],  # when causing collision
                                 'slow': [True, -1.0, False],  # when being too slow
                                 'left_highway': [True, -1.0, False],  # when leaving highway

From 8eef37e96b3e64fdf678800df7f252fcdc3bbd50 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sz=C5=91ke=20L=C3=A1szl=C3=B3?= <szoke.laszlo95@gmail.com>
Date: Fri, 16 Apr 2021 10:30:04 +0200
Subject: [PATCH 05/14] correcting runner script

---
 docker-compose.yml |  4 +++-
 entry.sh           | 17 +++++++++++++++++
 start_docker.sh    |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 entry.sh
 mode change 100644 => 100755 start_docker.sh

diff --git a/docker-compose.yml b/docker-compose.yml
index 8ae2e56..ec09210 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -16,6 +16,8 @@ services:
     - QT_X11_NO_MITSHM=1
     - USER=$USER
     - UID=$UID
+    - HOST_USER=$USER
+    - HOST_UID=$UID
     - _JAVA_OPTIONS=-Duser.home=/home/$USER/    # for Java based apps, i.e. PyCharm, CLion
     - NVIDIA_VISIBLE_DEVICES=all
     entrypoint: ["/entry.sh","true"]
@@ -31,4 +33,4 @@ services:
     cap_add:
       - SYS_PTRACE
     security_opt:
-      - seccomp:unconfined
\ No newline at end of file
+      - seccomp:unconfined
diff --git a/entry.sh b/entry.sh
new file mode 100644
index 0000000..cab16cb
--- /dev/null
+++ b/entry.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+if [ -z "$1" ]; then
+    START_SSH=false
+else
+    START_SSH=$1
+    shift
+fi
+
+echo "$HOST_USER:x:$HOST_UID:" >> /etc/group
+echo "docker:x:999:$HOST_USER" >> /etc/group
+useradd -u $HOST_UID -g $HOST_UID -d /home/$HOST_USER -s /bin/bash -M $HOST_USER
+
+if [ "$START_SSH" = true ]; then
+    service ssh start
+fi
+
+su $HOST_USER "$@"
diff --git a/start_docker.sh b/start_docker.sh
old mode 100644
new mode 100755
index 358d58c..2649112
--- a/start_docker.sh
+++ b/start_docker.sh
@@ -11,7 +11,7 @@ if [ ${#docker_list[@]} -gt 1 ]; then
         fi
 elif [ ${#docker_list[@]} -eq 0 ]; then
         echo "We have no running docker container, so we start one."
-    cd docker-compose -f ./docker-compose.yml run --name docker_sumo --rm --service-ports sumo
+   docker-compose -f ./docker-compose.yml run --name docker_sumo --rm --service-ports sumo
 else
         echo "We are entering the only running docker container."
     docker exec -it --user=$UID ${docker_list[0]} /bin/bash

From 7aa3454fcfca02e5aa05387a21d4c124bae32a35 Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Mon, 7 Jun 2021 13:23:13 +0200
Subject: [PATCH 06/14] Comment code

---
 evaluation.py          | 177 +++++++++++++++++++----------------------
 sumoGym/environment.py |   6 +-
 2 files changed, 84 insertions(+), 99 deletions(-)

diff --git a/evaluation.py b/evaluation.py
index 2994842..79a17ae 100644
--- a/evaluation.py
+++ b/evaluation.py
@@ -1,5 +1,6 @@
 """
-@author "Laszlo Szoke (CC-AD/ENG1-Bp)"
+@author "Laszlo Szoke"
+This script is used to generate the plots of the training evaluation
 """
 import copy
 import glob
@@ -11,6 +12,7 @@
 import numpy as np
 
 """
+Indices of the attributes:
 FL
 0 - dx
 1 - dv
@@ -39,8 +41,14 @@
 
 
 def plot_episode_stat(file):
+    """
+    Function to read and collect the data from file
+    :param file: path to the data
+    :return: dict of interesting attributes
+    """
     with open(file, "br") as f:
         dict_ = pickle.load(f)
+
     s = np.asarray(dict_["state"][:-1])
     r = np.asarray(dict_["reward"])
     cause = dict_["cause"]
@@ -64,62 +72,7 @@ def plot_episode_stat(file):
     rear_visible = rear_ego_distances > -48
     time_ = np.asarray(list(range(len(lanes))))
     lanes = np.asarray(lanes)
-    # # Plotting front distances relative to the ego based on the different lanes
-    # plt.scatter(time_, front_right_distance, label="FR")
-    # plt.scatter(time_, front_left_distance, label="FL")
-    # plt.scatter(time_, front_ego_distance, label="FE")
-    # plt.legend()
-    # plt.xlabel("steps [-]")
-    # plt.ylabel("Distance [m]")
-    # plt.title("Front Distance")
-    # plt.show()
-    # # Plotting rear distances relative to the ego based on the different lanes
-    # plt.scatter(time_, rear_right_distances, label="RR")
-    # plt.scatter(time_, rear_left_distances, label="RL")
-    # plt.scatter(time_, rear_ego_distances, label="RE")
-    # plt.xlabel("steps [-]")
-    # plt.ylabel("Distance [m]")
-    # plt.legend()
-    # plt.title("Rear Distance")
-    # plt.show()
-    # # Plotting speed of the ego, desired speed and front vehicle speed
-    # plt.plot(speeds, label="ego")
-    # plt.plot(desired_speeds, label="desired")
-    # plt.plot(front_ego_speeds + speeds, label="front")
-    # plt.legend()
-    # plt.xlabel("steps [-]")
-    # plt.ylabel("Speed [m/s2]")
-    # plt.title("Speed values")
-    # plt.show()
-    # # Plotting the speed differences
-    # plt.plot(desired_speeds - speeds, label="desired")
-    # plt.plot(front_ego_speeds, label="front")
-    # plt.xlabel("steps [-]")
-    # plt.ylabel("Speed [m/s2]")
-    # plt.legend()
-    # plt.title("Speed Differences")
-    # plt.show()
-    # # Plotting speed - distance of rear vehicles
-    # plt.scatter((rear_ego_speeds + speeds)[rear_visible], -1*rear_ego_distances[rear_visible])
-    # plt.scatter((rear_right_speeds + speeds)[rear_visible], -1*rear_right_distances[rear_visible])
-    # plt.scatter((rear_left_speeds + speeds)[rear_visible], -1*rear_left_distances[rear_visible])
-    # plt.title("Rear time_ till collision")
-    # plt.xlabel("Speed [m/s2]")
-    # plt.ylabel("Distance [m]")
-    # plt.show()
-    # Plotting speed - distance of front vehicles
-    # plt.scatter(speeds[front_visible],
-    #             front_ego_distance[front_visible] / (front_ego_speeds[front_visible] + speeds[front_visible]),
-    #             label="FE")
-    # plt.scatter(speeds[front_visible],
-    #              front_left_distance[front_visible] / (front_left_speeds[front_visible] + speeds[front_visible]),
-    #              label="FL")
-    # # plt.scatter(time_[front_visible], front_right_distance[front_visible]/(front_right_speeds[front_visible]+speeds[front_visible]), label="FR")
-    # # plt.scatter(time_[front_visible], front_left_distance[front_visible]/-1/front_left_speeds[front_visible], label="RL")
-    # plt.title("Time in-between front vehicle")
-    # plt.xlabel("Distance [m]")
-    # plt.ylabel("Time in-between vehicles [s]")
-    # plt.show()
+
     # summing the correct situation when the ego is keeping right as much as it can.
     keep_right = (sum(lanes == 0) + sum(np.logical_and(lanes != 0, s[:, 13] == 1))) / len(lanes)
     lane_changes = lanes[1:] != lanes[:-1]
@@ -152,6 +105,12 @@ def plot_episode_stat(file):
 
 
 def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"):
+    """
+    Function to collect all logs of the episodes
+    :param path_to_env_log: path to the directory of the env logs
+    :param extention: the file ending
+    :return: statistics of the folder
+    """
     files = glob.glob(f'{os.path.join(path_to_env_log, extention)}')
     files.sort(key=os.path.getmtime)
     with open(f'{os.path.split(path_to_env_log)[0]}/args_eval.txt', 'r') as f:
@@ -159,35 +118,59 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"):
     statistics_in_folder = []
     for filename in files:
         return_dict = plot_episode_stat(filename)
-        return_dict["weights"] =  params.get("model", "") + " "+ decode_w_for_readable_names(params["w"])
+        return_dict["weights"] =  decode_w_for_readable_names(params.get("model", ""), params["w"])
         statistics_in_folder.append(return_dict)
     return statistics_in_folder
 
-def decode_w_for_readable_names(w):
-
-    # if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]:
-    #     w_string = "safe"
-    # elif w == [1.0, 1.0, 0.0, 0.0, 0.0, 0.0]:
-    #     w_string = "safe and speedy"
-    # elif w == [1.0, 0.0, 1.0, 0.0, 0.0, 0.0]:
-    #     w_string = "safe LC"
-    # elif w == [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]:
-    #     w_string = "safe right"
-    # elif w == [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]:
-    #     w_string = "safe follow"
-    # elif w == [1.0, 0.0, 0.0, 0.0, 0.0, 1.0]:
-    #     w_string = "safe cut-in"
-    # elif w == [1.0, 0.0, 0.0, 0.0, 1.0, 1.0]:
-    #     w_string = "safe follow cut-in"
-    # elif w == [1.0, 1.0, 0.0, 1.0, 1.0, 1.0]:
-    #     w_string = "all but lc"
-    # else:
-    w_string = str(w)
+def decode_w_for_readable_names(model_name, w):
+    """
+    Function to decode the model name for the plot labels
+    :param model_name: name of the model we evaluate
+    :param w: weights of the preferences
+    :return: decoded name
+    """
+    name = "FastRL" if model_name in "v1" else "Q"
+    if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]:
+        w_string = "Safe"
+    elif w == [0.0, 1.0, 0.0, 0.0, 0.0, 0.0]:
+        w_string = "Speed keeper"
+    elif w == [0.0, 0.0, 1.0, 0.0, 0.0, 0.0]:
+        w_string = "Lane changer"
+    elif w == [0.0, 0.0, 0.0, 1.0, 0.0, 0.0]:
+        w_string = "Right keeper"
+    elif w == [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]:
+        w_string = "Safe follower"
+    elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]:
+        w_string = "No cut-in driver"
+    elif w == [1.0, 1.0, 0.0, 0.1, 0.5, 1.0]:
+        w_string = name + " Baseline"
+    elif w == [1.0, 2.0, 0.0, 0.0, 0.5, 1.0]:
+        w_string = "D"
+    elif w == [1.0, 0.0, -0.1, -0.1, 1.0, 1.0]:
+        w_string = "C"
+    elif w == [1.0, 2.0, -0.1, -0.5, 1.0, 1.0]:
+        w_string = "B"
+    elif w == [10.0, 2.0, -0.1, -0.5, 0.5, 1.0]:
+        w_string = "A"
+    elif w == [1.0, 1.0, 0.0, 1.0, 1.0, 1.0]:
+        w_string = "all but lc"
+    else:
+        w_string = str(w)
 
     return  w_string
+
 def draw_causes(cause_dicts, labels):
+    """
+    Function to draw cause plot
+    :param cause_dicts: causes
+    :param labels: labels to plot
+    :return:
+    """
     category_names = [str(key) for key in cause_dicts[0].keys()]
-    data = np.array(list(list(i.values()) for i in cause_dicts))
+    dat = list(list(i.values()) for i in cause_dicts)
+    dat.reverse()
+    labels.reverse()
+    data = np.array(dat)
     data_cum = data.cumsum(axis=1)
     category_colors = plt.get_cmap('RdYlGn')(
         np.linspace(0.15, 0.85, data.shape[1]))
@@ -196,7 +179,6 @@ def draw_causes(cause_dicts, labels):
     ax.invert_yaxis()
     ax.xaxis.set_visible(False)
     ax.set_xlim(0, np.sum(data, axis=1).max())
-
     for i, (colname, color) in enumerate(zip(category_names, category_colors)):
         widths = data[:, i]
         starts = data_cum[:, i] - widths
@@ -211,6 +193,12 @@ def draw_causes(cause_dicts, labels):
     plt.tight_layout()
 
 def eval_full_statistics(global_statistics, save_figures_path=None):
+    """
+    Function to plot all the collected data.
+    :param global_statistics: list of parameters of the evals
+    :param save_figures_path: where to save the plots
+    :return: none
+    """
     eval_values = ["ego_speed", "desired_speed_difference", "front_tiv", "rear_tiv",
                    "lane_changes", "keeping_right", "average_reward_per_step",
                    "tiv_before_lane_change", "tiv_after_lane_change", ]
@@ -240,10 +228,10 @@ def eval_full_statistics(global_statistics, save_figures_path=None):
             # plt.hist(episode_stat, bins=min(episode_stat.size//10, 50), histtype="barstacked", density=True, label=name_list[-1], stacked=True)
             name_stat.append(episode_stat)
         global_statsss.append(name_stat)
-        global_names.append(name)
+        global_names.append(name.replace("_", " ", -1))
         global_labels.append(name_list)
 
-    draw_causes(cause_list, global_labels[0])
+    draw_causes(cause_list, copy.deepcopy(global_labels[0]))
     if save_figures_path is not None:
         plt.savefig(f'{save_figures_path}/cause_plot.jpg')
         plt.cla()
@@ -261,6 +249,13 @@ def eval_full_statistics(global_statistics, save_figures_path=None):
 
 
 def draw_boxplot(data, labels, names):
+    """
+    Function to draw the boxplots
+    :param data: data to plot
+    :param labels: labels for the plots
+    :param names: names of the models
+    :return:
+    """
     fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=False, sharey=True, figsize=(8, 12))
     fig.suptitle("Evaluating episodic behavior")
     plt.autoscale()
@@ -272,32 +267,22 @@ def draw_boxplot(data, labels, names):
     plt.tight_layout()
 
 def fig_plot(data, title, names):
+
     fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=True, sharey=True, figsize=(8, 12))
     fig.suptitle(title)
     plt.autoscale()
     for i, ax in enumerate(axes.flatten()):
-        # Bulbasaur
-        # sns.histplot(data=data[i],
-        #              bins='auto',
-        #              kde=True,
-        #              ax=ax,
-        #              stat="probability",
-        #              common_norm=True,
-        #              common_bins=True,
-        #              multiple="layer",
-        #              label=names[i])
         ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center')
 
 
 if __name__ == "__main__":
     dir_of_eval = [
-        # "/cache/RL/training_with_policy/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210324_102545/",
-        "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/",
-        # "/cache/hdd/new_rewards/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210209_101340",
+        "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/"
     ]
     for run in dir_of_eval:
         global_stat = []
         eval_dirs = os.listdir(run)
+        eval_dirs.sort()
         for dir_ in eval_dirs:
             if "eval" not in dir_:
                 continue
@@ -305,5 +290,3 @@ def fig_plot(data, title, names):
             global_stat.append(single_stat)
         eval_full_statistics(global_stat,
                              )  # save_figures_path=os.path.join(dir_of_eval, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}"))
-
-    print()
diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index 886bd49..952b61c 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -500,7 +500,7 @@ def _get_terminating_events(self, is_lane_change, left_=False):
             if self.observation is not None:
                 temp_reward["keep_right"] = self.reward_dict["keep_right"][1] if self.observation["lane_id"] == 0 or (
                         self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else \
-                self.reward_dict["keep_right"][1] - 1
+                    self.reward_dict["keep_right"][1] - 1
             else:
                 if cause is not None:
                     temp_reward["keep_right"] = self.reward_dict[cause][1]
@@ -782,7 +782,9 @@ def _calculate_image_environment(self, flatten=True):
                 # Drawing speed of the current car
                 velocity = self.env_obs[car_id]['speed'] / 50
                 if self.egoID == car_id:
-                    velocity = 1 - abs(self.env_obs[car_id]['speed'] - self.desired_speed) / self.desired_speed
+                    velocity = 1 - abs(self.env_obs[car_id]['speed'] - self.desired_speed) / max(self.desired_speed,
+                                                                                                 self.env_obs[car_id][
+                                                                                                     "speed"])
                 observation[0, self.x_range_grid + dx - l:self.x_range_grid + dx + l,
                 self.y_range_grid + dy - w:self.y_range_grid + dy + w] += np.ones_like(
                     observation[0, self.x_range_grid + dx - l:self.x_range_grid + dx + l,

From 36641206fbcdd433b366136791a430c18666a37d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Holl=C3=B3=20=C3=81ron?= <holloaron7@gmail.com>
Date: Tue, 5 Oct 2021 09:18:45 +0200
Subject: [PATCH 07/14] fix reward type changed

---
 sumoGym/run_env.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sumoGym/run_env.py b/sumoGym/run_env.py
index bb8fbf4..bac081c 100644
--- a/sumoGym/run_env.py
+++ b/sumoGym/run_env.py
@@ -14,9 +14,9 @@ def main():
     # Modify simulation_directory for your directory path
     env = gym.make('SUMOEnvironment-v0',
                    simulation_directory='../basic_env',
-                   type_os="image",
+                   type_os="structured",
                    type_as='discrete',
-                   reward_type='speed',
+                   reward_type='positive',
                    mode='none',
                    change_speed_interval=100,
                    )
@@ -24,7 +24,7 @@ def main():
         terminate = False
         while not terminate:
             # action = [float(input('next steering')), float(input('next vel_dif'))]
-            action = random.randint(0,8)
+            action = int(input())# random.randint(0,8)
             state, reward, terminate, info = env.step(action)
             time.sleep(0.1)
             if terminate:

From 005e2dd9fd1980062df3589f7f77b3c8dfb8ba98 Mon Sep 17 00:00:00 2001
From: "Szoke Laszlo (XC-AD/ENG1-Bp)" <Laszlo.Szoke@hu.bosch.com>
Date: Fri, 15 Oct 2021 11:59:00 +0000
Subject: [PATCH 08/14] current setting

---
 sumoGym/environment.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index 952b61c..ad9c57a 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -262,10 +262,10 @@ def _setup_reward_system(self, reward_type='features'):
         if reward_type == "basic":
             raise NotImplementedError
         elif reward_type == 'features':
-            self.reward_dict = {'success': [True, 0.0, False],  # if successful episode
-                                'collision': [True, -100.0, False],  # when causing collision
-                                'slow': [True, -100.0, False],  # when being too slow
-                                'left_highway': [True, -100.0, False],  # when leaving highway
+            self.reward_dict = {'success': [True, 0.0, True],  # if successful episode
+                                'collision': [True, -10.0, False],  # when causing collision
+                                'slow': [True, -10.0, False],  # when being too slow
+                                'left_highway': [True, -10.0, False],  # when leaving highway
                                 'speed': [False, 0.0, True],
                                 # negative reward proportional to the difference from v_des
                                 'lane_change': [False, 0.0, True],  # successful lane-change
@@ -278,9 +278,9 @@ def _setup_reward_system(self, reward_type='features'):
 
         elif reward_type == 'positive':
             self.reward_dict = {'success': [True, 0.0, True],  # if successful episode
-                                'collision': [True, -1.0, False],  # when causing collision
-                                'slow': [True, -1.0, False],  # when being too slow
-                                'left_highway': [True, -1.0, False],  # when leaving highway
+                                'collision': [True, -10.0, False],  # when causing collision
+                                'slow': [True, -10.0, False],  # when being too slow
+                                'left_highway': [True, -10.0, False],  # when leaving highway
                                 'speed': [False, 1.0, True],
                                 # negative reward proportional to the difference from v_des
                                 'lane_change': [False, 1.0, True],  # successful lane-change

From d55c04b310d1ae69fa61e28471c40d5fb42fe6ab Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Mon, 8 Nov 2021 10:05:29 +0100
Subject: [PATCH 09/14] change rewards

---
 sumoGym/environment.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index ad9c57a..226f635 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -505,7 +505,7 @@ def _get_terminating_events(self, is_lane_change, left_=False):
                 if cause is not None:
                     temp_reward["keep_right"] = self.reward_dict[cause][1]
                 else:
-                    temp_reward["keep_right"] = 0
+                    temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1
 
         if temp_reward.get("follow_distance", [False, False, False])[2]:
             if self.observation is not None:
@@ -520,7 +520,7 @@ def _get_terminating_events(self, is_lane_change, left_=False):
             elif cause is None:
                 temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1]
             else:
-                temp_reward["follow_distance"] = self.reward_dict[cause][1]
+                temp_reward["follow_distance"] = 0 if temp_reward["type"] == "positive" else -1
 
         if temp_reward.get("cut_in_distance", [False, False, False])[2]:
             if self.observation is not None:
@@ -535,21 +535,21 @@ def _get_terminating_events(self, is_lane_change, left_=False):
             elif cause is None:
                 temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1]
             else:
-                temp_reward["cut_in_distance"] = self.reward_dict[cause][1]
+                temp_reward["cut_in_distance"] = 0 if temp_reward["type"] == "positive" else -1
 
         # getting speed reward
         if cause is None:
             dv = abs(self.state['speed'] - self.desired_speed)
             temp_reward["speed"] = self.reward_dict["speed"][1] - dv / max(self.desired_speed, self.state["speed"])
         else:
-            temp_reward["speed"] = self.reward_dict[cause][1]
+            temp_reward["speed"] = 0 if temp_reward["type"] == "positive" else -1
         # getting lane change reward.
         if is_lane_change and cause is None:
             temp_reward["lane_change"] = self.reward_dict["lane_change"][1]
         elif cause is None:
             temp_reward["lane_change"] = self.reward_dict["lane_change"][1] - 1
         else:
-            temp_reward["lane_change"] = self.reward_dict[cause][1]
+            temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1
         # constructing the reward vector
         reward = self.get_max_reward(temp_reward) * self.default_w
 

From d13b2d25e51619f1a1f6acd228c8cb7cfc34881c Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Mon, 8 Nov 2021 10:35:31 +0100
Subject: [PATCH 10/14] rewards changes

---
 sumoGym/environment.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index 226f635..cf49c5e 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -503,7 +503,7 @@ def _get_terminating_events(self, is_lane_change, left_=False):
                     self.reward_dict["keep_right"][1] - 1
             else:
                 if cause is not None:
-                    temp_reward["keep_right"] = self.reward_dict[cause][1]
+                    temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1
                 else:
                     temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1
 
@@ -546,8 +546,10 @@ def _get_terminating_events(self, is_lane_change, left_=False):
         # getting lane change reward.
         if is_lane_change and cause is None:
             temp_reward["lane_change"] = self.reward_dict["lane_change"][1]
+        # not terminating move reward
         elif cause is None:
-            temp_reward["lane_change"] = self.reward_dict["lane_change"][1] - 1
+            temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1
+        # terminating reward
         else:
             temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1
         # constructing the reward vector

From f10fc33896e9d9bbd1fc587c4b1ea43f66902408 Mon Sep 17 00:00:00 2001
From: "Szoke Laszlo (XC-AD/ENG1-Bp)" <Laszlo.Szoke@hu.bosch.com>
Date: Mon, 15 Nov 2021 12:03:58 +0000
Subject: [PATCH 11/14] starting speed adjusted

---
 sumoGym/environment.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sumoGym/environment.py b/sumoGym/environment.py
index cf49c5e..c68f73b 100644
--- a/sumoGym/environment.py
+++ b/sumoGym/environment.py
@@ -174,8 +174,8 @@ def _inner_reset(self):
         self._refresh_environment()
         # Init lateral model
         # Setting a starting speed of the ego
-        self.state['speed'] = self.desired_speed
-
+        self.state['speed'] = (self.desired_speed+self.state['speed'])/2
+                
         if "continuous" in self.type_as:
             self.lateral_model = LateralModel(
                 self.state,
@@ -611,7 +611,7 @@ def _select_egos(self, number_of_egos=1):
                 traci.vehicle.setRouteID(self.egoID, "r1")
 
                 traci.vehicle.setSpeedFactor(self.egoID, 2)
-                traci.vehicle.setSpeed(self.egoID, self.desired_speed)
+                traci.vehicle.setSpeed(self.egoID, (traci.vehicle.getSpeed(self.egoID)+self.desired_speed)/2)
                 traci.vehicle.setMaxSpeed(self.egoID, 50)
 
                 traci.vehicle.subscribeContext(self.egoID, tc.CMD_GET_VEHICLE_VARIABLE, dist=self.radar_range[0],

From 941ad01ef94656649714d384de3b2fdb81f83c01 Mon Sep 17 00:00:00 2001
From: "Szoke Laszlo (XC-AD/ENG1-Bp)" <Laszlo.Szoke@hu.bosch.com>
Date: Wed, 24 Nov 2021 13:46:44 +0000
Subject: [PATCH 12/14] eval adjustments

---
 evaluation.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/evaluation.py b/evaluation.py
index 79a17ae..64707e5 100644
--- a/evaluation.py
+++ b/evaluation.py
@@ -118,7 +118,7 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"):
     statistics_in_folder = []
     for filename in files:
         return_dict = plot_episode_stat(filename)
-        return_dict["weights"] =  decode_w_for_readable_names(params.get("model", ""), params["w"])
+        return_dict["weights"] =  decode_w_for_readable_names(params.get("model_version", ""), params["w"])
         statistics_in_folder.append(return_dict)
     return statistics_in_folder
 
@@ -142,7 +142,7 @@ def decode_w_for_readable_names(model_name, w):
         w_string = "Safe follower"
     elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]:
         w_string = "No cut-in driver"
-    elif w == [1.0, 1.0, 0.0, 0.1, 0.5, 1.0]:
+    elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]:
         w_string = name + " Baseline"
     elif w == [1.0, 2.0, 0.0, 0.0, 0.5, 1.0]:
         w_string = "D"
@@ -152,7 +152,7 @@ def decode_w_for_readable_names(model_name, w):
         w_string = "B"
     elif w == [10.0, 2.0, -0.1, -0.5, 0.5, 1.0]:
         w_string = "A"
-    elif w == [1.0, 1.0, 0.0, 1.0, 1.0, 1.0]:
+    elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]:
         w_string = "all but lc"
     else:
         w_string = str(w)
@@ -277,8 +277,11 @@ def fig_plot(data, title, names):
 
 if __name__ == "__main__":
     dir_of_eval = [
-        "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/"
-    ]
+        #"/cache/plotting/20211018_080302",
+        #"/cache/plotting/20211122_075322",
+        "/cache/plotting/compare",
+        ]
+    import time
     for run in dir_of_eval:
         global_stat = []
         eval_dirs = os.listdir(run)
@@ -289,4 +292,4 @@ def fig_plot(data, title, names):
             single_stat = plot_evaluation_statistics(os.path.join(run, dir_, "env"))
             global_stat.append(single_stat)
         eval_full_statistics(global_stat,
-                             )  # save_figures_path=os.path.join(dir_of_eval, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}"))
+                             save_figures_path=os.path.join(run, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}"))

From a9ca948e839265ba00d107f47ab6609ceada4fbb Mon Sep 17 00:00:00 2001
From: Szoke Laszlo <szoke.laszlo95@gmail.com>
Date: Wed, 24 Nov 2021 14:58:41 +0100
Subject: [PATCH 13/14] adjusted evaluator script

---
 evaluation.py | 50 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/evaluation.py b/evaluation.py
index 64707e5..2887c16 100644
--- a/evaluation.py
+++ b/evaluation.py
@@ -118,10 +118,19 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"):
     statistics_in_folder = []
     for filename in files:
         return_dict = plot_episode_stat(filename)
-        return_dict["weights"] =  decode_w_for_readable_names(params.get("model_version", ""), params["w"])
+        model_name = ""
+        model_version = params.get('model_version', "")
+        use_double = params.get('use_double_model', False)
+        if model_version is not None:
+            if model_version in 'v1':
+                model_name = "DFRL agent" if use_double else 'FastRL agent'
+            elif model_version in 'q':
+                model_name = 'Q agent'
+        return_dict["weights"] = decode_w_for_readable_names(model_name=model_name, w=params["w"])
         statistics_in_folder.append(return_dict)
     return statistics_in_folder
 
+
 def decode_w_for_readable_names(model_name, w):
     """
     Function to decode the model name for the plot labels
@@ -129,7 +138,7 @@ def decode_w_for_readable_names(model_name, w):
     :param w: weights of the preferences
     :return: decoded name
     """
-    name = "FastRL" if model_name in "v1" else "Q"
+
     if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]:
         w_string = "Safe"
     elif w == [0.0, 1.0, 0.0, 0.0, 0.0, 0.0]:
@@ -143,21 +152,22 @@ def decode_w_for_readable_names(model_name, w):
     elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]:
         w_string = "No cut-in driver"
     elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]:
-        w_string = name + " Baseline"
-    elif w == [1.0, 2.0, 0.0, 0.0, 0.5, 1.0]:
-        w_string = "D"
-    elif w == [1.0, 0.0, -0.1, -0.1, 1.0, 1.0]:
-        w_string = "C"
-    elif w == [1.0, 2.0, -0.1, -0.5, 1.0, 1.0]:
-        w_string = "B"
-    elif w == [10.0, 2.0, -0.1, -0.5, 0.5, 1.0]:
-        w_string = "A"
+        w_string = model_name + " Baseline"
+    elif w == [1.0, 0.0, -0.5, -0.5, 1.0, 1.0]:
+        w_string = model_name + " D"
+    elif w == [1.0, 1.0, 0.5, 0.0, 1.0, 1.0]:
+        w_string = model_name + " C"
+    elif w == [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]:
+        w_string = model_name + " B"
+    elif w == [1.0, 1.0, -0.5, 0.0, 1.0, 1.0]:
+        w_string = model_name + " A"
     elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]:
         w_string = "all but lc"
     else:
         w_string = str(w)
 
-    return  w_string
+    return w_string
+
 
 def draw_causes(cause_dicts, labels):
     """
@@ -192,6 +202,7 @@ def draw_causes(cause_dicts, labels):
               loc='lower left', fontsize='small')
     plt.tight_layout()
 
+
 def eval_full_statistics(global_statistics, save_figures_path=None):
     """
     Function to plot all the collected data.
@@ -214,9 +225,8 @@ def eval_full_statistics(global_statistics, save_figures_path=None):
         cause_list = []
         for i, item in enumerate(global_statistics):
             episode_stat = []
-            cause_dict = { "collision": 0, "slow": 0, None: 0}
+            cause_dict = {"collision": 0, "slow": 0, None: 0}
             for episode in item:
-
                 cause_dict[episode["cause"]] += 1
 
                 episode_stat.append(
@@ -266,8 +276,8 @@ def draw_boxplot(data, labels, names):
         # ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center')
     plt.tight_layout()
 
-def fig_plot(data, title, names):
 
+def fig_plot(data, title, names):
     fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=True, sharey=True, figsize=(8, 12))
     fig.suptitle(title)
     plt.autoscale()
@@ -277,11 +287,12 @@ def fig_plot(data, title, names):
 
 if __name__ == "__main__":
     dir_of_eval = [
-        #"/cache/plotting/20211018_080302",
-        #"/cache/plotting/20211122_075322",
+        # "/cache/plotting/20211018_080302",
+        # "/cache/plotting/20211122_075322",
         "/cache/plotting/compare",
-        ]
+    ]
     import time
+
     for run in dir_of_eval:
         global_stat = []
         eval_dirs = os.listdir(run)
@@ -292,4 +303,5 @@ def fig_plot(data, title, names):
             single_stat = plot_evaluation_statistics(os.path.join(run, dir_, "env"))
             global_stat.append(single_stat)
         eval_full_statistics(global_stat,
-                             save_figures_path=os.path.join(run, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}"))
+                             save_figures_path=os.path.join(run,
+                                                            f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}"))

From 7b1da52a05ee70b09447588e7989a7e3d7dd2d45 Mon Sep 17 00:00:00 2001
From: "Szoke Laszlo (XC-AD/ENG1-Bp)" <Laszlo.Szoke@hu.bosch.com>
Date: Wed, 8 Dec 2021 15:05:53 +0000
Subject: [PATCH 14/14] eval script adopted

---
 evaluation.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/evaluation.py b/evaluation.py
index 2887c16..176ee9d 100644
--- a/evaluation.py
+++ b/evaluation.py
@@ -123,9 +123,9 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"):
         use_double = params.get('use_double_model', False)
         if model_version is not None:
             if model_version in 'v1':
-                model_name = "DFRL agent" if use_double else 'FastRL agent'
+                model_name = "DFRL -" if use_double else 'FastRL -'
             elif model_version in 'q':
-                model_name = 'Q agent'
+                model_name = 'Q - '
         return_dict["weights"] = decode_w_for_readable_names(model_name=model_name, w=params["w"])
         statistics_in_folder.append(return_dict)
     return statistics_in_folder
@@ -152,7 +152,7 @@ def decode_w_for_readable_names(model_name, w):
     elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]:
         w_string = "No cut-in driver"
     elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]:
-        w_string = model_name + " Baseline"
+        w_string = model_name + " baseline"
     elif w == [1.0, 0.0, -0.5, -0.5, 1.0, 1.0]:
         w_string = model_name + " D"
     elif w == [1.0, 1.0, 0.5, 0.0, 1.0, 1.0]:
@@ -185,7 +185,7 @@ def draw_causes(cause_dicts, labels):
     category_colors = plt.get_cmap('RdYlGn')(
         np.linspace(0.15, 0.85, data.shape[1]))
 
-    fig, ax = plt.subplots()
+    fig, ax = plt.subplots(figsize=(7,8))
     ax.invert_yaxis()
     ax.xaxis.set_visible(False)
     ax.set_xlim(0, np.sum(data, axis=1).max())