From 713e0fbcf86e6461c50e5161707ae3192d6ece74 Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Thu, 1 Apr 2021 11:36:15 +0200 Subject: [PATCH 01/14] Fix reward output and reward calculation --- sumoGym/environment.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sumoGym/environment.py b/sumoGym/environment.py index 4c99d0d..a763d4b 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -69,9 +69,11 @@ def __init__(self, seed=None): super(SUMOEnvironment, self).__init__() + self.render_mode = mode + self.save_path = None self.name = "SuMoGyM" np.random.seed(seed if seed is not None else 42) - self.seed = seed + self.seed_ = seed if radar_range is None: radar_range = [50, 9] # x and y self.radar_range = radar_range @@ -132,7 +134,7 @@ def start(self): ] self.sumoCmd.append("--seed") - self.sumoCmd.append(str(int(np.random.randint(0, 1000000))) if self.seed is None else f"{self.seed}") + self.sumoCmd.append(str(int(np.random.randint(0, 1000000))) if self.seed_ is None else f"{self.seed_}") traci.start(self.sumoCmd[:4]) @@ -159,8 +161,8 @@ def _inner_reset(self): """ # Changing configuration self._choose_random_simulation() - if self.seed is None: - self.update_seed() + if self.seed_ is None: + self.seed() # Loads traci configuration traci.load(self.sumoCmd[1:]) # Resetting configuration @@ -183,13 +185,17 @@ def _inner_reset(self): return self._get_observation() - def update_seed(self): + def seed(self, seed=None): """ :return: """ index = self.sumoCmd.index("--seed") - self.sumoCmd[index + 1] = str(int(np.random.randint(0, 1000000))) + if seed is None: + self.sumoCmd[index + 1] = str(int(np.random.randint(0, 1000000))) + else: + self.sumoCmd[index + 1] = str(int(seed)) + self.seed_ = seed def _setup_observation_space(self, x_range=50, y_range=9): """ @@ -397,7 +403,7 @@ def _inner_step(self, action): if "lane" in exc.args[0]: cause, reward, terminated = self._get_terminating_events(True, left_=True) self.render() - return self._get_observation(), sum(reward), terminated, {'cause': cause, 'cumulants': reward, + return self._get_observation(), reward, terminated, {'cause': cause, 'velocity': self.state['speed'], 'distance': self.state['x_position'] - self.ego_start_position, @@ -415,7 +421,7 @@ def _inner_step(self, action): # creating the images if render is true. self.render() - return self._get_observation(), sum(reward), terminated, {'cause': cause, 'cumulants': reward, + return self._get_observation(), reward, terminated, {'cause': cause, 'velocity': self.state['speed'], 'distance': self.state['x_position'] - self.ego_start_position, @@ -514,7 +520,7 @@ def _get_terminating_events(self, is_lane_change, left_=False): else: temp_reward["speed"] = self.reward_dict[cause][1] # getting lane change reward. - if is_lane_change: + if is_lane_change and cause is None: temp_reward["lane_change"] = self.reward_dict["lane_change"][1] elif cause is None: temp_reward["lane_change"] = self.reward_dict["lane_change"][1] -1 From f5a3d5f7d35d7f9322479e12b23ef62b157033b9 Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Thu, 1 Apr 2021 12:07:42 +0200 Subject: [PATCH 02/14] add cumulated rewards --- sumoGym/environment.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sumoGym/environment.py b/sumoGym/environment.py index a763d4b..953af27 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -403,7 +403,8 @@ def _inner_step(self, action): if "lane" in exc.args[0]: cause, reward, terminated = self._get_terminating_events(True, left_=True) self.render() - return self._get_observation(), reward, terminated, {'cause': cause, + return self._get_observation(), sum(reward), terminated, {'cause': cause, + 'cumulants': reward, 'velocity': self.state['speed'], 'distance': self.state['x_position'] - self.ego_start_position, @@ -421,7 +422,8 @@ def _inner_step(self, action): # creating the images if render is true. self.render() - return self._get_observation(), reward, terminated, {'cause': cause, + return self._get_observation(), sum(reward), terminated, {'cause': cause, + 'cumulants': reward, 'velocity': self.state['speed'], 'distance': self.state['x_position'] - self.ego_start_position, From da81348d8aca849e0dc208b93d08d8dab73f5f1d Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Thu, 8 Apr 2021 10:43:30 +0200 Subject: [PATCH 03/14] added reward changes making q comparable --- sumoGym/environment.py | 48 ++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/sumoGym/environment.py b/sumoGym/environment.py index 953af27..744a42b 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -266,14 +266,29 @@ def _setup_reward_system(self, reward_type='features'): 'collision': [True, -100.0, False], # when causing collision 'slow': [True, -100.0, False], # when being too slow 'left_highway': [True, -100.0, False], # when leaving highway + 'speed': [False, 0.0, True], + # negative reward proportional to the difference from v_des + 'lane_change': [False, 0.0, True], # successful lane-change + 'keep_right': [False, 0.0, True], # whenever the available most right lane is used + 'follow_distance': [False, 0.0, True], + # whenever closer than required follow distance, + # proportional negative + 'cut_in_distance': [False, 0.0, True], # whenever cuts in closer then should. + 'type': reward_type} + + elif reward_type == 'positive': + self.reward_dict = {'success': [True, 0.0, False], # if successful episode + 'collision': [True, -1.0, False], # when causing collision + 'slow': [True, -1.0, False], # when being too slow + 'left_highway': [True, -1.0, False], # when leaving highway 'speed': [False, 1.0, True], # negative reward proportional to the difference from v_des 'lane_change': [False, 1.0, True], # successful lane-change 'keep_right': [False, 1.0, True], # whenever the available most right lane is used - 'follow_distance': [False, -1.0, True], + 'follow_distance': [False, 1.0, True], # whenever closer than required follow distance, # proportional negative - 'cut_in_distance': [False, -1.0, True], # whenever cuts in closer then should. + 'cut_in_distance': [False, 1.0, True], # whenever cuts in closer then should. 'type': reward_type} else: raise RuntimeError("Reward system can not be found") @@ -484,21 +499,26 @@ def _get_terminating_events(self, is_lane_change, left_=False): if temp_reward.get("keep_right", [False, False, False])[2]: if self.observation is not None: temp_reward["keep_right"] = self.reward_dict["keep_right"][1] if self.observation["lane_id"] == 0 or ( - self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else self.reward_dict["keep_right"][1] -1 + self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else \ + self.reward_dict["keep_right"][1] - 1 else: - temp_reward["keep_right"] = self.reward_dict[cause][1] if cause is not None else 0 + if cause is not None: + temp_reward["keep_right"] = self.reward_dict[cause][1] + else: + temp_reward["keep_right"] = 0 if temp_reward.get("follow_distance", [False, False, False])[2]: if self.observation is not None: follow_time = ( - (self.observation["FE"]["dx"] + self.observation["FE"]["dv"] * self.dt) / self.observation[ - "speed"] * 2) + (self.observation["FE"]["dx"] + self.observation["FE"]["dv"] * self.dt) / self.observation[ + "speed"] * 2) if follow_time < 1: - temp_reward["follow_distance"] = max(follow_time - 1, -1) + temp_reward["follow_distance"] = max(self.reward_dict["follow_distance"][1] + follow_time - 1, + self.reward_dict["follow_distance"][1] - 1) else: - temp_reward["follow_distance"] = 0 + temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1] elif cause is None: - temp_reward["follow_distance"] = 0 + temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1] else: temp_reward["follow_distance"] = self.reward_dict[cause][1] @@ -507,11 +527,13 @@ def _get_terminating_events(self, is_lane_change, left_=False): follow_time = ((-1 * self.observation["RE"]["dx"] - self.observation["RE"]["dv"] * self.dt) / self.observation["speed"] * 2) if follow_time < 0.5: - temp_reward["cut_in_distance"] = max(2 * (follow_time - 0.5), -1) + temp_reward["cut_in_distance"] = max( + self.reward_dict["cut_in_distance"][1] + 2 * (follow_time - 0.5), + self.reward_dict["cut_in_distance"][1] - 1) else: - temp_reward["cut_in_distance"] = 0 + temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1] elif cause is None: - temp_reward["cut_in_distance"] = 0 + temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1] else: temp_reward["cut_in_distance"] = self.reward_dict[cause][1] @@ -525,7 +547,7 @@ def _get_terminating_events(self, is_lane_change, left_=False): if is_lane_change and cause is None: temp_reward["lane_change"] = self.reward_dict["lane_change"][1] elif cause is None: - temp_reward["lane_change"] = self.reward_dict["lane_change"][1] -1 + temp_reward["lane_change"] = self.reward_dict["lane_change"][1] - 1 else: temp_reward["lane_change"] = self.reward_dict[cause][1] # constructing the reward vector From 440bdcd471e50558a599bca539746b4b7d7c30b2 Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Fri, 16 Apr 2021 08:33:13 +0200 Subject: [PATCH 04/14] Added terminated features evaluation corrected --- evaluation.py | 10 ++++++---- sumoGym/environment.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/evaluation.py b/evaluation.py index b123b6d..2994842 100644 --- a/evaluation.py +++ b/evaluation.py @@ -140,7 +140,7 @@ def plot_episode_stat(file): "lane_changes": sum(lane_changes) / len(lane_changes), "desired_speed_difference": speeds - desired_speeds, "keeping_right": keep_right, - "average_reward_per_step": r.mean(), + "average_reward_per_step": r.mean() if len(r.shape) < 2 else r.sum(-1).mean(), "cause": cause, "distance_before_lane_change": distance_before_lane_change, "distance_after_lane_change": distance_after_lane_change, @@ -208,9 +208,10 @@ def draw_causes(cause_dicts, labels): ax.bar_label(rects, label_type='center', color=text_color) ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1), loc='lower left', fontsize='small') + plt.tight_layout() def eval_full_statistics(global_statistics, save_figures_path=None): - eval_values = ["ego_speed", "desired_speed_difference", "follow_distance", "front_tiv", + eval_values = ["ego_speed", "desired_speed_difference", "front_tiv", "rear_tiv", "lane_changes", "keeping_right", "average_reward_per_step", "tiv_before_lane_change", "tiv_after_lane_change", ] if save_figures_path is not None and not os.path.exists(save_figures_path): @@ -261,13 +262,14 @@ def eval_full_statistics(global_statistics, save_figures_path=None): def draw_boxplot(data, labels, names): fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=False, sharey=True, figsize=(8, 12)) - # fig.suptitle("title") + fig.suptitle("Evaluating episodic behavior") plt.autoscale() for i, ax in enumerate(axes.flatten()): ax.boxplot(data[i], labels=labels[i], autorange=True, showfliers=True, notch=False, meanline=True, whis=[5, 95], sym="", vert=False) ax.set_title(names[i]) # ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center') + plt.tight_layout() def fig_plot(data, title, names): fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=True, sharey=True, figsize=(8, 12)) @@ -290,7 +292,7 @@ def fig_plot(data, title, names): if __name__ == "__main__": dir_of_eval = [ # "/cache/RL/training_with_policy/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210324_102545/", - "/cache/RL/Eval_fastrl/", + "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/", # "/cache/hdd/new_rewards/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210209_101340", ] for run in dir_of_eval: diff --git a/sumoGym/environment.py b/sumoGym/environment.py index 744a42b..886bd49 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -277,7 +277,7 @@ def _setup_reward_system(self, reward_type='features'): 'type': reward_type} elif reward_type == 'positive': - self.reward_dict = {'success': [True, 0.0, False], # if successful episode + self.reward_dict = {'success': [True, 0.0, True], # if successful episode 'collision': [True, -1.0, False], # when causing collision 'slow': [True, -1.0, False], # when being too slow 'left_highway': [True, -1.0, False], # when leaving highway From 8eef37e96b3e64fdf678800df7f252fcdc3bbd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sz=C5=91ke=20L=C3=A1szl=C3=B3?= Date: Fri, 16 Apr 2021 10:30:04 +0200 Subject: [PATCH 05/14] correcting runner script --- docker-compose.yml | 4 +++- entry.sh | 17 +++++++++++++++++ start_docker.sh | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 entry.sh mode change 100644 => 100755 start_docker.sh diff --git a/docker-compose.yml b/docker-compose.yml index 8ae2e56..ec09210 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,8 @@ services: - QT_X11_NO_MITSHM=1 - USER=$USER - UID=$UID + - HOST_USER=$USER + - HOST_UID=$UID - _JAVA_OPTIONS=-Duser.home=/home/$USER/ # for Java based apps, i.e. PyCharm, CLion - NVIDIA_VISIBLE_DEVICES=all entrypoint: ["/entry.sh","true"] @@ -31,4 +33,4 @@ services: cap_add: - SYS_PTRACE security_opt: - - seccomp:unconfined \ No newline at end of file + - seccomp:unconfined diff --git a/entry.sh b/entry.sh new file mode 100644 index 0000000..cab16cb --- /dev/null +++ b/entry.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +if [ -z "$1" ]; then + START_SSH=false +else + START_SSH=$1 + shift +fi + +echo "$HOST_USER:x:$HOST_UID:" >> /etc/group +echo "docker:x:999:$HOST_USER" >> /etc/group +useradd -u $HOST_UID -g $HOST_UID -d /home/$HOST_USER -s /bin/bash -M $HOST_USER + +if [ "$START_SSH" = true ]; then + service ssh start +fi + +su $HOST_USER "$@" diff --git a/start_docker.sh b/start_docker.sh old mode 100644 new mode 100755 index 358d58c..2649112 --- a/start_docker.sh +++ b/start_docker.sh @@ -11,7 +11,7 @@ if [ ${#docker_list[@]} -gt 1 ]; then fi elif [ ${#docker_list[@]} -eq 0 ]; then echo "We have no running docker container, so we start one." - cd docker-compose -f ./docker-compose.yml run --name docker_sumo --rm --service-ports sumo + docker-compose -f ./docker-compose.yml run --name docker_sumo --rm --service-ports sumo else echo "We are entering the only running docker container." docker exec -it --user=$UID ${docker_list[0]} /bin/bash From 7aa3454fcfca02e5aa05387a21d4c124bae32a35 Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Mon, 7 Jun 2021 13:23:13 +0200 Subject: [PATCH 06/14] Comment code --- evaluation.py | 177 +++++++++++++++++++---------------------- sumoGym/environment.py | 6 +- 2 files changed, 84 insertions(+), 99 deletions(-) diff --git a/evaluation.py b/evaluation.py index 2994842..79a17ae 100644 --- a/evaluation.py +++ b/evaluation.py @@ -1,5 +1,6 @@ """ -@author "Laszlo Szoke (CC-AD/ENG1-Bp)" +@author "Laszlo Szoke" +This script is used to generate the plots of the training evaluation """ import copy import glob @@ -11,6 +12,7 @@ import numpy as np """ +Indices of the attributes: FL 0 - dx 1 - dv @@ -39,8 +41,14 @@ def plot_episode_stat(file): + """ + Function to read and collect the data from file + :param file: path to the data + :return: dict of interesting attributes + """ with open(file, "br") as f: dict_ = pickle.load(f) + s = np.asarray(dict_["state"][:-1]) r = np.asarray(dict_["reward"]) cause = dict_["cause"] @@ -64,62 +72,7 @@ def plot_episode_stat(file): rear_visible = rear_ego_distances > -48 time_ = np.asarray(list(range(len(lanes)))) lanes = np.asarray(lanes) - # # Plotting front distances relative to the ego based on the different lanes - # plt.scatter(time_, front_right_distance, label="FR") - # plt.scatter(time_, front_left_distance, label="FL") - # plt.scatter(time_, front_ego_distance, label="FE") - # plt.legend() - # plt.xlabel("steps [-]") - # plt.ylabel("Distance [m]") - # plt.title("Front Distance") - # plt.show() - # # Plotting rear distances relative to the ego based on the different lanes - # plt.scatter(time_, rear_right_distances, label="RR") - # plt.scatter(time_, rear_left_distances, label="RL") - # plt.scatter(time_, rear_ego_distances, label="RE") - # plt.xlabel("steps [-]") - # plt.ylabel("Distance [m]") - # plt.legend() - # plt.title("Rear Distance") - # plt.show() - # # Plotting speed of the ego, desired speed and front vehicle speed - # plt.plot(speeds, label="ego") - # plt.plot(desired_speeds, label="desired") - # plt.plot(front_ego_speeds + speeds, label="front") - # plt.legend() - # plt.xlabel("steps [-]") - # plt.ylabel("Speed [m/s2]") - # plt.title("Speed values") - # plt.show() - # # Plotting the speed differences - # plt.plot(desired_speeds - speeds, label="desired") - # plt.plot(front_ego_speeds, label="front") - # plt.xlabel("steps [-]") - # plt.ylabel("Speed [m/s2]") - # plt.legend() - # plt.title("Speed Differences") - # plt.show() - # # Plotting speed - distance of rear vehicles - # plt.scatter((rear_ego_speeds + speeds)[rear_visible], -1*rear_ego_distances[rear_visible]) - # plt.scatter((rear_right_speeds + speeds)[rear_visible], -1*rear_right_distances[rear_visible]) - # plt.scatter((rear_left_speeds + speeds)[rear_visible], -1*rear_left_distances[rear_visible]) - # plt.title("Rear time_ till collision") - # plt.xlabel("Speed [m/s2]") - # plt.ylabel("Distance [m]") - # plt.show() - # Plotting speed - distance of front vehicles - # plt.scatter(speeds[front_visible], - # front_ego_distance[front_visible] / (front_ego_speeds[front_visible] + speeds[front_visible]), - # label="FE") - # plt.scatter(speeds[front_visible], - # front_left_distance[front_visible] / (front_left_speeds[front_visible] + speeds[front_visible]), - # label="FL") - # # plt.scatter(time_[front_visible], front_right_distance[front_visible]/(front_right_speeds[front_visible]+speeds[front_visible]), label="FR") - # # plt.scatter(time_[front_visible], front_left_distance[front_visible]/-1/front_left_speeds[front_visible], label="RL") - # plt.title("Time in-between front vehicle") - # plt.xlabel("Distance [m]") - # plt.ylabel("Time in-between vehicles [s]") - # plt.show() + # summing the correct situation when the ego is keeping right as much as it can. keep_right = (sum(lanes == 0) + sum(np.logical_and(lanes != 0, s[:, 13] == 1))) / len(lanes) lane_changes = lanes[1:] != lanes[:-1] @@ -152,6 +105,12 @@ def plot_episode_stat(file): def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"): + """ + Function to collect all logs of the episodes + :param path_to_env_log: path to the directory of the env logs + :param extention: the file ending + :return: statistics of the folder + """ files = glob.glob(f'{os.path.join(path_to_env_log, extention)}') files.sort(key=os.path.getmtime) with open(f'{os.path.split(path_to_env_log)[0]}/args_eval.txt', 'r') as f: @@ -159,35 +118,59 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"): statistics_in_folder = [] for filename in files: return_dict = plot_episode_stat(filename) - return_dict["weights"] = params.get("model", "") + " "+ decode_w_for_readable_names(params["w"]) + return_dict["weights"] = decode_w_for_readable_names(params.get("model", ""), params["w"]) statistics_in_folder.append(return_dict) return statistics_in_folder -def decode_w_for_readable_names(w): - - # if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]: - # w_string = "safe" - # elif w == [1.0, 1.0, 0.0, 0.0, 0.0, 0.0]: - # w_string = "safe and speedy" - # elif w == [1.0, 0.0, 1.0, 0.0, 0.0, 0.0]: - # w_string = "safe LC" - # elif w == [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]: - # w_string = "safe right" - # elif w == [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]: - # w_string = "safe follow" - # elif w == [1.0, 0.0, 0.0, 0.0, 0.0, 1.0]: - # w_string = "safe cut-in" - # elif w == [1.0, 0.0, 0.0, 0.0, 1.0, 1.0]: - # w_string = "safe follow cut-in" - # elif w == [1.0, 1.0, 0.0, 1.0, 1.0, 1.0]: - # w_string = "all but lc" - # else: - w_string = str(w) +def decode_w_for_readable_names(model_name, w): + """ + Function to decode the model name for the plot labels + :param model_name: name of the model we evaluate + :param w: weights of the preferences + :return: decoded name + """ + name = "FastRL" if model_name in "v1" else "Q" + if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]: + w_string = "Safe" + elif w == [0.0, 1.0, 0.0, 0.0, 0.0, 0.0]: + w_string = "Speed keeper" + elif w == [0.0, 0.0, 1.0, 0.0, 0.0, 0.0]: + w_string = "Lane changer" + elif w == [0.0, 0.0, 0.0, 1.0, 0.0, 0.0]: + w_string = "Right keeper" + elif w == [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]: + w_string = "Safe follower" + elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]: + w_string = "No cut-in driver" + elif w == [1.0, 1.0, 0.0, 0.1, 0.5, 1.0]: + w_string = name + " Baseline" + elif w == [1.0, 2.0, 0.0, 0.0, 0.5, 1.0]: + w_string = "D" + elif w == [1.0, 0.0, -0.1, -0.1, 1.0, 1.0]: + w_string = "C" + elif w == [1.0, 2.0, -0.1, -0.5, 1.0, 1.0]: + w_string = "B" + elif w == [10.0, 2.0, -0.1, -0.5, 0.5, 1.0]: + w_string = "A" + elif w == [1.0, 1.0, 0.0, 1.0, 1.0, 1.0]: + w_string = "all but lc" + else: + w_string = str(w) return w_string + def draw_causes(cause_dicts, labels): + """ + Function to draw cause plot + :param cause_dicts: causes + :param labels: labels to plot + :return: + """ category_names = [str(key) for key in cause_dicts[0].keys()] - data = np.array(list(list(i.values()) for i in cause_dicts)) + dat = list(list(i.values()) for i in cause_dicts) + dat.reverse() + labels.reverse() + data = np.array(dat) data_cum = data.cumsum(axis=1) category_colors = plt.get_cmap('RdYlGn')( np.linspace(0.15, 0.85, data.shape[1])) @@ -196,7 +179,6 @@ def draw_causes(cause_dicts, labels): ax.invert_yaxis() ax.xaxis.set_visible(False) ax.set_xlim(0, np.sum(data, axis=1).max()) - for i, (colname, color) in enumerate(zip(category_names, category_colors)): widths = data[:, i] starts = data_cum[:, i] - widths @@ -211,6 +193,12 @@ def draw_causes(cause_dicts, labels): plt.tight_layout() def eval_full_statistics(global_statistics, save_figures_path=None): + """ + Function to plot all the collected data. + :param global_statistics: list of parameters of the evals + :param save_figures_path: where to save the plots + :return: none + """ eval_values = ["ego_speed", "desired_speed_difference", "front_tiv", "rear_tiv", "lane_changes", "keeping_right", "average_reward_per_step", "tiv_before_lane_change", "tiv_after_lane_change", ] @@ -240,10 +228,10 @@ def eval_full_statistics(global_statistics, save_figures_path=None): # plt.hist(episode_stat, bins=min(episode_stat.size//10, 50), histtype="barstacked", density=True, label=name_list[-1], stacked=True) name_stat.append(episode_stat) global_statsss.append(name_stat) - global_names.append(name) + global_names.append(name.replace("_", " ", -1)) global_labels.append(name_list) - draw_causes(cause_list, global_labels[0]) + draw_causes(cause_list, copy.deepcopy(global_labels[0])) if save_figures_path is not None: plt.savefig(f'{save_figures_path}/cause_plot.jpg') plt.cla() @@ -261,6 +249,13 @@ def eval_full_statistics(global_statistics, save_figures_path=None): def draw_boxplot(data, labels, names): + """ + Function to draw the boxplots + :param data: data to plot + :param labels: labels for the plots + :param names: names of the models + :return: + """ fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=False, sharey=True, figsize=(8, 12)) fig.suptitle("Evaluating episodic behavior") plt.autoscale() @@ -272,32 +267,22 @@ def draw_boxplot(data, labels, names): plt.tight_layout() def fig_plot(data, title, names): + fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=True, sharey=True, figsize=(8, 12)) fig.suptitle(title) plt.autoscale() for i, ax in enumerate(axes.flatten()): - # Bulbasaur - # sns.histplot(data=data[i], - # bins='auto', - # kde=True, - # ax=ax, - # stat="probability", - # common_norm=True, - # common_bins=True, - # multiple="layer", - # label=names[i]) ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center') if __name__ == "__main__": dir_of_eval = [ - # "/cache/RL/training_with_policy/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210324_102545/", - "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/", - # "/cache/hdd/new_rewards/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210209_101340", + "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/" ] for run in dir_of_eval: global_stat = [] eval_dirs = os.listdir(run) + eval_dirs.sort() for dir_ in eval_dirs: if "eval" not in dir_: continue @@ -305,5 +290,3 @@ def fig_plot(data, title, names): global_stat.append(single_stat) eval_full_statistics(global_stat, ) # save_figures_path=os.path.join(dir_of_eval, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}")) - - print() diff --git a/sumoGym/environment.py b/sumoGym/environment.py index 886bd49..952b61c 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -500,7 +500,7 @@ def _get_terminating_events(self, is_lane_change, left_=False): if self.observation is not None: temp_reward["keep_right"] = self.reward_dict["keep_right"][1] if self.observation["lane_id"] == 0 or ( self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else \ - self.reward_dict["keep_right"][1] - 1 + self.reward_dict["keep_right"][1] - 1 else: if cause is not None: temp_reward["keep_right"] = self.reward_dict[cause][1] @@ -782,7 +782,9 @@ def _calculate_image_environment(self, flatten=True): # Drawing speed of the current car velocity = self.env_obs[car_id]['speed'] / 50 if self.egoID == car_id: - velocity = 1 - abs(self.env_obs[car_id]['speed'] - self.desired_speed) / self.desired_speed + velocity = 1 - abs(self.env_obs[car_id]['speed'] - self.desired_speed) / max(self.desired_speed, + self.env_obs[car_id][ + "speed"]) observation[0, self.x_range_grid + dx - l:self.x_range_grid + dx + l, self.y_range_grid + dy - w:self.y_range_grid + dy + w] += np.ones_like( observation[0, self.x_range_grid + dx - l:self.x_range_grid + dx + l, From 36641206fbcdd433b366136791a430c18666a37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Holl=C3=B3=20=C3=81ron?= Date: Tue, 5 Oct 2021 09:18:45 +0200 Subject: [PATCH 07/14] fix reward type changed --- sumoGym/run_env.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sumoGym/run_env.py b/sumoGym/run_env.py index bb8fbf4..bac081c 100644 --- a/sumoGym/run_env.py +++ b/sumoGym/run_env.py @@ -14,9 +14,9 @@ def main(): # Modify simulation_directory for your directory path env = gym.make('SUMOEnvironment-v0', simulation_directory='../basic_env', - type_os="image", + type_os="structured", type_as='discrete', - reward_type='speed', + reward_type='positive', mode='none', change_speed_interval=100, ) @@ -24,7 +24,7 @@ def main(): terminate = False while not terminate: # action = [float(input('next steering')), float(input('next vel_dif'))] - action = random.randint(0,8) + action = int(input())# random.randint(0,8) state, reward, terminate, info = env.step(action) time.sleep(0.1) if terminate: From 005e2dd9fd1980062df3589f7f77b3c8dfb8ba98 Mon Sep 17 00:00:00 2001 From: "Szoke Laszlo (XC-AD/ENG1-Bp)" Date: Fri, 15 Oct 2021 11:59:00 +0000 Subject: [PATCH 08/14] current setting --- sumoGym/environment.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sumoGym/environment.py b/sumoGym/environment.py index 952b61c..ad9c57a 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -262,10 +262,10 @@ def _setup_reward_system(self, reward_type='features'): if reward_type == "basic": raise NotImplementedError elif reward_type == 'features': - self.reward_dict = {'success': [True, 0.0, False], # if successful episode - 'collision': [True, -100.0, False], # when causing collision - 'slow': [True, -100.0, False], # when being too slow - 'left_highway': [True, -100.0, False], # when leaving highway + self.reward_dict = {'success': [True, 0.0, True], # if successful episode + 'collision': [True, -10.0, False], # when causing collision + 'slow': [True, -10.0, False], # when being too slow + 'left_highway': [True, -10.0, False], # when leaving highway 'speed': [False, 0.0, True], # negative reward proportional to the difference from v_des 'lane_change': [False, 0.0, True], # successful lane-change @@ -278,9 +278,9 @@ def _setup_reward_system(self, reward_type='features'): elif reward_type == 'positive': self.reward_dict = {'success': [True, 0.0, True], # if successful episode - 'collision': [True, -1.0, False], # when causing collision - 'slow': [True, -1.0, False], # when being too slow - 'left_highway': [True, -1.0, False], # when leaving highway + 'collision': [True, -10.0, False], # when causing collision + 'slow': [True, -10.0, False], # when being too slow + 'left_highway': [True, -10.0, False], # when leaving highway 'speed': [False, 1.0, True], # negative reward proportional to the difference from v_des 'lane_change': [False, 1.0, True], # successful lane-change From d55c04b310d1ae69fa61e28471c40d5fb42fe6ab Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Mon, 8 Nov 2021 10:05:29 +0100 Subject: [PATCH 09/14] change rewards --- sumoGym/environment.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sumoGym/environment.py b/sumoGym/environment.py index ad9c57a..226f635 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -505,7 +505,7 @@ def _get_terminating_events(self, is_lane_change, left_=False): if cause is not None: temp_reward["keep_right"] = self.reward_dict[cause][1] else: - temp_reward["keep_right"] = 0 + temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1 if temp_reward.get("follow_distance", [False, False, False])[2]: if self.observation is not None: @@ -520,7 +520,7 @@ def _get_terminating_events(self, is_lane_change, left_=False): elif cause is None: temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1] else: - temp_reward["follow_distance"] = self.reward_dict[cause][1] + temp_reward["follow_distance"] = 0 if temp_reward["type"] == "positive" else -1 if temp_reward.get("cut_in_distance", [False, False, False])[2]: if self.observation is not None: @@ -535,21 +535,21 @@ def _get_terminating_events(self, is_lane_change, left_=False): elif cause is None: temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1] else: - temp_reward["cut_in_distance"] = self.reward_dict[cause][1] + temp_reward["cut_in_distance"] = 0 if temp_reward["type"] == "positive" else -1 # getting speed reward if cause is None: dv = abs(self.state['speed'] - self.desired_speed) temp_reward["speed"] = self.reward_dict["speed"][1] - dv / max(self.desired_speed, self.state["speed"]) else: - temp_reward["speed"] = self.reward_dict[cause][1] + temp_reward["speed"] = 0 if temp_reward["type"] == "positive" else -1 # getting lane change reward. if is_lane_change and cause is None: temp_reward["lane_change"] = self.reward_dict["lane_change"][1] elif cause is None: temp_reward["lane_change"] = self.reward_dict["lane_change"][1] - 1 else: - temp_reward["lane_change"] = self.reward_dict[cause][1] + temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1 # constructing the reward vector reward = self.get_max_reward(temp_reward) * self.default_w From d13b2d25e51619f1a1f6acd228c8cb7cfc34881c Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Mon, 8 Nov 2021 10:35:31 +0100 Subject: [PATCH 10/14] rewards changes --- sumoGym/environment.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sumoGym/environment.py b/sumoGym/environment.py index 226f635..cf49c5e 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -503,7 +503,7 @@ def _get_terminating_events(self, is_lane_change, left_=False): self.reward_dict["keep_right"][1] - 1 else: if cause is not None: - temp_reward["keep_right"] = self.reward_dict[cause][1] + temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1 else: temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1 @@ -546,8 +546,10 @@ def _get_terminating_events(self, is_lane_change, left_=False): # getting lane change reward. if is_lane_change and cause is None: temp_reward["lane_change"] = self.reward_dict["lane_change"][1] + # not terminating move reward elif cause is None: - temp_reward["lane_change"] = self.reward_dict["lane_change"][1] - 1 + temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1 + # terminating reward else: temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1 # constructing the reward vector From f10fc33896e9d9bbd1fc587c4b1ea43f66902408 Mon Sep 17 00:00:00 2001 From: "Szoke Laszlo (XC-AD/ENG1-Bp)" Date: Mon, 15 Nov 2021 12:03:58 +0000 Subject: [PATCH 11/14] starting speed adjusted --- sumoGym/environment.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sumoGym/environment.py b/sumoGym/environment.py index cf49c5e..c68f73b 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -174,8 +174,8 @@ def _inner_reset(self): self._refresh_environment() # Init lateral model # Setting a starting speed of the ego - self.state['speed'] = self.desired_speed - + self.state['speed'] = (self.desired_speed+self.state['speed'])/2 + if "continuous" in self.type_as: self.lateral_model = LateralModel( self.state, @@ -611,7 +611,7 @@ def _select_egos(self, number_of_egos=1): traci.vehicle.setRouteID(self.egoID, "r1") traci.vehicle.setSpeedFactor(self.egoID, 2) - traci.vehicle.setSpeed(self.egoID, self.desired_speed) + traci.vehicle.setSpeed(self.egoID, (traci.vehicle.getSpeed(self.egoID)+self.desired_speed)/2) traci.vehicle.setMaxSpeed(self.egoID, 50) traci.vehicle.subscribeContext(self.egoID, tc.CMD_GET_VEHICLE_VARIABLE, dist=self.radar_range[0], From 941ad01ef94656649714d384de3b2fdb81f83c01 Mon Sep 17 00:00:00 2001 From: "Szoke Laszlo (XC-AD/ENG1-Bp)" Date: Wed, 24 Nov 2021 13:46:44 +0000 Subject: [PATCH 12/14] eval adjustments --- evaluation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/evaluation.py b/evaluation.py index 79a17ae..64707e5 100644 --- a/evaluation.py +++ b/evaluation.py @@ -118,7 +118,7 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"): statistics_in_folder = [] for filename in files: return_dict = plot_episode_stat(filename) - return_dict["weights"] = decode_w_for_readable_names(params.get("model", ""), params["w"]) + return_dict["weights"] = decode_w_for_readable_names(params.get("model_version", ""), params["w"]) statistics_in_folder.append(return_dict) return statistics_in_folder @@ -142,7 +142,7 @@ def decode_w_for_readable_names(model_name, w): w_string = "Safe follower" elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]: w_string = "No cut-in driver" - elif w == [1.0, 1.0, 0.0, 0.1, 0.5, 1.0]: + elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]: w_string = name + " Baseline" elif w == [1.0, 2.0, 0.0, 0.0, 0.5, 1.0]: w_string = "D" @@ -152,7 +152,7 @@ def decode_w_for_readable_names(model_name, w): w_string = "B" elif w == [10.0, 2.0, -0.1, -0.5, 0.5, 1.0]: w_string = "A" - elif w == [1.0, 1.0, 0.0, 1.0, 1.0, 1.0]: + elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]: w_string = "all but lc" else: w_string = str(w) @@ -277,8 +277,11 @@ def fig_plot(data, title, names): if __name__ == "__main__": dir_of_eval = [ - "/cache/RL/training_with_policy/FastRLv1_SuMoGyM_discrete/tits/" - ] + #"/cache/plotting/20211018_080302", + #"/cache/plotting/20211122_075322", + "/cache/plotting/compare", + ] + import time for run in dir_of_eval: global_stat = [] eval_dirs = os.listdir(run) @@ -289,4 +292,4 @@ def fig_plot(data, title, names): single_stat = plot_evaluation_statistics(os.path.join(run, dir_, "env")) global_stat.append(single_stat) eval_full_statistics(global_stat, - ) # save_figures_path=os.path.join(dir_of_eval, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}")) + save_figures_path=os.path.join(run, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}")) From a9ca948e839265ba00d107f47ab6609ceada4fbb Mon Sep 17 00:00:00 2001 From: Szoke Laszlo Date: Wed, 24 Nov 2021 14:58:41 +0100 Subject: [PATCH 13/14] adjusted evaluator script --- evaluation.py | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/evaluation.py b/evaluation.py index 64707e5..2887c16 100644 --- a/evaluation.py +++ b/evaluation.py @@ -118,10 +118,19 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"): statistics_in_folder = [] for filename in files: return_dict = plot_episode_stat(filename) - return_dict["weights"] = decode_w_for_readable_names(params.get("model_version", ""), params["w"]) + model_name = "" + model_version = params.get('model_version', "") + use_double = params.get('use_double_model', False) + if model_version is not None: + if model_version in 'v1': + model_name = "DFRL agent" if use_double else 'FastRL agent' + elif model_version in 'q': + model_name = 'Q agent' + return_dict["weights"] = decode_w_for_readable_names(model_name=model_name, w=params["w"]) statistics_in_folder.append(return_dict) return statistics_in_folder + def decode_w_for_readable_names(model_name, w): """ Function to decode the model name for the plot labels @@ -129,7 +138,7 @@ def decode_w_for_readable_names(model_name, w): :param w: weights of the preferences :return: decoded name """ - name = "FastRL" if model_name in "v1" else "Q" + if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]: w_string = "Safe" elif w == [0.0, 1.0, 0.0, 0.0, 0.0, 0.0]: @@ -143,21 +152,22 @@ def decode_w_for_readable_names(model_name, w): elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]: w_string = "No cut-in driver" elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]: - w_string = name + " Baseline" - elif w == [1.0, 2.0, 0.0, 0.0, 0.5, 1.0]: - w_string = "D" - elif w == [1.0, 0.0, -0.1, -0.1, 1.0, 1.0]: - w_string = "C" - elif w == [1.0, 2.0, -0.1, -0.5, 1.0, 1.0]: - w_string = "B" - elif w == [10.0, 2.0, -0.1, -0.5, 0.5, 1.0]: - w_string = "A" + w_string = model_name + " Baseline" + elif w == [1.0, 0.0, -0.5, -0.5, 1.0, 1.0]: + w_string = model_name + " D" + elif w == [1.0, 1.0, 0.5, 0.0, 1.0, 1.0]: + w_string = model_name + " C" + elif w == [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]: + w_string = model_name + " B" + elif w == [1.0, 1.0, -0.5, 0.0, 1.0, 1.0]: + w_string = model_name + " A" elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]: w_string = "all but lc" else: w_string = str(w) - return w_string + return w_string + def draw_causes(cause_dicts, labels): """ @@ -192,6 +202,7 @@ def draw_causes(cause_dicts, labels): loc='lower left', fontsize='small') plt.tight_layout() + def eval_full_statistics(global_statistics, save_figures_path=None): """ Function to plot all the collected data. @@ -214,9 +225,8 @@ def eval_full_statistics(global_statistics, save_figures_path=None): cause_list = [] for i, item in enumerate(global_statistics): episode_stat = [] - cause_dict = { "collision": 0, "slow": 0, None: 0} + cause_dict = {"collision": 0, "slow": 0, None: 0} for episode in item: - cause_dict[episode["cause"]] += 1 episode_stat.append( @@ -266,8 +276,8 @@ def draw_boxplot(data, labels, names): # ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center') plt.tight_layout() -def fig_plot(data, title, names): +def fig_plot(data, title, names): fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=True, sharey=True, figsize=(8, 12)) fig.suptitle(title) plt.autoscale() @@ -277,11 +287,12 @@ def fig_plot(data, title, names): if __name__ == "__main__": dir_of_eval = [ - #"/cache/plotting/20211018_080302", - #"/cache/plotting/20211122_075322", + # "/cache/plotting/20211018_080302", + # "/cache/plotting/20211122_075322", "/cache/plotting/compare", - ] + ] import time + for run in dir_of_eval: global_stat = [] eval_dirs = os.listdir(run) @@ -292,4 +303,5 @@ def fig_plot(data, title, names): single_stat = plot_evaluation_statistics(os.path.join(run, dir_, "env")) global_stat.append(single_stat) eval_full_statistics(global_stat, - save_figures_path=os.path.join(run, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}")) + save_figures_path=os.path.join(run, + f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}")) From 7b1da52a05ee70b09447588e7989a7e3d7dd2d45 Mon Sep 17 00:00:00 2001 From: "Szoke Laszlo (XC-AD/ENG1-Bp)" Date: Wed, 8 Dec 2021 15:05:53 +0000 Subject: [PATCH 14/14] eval script adopted --- evaluation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/evaluation.py b/evaluation.py index 2887c16..176ee9d 100644 --- a/evaluation.py +++ b/evaluation.py @@ -123,9 +123,9 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"): use_double = params.get('use_double_model', False) if model_version is not None: if model_version in 'v1': - model_name = "DFRL agent" if use_double else 'FastRL agent' + model_name = "DFRL -" if use_double else 'FastRL -' elif model_version in 'q': - model_name = 'Q agent' + model_name = 'Q - ' return_dict["weights"] = decode_w_for_readable_names(model_name=model_name, w=params["w"]) statistics_in_folder.append(return_dict) return statistics_in_folder @@ -152,7 +152,7 @@ def decode_w_for_readable_names(model_name, w): elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]: w_string = "No cut-in driver" elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]: - w_string = model_name + " Baseline" + w_string = model_name + " baseline" elif w == [1.0, 0.0, -0.5, -0.5, 1.0, 1.0]: w_string = model_name + " D" elif w == [1.0, 1.0, 0.5, 0.0, 1.0, 1.0]: @@ -185,7 +185,7 @@ def draw_causes(cause_dicts, labels): category_colors = plt.get_cmap('RdYlGn')( np.linspace(0.15, 0.85, data.shape[1])) - fig, ax = plt.subplots() + fig, ax = plt.subplots(figsize=(7,8)) ax.invert_yaxis() ax.xaxis.set_visible(False) ax.set_xlim(0, np.sum(data, axis=1).max())