diff --git a/docker-compose.yml b/docker-compose.yml index 8ae2e56..ec09210 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,8 @@ services: - QT_X11_NO_MITSHM=1 - USER=$USER - UID=$UID + - HOST_USER=$USER + - HOST_UID=$UID - _JAVA_OPTIONS=-Duser.home=/home/$USER/ # for Java based apps, i.e. PyCharm, CLion - NVIDIA_VISIBLE_DEVICES=all entrypoint: ["/entry.sh","true"] @@ -31,4 +33,4 @@ services: cap_add: - SYS_PTRACE security_opt: - - seccomp:unconfined \ No newline at end of file + - seccomp:unconfined diff --git a/entry.sh b/entry.sh new file mode 100644 index 0000000..cab16cb --- /dev/null +++ b/entry.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +if [ -z "$1" ]; then + START_SSH=false +else + START_SSH=$1 + shift +fi + +echo "$HOST_USER:x:$HOST_UID:" >> /etc/group +echo "docker:x:999:$HOST_USER" >> /etc/group +useradd -u $HOST_UID -g $HOST_UID -d /home/$HOST_USER -s /bin/bash -M $HOST_USER + +if [ "$START_SSH" = true ]; then + service ssh start +fi + +su $HOST_USER "$@" diff --git a/evaluation.py b/evaluation.py index b123b6d..176ee9d 100644 --- a/evaluation.py +++ b/evaluation.py @@ -1,5 +1,6 @@ """ -@author "Laszlo Szoke (CC-AD/ENG1-Bp)" +@author "Laszlo Szoke" +This script is used to generate the plots of the training evaluation """ import copy import glob @@ -11,6 +12,7 @@ import numpy as np """ +Indices of the attributes: FL 0 - dx 1 - dv @@ -39,8 +41,14 @@ def plot_episode_stat(file): + """ + Function to read and collect the data from file + :param file: path to the data + :return: dict of interesting attributes + """ with open(file, "br") as f: dict_ = pickle.load(f) + s = np.asarray(dict_["state"][:-1]) r = np.asarray(dict_["reward"]) cause = dict_["cause"] @@ -64,62 +72,7 @@ def plot_episode_stat(file): rear_visible = rear_ego_distances > -48 time_ = np.asarray(list(range(len(lanes)))) lanes = np.asarray(lanes) - # # Plotting front distances relative to the ego based on the different lanes - # plt.scatter(time_, front_right_distance, label="FR") - # plt.scatter(time_, front_left_distance, label="FL") - # plt.scatter(time_, front_ego_distance, label="FE") - # plt.legend() - # plt.xlabel("steps [-]") - # plt.ylabel("Distance [m]") - # plt.title("Front Distance") - # plt.show() - # # Plotting rear distances relative to the ego based on the different lanes - # plt.scatter(time_, rear_right_distances, label="RR") - # plt.scatter(time_, rear_left_distances, label="RL") - # plt.scatter(time_, rear_ego_distances, label="RE") - # plt.xlabel("steps [-]") - # plt.ylabel("Distance [m]") - # plt.legend() - # plt.title("Rear Distance") - # plt.show() - # # Plotting speed of the ego, desired speed and front vehicle speed - # plt.plot(speeds, label="ego") - # plt.plot(desired_speeds, label="desired") - # plt.plot(front_ego_speeds + speeds, label="front") - # plt.legend() - # plt.xlabel("steps [-]") - # plt.ylabel("Speed [m/s2]") - # plt.title("Speed values") - # plt.show() - # # Plotting the speed differences - # plt.plot(desired_speeds - speeds, label="desired") - # plt.plot(front_ego_speeds, label="front") - # plt.xlabel("steps [-]") - # plt.ylabel("Speed [m/s2]") - # plt.legend() - # plt.title("Speed Differences") - # plt.show() - # # Plotting speed - distance of rear vehicles - # plt.scatter((rear_ego_speeds + speeds)[rear_visible], -1*rear_ego_distances[rear_visible]) - # plt.scatter((rear_right_speeds + speeds)[rear_visible], -1*rear_right_distances[rear_visible]) - # plt.scatter((rear_left_speeds + speeds)[rear_visible], -1*rear_left_distances[rear_visible]) - # plt.title("Rear time_ till collision") - # plt.xlabel("Speed [m/s2]") - # plt.ylabel("Distance [m]") - # plt.show() - # Plotting speed - distance of front vehicles - # plt.scatter(speeds[front_visible], - # front_ego_distance[front_visible] / (front_ego_speeds[front_visible] + speeds[front_visible]), - # label="FE") - # plt.scatter(speeds[front_visible], - # front_left_distance[front_visible] / (front_left_speeds[front_visible] + speeds[front_visible]), - # label="FL") - # # plt.scatter(time_[front_visible], front_right_distance[front_visible]/(front_right_speeds[front_visible]+speeds[front_visible]), label="FR") - # # plt.scatter(time_[front_visible], front_left_distance[front_visible]/-1/front_left_speeds[front_visible], label="RL") - # plt.title("Time in-between front vehicle") - # plt.xlabel("Distance [m]") - # plt.ylabel("Time in-between vehicles [s]") - # plt.show() + # summing the correct situation when the ego is keeping right as much as it can. keep_right = (sum(lanes == 0) + sum(np.logical_and(lanes != 0, s[:, 13] == 1))) / len(lanes) lane_changes = lanes[1:] != lanes[:-1] @@ -140,7 +93,7 @@ def plot_episode_stat(file): "lane_changes": sum(lane_changes) / len(lane_changes), "desired_speed_difference": speeds - desired_speeds, "keeping_right": keep_right, - "average_reward_per_step": r.mean(), + "average_reward_per_step": r.mean() if len(r.shape) < 2 else r.sum(-1).mean(), "cause": cause, "distance_before_lane_change": distance_before_lane_change, "distance_after_lane_change": distance_after_lane_change, @@ -152,6 +105,12 @@ def plot_episode_stat(file): def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"): + """ + Function to collect all logs of the episodes + :param path_to_env_log: path to the directory of the env logs + :param extention: the file ending + :return: statistics of the folder + """ files = glob.glob(f'{os.path.join(path_to_env_log, extention)}') files.sort(key=os.path.getmtime) with open(f'{os.path.split(path_to_env_log)[0]}/args_eval.txt', 'r') as f: @@ -159,44 +118,77 @@ def plot_evaluation_statistics(path_to_env_log, extention="*.pkl"): statistics_in_folder = [] for filename in files: return_dict = plot_episode_stat(filename) - return_dict["weights"] = params.get("model", "") + " "+ decode_w_for_readable_names(params["w"]) + model_name = "" + model_version = params.get('model_version', "") + use_double = params.get('use_double_model', False) + if model_version is not None: + if model_version in 'v1': + model_name = "DFRL -" if use_double else 'FastRL -' + elif model_version in 'q': + model_name = 'Q - ' + return_dict["weights"] = decode_w_for_readable_names(model_name=model_name, w=params["w"]) statistics_in_folder.append(return_dict) return statistics_in_folder -def decode_w_for_readable_names(w): - - # if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]: - # w_string = "safe" - # elif w == [1.0, 1.0, 0.0, 0.0, 0.0, 0.0]: - # w_string = "safe and speedy" - # elif w == [1.0, 0.0, 1.0, 0.0, 0.0, 0.0]: - # w_string = "safe LC" - # elif w == [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]: - # w_string = "safe right" - # elif w == [1.0, 0.0, 0.0, 0.0, 1.0, 0.0]: - # w_string = "safe follow" - # elif w == [1.0, 0.0, 0.0, 0.0, 0.0, 1.0]: - # w_string = "safe cut-in" - # elif w == [1.0, 0.0, 0.0, 0.0, 1.0, 1.0]: - # w_string = "safe follow cut-in" - # elif w == [1.0, 1.0, 0.0, 1.0, 1.0, 1.0]: - # w_string = "all but lc" - # else: - w_string = str(w) - - return w_string + +def decode_w_for_readable_names(model_name, w): + """ + Function to decode the model name for the plot labels + :param model_name: name of the model we evaluate + :param w: weights of the preferences + :return: decoded name + """ + + if w == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0]: + w_string = "Safe" + elif w == [0.0, 1.0, 0.0, 0.0, 0.0, 0.0]: + w_string = "Speed keeper" + elif w == [0.0, 0.0, 1.0, 0.0, 0.0, 0.0]: + w_string = "Lane changer" + elif w == [0.0, 0.0, 0.0, 1.0, 0.0, 0.0]: + w_string = "Right keeper" + elif w == [0.0, 0.0, 0.0, 0.0, 1.0, 0.0]: + w_string = "Safe follower" + elif w == [0.0, 0.0, 0.0, 0.0, 0.0, 1.0]: + w_string = "No cut-in driver" + elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]: + w_string = model_name + " baseline" + elif w == [1.0, 0.0, -0.5, -0.5, 1.0, 1.0]: + w_string = model_name + " D" + elif w == [1.0, 1.0, 0.5, 0.0, 1.0, 1.0]: + w_string = model_name + " C" + elif w == [1.0, 1.0, 0.0, 0.0, 1.0, 1.0]: + w_string = model_name + " B" + elif w == [1.0, 1.0, -0.5, 0.0, 1.0, 1.0]: + w_string = model_name + " A" + elif w == [1.0, 1.0, -0.5, 0.5, 0.5, 0.5]: + w_string = "all but lc" + else: + w_string = str(w) + + return w_string + + def draw_causes(cause_dicts, labels): + """ + Function to draw cause plot + :param cause_dicts: causes + :param labels: labels to plot + :return: + """ category_names = [str(key) for key in cause_dicts[0].keys()] - data = np.array(list(list(i.values()) for i in cause_dicts)) + dat = list(list(i.values()) for i in cause_dicts) + dat.reverse() + labels.reverse() + data = np.array(dat) data_cum = data.cumsum(axis=1) category_colors = plt.get_cmap('RdYlGn')( np.linspace(0.15, 0.85, data.shape[1])) - fig, ax = plt.subplots() + fig, ax = plt.subplots(figsize=(7,8)) ax.invert_yaxis() ax.xaxis.set_visible(False) ax.set_xlim(0, np.sum(data, axis=1).max()) - for i, (colname, color) in enumerate(zip(category_names, category_colors)): widths = data[:, i] starts = data_cum[:, i] - widths @@ -208,9 +200,17 @@ def draw_causes(cause_dicts, labels): ax.bar_label(rects, label_type='center', color=text_color) ax.legend(ncol=len(category_names), bbox_to_anchor=(0, 1), loc='lower left', fontsize='small') + plt.tight_layout() + def eval_full_statistics(global_statistics, save_figures_path=None): - eval_values = ["ego_speed", "desired_speed_difference", "follow_distance", "front_tiv", + """ + Function to plot all the collected data. + :param global_statistics: list of parameters of the evals + :param save_figures_path: where to save the plots + :return: none + """ + eval_values = ["ego_speed", "desired_speed_difference", "front_tiv", "rear_tiv", "lane_changes", "keeping_right", "average_reward_per_step", "tiv_before_lane_change", "tiv_after_lane_change", ] if save_figures_path is not None and not os.path.exists(save_figures_path): @@ -225,9 +225,8 @@ def eval_full_statistics(global_statistics, save_figures_path=None): cause_list = [] for i, item in enumerate(global_statistics): episode_stat = [] - cause_dict = { "collision": 0, "slow": 0, None: 0} + cause_dict = {"collision": 0, "slow": 0, None: 0} for episode in item: - cause_dict[episode["cause"]] += 1 episode_stat.append( @@ -239,10 +238,10 @@ def eval_full_statistics(global_statistics, save_figures_path=None): # plt.hist(episode_stat, bins=min(episode_stat.size//10, 50), histtype="barstacked", density=True, label=name_list[-1], stacked=True) name_stat.append(episode_stat) global_statsss.append(name_stat) - global_names.append(name) + global_names.append(name.replace("_", " ", -1)) global_labels.append(name_list) - draw_causes(cause_list, global_labels[0]) + draw_causes(cause_list, copy.deepcopy(global_labels[0])) if save_figures_path is not None: plt.savefig(f'{save_figures_path}/cause_plot.jpg') plt.cla() @@ -260,48 +259,49 @@ def eval_full_statistics(global_statistics, save_figures_path=None): def draw_boxplot(data, labels, names): + """ + Function to draw the boxplots + :param data: data to plot + :param labels: labels for the plots + :param names: names of the models + :return: + """ fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=False, sharey=True, figsize=(8, 12)) - # fig.suptitle("title") + fig.suptitle("Evaluating episodic behavior") plt.autoscale() for i, ax in enumerate(axes.flatten()): ax.boxplot(data[i], labels=labels[i], autorange=True, showfliers=True, notch=False, meanline=True, whis=[5, 95], sym="", vert=False) ax.set_title(names[i]) # ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center') + plt.tight_layout() + def fig_plot(data, title, names): fig, axes = plt.subplots(data.__len__() // 2, 2, sharex=True, sharey=True, figsize=(8, 12)) fig.suptitle(title) plt.autoscale() for i, ax in enumerate(axes.flatten()): - # Bulbasaur - # sns.histplot(data=data[i], - # bins='auto', - # kde=True, - # ax=ax, - # stat="probability", - # common_norm=True, - # common_bins=True, - # multiple="layer", - # label=names[i]) ax.annotate(names[i], (0.5, 0.9), xycoords='axes fraction', va='center', ha='center') if __name__ == "__main__": dir_of_eval = [ - # "/cache/RL/training_with_policy/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210324_102545/", - "/cache/RL/Eval_fastrl/", - # "/cache/hdd/new_rewards/Qnetwork_SimpleMLP_SuMoGyM_discrete/20210209_101340", + # "/cache/plotting/20211018_080302", + # "/cache/plotting/20211122_075322", + "/cache/plotting/compare", ] + import time + for run in dir_of_eval: global_stat = [] eval_dirs = os.listdir(run) + eval_dirs.sort() for dir_ in eval_dirs: if "eval" not in dir_: continue single_stat = plot_evaluation_statistics(os.path.join(run, dir_, "env")) global_stat.append(single_stat) eval_full_statistics(global_stat, - ) # save_figures_path=os.path.join(dir_of_eval, f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}")) - - print() + save_figures_path=os.path.join(run, + f"plots_{time.strftime('%Y%m%d_%H%M%S', time.gmtime())}")) diff --git a/start_docker.sh b/start_docker.sh old mode 100644 new mode 100755 index 358d58c..2649112 --- a/start_docker.sh +++ b/start_docker.sh @@ -11,7 +11,7 @@ if [ ${#docker_list[@]} -gt 1 ]; then fi elif [ ${#docker_list[@]} -eq 0 ]; then echo "We have no running docker container, so we start one." - cd docker-compose -f ./docker-compose.yml run --name docker_sumo --rm --service-ports sumo + docker-compose -f ./docker-compose.yml run --name docker_sumo --rm --service-ports sumo else echo "We are entering the only running docker container." docker exec -it --user=$UID ${docker_list[0]} /bin/bash diff --git a/sumoGym/environment.py b/sumoGym/environment.py index 4c99d0d..c68f73b 100644 --- a/sumoGym/environment.py +++ b/sumoGym/environment.py @@ -69,9 +69,11 @@ def __init__(self, seed=None): super(SUMOEnvironment, self).__init__() + self.render_mode = mode + self.save_path = None self.name = "SuMoGyM" np.random.seed(seed if seed is not None else 42) - self.seed = seed + self.seed_ = seed if radar_range is None: radar_range = [50, 9] # x and y self.radar_range = radar_range @@ -132,7 +134,7 @@ def start(self): ] self.sumoCmd.append("--seed") - self.sumoCmd.append(str(int(np.random.randint(0, 1000000))) if self.seed is None else f"{self.seed}") + self.sumoCmd.append(str(int(np.random.randint(0, 1000000))) if self.seed_ is None else f"{self.seed_}") traci.start(self.sumoCmd[:4]) @@ -159,8 +161,8 @@ def _inner_reset(self): """ # Changing configuration self._choose_random_simulation() - if self.seed is None: - self.update_seed() + if self.seed_ is None: + self.seed() # Loads traci configuration traci.load(self.sumoCmd[1:]) # Resetting configuration @@ -172,8 +174,8 @@ def _inner_reset(self): self._refresh_environment() # Init lateral model # Setting a starting speed of the ego - self.state['speed'] = self.desired_speed - + self.state['speed'] = (self.desired_speed+self.state['speed'])/2 + if "continuous" in self.type_as: self.lateral_model = LateralModel( self.state, @@ -183,13 +185,17 @@ def _inner_reset(self): return self._get_observation() - def update_seed(self): + def seed(self, seed=None): """ :return: """ index = self.sumoCmd.index("--seed") - self.sumoCmd[index + 1] = str(int(np.random.randint(0, 1000000))) + if seed is None: + self.sumoCmd[index + 1] = str(int(np.random.randint(0, 1000000))) + else: + self.sumoCmd[index + 1] = str(int(seed)) + self.seed_ = seed def _setup_observation_space(self, x_range=50, y_range=9): """ @@ -256,18 +262,33 @@ def _setup_reward_system(self, reward_type='features'): if reward_type == "basic": raise NotImplementedError elif reward_type == 'features': - self.reward_dict = {'success': [True, 0.0, False], # if successful episode - 'collision': [True, -100.0, False], # when causing collision - 'slow': [True, -100.0, False], # when being too slow - 'left_highway': [True, -100.0, False], # when leaving highway + self.reward_dict = {'success': [True, 0.0, True], # if successful episode + 'collision': [True, -10.0, False], # when causing collision + 'slow': [True, -10.0, False], # when being too slow + 'left_highway': [True, -10.0, False], # when leaving highway + 'speed': [False, 0.0, True], + # negative reward proportional to the difference from v_des + 'lane_change': [False, 0.0, True], # successful lane-change + 'keep_right': [False, 0.0, True], # whenever the available most right lane is used + 'follow_distance': [False, 0.0, True], + # whenever closer than required follow distance, + # proportional negative + 'cut_in_distance': [False, 0.0, True], # whenever cuts in closer then should. + 'type': reward_type} + + elif reward_type == 'positive': + self.reward_dict = {'success': [True, 0.0, True], # if successful episode + 'collision': [True, -10.0, False], # when causing collision + 'slow': [True, -10.0, False], # when being too slow + 'left_highway': [True, -10.0, False], # when leaving highway 'speed': [False, 1.0, True], # negative reward proportional to the difference from v_des 'lane_change': [False, 1.0, True], # successful lane-change 'keep_right': [False, 1.0, True], # whenever the available most right lane is used - 'follow_distance': [False, -1.0, True], + 'follow_distance': [False, 1.0, True], # whenever closer than required follow distance, # proportional negative - 'cut_in_distance': [False, -1.0, True], # whenever cuts in closer then should. + 'cut_in_distance': [False, 1.0, True], # whenever cuts in closer then should. 'type': reward_type} else: raise RuntimeError("Reward system can not be found") @@ -397,7 +418,8 @@ def _inner_step(self, action): if "lane" in exc.args[0]: cause, reward, terminated = self._get_terminating_events(True, left_=True) self.render() - return self._get_observation(), sum(reward), terminated, {'cause': cause, 'cumulants': reward, + return self._get_observation(), sum(reward), terminated, {'cause': cause, + 'cumulants': reward, 'velocity': self.state['speed'], 'distance': self.state['x_position'] - self.ego_start_position, @@ -415,7 +437,8 @@ def _inner_step(self, action): # creating the images if render is true. self.render() - return self._get_observation(), sum(reward), terminated, {'cause': cause, 'cumulants': reward, + return self._get_observation(), sum(reward), terminated, {'cause': cause, + 'cumulants': reward, 'velocity': self.state['speed'], 'distance': self.state['x_position'] - self.ego_start_position, @@ -476,50 +499,59 @@ def _get_terminating_events(self, is_lane_change, left_=False): if temp_reward.get("keep_right", [False, False, False])[2]: if self.observation is not None: temp_reward["keep_right"] = self.reward_dict["keep_right"][1] if self.observation["lane_id"] == 0 or ( - self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else self.reward_dict["keep_right"][1] -1 + self.observation["ER"] == 1 and self.observation["RE"]["dv"] < 1) else \ + self.reward_dict["keep_right"][1] - 1 else: - temp_reward["keep_right"] = self.reward_dict[cause][1] if cause is not None else 0 + if cause is not None: + temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1 + else: + temp_reward["keep_right"] = 0 if temp_reward["type"] == "positive" else -1 if temp_reward.get("follow_distance", [False, False, False])[2]: if self.observation is not None: follow_time = ( - (self.observation["FE"]["dx"] + self.observation["FE"]["dv"] * self.dt) / self.observation[ - "speed"] * 2) + (self.observation["FE"]["dx"] + self.observation["FE"]["dv"] * self.dt) / self.observation[ + "speed"] * 2) if follow_time < 1: - temp_reward["follow_distance"] = max(follow_time - 1, -1) + temp_reward["follow_distance"] = max(self.reward_dict["follow_distance"][1] + follow_time - 1, + self.reward_dict["follow_distance"][1] - 1) else: - temp_reward["follow_distance"] = 0 + temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1] elif cause is None: - temp_reward["follow_distance"] = 0 + temp_reward["follow_distance"] = self.reward_dict["follow_distance"][1] else: - temp_reward["follow_distance"] = self.reward_dict[cause][1] + temp_reward["follow_distance"] = 0 if temp_reward["type"] == "positive" else -1 if temp_reward.get("cut_in_distance", [False, False, False])[2]: if self.observation is not None: follow_time = ((-1 * self.observation["RE"]["dx"] - self.observation["RE"]["dv"] * self.dt) / self.observation["speed"] * 2) if follow_time < 0.5: - temp_reward["cut_in_distance"] = max(2 * (follow_time - 0.5), -1) + temp_reward["cut_in_distance"] = max( + self.reward_dict["cut_in_distance"][1] + 2 * (follow_time - 0.5), + self.reward_dict["cut_in_distance"][1] - 1) else: - temp_reward["cut_in_distance"] = 0 + temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1] elif cause is None: - temp_reward["cut_in_distance"] = 0 + temp_reward["cut_in_distance"] = self.reward_dict["cut_in_distance"][1] else: - temp_reward["cut_in_distance"] = self.reward_dict[cause][1] + temp_reward["cut_in_distance"] = 0 if temp_reward["type"] == "positive" else -1 # getting speed reward if cause is None: dv = abs(self.state['speed'] - self.desired_speed) temp_reward["speed"] = self.reward_dict["speed"][1] - dv / max(self.desired_speed, self.state["speed"]) else: - temp_reward["speed"] = self.reward_dict[cause][1] + temp_reward["speed"] = 0 if temp_reward["type"] == "positive" else -1 # getting lane change reward. - if is_lane_change: + if is_lane_change and cause is None: temp_reward["lane_change"] = self.reward_dict["lane_change"][1] + # not terminating move reward elif cause is None: - temp_reward["lane_change"] = self.reward_dict["lane_change"][1] -1 + temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1 + # terminating reward else: - temp_reward["lane_change"] = self.reward_dict[cause][1] + temp_reward["lane_change"] = 0 if temp_reward["type"] == "positive" else -1 # constructing the reward vector reward = self.get_max_reward(temp_reward) * self.default_w @@ -579,7 +611,7 @@ def _select_egos(self, number_of_egos=1): traci.vehicle.setRouteID(self.egoID, "r1") traci.vehicle.setSpeedFactor(self.egoID, 2) - traci.vehicle.setSpeed(self.egoID, self.desired_speed) + traci.vehicle.setSpeed(self.egoID, (traci.vehicle.getSpeed(self.egoID)+self.desired_speed)/2) traci.vehicle.setMaxSpeed(self.egoID, 50) traci.vehicle.subscribeContext(self.egoID, tc.CMD_GET_VEHICLE_VARIABLE, dist=self.radar_range[0], @@ -752,7 +784,9 @@ def _calculate_image_environment(self, flatten=True): # Drawing speed of the current car velocity = self.env_obs[car_id]['speed'] / 50 if self.egoID == car_id: - velocity = 1 - abs(self.env_obs[car_id]['speed'] - self.desired_speed) / self.desired_speed + velocity = 1 - abs(self.env_obs[car_id]['speed'] - self.desired_speed) / max(self.desired_speed, + self.env_obs[car_id][ + "speed"]) observation[0, self.x_range_grid + dx - l:self.x_range_grid + dx + l, self.y_range_grid + dy - w:self.y_range_grid + dy + w] += np.ones_like( observation[0, self.x_range_grid + dx - l:self.x_range_grid + dx + l, diff --git a/sumoGym/run_env.py b/sumoGym/run_env.py index bb8fbf4..bac081c 100644 --- a/sumoGym/run_env.py +++ b/sumoGym/run_env.py @@ -14,9 +14,9 @@ def main(): # Modify simulation_directory for your directory path env = gym.make('SUMOEnvironment-v0', simulation_directory='../basic_env', - type_os="image", + type_os="structured", type_as='discrete', - reward_type='speed', + reward_type='positive', mode='none', change_speed_interval=100, ) @@ -24,7 +24,7 @@ def main(): terminate = False while not terminate: # action = [float(input('next steering')), float(input('next vel_dif'))] - action = random.randint(0,8) + action = int(input())# random.randint(0,8) state, reward, terminate, info = env.step(action) time.sleep(0.1) if terminate: