diff --git a/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py b/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py index a3f797f2..8c026bf3 100644 --- a/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py +++ b/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py @@ -181,7 +181,7 @@ def __init__(self, sensors, actuators, handler, config=None): self.env = gym.make(self.env_name, **self.environment.environment) self.inference_file = params.inference["params"]["inference_file"] - observation, _ = self.env.reset() + observation = self.env.reset() self.step = 1 self.state = "".join(map(str, observation)) diff --git a/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml b/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml index 1f189826..3602e7d5 100644 --- a/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml +++ b/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml @@ -6,8 +6,8 @@ settings: environment_set: environments # gazebo_environments, carla_environments env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot - actions: simple # simple, medium, hard, test - states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) + actions: hard # simple, medium, hard, test + states: sp5 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points) rewards: followline_center # rewards_followline_center framework: TensorFlow # TensorFlow, Pytorch total_episodes: 5 @@ -26,13 +26,13 @@ retraining: inference: dqn: - inference_file: models/gazebo/rl_models/dqn/DQN_sp_16x16_LAPCOMPLETED_Max5334_Epoch596_inTime20230310-223500.model + inference_file: /home/ruben/Desktop/my-BehaviorMetrics/behavior_metrics/models/rl_models/dqn/sp5/superhard/DQN_sp_16x16_LAPCOMPLETED_Max1274374_inTime20230605-000135_Epoch401.model algorithm: dqn: alpha: 0.8 gamma: 0.9 - epsilon: 0.99 + epsilon: 0.000001 epsilon_discount: 0.9986 epsilon_min: 0.05 model_name: DQN_sp_16x16 @@ -64,29 +64,41 @@ states: sp3: 0: [5, 15, 22] sp5: - 0: [3, 5, 10, 15, 20] + 0: [13, 25, 60, 110, 160] + sp7: + 0: [ 13, 15, 20, 25, 30, 40, 50 ] spn: 0: [10] actions: simple: - 0: [3, 0] - 1: [2, 1] - 2: [2, -1] + 0: [5, 0] + 1: [0.5, 0.2] + 2: [0.5, -0.2] medium: - 0: [3, 0] - 1: [2, 1] - 2: [2, -1] - 3: [1, 1.5] - 4: [1, -1.5] + 0: [ 1, 0 ] + 1: [ 1, 1 ] + 2: [ 1, -1 ] + 3: [ 1, 0.5 ] + 4: [ 1, -0.5 ] hard: - 0: [3, 0] - 1: [2, 1] - 2: [2, -1] - 3: [1.5, 1] - 4: [1.5, -1] - 5: [1, -1.5] - 6: [1, -1.5] + 0: [1, 0] + 1: [3, 0] + 2: [6, 0] + 3: [3, -1] + 4: [3, 1] + 5: [2, -0.5] + 6: [2, 0.5] + 7: [1.5, -1] + 8: [1.5, 1] + 9: [1, -0.5] + 10: [1, 0.5] + 11: [0.5, -0.5] + 12: [0.5, 0.5] + + + + test: 0: [0, 0] diff --git a/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py b/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py index ef4e8780..849b06e1 100644 --- a/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py +++ b/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py @@ -34,5 +34,4 @@ def inference(self, state): 0 ] else: - return self.model.predict([int(state)], verbose=0)[0] - + return self.model(np.array(state).reshape(1, 5))[0]