fix dqn f1 gazebo inference bugs

JdeRobot · Nov 18, 2023 · ca05f4c · ca05f4c
1 parent c9e327e
commit ca05f4c
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 23 deletions.
diff --git a/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py b/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py
@@ -181,7 +181,7 @@ def __init__(self, sensors, actuators, handler, config=None):
         self.env = gym.make(self.env_name, **self.environment.environment)
 
         self.inference_file = params.inference["params"]["inference_file"]
-        observation, _ = self.env.reset()
+        observation = self.env.reset()
         self.step = 1
         self.state = "".join(map(str, observation))
 

diff --git a/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml b/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml
@@ -6,8 +6,8 @@ settings:
   environment_set: environments # gazebo_environments, carla_environments
   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
-  actions: simple # simple, medium, hard, test
-  states: sp1 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
+  actions: hard # simple, medium, hard, test
+  states: sp5 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
   rewards: followline_center # rewards_followline_center
   framework: TensorFlow # TensorFlow, Pytorch
   total_episodes: 5
@@ -26,13 +26,13 @@ retraining:
 
 inference:
   dqn:
-    inference_file: models/gazebo/rl_models/dqn/DQN_sp_16x16_LAPCOMPLETED_Max5334_Epoch596_inTime20230310-223500.model
+    inference_file: /home/ruben/Desktop/my-BehaviorMetrics/behavior_metrics/models/rl_models/dqn/sp5/superhard/DQN_sp_16x16_LAPCOMPLETED_Max1274374_inTime20230605-000135_Epoch401.model
 
 algorithm:
   dqn:
     alpha: 0.8
     gamma: 0.9
-    epsilon: 0.99
+    epsilon: 0.000001
     epsilon_discount: 0.9986
     epsilon_min: 0.05
     model_name: DQN_sp_16x16
@@ -64,29 +64,41 @@ states:
   sp3:
     0: [5, 15, 22]
   sp5:
-    0: [3, 5, 10, 15, 20]
+    0: [13, 25, 60, 110, 160]
+  sp7:
+    0: [ 13, 15, 20, 25, 30, 40, 50 ]
   spn:
     0: [10]
 
 actions:
   simple:
-    0: [3, 0]
-    1: [2, 1]
-    2: [2, -1]
+    0: [5, 0]
+    1: [0.5, 0.2]
+    2: [0.5, -0.2]
   medium:
-    0: [3, 0]
-    1: [2, 1]
-    2: [2, -1]
-    3: [1, 1.5]
-    4: [1, -1.5]
+    0: [ 1, 0 ]
+    1: [ 1, 1 ]
+    2: [ 1, -1 ]
+    3: [ 1, 0.5 ]
+    4: [ 1, -0.5 ]
   hard:
-    0: [3, 0]
-    1: [2, 1]
-    2: [2, -1]
-    3: [1.5, 1]
-    4: [1.5, -1]
-    5: [1, -1.5]
-    6: [1, -1.5]
+    0: [1, 0]
+    1: [3, 0]
+    2: [6, 0]
+    3: [3, -1]
+    4: [3, 1]
+    5: [2, -0.5]
+    6: [2, 0.5]
+    7: [1.5, -1]
+    8: [1.5, 1]
+    9: [1, -0.5]
+    10: [1, 0.5]
+    11: [0.5, -0.5]
+    12: [0.5, 0.5]
+
+
+
+
   test:
     0: [0, 0]
 

diff --git a/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py b/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py
@@ -34,5 +34,4 @@ def inference(self, state):
                 0
             ]
         else:
-            return self.model.predict([int(state)], verbose=0)[0]
-
+            return self.model(np.array(state).reshape(1, 5))[0]
-Original file line number
+Diff line change
@@ Expand Up / @@ -34,5 +34,4 @@ def inference(self, state): @@
                 ]
             else:
-                return self.model.predict([int(state)], verbose=0)[0]
+                return self.model(np.array(state).reshape(1, 5))[0]