fixes for gazebo f1 follow line algorithms comparison

JdeRobot · Dec 20, 2023 · 955b9c2 · 955b9c2
1 parent 06b7286
commit 955b9c2
Show file tree

Hide file tree

Showing 32 changed files with 44 additions and 65 deletions.
diff --git a/behavior_metrics/20231220-202146.bag.active b/behavior_metrics/20231220-202146.bag.active
diff --git a/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py b/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_dqn.py
@@ -183,7 +183,7 @@ def __init__(self, sensors, actuators, handler, config=None):
         self.inference_file = params.inference["params"]["inference_file"]
         observation = self.env.reset()
         self.step = 1
-        self.state = "".join(map(str, observation))
+        self.state = observation[0]
 
         self.inferencer = InferencerWrapper("dqn", self.inference_file, env=config_file)
 
@@ -238,9 +238,7 @@ def execute(self):
         # Execute the action and get feedback
         observation, reward, done, info = self.env.step(action, self.step)
         self.step += 1
-
-        self.state = "".join(map(str, observation))
-
+        self.state = observation
         image = self.camera.getImage().data
 
         self.update_frame('frame_0', image)

diff --git a/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_qlearn.py b/behavior_metrics/brains/gazebo/f1/brain_f1_follow_line_qlearn.py
@@ -44,7 +44,7 @@ def __init__(self, sensors, actuators, handler, config=None):
             'algorithm': 'qlearn',
             'environment': 'simple', 
             'agent': 'f1',
-            'filename': 'brains/gazebo/sf1/config/config_f1_qlearn.yaml'
+            'filename': 'brains/gazebo/f1/config/config_f1_qlearn.yaml'
         }
 
         f = open(args['filename'], "r")

diff --git a/behavior_metrics/brains/gazebo/f1/config/config_f1_qlearn.yaml b/behavior_metrics/brains/gazebo/f1/config/config_f1_qlearn.yaml
@@ -22,7 +22,7 @@ actions:
     actions_set: simple #simple
     available_actions:
       simple:
-        0: [ 3, 0 ]
+        0: [ 6, 0 ]
         1: [ 2, 1 ]
         2: [ 2, -1 ]
 

diff --git a/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_ddpg_f1_gazebo.yaml b/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_ddpg_f1_gazebo.yaml
@@ -29,8 +29,8 @@ retraining:
 
 inference:
   ddpg:
-    inference_ddpg_tf_actor_model_name: "/home/ruben/Desktop/RL-Studio/rl_studio/checkpoints/follow_line_gazebo_ddpg_f1_TensorFlow/v1to20/superfast-20231118-220658_IMPROVED_C-simple_S-sp5_A-continuous_MR-2025_E-3797/ACTOR"
-    inference_ddpg_tf_critic_model_name: "/home/ruben/Desktop/RL-Studio/rl_studio/checkpoints/follow_line_gazebo_ddpg_f1_TensorFlow/v1to20/superfast-20231118-220658_IMPROVED_C-simple_S-sp5_A-continuous_MR-2025_E-3797/CRITIC"
+    inference_ddpg_tf_actor_model_name: "/home/ruben/Desktop/my-BehaviorMetrics/behavior_metrics/models/gazebo/rl_models/ddpg/superfast-20231118-220658_IMPROVED_C-simple_S-sp5_A-continuous_MR-2025_E-3797/ACTOR"
+    inference_ddpg_tf_critic_model_name: "/home/ruben/Desktop/my-BehaviorMetrics/behavior_metrics/models/gazebo/rl_models/ddpg/superfast-20231118-220658_IMPROVED_C-simple_S-sp5_A-continuous_MR-2025_E-3797/CRITIC"
 
 algorithm:
   ddpg:

diff --git a/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml b/behavior_metrics/brains/gazebo/f1/config/config_inference_followline_dqn_f1_gazebo.yaml
@@ -7,7 +7,7 @@ settings:
   env: simple # simple, nurburgring, montreal, curves, simple_laser, manual, autoparking
   agent: f1 # f1, autoparkingRL, auto_carla, mountain_car, robot_mesh, cartpole, turtlebot
   actions: hard # simple, medium, hard, test
-  states: sp5 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
+  states: sp10 #image, sp1 (simplified perception with 1 point), sp3 (simplified perception with 3 points), spn (simplified perception with n points)
   rewards: followline_center # rewards_followline_center
   framework: TensorFlow # TensorFlow, Pytorch
   total_episodes: 5
@@ -26,7 +26,7 @@ retraining:
 
 inference:
   dqn:
-    inference_file: /home/ruben/Desktop/my-BehaviorMetrics/behavior_metrics/models/rl_models/dqn/sp5/superhard/DQN_sp_16x16_LAPCOMPLETED_Max1274374_inTime20230605-000135_Epoch401.model
+    inference_file: /home/ruben/Desktop/my-BehaviorMetrics/behavior_metrics/models/gazebo/rl_models/dqn/DQN_sp_16x16_LAPCOMPLETED_Max2951_Epoch267_inTime20231220-121204.model
 
 algorithm:
   dqn:
@@ -65,8 +65,8 @@ states:
     0: [5, 15, 22]
   sp5:
     0: [13, 25, 60, 110, 160]
-  sp7:
-    0: [ 13, 15, 20, 25, 30, 40, 50 ]
+  sp10:
+    0: [3, 15, 30, 45, 50, 70, 100, 120, 150, 190]
   spn:
     0: [10]
 
@@ -82,23 +82,15 @@ actions:
     3: [ 1, 0.5 ]
     4: [ 1, -0.5 ]
   hard:
-    0: [1, 0]
-    1: [3, 0]
-    2: [6, 0]
-    3: [3, -1]
-    4: [3, 1]
-    5: [2, -0.5]
-    6: [2, 0.5]
-    7: [1.5, -1]
-    8: [1.5, 1]
-    9: [1, -0.5]
-    10: [1, 0.5]
-    11: [0.5, -0.5]
-    12: [0.5, 0.5]
-
-
-
-
+    0: [7, 0]
+    1: [4, 1,5]
+    2: [4, -1,5]
+    3: [3, 1]
+    4: [3, -1]
+    5: [2, 1]
+    6: [2, -1]
+    7: [1, 0.5]
+    8: [1, -0.5]
   test:
     0: [0, 0]
 

diff --git a/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py b/behavior_metrics/brains/gazebo/f1/rl_utils/algorithms/dqn_f1.py
@@ -34,4 +34,4 @@ def inference(self, state):
                 0
             ]
         else:
-            return self.model(np.array(state).reshape(1, 5))[0]
+            return self.model(np.array([state]))[0]
diff --git a/behavior_metrics/brains/gazebo/f1/rl_utils/models/step.py b/behavior_metrics/brains/gazebo/f1/rl_utils/models/step.py
@@ -55,7 +55,7 @@ def step_followline_state_sp_actions_discretes(self, action, step):
         f1_image_camera, _ = self.f1gazeboimages.get_camera_info()
 
         ##==== get center
-        points_in_red_line, _ = self.simplifiedperception.processed_image(
+        points_in_red_line, centrals_normalized = self.simplifiedperception.processed_image(
             f1_image_camera.data, self.height, self.width, self.x_row, self.center_image
         )
         if self.state_space == "spn":
@@ -68,17 +68,17 @@ def step_followline_state_sp_actions_discretes(self, action, step):
 
         ##==== get State
         ##==== simplified perception as observation
-        state = self.simplifiedperception.calculate_observation(
-            points_in_red_line, self.center_image, self.pixel_region
-        )
+        # state = self.simplifiedperception.calculate_observation(
+        #     points_in_red_line, self.center_image, self.pixel_region
+        # )
 
         ##==== get Rewards
-        if self.reward_function == "followline_center":
-            reward, done = self.f1gazeborewards.rewards_followline_center(
-                center, self.rewards
-            )
+        # if self.reward_function == "followline_center":
+        #     reward, done = self.f1gazeborewards.rewards_followline_center(
+        #         center, self.rewards
+        #     )
 
-        return state, reward, done, {}
+        return centrals_normalized, 0, False, {}
 
     def step_followline_state_image_actions_continuous(self, action, step):
         self._gazebo_unpause()

diff --git a/behavior_metrics/configs/gazebo/DL-torch-lstm.yml b/behavior_metrics/configs/gazebo/DL-torch-lstm.yml
@@ -16,7 +16,7 @@ Behaviors:
                     Topic: '/F1ROS/cmd_vel'
                     MaxV: 3
                     MaxW: 0.3
-        BrainPath: 'brains/gazebo/ssf1/brain_f1_torch-lstm.py'
+        BrainPath: 'brains/gazebo/f1/brain_f1_torch-lstm.py'
         PilotTimeCycle: 50
         Parameters:
             Model: 'model_lstm_pilotnet_torch.ckpt'

diff --git a/behavior_metrics/configs/gazebo/default-multiple.yml b/behavior_metrics/configs/gazebo/default-multiple.yml
@@ -16,7 +16,7 @@ Behaviors:
                     Topic: '/F1ROS/cmd_vel'
                     MaxV: 3
                     MaxW: 0.3
-        BrainPath: ['brains/f1/brain_f1_follow_line_dqn.py', 'brains/f1/brain_f1_follow_line_qlearn.py', 'brains/f1/brain_f1_follow_line_ddpg.py']
+        BrainPath: ['brains/gazebo/f1/brain_f1_follow_line_ppo.py', 'brains/gazebo/f1/brain_f1_follow_line_dqn.py', 'brains/gazebo/f1/brain_f1_follow_line_qlearn.py', 'brains/gazebo/f1/brain_f1_follow_line_ddpg.py']
         PilotTimeCycle: 50
         Parameters:
 #            Model: ['model_deepest_lstm_cropped_250_norm_max_pooling.h5', 'model_deepest_lstm_cropped_250_norm_test.h5']
@@ -25,13 +25,14 @@ Behaviors:
             ImageNormalized: True
             PredictionsNormalized: True
             GPU: True
+        Environment: gazebo
         Type: 'f1'
     Experiment:
         Name: "Experiment name"
         Description: "Experiment description"
-        Timeout: [400, 600]
+        Timeout: [300, 500]
         UseWorldTimeouts: [50, 50]
-        Repetitions: 2
+        Repetitions: 3
     Simulation:
         World: ['/opt/jderobot/share/jderobot/gazebo/launch/simple_circuit.launch', '/opt/jderobot/share/jderobot/gazebo/launch/many_curves.launch']
         RealTimeUpdateRate: 1000

diff --git a/behavior_metrics/configs/gazebo/default-rl-dqn.yml b/behavior_metrics/configs/gazebo/default-rl-dqn.yml
@@ -21,6 +21,7 @@ Behaviors:
         PilotTimeCycle: 50
         Parameters:
             ImageTranform: ''
+        Environment: gazebo
         Type: 'f1'
     Simulation:
         World: /usr/local/gazebo/launch/simple_circuit.launch

diff --git a/behavior_metrics/configs/gazebo/default-rl-qlearn.yml b/behavior_metrics/configs/gazebo/default-rl-qlearn.yml
@@ -21,6 +21,7 @@ Behaviors:
         PilotTimeCycle: 50
         Parameters:
             ImageTranform: ''
+        Environment: gazebo
         Type: 'f1'
     Simulation:
         World: /usr/local/gazebo/launch/simple_circuit.launch

diff --git a/behavior_metrics/configs/gazebo/default-rl.yml b/behavior_metrics/configs/gazebo/default-rl.yml
@@ -17,7 +17,7 @@ Behaviors:
                     MaxV: 3
                     MaxW: 0.3
                     RL: True
-        BrainPath: 'brains/f1rl/train.py'
+        BrainPath: 'brains/gazebo/f1rl/train.py'
         PilotTimeCycle: 50
         Type: 'f1rl'
         Parameters:

diff --git a/...dqn/DQN_sp_16x16_LAPCOMPLETED_Max2951_Epoch267_inTime20231220-121204.model/fingerprint.pb b/...dqn/DQN_sp_16x16_LAPCOMPLETED_Max2951_Epoch267_inTime20231220-121204.model/fingerprint.pb
diff --git a/.../DQN_sp_16x16_LAPCOMPLETED_Max2951_Epoch267_inTime20231220-121204.model/keras_metadata.pb b/.../DQN_sp_16x16_LAPCOMPLETED_Max2951_Epoch267_inTime20231220-121204.model/keras_metadata.pb
@@ -0,0 +1,7 @@
+
+�*root"_tf_keras_network*�){"name": "model", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 10]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": []}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_1", "inbound_nodes": [[["dense", 0, 0, {}]]]}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 9, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_2", "inbound_nodes": [[["dense_1", 0, 0, {}]]]}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense_2", 0, 0]]}, "shared_object_id": 10, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, 10]}, "ndim": 2, "max_ndim": null, "min_ndim": null, "axes": {}}}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 10]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 10]}, "float32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 10]}, "float32", "input_1"]}, "keras_version": "2.11.0", "backend": "tensorflow", "model_config": {"class_name": "Functional", "config": {"name": "model", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 10]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}, "name": "input_1", "inbound_nodes": [], "shared_object_id": 0}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense", "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_1", "inbound_nodes": [[["dense", 0, 0, {}]]], "shared_object_id": 6}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 9, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 7}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 8}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "name": "dense_2", "inbound_nodes": [[["dense_1", 0, 0, {}]]], "shared_object_id": 9}], "input_layers": [["input_1", 0, 0]], "output_layers": [["dense_2", 0, 0]]}}, "training_config": {"loss": "mean_squared_error", "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": true, "is_legacy_optimizer": false, "learning_rate": 0.004999999888241291, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2
+�root.layer-0"_tf_keras_input_layer*�{"class_name": "InputLayer", "name": "input_1", "dtype": "float32", "sparse": false, "ragged": false, "batch_input_shape": {"class_name": "__tuple__", "items": [null, 10]}, "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 10]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "input_1"}}2
+�root.layer_with_weights-0"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 2}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["input_1", 0, 0, {}]]], "shared_object_id": 3, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 10}}, "shared_object_id": 12}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 10]}}2
+�root.layer_with_weights-1"_tf_keras_layer*�{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 16, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 5}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense", 0, 0, {}]]], "shared_object_id": 6, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 16}}, "shared_object_id": 13}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16]}}2
+�root.layer_with_weights-2"_tf_keras_layer*�{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 9, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 7}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 8}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "inbound_nodes": [[["dense_1", 0, 0, {}]]], "shared_object_id": 9, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 16}}, "shared_object_id": 14}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 16]}}2
+�Uroot.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 15}2
diff --git a/...nTime20230310-223500.model/saved_model.pb → ...nTime20231220-121204.model/saved_model.pb b/...nTime20230310-223500.model/saved_model.pb → ...nTime20231220-121204.model/saved_model.pb
diff --git a/...ETED_Max2951_Epoch267_inTime20231220-121204.model/variables/variables.data-00000-of-00001 b/...ETED_Max2951_Epoch267_inTime20231220-121204.model/variables/variables.data-00000-of-00001
diff --git a/...16x16_LAPCOMPLETED_Max2951_Epoch267_inTime20231220-121204.model/variables/variables.index b/...16x16_LAPCOMPLETED_Max2951_Epoch267_inTime20231220-121204.model/variables/variables.index
diff --git a/...dqn/DQN_sp_16x16_LAPCOMPLETED_Max5334_Epoch596_inTime20230310-223500.model/fingerprint.pb b/...dqn/DQN_sp_16x16_LAPCOMPLETED_Max5334_Epoch596_inTime20230310-223500.model/fingerprint.pb
-Original file line number
+Diff line change
@@ Expand Up / @@ -34,4 +34,4 @@ def inference(self, state): @@
                 ]
             else:
-                return self.model(np.array(state).reshape(1, 5))[0]
+                return self.model(np.array([state]))[0]