Initial version working on ray cluster (does not return observations …

…yet)
RoboTeamTwente · Nov 20, 2024 · 15b0666 · 15b0666
1 parent 396e527
commit 15b0666
Show file tree

Hide file tree

Showing 7 changed files with 262 additions and 251 deletions.
diff --git a/docker/runner/ray-cluster-combined.yaml b/docker/runner/ray-cluster-combined.yaml
@@ -22,7 +22,7 @@ spec:
                 - key: kubernetes.io/hostname
                   operator: In
                   values:
-                  - multinode-demo  # Schedule on control-plane
+                  - ray  # Schedule on control-plane
         containers:
         - name: ray-head
           image: roboteamtwente/ray:development
@@ -78,6 +78,8 @@ spec:
           labels:
             app: ray-worker
         spec:
+          hostNetwork: true
+          dnsPolicy: ClusterFirstWithHostNet
           affinity:
             nodeAffinity:
               requiredDuringSchedulingIgnoredDuringExecution:
@@ -86,7 +88,13 @@ spec:
                   - key: kubernetes.io/hostname
                     operator: In
                     values:
-                    - multinode-demo-m02  # Schedule on worker node
+                    - ray-m02  # Schedule on worker node
+            podAntiAffinity:
+              requiredDuringSchedulingIgnoredDuringExecution:
+              - labelSelector:
+                  matchLabels:
+                    app: ray-worker
+                topologyKey: "kubernetes.io/hostname"
           volumes:
           - name: gradle-cache
             emptyDir: {}
@@ -101,8 +109,8 @@ spec:
                 cpu: 500m
                 memory: 1Gi
               limits:
-                cpu: 1000m
-                memory: 2Gi
+                cpu: 2000m
+                memory: 4Gi
             env:
             - name: LD_LIBRARY_PATH
               value: /home/roboteam/build/release/lib
@@ -112,10 +120,10 @@ spec:
           # Game Controller
           - name: ssl-game-controller
             image: robocupssl/ssl-game-controller:latest
-            args: ["-address", "0.0.0.0:8081"]  # Changed from :8081 to explicitly bind to all interfaces
+            args: ["-address", "0.0.0.0:8081"]
             ports:
               - containerPort: 8081
-                protocol: TCP    # Explicitly set protocol
+                protocol: TCP
 
           # Primary AI
           - name: roboteam-primary-ai
@@ -193,11 +201,11 @@ spec:
             - "/home/roboteam/external/framework/build/bin/simulator-cli"
             ports:
               - containerPort: 10300
-                protocol: UDP    # Simulator control port
+                protocol: UDP
               - containerPort: 10301
-                protocol: TCP    # Presumably TCP ports
+                protocol: TCP
               - containerPort: 5558
-                protocol: TCP    # ZMQ port
+                protocol: TCP
             env:
               - name: LD_LIBRARY_PATH
                 value: /home/roboteam/build/release/lib
@@ -244,4 +252,47 @@ spec:
   - name: gc-interface
     port: 8081
     targetPort: 8081
-    nodePort: 30081  # Game controller interface
+    nodePort: 30081  # Game controller interface
+  - name: redis
+    port: 6379  
+    targetPort: 6379
+    nodePort: 30679  # Choose an available port
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: roboteam-ray-cluster-head-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: ray-head
+  ports:
+  - name: redis
+    port: 6379
+    targetPort: 6379
+  - name: gcs
+    port: 10001
+    targetPort: 10001
+  - name: dashboard
+    port: 8265
+    targetPort: 8265
+  - name: serve
+    port: 8000
+    targetPort: 8000
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: roboteam-ray-worker-svc
+spec:
+  selector:
+    app: ray-worker
+  ports:
+  - name: game-controller
+    port: 8081
+    targetPort: 8081
+  - name: simulator
+    port: 5558
+    targetPort: 5558
diff --git a/roboteam_ai/src/RL/RL_Ray/train.py b/roboteam_ai/src/RL/RL_Ray/train.py
@@ -27,23 +27,23 @@ def verify_imports():
 def main():
     verify_imports()
 
-    # if not ray.is_initialized():
-    #     ray.init(
-    #         address="ray://192.168.49.2:31001",
-    #         ignore_reinit_error=True,
-    #         runtime_env={
-    #             "env_vars": {
-    #                 "NUMPY_EXPERIMENTAL_ARRAY_FUNCTION": "0",
-
-    #             },
-    #             # "pip": [
-    #             #     "numpy==1.24.3",
-    #             #     "pyzmq==26.2.0"
-    #             # ]
-    #         }
-    #     )
-
-    ray.init()
+    if not ray.is_initialized():
+        ray.init(
+            address=f"ray://192.168.49.2:31001",
+            ignore_reinit_error=True,
+            runtime_env={
+                "env_vars": {
+                    "NUMPY_EXPERIMENTAL_ARRAY_FUNCTION": "0",
+
+                },
+                # "pip": [
+                #     "numpy==1.24.3",
+                #     "pyzmq==26.2.0"
+                # ]
+            }
+        )
+
+    # ray.init()
 
     # We can set env_config here
     def env_creator(env_config):
@@ -65,7 +65,13 @@ def env_creator(env_config):
         .resources(num_gpus=0)
         .env_runners(
             num_env_runners=1,
+            num_envs_per_env_runner=1,
+            sample_timeout_s=None
         )
+#         .api_stack(
+#             enable_rl_module_and_learner=True,
+#             enable_env_runner_and_connector_v2=True
+# )
         .debugging(
             log_level="DEBUG",
             seed=42
@@ -78,17 +84,15 @@ def env_creator(env_config):
     algo = config.build()
 
     for i in range(10):
+        print(f"\nStarting iteration {i}")
         result = algo.train()
         result.pop("config")
+        print("\nTraining metrics:")
+        print(f"Episode Reward Mean: {result.get('episode_reward_mean', 'N/A')}")
+        print(f"Episode Length Mean: {result.get('episode_len_mean', 'N/A')}")
+        print(f"Total Timesteps: {result.get('timesteps_total', 'N/A')}")
         pprint(result)
 
-        if i % 5 == 0:
-            # Use save instead of save_to_path
-            checkpoint_dir = f"checkpoint_{i}"
-            os.makedirs(checkpoint_dir, exist_ok=True)
-            algo.save(checkpoint_dir)
-            print(f"Checkpoint saved in directory {checkpoint_dir}")
-
 if __name__ == "__main__":
     main()
 

diff --git a/roboteam_ai/src/RL/env2.py b/roboteam_ai/src/RL/env2.py
@@ -44,9 +44,8 @@ def __init__(self, config=None):
         self.blue_score = 0  # Initialize blue score to zero
 
         # Initialize the observation space
-        self.observation_space = spaces.Box(low=0, high=self.MAX_ROBOTS_US, shape=(15,), dtype=np.int32)
+        self.observation_space = spaces.Box(low=float('-inf'), high=float('inf'), shape=(1,15), dtype=np.float64)
 
-
         # Action space: [attackers, defenders]
         # Wallers will be automatically calculated
         self.action_space = spaces.MultiDiscrete([self.MAX_ROBOTS_US + 1, self.MAX_ROBOTS_US + 1])
@@ -170,26 +169,38 @@ def get_observation(self):
         """
         get_observation is meant to get the observation space (kinda like the state)
         """
-
         # Get the robot grid representation
-        self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state() # Matrix of 4 by 2 + 2 booleans
+        self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state()
         print(f"Robot grid: {self.robot_grid}")
         print(f"Yellow dribbling: {self.is_yellow_dribbling}, Blue dribbling: {self.is_blue_dribbling}")
 
         # Get the ball location
-        self.ball_position, self.ball_quadrant = get_ball_state() # x,y coordinates, quadrant
-
+        self.ball_position, self.ball_quadrant = get_ball_state()
         print(f"Ball position: {self.ball_position}, Ball quadrant: {self.ball_quadrant}")
 
-        robot_positions_flat = self.robot_grid.flatten()
+        # Convert and flatten robot positions to float64
+        robot_positions_flat = self.robot_grid.astype(np.float64).flatten()  # 8 elements
+
+        # Use ball quadrant for observation
+        ball_quadrant = np.array([float(self.ball_quadrant)], dtype=np.float64)  # 1 element
+
+        # Convert dribbling status to float64
+        is_yellow_dribbling = np.array([float(self.is_yellow_dribbling)], dtype=np.float64)  # 1 element
 
-        # Convert `ball_position` (scalar) and `is_yellow_dribbling` (boolean) to compatible formats
-        ball_position = np.array([self.ball_quadrant])  # 1 element
-        is_yellow_dribbling = np.array([int(self.is_yellow_dribbling)])  # Convert boolean to int (0 or 1)
+        # Combine all parts into the observation array with padding
+        observation = np.concatenate([
+            robot_positions_flat,    # 8 elements
+            ball_quadrant,          # 1 element
+            is_yellow_dribbling,    # 1 element
+            np.zeros(5, dtype=np.float64)  # 5 elements to reach total of 15
+        ])
 
-        # Combine all parts into a single 15-element observation array
-        # Pad with zeros if you need additional elements
-        observation = np.concatenate([robot_positions_flat, ball_position, is_yellow_dribbling, np.zeros(5)])
+        # Reshape to match expected shape (1, 15)
+        observation = observation.reshape(1, 15)
+
+        # Verify shape and dtype
+        assert observation.shape == (1, 15), f"Observation shape {observation.shape} != (1, 15)"
+        assert observation.dtype == np.float64, f"Observation dtype {observation.dtype} != float64"
 
         return observation, self.calculate_reward()
 
@@ -230,7 +241,7 @@ def step(self, action):
             observation_space, _ = self.reset()
         truncated = self.is_truncated()  # Determine if the episode was truncated, too much time or a yellow card
 
-        time.sleep(0.25)  # DELAY FOR STEPS (ADJUST LATER)
+        time.sleep(0.1)  # DELAY FOR STEPS (ADJUST LATER)
 
         return observation_space, reward, done, truncated, {}
 
@@ -266,20 +277,26 @@ def reset(self, seed=None,**kwargs):
         """
 
         # Teleport ball to middle position
+        print("Teleporting ball...")
         teleport_ball(0,0)
 
         # Reset referee state
+        print("Resetting referee state...")
         reset_referee_state()
 
         # Set blue team on right side + initiates kickoff
+        print("Starting game...")
         start_game()
 
+        print("Getting observation...")
+        observation, _ = self.get_observation()
+
         # Reset shaped_reward_given boolean
         self.shaped_reward_given = False
         self.is_yellow_dribbling = False
         self.is_blue_dribbling = False
 
-        observation, _ = self.get_observation()
+        print("Reset complete!")
         return observation,{}