diff --git a/Dockerfile.ray b/Dockerfile.ray index 828bfbccf..13cec8b14 100644 --- a/Dockerfile.ray +++ b/Dockerfile.ray @@ -5,7 +5,7 @@ FROM rayproject/ray:latest-py310 # Install dependencies in a single layer to keep it cached -RUN pip install torch==2.5.1 gymnasium numpy==1.24.3 ray[rllib]==2.38.0 pyzmq +RUN pip install torch==2.5.1 gymnasium numpy==1.24.3 ray[rllib]==2.38.0 pyzmq protobuf websockets # Copy the entire roboteam root folder (including the roboteam_ai and roboteam_networking folders) COPY roboteam_ai /roboteam/roboteam_ai diff --git a/README.md b/README.md index 638d63d29..c208745a9 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,14 @@ To enable Tracy - Information is in the tracy [docs](https://github.com/wolfpld/tracy) - Run AI - ### Use of Ray + The dockerimage Dockerfile.ray is used to build a docker image with the ray project's official ray image as a base. It also adds the necessary libraries to the image and the roboteam RL code to the image. Only build it if you want to deploy it to a cluster. Build the docker image using the following command from the root folder: + - docker build -t roboteamtwente/ray:development -f Dockerfile.ray . Push it using the following command: -- docker push roboteamtwente/ray:development \ No newline at end of file + +- docker push roboteamtwente/ray:development diff --git a/docker/runner/README.md b/docker/runner/README.md index 966daffe1..6362b65c3 100644 --- a/docker/runner/README.md +++ b/docker/runner/README.md @@ -2,20 +2,23 @@ In a ray or distributed computing cluster, the terms "head node" and "worker nodes" refer to different roles that containers play in the cluster. The head node is the master node in a Ray cluster. You typically have one head node. Worker nodes are the containers that execute the jobs, in parallel. You can have as many worker nodes as you want. ----------------------------------------------------------- -## Installing Kuberay: -curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash -helm repo add kuberay https://ray-project.github.io/kuberay-helm/ + +## Installing Kuberay + +curl | bash +helm repo add kuberay helm repo update helm install kuberay-operator kuberay/kuberay-operator --namespace ray-system --create-namespace -https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html + The above source was used for creating the ray-cluster.yaml -Installing kubernetes and minikubernetes, you can follow this guide to check out how to install and run them: https://medium.com/@areesmoon/installing-minikube-on-ubuntu-20-04-lts-focal-fossa-b10fad9d0511 +Installing kubernetes and minikubernetes, you can follow this guide to check out how to install and run them: Use 'pip install ray' and then 'pip show ray' to get your version of ray. ----------------------------------------------------------------------------------- +----------------------------------------------------------- + After you have both kubernetes and ray, use the following command to create a cluster: kubectl apply -f ray-cluster.yaml This cluster launches a ray head node and one worker node. Launch the external simulator using kubectl apply -f simulator.yaml @@ -24,11 +27,13 @@ This cluster launches a ray head node and one worker node. Launch the external s Use to forward the needed port to the ray service: kubectl port-forward svc/ 8265:8265 This is the port that will be used inside ray_jobs.py, where we submit the jobs to ray. +----------------------------------------------------------- ------------------------------------------------------ ## Useful commands + kubectl apply -f ray-cluster.yaml kubectl delete -f ray-cluster.yaml helm install kuberay-operator ray/kuberay-operator helm uninstall kuberay-operator -kubectl port-forward svc/roboteam-ray-cluster-head-nodeport 8265:8265 6379:6379 10001:10001 8000:8000 & \ No newline at end of file +kubectl port-forward svc/roboteam-ray-cluster-head-nodeport 8265:8265 6379:6379 10001:10001 8000:8000 & +minikube start -p ray --nodes 2 --memory 4000 --cpus 3 diff --git a/docker/runner/ray-cluster-combined.yaml b/docker/runner/ray-cluster-combined.yaml index a054f98eb..c725a1e33 100644 --- a/docker/runner/ray-cluster-combined.yaml +++ b/docker/runner/ray-cluster-combined.yaml @@ -34,11 +34,11 @@ spec: - containerPort: 8000 # Serve resources: requests: - cpu: "500m" - memory: "1Gi" - limits: cpu: "1" memory: "2Gi" + limits: + cpu: "2" + memory: "4Gi" env: - name: POD_IP valueFrom: @@ -70,7 +70,9 @@ spec: # Worker node configuration with integrated simulator workerGroupSpecs: - groupName: worker-group - replicas: 1 + replicas: 1 + minReplicas: 1 # Specify minimum number of worker replicas + maxReplicas: 1 # Optional: specify maximum number of replicas rayStartParams: num-cpus: "1" template: @@ -78,7 +80,7 @@ spec: labels: app: ray-worker spec: - hostNetwork: true + hostNetwork: False dnsPolicy: ClusterFirstWithHostNet affinity: nodeAffinity: @@ -109,8 +111,8 @@ spec: cpu: 500m memory: 1Gi limits: - cpu: 2000m - memory: 4Gi + cpu: 1500m + memory: 3Gi env: - name: LD_LIBRARY_PATH value: /home/roboteam/build/release/lib @@ -187,10 +189,10 @@ spec: resources: requests: cpu: 120m - memory: 20Mi + memory: 40Mi limits: cpu: 150m - memory: 50Mi + memory: 100Mi # Simulator - name: erforce-simulator @@ -295,4 +297,4 @@ spec: targetPort: 8081 - name: simulator port: 5558 - targetPort: 5558 \ No newline at end of file + targetPort: 5558 diff --git a/docker/runner/ray-cluster.yaml b/docker/runner/ray-cluster.yaml index 79ee4ca40..f22063d61 100644 --- a/docker/runner/ray-cluster.yaml +++ b/docker/runner/ray-cluster.yaml @@ -8,39 +8,46 @@ spec: headGroupSpec: rayStartParams: dashboard-host: "0.0.0.0" + node-ip-address: "$(HOST_IP)" template: metadata: labels: app: ray-head spec: hostNetwork: false + # Add node affinity for head node + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - multinode-demo containers: - name: ray-head image: roboteamtwente/ray:development - imagePullPolicy: Always # Always pull the latest image + imagePullPolicy: Always ports: - - containerPort: 8265 # dashboard port - - containerPort: 6379 # redis port - - containerPort: 10001 # GCS server port - - containerPort: 8000 # Serve port + - containerPort: 8265 + - containerPort: 6379 + - containerPort: 10001 + - containerPort: 8000 resources: requests: cpu: "500m" - memory: "1Gi" # Increased from 256Mi + memory: "1Gi" limits: - cpu: "1" # Changed from 600 (which was too high) - memory: "2Gi" # Increased from 512Mi - + cpu: "1" + memory: "2Gi" env: - - name: SIMULATION_HOST - value: "127.0.0.1" # Using localhost since we're on host network - - name: VISION_PORT - value: "10020" # Match your simulator's vision port - - name: REFEREE_PORT - value: "10003" # Match your simulator's referee port - + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP command: ["/bin/bash", "-c", "--"] - args: ["ray start --head --port=6379 --dashboard-host=0.0.0.0 --block"] + args: ["ray start --head --port=6379 --bind-address=0.0.0.0 --dashboard-host=0.0.0.0 --node-ip-address=$(HOST_IP) --block"] livenessProbe: exec: command: @@ -66,7 +73,7 @@ spec: # Worker node configuration workerGroupSpecs: - groupName: worker-group - replicas: 1 # Number of worker nodes + replicas: 1 rayStartParams: num-cpus: "1" template: @@ -75,10 +82,20 @@ spec: app: ray-worker spec: hostNetwork: true + # Replace pod anti-affinity with node affinity + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - multinode-demo-m02 containers: - name: ray-worker image: roboteamtwente/ray:development - imagePullPolicy: Always # Always pull the latest image + imagePullPolicy: Always resources: requests: cpu: 500m @@ -130,4 +147,27 @@ spec: - name: serve port: 8000 targetPort: 8000 - nodePort: 30800 # Serve \ No newline at end of file + nodePort: 30800 # Serve + +--- +apiVersion: v1 +kind: Service +metadata: + name: roboteam-ray-cluster-head-svc +spec: + type: ClusterIP + selector: + app: ray-head + ports: + - name: redis + port: 6379 + targetPort: 6379 + - name: gcs + port: 10001 + targetPort: 10001 + - name: dashboard + port: 8265 + targetPort: 8265 + - name: serve + port: 8000 + targetPort: 8000 \ No newline at end of file diff --git a/docker/runner/simulator.yaml b/docker/runner/simulator.yaml index ac5291b4f..76d3a0a0f 100644 --- a/docker/runner/simulator.yaml +++ b/docker/runner/simulator.yaml @@ -209,4 +209,20 @@ spec: targetPort: 8080 - name: gc port: 8081 - targetPort: 8081 \ No newline at end of file + targetPort: 8081 + - name: zmq + port: 5558 + targetPort: 5558 + protocol: TCP + - name: sim-control + port: 10300 + targetPort: 10300 + protocol: UDP + - name: vision + port: 10020 + targetPort: 10020 + protocol: UDP + - name: referee + port: 10003 + targetPort: 10003 + protocol: UDP \ No newline at end of file diff --git a/roboteam_ai/src/RL/RL_Ray/train.py b/roboteam_ai/src/RL/RL_Ray/train.py index 51c7254c2..f6ac38199 100644 --- a/roboteam_ai/src/RL/RL_Ray/train.py +++ b/roboteam_ai/src/RL/RL_Ray/train.py @@ -18,14 +18,7 @@ import warnings warnings.filterwarnings('ignore', category=DeprecationWarning) -def verify_imports(): - import numpy - import torch - print(f"Local NumPy version: {numpy.__version__}") - print(f"Local PyTorch version: {torch.__version__}") - def main(): - verify_imports() if not ray.is_initialized(): ray.init( @@ -34,30 +27,22 @@ def main(): runtime_env={ "env_vars": { "NUMPY_EXPERIMENTAL_ARRAY_FUNCTION": "0", - }, - # "pip": [ - # "numpy==1.24.3", - # "pyzmq==26.2.0" - # ] + "py_modules": [ + os.path.join(roboteam_ai_root, "roboteam_ai"), + os.path.join(roboteam_ai_root, "roboteam_networking") + ] } ) # ray.init() - # We can set env_config here def env_creator(env_config): return RoboTeamEnv(env_config) # This passes the config to your env # Register the environment register_env("RoboTeamEnv", env_creator) - # Create list of callbacks - callbacks = [ - JsonLoggerCallback(), - CSVLoggerCallback(), - ] - config = ( PPOConfig() .environment("RoboTeamEnv") @@ -65,32 +50,25 @@ def env_creator(env_config): .resources(num_gpus=0) .env_runners( num_env_runners=1, - num_envs_per_env_runner=1, - sample_timeout_s=None - ) -# .api_stack( -# enable_rl_module_and_learner=True, -# enable_env_runner_and_connector_v2=True -# ) + num_envs_per_env_runner=1, # If you use vectorized env, otherwise set to 1 + rollout_fragment_length=16, + sample_timeout_s=30, + create_env_on_local_worker=False) # This makes sure that we don't run a local environment + .api_stack( + enable_rl_module_and_learner=True, + enable_env_runner_and_connector_v2=True) .debugging( log_level="DEBUG", seed=42 ) - #.callbacks(callbacks) - .evaluation(evaluation_interval=10) ) print("Starting training...") algo = config.build() for i in range(10): - print(f"\nStarting iteration {i}") result = algo.train() result.pop("config") - print("\nTraining metrics:") - print(f"Episode Reward Mean: {result.get('episode_reward_mean', 'N/A')}") - print(f"Episode Length Mean: {result.get('episode_len_mean', 'N/A')}") - print(f"Total Timesteps: {result.get('timesteps_total', 'N/A')}") pprint(result) if __name__ == "__main__": diff --git a/roboteam_ai/src/RL/env.py b/roboteam_ai/src/RL/env.py index 30f0c0a7d..39f5c6aa3 100644 --- a/roboteam_ai/src/RL/env.py +++ b/roboteam_ai/src/RL/env.py @@ -34,7 +34,6 @@ class RoboTeamEnv(gymnasium.Env): def __init__(self, config=None): self.config = config or {} # Config placeholder - self.MAX_ROBOTS_US = 10 # Define the number of robots that are present in each grid + ball location @@ -161,7 +160,6 @@ def get_observation(self): 'ball_position': self.ball_quadrant, 'is_yellow_dribbling' : self.is_yellow_dribbling } - print("obs: ", observation_space) return observation_space, self.calculate_reward() diff --git a/roboteam_ai/src/RL/env2.py b/roboteam_ai/src/RL/env2.py index 912118b75..610c83737 100644 --- a/roboteam_ai/src/RL/env2.py +++ b/roboteam_ai/src/RL/env2.py @@ -44,7 +44,7 @@ def __init__(self, config=None): self.blue_score = 0 # Initialize blue score to zero # Initialize the observation space - self.observation_space = spaces.Box(low=float('-inf'), high=float('inf'), shape=(1,15), dtype=np.float64) + self.observation_space = spaces.Box(low=float('-inf'), high=float('inf'), shape=(15,), dtype=np.float64) # Action space: [attackers, defenders] # Wallers will be automatically calculated @@ -70,34 +70,25 @@ def check_ball_placement(self): Function to teleport the ball to the designated position for ball placement if necessary. """ # Get the current referee state - referee_state, referee_info = get_referee_state() # Assuming get_referee_state() is in scope - - # Extract the command from the referee state - self.ref_command = referee_info['command'] - print("ref command", self.ref_command) + get_referee_state() - # If ref gives command BALL_PLACEMENT_US OR BALL_PLACEMENT_THEM if (self.ref_command == 16 or self.ref_command == 17): - referee_state, referee_data = get_referee_state() - if referee_data["designated_position"]is not None: - self.x, self.y = referee_data["designated_position"]["x"]/1000, referee_data["designated_position"]["y"]/1000 + if self.x and self.y is not None: # Teleport the ball to the designated location teleport_ball(self.x, self.y) else: print("No designated position provided in referee state.") - def get_referee_state(self): """ - Function to globally import the referee state + Function to get referee state values """ - self.x,self.y, # Designated pos - self.yellow_yellow_cards, self.blue_yellow_cards, # yellow cards - self.ref_command, # Ref command, such as HALT, STOP - self.yellow_score, self.blue_score = get_referee_state() # Scores + self.yellow_score, self.blue_score, self.stage, self.ref_command, self.x, self.y = get_referee_state() + self.x = self.x/1000 + self.y = self.y/1000 def calculate_reward(self): """ @@ -171,13 +162,10 @@ def get_observation(self): """ # Get the robot grid representation self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state() - print(f"Robot grid: {self.robot_grid}") - print(f"Yellow dribbling: {self.is_yellow_dribbling}, Blue dribbling: {self.is_blue_dribbling}") - + # Get the ball location self.ball_position, self.ball_quadrant = get_ball_state() - print(f"Ball position: {self.ball_position}, Ball quadrant: {self.ball_quadrant}") - + # Convert and flatten robot positions to float64 robot_positions_flat = self.robot_grid.astype(np.float64).flatten() # 8 elements @@ -195,14 +183,10 @@ def get_observation(self): np.zeros(5, dtype=np.float64) # 5 elements to reach total of 15 ]) - # Reshape to match expected shape (1, 15) - observation = observation.reshape(1, 15) - - # Verify shape and dtype - assert observation.shape == (1, 15), f"Observation shape {observation.shape} != (1, 15)" - assert observation.dtype == np.float64, f"Observation dtype {observation.dtype} != float64" + # Make sure it's flat + observation = observation.reshape(15,) - return observation, self.calculate_reward() + return observation def step(self, action): """ @@ -211,37 +195,38 @@ def step(self, action): """ # Only carry out "normal" loop if the game state is NORMAL_START (this indicates normal gameplay loop) - if self.ref_command == "RUNNING": # Maybe this needs to change to normal_start - - attackers, defenders = action - wallers = self.MAX_ROBOTS - (attackers + defenders) - - # Ensure non-negative values and total of 10 - attackers = max(0, min(attackers, self.MAX_ROBOTS)) - defenders = max(0, min(defenders, self.MAX_ROBOTS - attackers)) - wallers = self.MAX_ROBOTS - (attackers + defenders) - - # Sends the action command over proto to legacy AI - send_action_command(num_attacker=attackers, num_defender=defenders, num_waller= wallers) - + print(f"Step called with action: {action}") + + # Get current referee state at start of step + #referee_state, referee_info = get_referee_state() + #print(f"Step - Current referee state: {referee_state}") + + # if self.ref_command == "RUNNING": + # print("Game is RUNNING, executing action") + # attackers, defenders = action + # wallers = self.MAX_ROBOTS_US - (attackers + defenders) + # send_action_command(num_attacker=attackers, num_defender=defenders, num_waller=wallers) + # else: + # print(f"Game not RUNNING, current command: {self.ref_command}") # If the game is halted, stopped or ball placement is happening, execute this. # Logic to TP the ball if there is ball placement of either side - self.check_ball_placement() # Run the function to check if we need to TP the ball - - reward = self.calculate_reward() + #self.check_ball_placement() # Run the function to check if we need to TP the ball # Update observation_space - observation_space,_ = self.get_observation() + observation_space = self.get_observation() + + # Get reward + reward = self.calculate_reward() done = self.is_terminated() - print("isDone",done) # If task is completed (a goal was scored) + #print("isDone",done) # If task is completed (a goal was scored) if done: observation_space, _ = self.reset() truncated = self.is_truncated() # Determine if the episode was truncated, too much time or a yellow card - time.sleep(0.1) # DELAY FOR STEPS (ADJUST LATER) + #time.sleep(0.1) # DELAY FOR STEPS (ADJUST LATER) return observation_space, reward, done, truncated, {} @@ -250,18 +235,11 @@ def is_terminated(self): """ Activates when the task has been completed (or it failed because of opponent scoring a goal) """ - referee_state, referee_info = get_referee_state() # Assuming get_referee_state() is in scope - - self.ref_command = referee_info['command'] - self.yellow_score = referee_state.yellow.score - self.blue_score = referee_state.blue.score - - print("refcommand", self.ref_command) - print("blue", self.blue_score) - print("yellow", self.yellow_score) if self.ref_command == 0 and (self.yellow_score == 1 or self.blue_score == 1): # HALT command indicates that either team scored return True + else: + return False def is_truncated(self): """ @@ -269,13 +247,15 @@ def is_truncated(self): """ # Implement logic to reset the game if no goal is scored - pass + return False def reset(self, seed=None,**kwargs): """ The reset function resets the environment when a game is ended """ + print("Testing...") + # Teleport ball to middle position print("Teleporting ball...") teleport_ball(0,0) @@ -289,7 +269,7 @@ def reset(self, seed=None,**kwargs): start_game() print("Getting observation...") - observation, _ = self.get_observation() + observation = self.get_observation() # Reset shaped_reward_given boolean self.shaped_reward_given = False @@ -297,7 +277,7 @@ def reset(self, seed=None,**kwargs): self.is_blue_dribbling = False print("Reset complete!") - return observation,{} + return observation, {} diff --git a/roboteam_ai/src/RL/env_external.py b/roboteam_ai/src/RL/env_external.py new file mode 100644 index 000000000..5131b397f --- /dev/null +++ b/roboteam_ai/src/RL/env_external.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 + +""" +Currently not in use, might start using it to go to a different way of looping through environment, +right now completely GPT'ed, do not use. +""" + + +import numpy as np +from gymnasium import spaces +from ray.rllib.env.external_env import ExternalEnv +import time +import os +import sys +from google.protobuf.message import DecodeError + +# Make root folder /roboteam +current_dir = os.path.dirname(os.path.abspath(__file__)) +roboteam_path = os.path.abspath(os.path.join(current_dir, "../../..")) +sys.path.append(roboteam_path) + +from roboteam_ai.src.RL.src.sentActionCommand import send_action_command +from roboteam_ai.src.RL.src.getState import get_ball_state, get_robot_state, get_referee_state +from roboteam_ai.src.RL.src.teleportBall import teleport_ball +from roboteam_ai.src.RL.src.resetRefereeAPI import reset_referee_state +from roboteam_ai.src.RL.src.changeGameState import start_game + +class RoboTeamEnv(ExternalEnv): + def __init__(self, config=None): + # Initialize state variables + self.MAX_ROBOTS_US = 10 + self.robot_grid = np.zeros((4, 2), dtype=int) + self.ball_position = np.zeros((1,2)) + self.ball_quadrant = 4 + self.yellow_score = 0 + self.blue_score = 0 + self.x = 0 + self.y = 0 + self.yellow_yellow_cards = 0 + self.blue_yellow_cards = 0 + self.ref_command = "" + self.shaped_reward_given = False + self.is_yellow_dribbling = False + self.is_blue_dribbling = False + + # Define spaces before calling parent init + action_space = spaces.MultiDiscrete([self.MAX_ROBOTS_US + 1, self.MAX_ROBOTS_US + 1]) + observation_space = spaces.Box( + low=float('-inf'), + high=float('inf'), + shape=(15,), + dtype=np.float64 + ) + + # Initialize ExternalEnv parent with the spaces + super().__init__(action_space=action_space, observation_space=observation_space) + + # Add Gymnasium compatibility attributes + self._spec = None + self.metadata = { + 'render_modes': [], + 'render.modes': [], + 'render_fps': None + } + self.reward_range = (-float('inf'), float('inf')) + + @property + def unwrapped(self): + """Gymnasium compatibility: Return the base environment.""" + return self + + @property + def spec(self): + """Gymnasium compatibility: Return environment specification.""" + return self._spec + + @spec.setter + def spec(self, value): + """Gymnasium compatibility: Set environment specification.""" + self._spec = value + + def _reset_sim(self): + """Reset the simulation state""" + try: + print("Teleporting ball...") + teleport_ball(0, 0) + print("Sent command to teleport ball to (0, 0, 0.0)") + print("Resetting referee state...") + reset_referee_state() + print("Starting game...") + start_game() + + self.shaped_reward_given = False + self.is_yellow_dribbling = False + self.is_blue_dribbling = False + self.yellow_score = 0 + self.blue_score = 0 + except Exception as e: + print(f"Error resetting simulation: {e}") + + def _get_observation(self): + """Get the current observation from the simulator""" + try: + # Get robot and ball state + self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state() + self.ball_position, self.ball_quadrant = get_ball_state() + + # Convert and flatten robot positions + robot_positions_flat = self.robot_grid.astype(np.float64).flatten() # 8 elements + ball_quadrant = np.array([float(self.ball_quadrant)], dtype=np.float64) # 1 element + is_yellow_dribbling = np.array([float(self.is_yellow_dribbling)], dtype=np.float64) # 1 element + + # Combine all parts + observation = np.concatenate([ + robot_positions_flat, # 8 elements + ball_quadrant, # 1 element + is_yellow_dribbling, # 1 element + np.zeros(5, dtype=np.float64) # 5 elements padding + ]) + + return observation.reshape(15,) + except Exception as e: + print(f"Error getting observation: {e}") + return np.zeros(15, dtype=np.float64) + + def _execute_action(self, action): + """Execute the action in the simulator""" + try: + print(f"x {self.x}") + print(f"y {self.y}") + print(f"Stepping with action: {action}") + + attackers, defenders = action + + # Ensure non-negative values and total of MAX_ROBOTS_US + attackers = max(0, min(attackers, self.MAX_ROBOTS_US)) + defenders = max(0, min(defenders, self.MAX_ROBOTS_US - attackers)) + wallers = self.MAX_ROBOTS_US - (attackers + defenders) + + # Send action to simulator + send_action_command( + num_attacker=attackers, + num_defender=defenders, + num_waller=wallers + ) + except Exception as e: + print(f"Error executing action: {e}") + + def _check_ball_placement(self): + """Handle ball placement commands""" + try: + referee_state, referee_info = get_referee_state() + self.ref_command = referee_info['command'] + + if (self.ref_command == 16 or self.ref_command == 17): + if referee_info["designated_position"] is not None: + self.x = referee_info["designated_position"]["x"]/1000 + self.y = referee_info["designated_position"]["y"]/1000 + teleport_ball(self.x, self.y) + except Exception as e: + print(f"Error in ball placement: {e}") + + def _calculate_reward(self): + """Calculate the reward for the current state""" + try: + goal_scored_reward = 0 + if self.yellow_score == 1: + goal_scored_reward = 1 + elif self.blue_score == 1: + goal_scored_reward = -1 + + shaped_reward = 0 + if not self.shaped_reward_given and self.is_yellow_dribbling and ( + self.ball_quadrant == 1 or self.ball_quadrant == 3): + self.shaped_reward_given = True + shaped_reward = 0.1 + + return goal_scored_reward + shaped_reward + except Exception as e: + print(f"Error calculating reward: {e}") + return 0.0 + + def _is_terminated(self): + """Check if episode should terminate""" + try: + referee_state, referee_info = get_referee_state() + self.ref_command = referee_info['command'] + self.yellow_score = referee_state.yellow.score + self.blue_score = referee_state.blue.score + + return (self.ref_command == 0 and + (self.yellow_score == 1 or self.blue_score == 1)) + except Exception as e: + print(f"Error checking termination: {e}") + return False + + def run(self): + """Main control loop for the external environment.""" + while True: + try: + # Start new episode + episode_id = self.start_episode() + self._reset_sim() + observation = self._get_observation() + + while True: + # Get action from policy + action = self.get_action(episode_id, observation) + + # Handle ball placement + self._check_ball_placement() + + # Execute action if game is running + if self.ref_command == "RUNNING": + self._execute_action(action) + + # Calculate reward before getting next observation + reward = self._calculate_reward() + self.log_returns(episode_id, reward) + + # Get next observation + observation = self._get_observation() + + # Check if episode is done + if self._is_terminated(): + self.end_episode(episode_id, observation) + break + + time.sleep(0.1) # Control loop rate + + except Exception as e: + print(f"Error in main loop: {e}") + time.sleep(1) # Wait before retrying \ No newline at end of file diff --git a/roboteam_ai/src/RL/src/getState.py b/roboteam_ai/src/RL/src/getState.py index e040f19ce..7dd206848 100644 --- a/roboteam_ai/src/RL/src/getState.py +++ b/roboteam_ai/src/RL/src/getState.py @@ -21,16 +21,18 @@ # from roboteam_networking.proto.ssl_gc_api_pb2 import Output as RefereeState # Alias for referee state from roboteam_networking.proto.messages_robocup_ssl_referee_pb2 import * # Alias for referee state -IS_IN_K8S = True +def is_kubernetes(): + """Detect if running in Kubernetes environment""" + return os.getenv('KUBERNETES_SERVICE_HOST') is not None def get_zmq_address(): """Get the appropriate ZMQ address based on environment""" - if IS_IN_K8S: + if is_kubernetes(): host = "roboteam-ray-worker-svc" - print("Running in Kubernetes, using service DNS") + #print("Running in Kubernetes, using service DNS") else: host = "localhost" - print("Running locally") + #print("Running locally") return f"tcp://{host}:5558" # Function to get the ball state @@ -46,13 +48,13 @@ def get_ball_state(): socket_world.setsockopt_string(zmq.SUBSCRIBE, "") zmq_address = get_zmq_address() - print(f"Connecting to ZMQ at: {zmq_address}") + #print(f"Connecting to ZMQ at: {zmq_address}") socket_world.connect(zmq_address) try: - print("Waiting for ZMQ message...") + #print("Waiting for ZMQ message...") message = socket_world.recv() - print("Received ZMQ message") + #print("Received ZMQ message") state = RoboState.FromString(message) if not len(state.processed_vision_packets): @@ -64,8 +66,8 @@ def get_ball_state(): ball_position[0] = world.ball.pos.x ball_position[1] = world.ball.pos.y - print("x",ball_position[0]) - print("y",ball_position[1]) + #print("x",ball_position[0]) + #print("y",ball_position[1]) if abs(ball_position[0]) <= CENTER_THRESHOLD and abs(ball_position[1]) <= CENTER_THRESHOLD: ball_quadrant = 4 # Center @@ -94,7 +96,7 @@ def get_robot_state(): socket_world.setsockopt_string(zmq.SUBSCRIBE, "") zmq_address = get_zmq_address() - print(f"Connecting to ZMQ at: {zmq_address}") + #print(f"Connecting to ZMQ at: {zmq_address}") socket_world.connect(zmq_address) try: @@ -150,59 +152,33 @@ def get_referee_state(): sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) try: - # Receive the data + sock.settimeout(5) message, _ = sock.recvfrom(4096) referee_state = Referee.FromString(message) - # Extract information from TeamInfo for Yellow and Blue teams - yellow_team_info = referee_state.yellow - blue_team_info = referee_state.blue - - # Get details from Yellow TeamInfo - yellow_team_data = { - "name": yellow_team_info.name, - "score": yellow_team_info.score, - "red_cards": yellow_team_info.red_cards, - "yellow_cards": yellow_team_info.yellow_cards, - "fouls": yellow_team_info.foul_counter, - "ball_placement_failures": yellow_team_info.ball_placement_failures - } - - # Get details from Blue TeamInfo - blue_team_data = { - "name": blue_team_info.name, - "score": blue_team_info.score, - "red_cards": blue_team_info.red_cards, - "yellow_cards": blue_team_info.yellow_cards, - "fouls": blue_team_info.foul_counter, - "ball_placement_failures": blue_team_info.ball_placement_failures - } - - # Return both the raw referee_state object and a dictionary of extracted details + # Get x,y values directly + x, y = 0, 0 # Default values if referee_state.HasField('designated_position'): - designated_position = { - "x": referee_state.designated_position.x, - "y": referee_state.designated_position.y - } - else: - designated_position = None - - # Return both the raw referee_state object and a dictionary of extracted details - return referee_state, { - "yellow_team": yellow_team_data, - "blue_team": blue_team_data, - "stage": referee_state.stage, - "command": referee_state.command, - "designated_position": designated_position - } - - except DecodeError: - print("Failed to decode referee state message") + x = referee_state.designated_position.x + y = referee_state.designated_position.y + + return ( + referee_state.yellow.score, + referee_state.blue.score, + referee_state.stage, + referee_state.command, + x, + y + ) + except socket.timeout: + print("Referee state timeout, returning defaults") + return 0, 0, 0, 0, 0, 0 # Default values except Exception as e: - print(f"Error: {e}") + print(f"Error getting referee state: {e}") + return 0, 0, 0, 0, 0, 0 # Default values finally: sock.close() - + if __name__ == "__main__": # Get robot state grid_array, yellow_team_dribbling, blue_team_dribbling = get_robot_state() diff --git a/roboteam_ai/src/RL/src/resetReferee.py b/roboteam_ai/src/RL/src/resetReferee.py index 76591bbd3..80d62ea20 100644 --- a/roboteam_ai/src/RL/src/resetReferee.py +++ b/roboteam_ai/src/RL/src/resetReferee.py @@ -5,6 +5,10 @@ import time from google.protobuf.timestamp_pb2 import Timestamp +""" +NOT USED +""" + current_dir = os.path.dirname(os.path.abspath(__file__)) roboteam_path = os.path.abspath(os.path.join(current_dir, "..", "..", "..", "..")) diff --git a/roboteam_ai/src/RL/src/websocketHandler.py b/roboteam_ai/src/RL/src/websocketHandler.py index d7d66126a..a3ed8838b 100644 --- a/roboteam_ai/src/RL/src/websocketHandler.py +++ b/roboteam_ai/src/RL/src/websocketHandler.py @@ -3,16 +3,18 @@ import asyncio import json -IS_IN_K8S = True # We run it locally. +def is_kubernetes(): + """Detect if running in Kubernetes environment""" + return os.getenv('KUBERNETES_SERVICE_HOST') is not None def get_websocket_uri(): """Get the appropriate URI based on the environment""" - if IS_IN_K8S: + if is_kubernetes(): host = "roboteam-ray-worker-svc" - print("Running in Kubernetes, using service DNS") + #print("Running in Kubernetes, using service DNS") else: host = "localhost" - print("Running locally") + #print("Running locally") return f"ws://{host}:8081/api/control"