diff --git a/Dockerfile.ray b/Dockerfile.ray
index 828bfbccf..13cec8b14 100644
--- a/Dockerfile.ray
+++ b/Dockerfile.ray
@@ -5,7 +5,7 @@
 FROM rayproject/ray:latest-py310
 
 # Install dependencies in a single layer to keep it cached
-RUN pip install torch==2.5.1 gymnasium numpy==1.24.3 ray[rllib]==2.38.0 pyzmq
+RUN pip install torch==2.5.1 gymnasium numpy==1.24.3 ray[rllib]==2.38.0 pyzmq protobuf websockets
 
 # Copy the entire roboteam root folder (including the roboteam_ai and roboteam_networking folders)
 COPY roboteam_ai /roboteam/roboteam_ai
diff --git a/README.md b/README.md
index 638d63d29..c208745a9 100644
--- a/README.md
+++ b/README.md
@@ -37,12 +37,14 @@ To enable Tracy
   - Information is in the tracy [docs](https://github.com/wolfpld/tracy)
 - Run AI
 
-
 ### Use of Ray
+
 The dockerimage Dockerfile.ray is used to build a docker image with the ray project's official ray image as a base. It also adds the necessary libraries to the image and the roboteam RL code to the image. Only build it if you want to deploy it to a cluster.
 
 Build the docker image using the following command from the root folder:
+
 - docker build -t roboteamtwente/ray:development -f Dockerfile.ray .
 
 Push it using the following command:
-- docker push roboteamtwente/ray:development
\ No newline at end of file
+
+- docker push roboteamtwente/ray:development
diff --git a/docker/runner/README.md b/docker/runner/README.md
index 966daffe1..6362b65c3 100644
--- a/docker/runner/README.md
+++ b/docker/runner/README.md
@@ -2,20 +2,23 @@
 In a ray or distributed computing cluster, the terms "head node" and "worker nodes" refer to different roles that containers play in the cluster. The head node is the master node in a Ray cluster. You typically have one head node. Worker nodes are the containers that execute the jobs, in parallel. You can have as many worker nodes as you want.
 
 -----------------------------------------------------------
-## Installing Kuberay:
-curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
-helm repo add kuberay https://ray-project.github.io/kuberay-helm/
+
+## Installing Kuberay
+
+curl <https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3> | bash
+helm repo add kuberay <https://ray-project.github.io/kuberay-helm/>
 helm repo update
 helm install kuberay-operator kuberay/kuberay-operator --namespace ray-system --create-namespace
 
-https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html
+<https://docs.ray.io/en/latest/cluster/kubernetes/user-guides/config.html>
 The above source was used for creating the ray-cluster.yaml
 
-Installing kubernetes and minikubernetes, you can follow this guide to check out how to install and run them: https://medium.com/@areesmoon/installing-minikube-on-ubuntu-20-04-lts-focal-fossa-b10fad9d0511
+Installing kubernetes and minikubernetes, you can follow this guide to check out how to install and run them: <https://medium.com/@areesmoon/installing-minikube-on-ubuntu-20-04-lts-focal-fossa-b10fad9d0511>
 
 Use 'pip install ray' and then 'pip show ray' to get your version of ray.
 
-----------------------------------------------------------------------------------
+-----------------------------------------------------------
+
 After you have both kubernetes and ray, use the following command to create a cluster: kubectl apply -f ray-cluster.yaml
 This cluster launches a ray head node and one worker node. Launch the external simulator using kubectl apply -f simulator.yaml
 
@@ -24,11 +27,13 @@ This cluster launches a ray head node and one worker node. Launch the external s
 Use to forward the needed port to the ray service: kubectl port-forward svc/<cluster name> 8265:8265
 This is the port that will be used inside ray_jobs.py, where we submit the jobs to ray.
 
+-----------------------------------------------------------
 
------------------------------------------------------
 ## Useful commands
+
 kubectl apply -f ray-cluster.yaml
 kubectl delete -f ray-cluster.yaml
 helm install kuberay-operator ray/kuberay-operator
 helm uninstall kuberay-operator
-kubectl port-forward svc/roboteam-ray-cluster-head-nodeport 8265:8265 6379:6379 10001:10001 8000:8000 &
\ No newline at end of file
+kubectl port-forward svc/roboteam-ray-cluster-head-nodeport 8265:8265 6379:6379 10001:10001 8000:8000 &
+minikube start -p ray --nodes 2 --memory 4000 --cpus 3
diff --git a/docker/runner/ray-cluster-combined.yaml b/docker/runner/ray-cluster-combined.yaml
index a054f98eb..c725a1e33 100644
--- a/docker/runner/ray-cluster-combined.yaml
+++ b/docker/runner/ray-cluster-combined.yaml
@@ -34,11 +34,11 @@ spec:
           - containerPort: 8000  # Serve
           resources:
             requests:
-              cpu: "500m"
-              memory: "1Gi"
-            limits:
               cpu: "1"
               memory: "2Gi"
+            limits:
+              cpu: "2"
+              memory: "4Gi"
           env:
           - name: POD_IP
             valueFrom:
@@ -70,7 +70,9 @@ spec:
   # Worker node configuration with integrated simulator
   workerGroupSpecs:
     - groupName: worker-group
-      replicas: 1
+      replicas: 1      
+      minReplicas: 1  # Specify minimum number of worker replicas
+      maxReplicas: 1  # Optional: specify maximum number of replicas
       rayStartParams:
         num-cpus: "1"
       template:
@@ -78,7 +80,7 @@ spec:
           labels:
             app: ray-worker
         spec:
-          hostNetwork: true
+          hostNetwork: False
           dnsPolicy: ClusterFirstWithHostNet
           affinity:
             nodeAffinity:
@@ -109,8 +111,8 @@ spec:
                 cpu: 500m
                 memory: 1Gi
               limits:
-                cpu: 2000m
-                memory: 4Gi
+                cpu: 1500m
+                memory: 3Gi
             env:
             - name: LD_LIBRARY_PATH
               value: /home/roboteam/build/release/lib
@@ -187,10 +189,10 @@ spec:
             resources:
               requests:
                 cpu: 120m
-                memory: 20Mi
+                memory: 40Mi
               limits:
                 cpu: 150m
-                memory: 50Mi
+                memory: 100Mi
 
           # Simulator
           - name: erforce-simulator
@@ -295,4 +297,4 @@ spec:
     targetPort: 8081
   - name: simulator
     port: 5558
-    targetPort: 5558
\ No newline at end of file
+    targetPort: 5558
diff --git a/docker/runner/ray-cluster.yaml b/docker/runner/ray-cluster.yaml
index 79ee4ca40..f22063d61 100644
--- a/docker/runner/ray-cluster.yaml
+++ b/docker/runner/ray-cluster.yaml
@@ -8,39 +8,46 @@ spec:
   headGroupSpec:
     rayStartParams:
       dashboard-host: "0.0.0.0"
+      node-ip-address: "$(HOST_IP)"
     template:
       metadata:
         labels:
           app: ray-head
       spec:
         hostNetwork: false
+        # Add node affinity for head node
+        affinity:
+          nodeAffinity:
+            requiredDuringSchedulingIgnoredDuringExecution:
+              nodeSelectorTerms:
+              - matchExpressions:
+                - key: kubernetes.io/hostname
+                  operator: In
+                  values:
+                  - multinode-demo
         containers:
         - name: ray-head
           image: roboteamtwente/ray:development
-          imagePullPolicy: Always # Always pull the latest image
+          imagePullPolicy: Always
           ports:
-          - containerPort: 8265  # dashboard port
-          - containerPort: 6379  # redis port 
-          - containerPort: 10001  # GCS server port
-          - containerPort: 8000   # Serve port
+          - containerPort: 8265
+          - containerPort: 6379
+          - containerPort: 10001
+          - containerPort: 8000
           resources:
             requests:
               cpu: "500m"
-              memory: "1Gi"    # Increased from 256Mi
+              memory: "1Gi"
             limits:
-              cpu: "1"         # Changed from 600 (which was too high)
-              memory: "2Gi"    # Increased from 512Mi
-
+              cpu: "1"
+              memory: "2Gi"
           env:
-          - name: SIMULATION_HOST
-            value: "127.0.0.1"  # Using localhost since we're on host network
-          - name: VISION_PORT
-            value: "10020"      # Match your simulator's vision port
-          - name: REFEREE_PORT
-            value: "10003"      # Match your simulator's referee port
-
+          - name: HOST_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.hostIP
           command: ["/bin/bash", "-c", "--"]
-          args: ["ray start --head --port=6379 --dashboard-host=0.0.0.0 --block"]
+          args: ["ray start --head --port=6379 --bind-address=0.0.0.0 --dashboard-host=0.0.0.0 --node-ip-address=$(HOST_IP) --block"]
           livenessProbe:
             exec:
               command:
@@ -66,7 +73,7 @@ spec:
   # Worker node configuration
   workerGroupSpecs:
     - groupName: worker-group
-      replicas: 1  # Number of worker nodes
+      replicas: 1
       rayStartParams:
         num-cpus: "1" 
       template:
@@ -75,10 +82,20 @@ spec:
             app: ray-worker
         spec:
           hostNetwork: true
+          # Replace pod anti-affinity with node affinity
+          affinity:
+            nodeAffinity:
+              requiredDuringSchedulingIgnoredDuringExecution:
+                nodeSelectorTerms:
+                - matchExpressions:
+                  - key: kubernetes.io/hostname
+                    operator: In
+                    values:
+                    - multinode-demo-m02
           containers:
           - name: ray-worker
             image: roboteamtwente/ray:development
-            imagePullPolicy: Always # Always pull the latest image
+            imagePullPolicy: Always
             resources:
               requests:
                 cpu: 500m
@@ -130,4 +147,27 @@ spec:
   - name: serve
     port: 8000
     targetPort: 8000
-    nodePort: 30800  # Serve
\ No newline at end of file
+    nodePort: 30800  # Serve
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: roboteam-ray-cluster-head-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: ray-head
+  ports:
+  - name: redis
+    port: 6379
+    targetPort: 6379
+  - name: gcs
+    port: 10001
+    targetPort: 10001
+  - name: dashboard
+    port: 8265
+    targetPort: 8265
+  - name: serve
+    port: 8000
+    targetPort: 8000
\ No newline at end of file
diff --git a/docker/runner/simulator.yaml b/docker/runner/simulator.yaml
index ac5291b4f..76d3a0a0f 100644
--- a/docker/runner/simulator.yaml
+++ b/docker/runner/simulator.yaml
@@ -209,4 +209,20 @@ spec:
       targetPort: 8080
     - name: gc
       port: 8081
-      targetPort: 8081
\ No newline at end of file
+      targetPort: 8081
+    - name: zmq
+      port: 5558
+      targetPort: 5558
+      protocol: TCP
+    - name: sim-control
+      port: 10300
+      targetPort: 10300
+      protocol: UDP
+    - name: vision
+      port: 10020
+      targetPort: 10020
+      protocol: UDP
+    - name: referee
+      port: 10003
+      targetPort: 10003
+      protocol: UDP
\ No newline at end of file
diff --git a/roboteam_ai/src/RL/RL_Ray/train.py b/roboteam_ai/src/RL/RL_Ray/train.py
index 51c7254c2..f6ac38199 100644
--- a/roboteam_ai/src/RL/RL_Ray/train.py
+++ b/roboteam_ai/src/RL/RL_Ray/train.py
@@ -18,14 +18,7 @@
 import warnings
 warnings.filterwarnings('ignore', category=DeprecationWarning)
 
-def verify_imports():
-    import numpy
-    import torch
-    print(f"Local NumPy version: {numpy.__version__}")
-    print(f"Local PyTorch version: {torch.__version__}")
-
 def main():
-    verify_imports()
 
     if not ray.is_initialized():
         ray.init(
@@ -34,30 +27,22 @@ def main():
             runtime_env={
                 "env_vars": {
                     "NUMPY_EXPERIMENTAL_ARRAY_FUNCTION": "0",
-
                 },
-                # "pip": [
-                #     "numpy==1.24.3",
-                #     "pyzmq==26.2.0"
-                # ]
+                "py_modules": [
+                        os.path.join(roboteam_ai_root, "roboteam_ai"),
+                        os.path.join(roboteam_ai_root, "roboteam_networking")
+                    ]
             }
         )
 
     # ray.init()
 
-    # We can set env_config here
     def env_creator(env_config):
         return RoboTeamEnv(env_config)  # This passes the config to your env
     
     # Register the environment
     register_env("RoboTeamEnv", env_creator)
 
-    # Create list of callbacks
-    callbacks = [
-        JsonLoggerCallback(),
-        CSVLoggerCallback(),
-    ]
-
     config = (
         PPOConfig()
         .environment("RoboTeamEnv")
@@ -65,32 +50,25 @@ def env_creator(env_config):
         .resources(num_gpus=0)
         .env_runners(
             num_env_runners=1,
-            num_envs_per_env_runner=1,
-            sample_timeout_s=None
-        )
-#         .api_stack(
-#             enable_rl_module_and_learner=True,
-#             enable_env_runner_and_connector_v2=True
-# )
+            num_envs_per_env_runner=1, # If you use vectorized env, otherwise set to 1
+            rollout_fragment_length=16,
+            sample_timeout_s=30,
+            create_env_on_local_worker=False) # This makes sure that we don't run a local environment
+        .api_stack(
+            enable_rl_module_and_learner=True,
+            enable_env_runner_and_connector_v2=True)
         .debugging(
             log_level="DEBUG",
             seed=42
         )
-        #.callbacks(callbacks)
-        .evaluation(evaluation_interval=10)
     )
 
     print("Starting training...")
     algo = config.build()
 
     for i in range(10):
-        print(f"\nStarting iteration {i}")
         result = algo.train()
         result.pop("config")
-        print("\nTraining metrics:")
-        print(f"Episode Reward Mean: {result.get('episode_reward_mean', 'N/A')}")
-        print(f"Episode Length Mean: {result.get('episode_len_mean', 'N/A')}")
-        print(f"Total Timesteps: {result.get('timesteps_total', 'N/A')}")
         pprint(result)
 
 if __name__ == "__main__":
diff --git a/roboteam_ai/src/RL/env.py b/roboteam_ai/src/RL/env.py
index 30f0c0a7d..39f5c6aa3 100644
--- a/roboteam_ai/src/RL/env.py
+++ b/roboteam_ai/src/RL/env.py
@@ -34,7 +34,6 @@ class RoboTeamEnv(gymnasium.Env):
     def __init__(self, config=None):
         self.config = config or {} # Config placeholder
 
-
         self.MAX_ROBOTS_US = 10
 
         # Define the number of robots that are present in each grid + ball location
@@ -161,7 +160,6 @@ def get_observation(self):
             'ball_position': self.ball_quadrant,
             'is_yellow_dribbling' : self.is_yellow_dribbling
         }
-        print("obs: ", observation_space)
 
         return observation_space, self.calculate_reward()
 
diff --git a/roboteam_ai/src/RL/env2.py b/roboteam_ai/src/RL/env2.py
index 912118b75..610c83737 100644
--- a/roboteam_ai/src/RL/env2.py
+++ b/roboteam_ai/src/RL/env2.py
@@ -44,7 +44,7 @@ def __init__(self, config=None):
         self.blue_score = 0  # Initialize blue score to zero
 
         # Initialize the observation space
-        self.observation_space = spaces.Box(low=float('-inf'), high=float('inf'), shape=(1,15), dtype=np.float64)
+        self.observation_space = spaces.Box(low=float('-inf'), high=float('inf'), shape=(15,), dtype=np.float64)
 
         # Action space: [attackers, defenders]
         # Wallers will be automatically calculated
@@ -70,34 +70,25 @@ def check_ball_placement(self):
         Function to teleport the ball to the designated position for ball placement if necessary.
         """
         # Get the current referee state
-        referee_state, referee_info = get_referee_state()  # Assuming get_referee_state() is in scope
-
-        # Extract the command from the referee state
-        self.ref_command = referee_info['command']
-        print("ref command", self.ref_command)
+        get_referee_state()
  
-
         # If ref gives command BALL_PLACEMENT_US OR BALL_PLACEMENT_THEM
         if (self.ref_command == 16 or self.ref_command == 17):
-            referee_state, referee_data = get_referee_state()
 
-            if referee_data["designated_position"]is not None:
-                self.x, self.y = referee_data["designated_position"]["x"]/1000, referee_data["designated_position"]["y"]/1000
+            if self.x and self.y is not None:
 
                 # Teleport the ball to the designated location
                 teleport_ball(self.x, self.y)
             else:
                 print("No designated position provided in referee state.")
 
-
     def get_referee_state(self):
         """
-        Function to globally import the referee state
+        Function to get referee state values
         """
-        self.x,self.y,  # Designated pos
-        self.yellow_yellow_cards, self.blue_yellow_cards, # yellow cards
-        self.ref_command, # Ref command, such as HALT, STOP
-        self.yellow_score, self.blue_score = get_referee_state() # Scores
+        self.yellow_score, self.blue_score, self.stage, self.ref_command, self.x, self.y = get_referee_state()
+        self.x = self.x/1000
+        self.y = self.y/1000
 
     def calculate_reward(self):
         """
@@ -171,13 +162,10 @@ def get_observation(self):
         """
         # Get the robot grid representation
         self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state()
-        print(f"Robot grid: {self.robot_grid}")
-        print(f"Yellow dribbling: {self.is_yellow_dribbling}, Blue dribbling: {self.is_blue_dribbling}")
-
+        
         # Get the ball location
         self.ball_position, self.ball_quadrant = get_ball_state()
-        print(f"Ball position: {self.ball_position}, Ball quadrant: {self.ball_quadrant}")
-
+        
         # Convert and flatten robot positions to float64
         robot_positions_flat = self.robot_grid.astype(np.float64).flatten()  # 8 elements
         
@@ -195,14 +183,10 @@ def get_observation(self):
             np.zeros(5, dtype=np.float64)  # 5 elements to reach total of 15
         ])
         
-        # Reshape to match expected shape (1, 15)
-        observation = observation.reshape(1, 15)
-        
-        # Verify shape and dtype
-        assert observation.shape == (1, 15), f"Observation shape {observation.shape} != (1, 15)"
-        assert observation.dtype == np.float64, f"Observation dtype {observation.dtype} != float64"
+        # Make sure it's flat
+        observation = observation.reshape(15,)
         
-        return observation, self.calculate_reward()
+        return observation
 
     def step(self, action):
         """
@@ -211,37 +195,38 @@ def step(self, action):
         """
 
         # Only carry out "normal" loop if the game state is NORMAL_START (this indicates normal gameplay loop)
-        if self.ref_command == "RUNNING": # Maybe this needs to change to normal_start
-
-            attackers, defenders = action
-            wallers = self.MAX_ROBOTS - (attackers + defenders)
-
-            # Ensure non-negative values and total of 10
-            attackers = max(0, min(attackers, self.MAX_ROBOTS))
-            defenders = max(0, min(defenders, self.MAX_ROBOTS - attackers))
-            wallers = self.MAX_ROBOTS - (attackers + defenders)
-
-            # Sends the action command over proto to legacy AI
-            send_action_command(num_attacker=attackers, num_defender=defenders, num_waller= wallers)
-
+        print(f"Step called with action: {action}")
+        
+        # Get current referee state at start of step
+        #referee_state, referee_info = get_referee_state()
+        #print(f"Step - Current referee state: {referee_state}")
+
+        # if self.ref_command == "RUNNING":
+        #     print("Game is RUNNING, executing action")
+        #     attackers, defenders = action
+        #     wallers = self.MAX_ROBOTS_US - (attackers + defenders)
+        #     send_action_command(num_attacker=attackers, num_defender=defenders, num_waller=wallers)
+        # else:
+        #     print(f"Game not RUNNING, current command: {self.ref_command}")
 
         # If the game is halted, stopped or ball placement is happening, execute this.
 
         # Logic to TP the ball if there is ball placement of either side
-        self.check_ball_placement() # Run the function to check if we need to TP the ball
-
-        reward = self.calculate_reward()
+        #self.check_ball_placement() # Run the function to check if we need to TP the ball
 
         # Update observation_space
-        observation_space,_ = self.get_observation()
+        observation_space = self.get_observation()
+
+        # Get reward
+        reward = self.calculate_reward()
         
         done = self.is_terminated()
-        print("isDone",done)  # If task is completed (a goal was scored)
+        #print("isDone",done)  # If task is completed (a goal was scored)
         if done:
             observation_space, _ = self.reset()
         truncated = self.is_truncated()  # Determine if the episode was truncated, too much time or a yellow card
 
-        time.sleep(0.1)  # DELAY FOR STEPS (ADJUST LATER)
+        #time.sleep(0.1)  # DELAY FOR STEPS (ADJUST LATER)
 
         return observation_space, reward, done, truncated, {}
 
@@ -250,18 +235,11 @@ def is_terminated(self):
         """
         Activates when the task has been completed (or it failed because of opponent scoring a goal)
         """
-        referee_state, referee_info = get_referee_state()  # Assuming get_referee_state() is in scope
-
-        self.ref_command = referee_info['command']
-        self.yellow_score = referee_state.yellow.score
-        self.blue_score = referee_state.blue.score
-        
-        print("refcommand", self.ref_command)
-        print("blue", self.blue_score)
-        print("yellow", self.yellow_score)
 
         if self.ref_command == 0 and (self.yellow_score == 1 or self.blue_score == 1): # HALT command indicates that either team scored
             return True
+        else:
+            return False
 
     def is_truncated(self):
         """
@@ -269,13 +247,15 @@ def is_truncated(self):
         """
 
         # Implement logic to reset the game if no goal is scored
-        pass
+        return False
 
     def reset(self, seed=None,**kwargs):
         """
         The reset function resets the environment when a game is ended
         """
 
+        print("Testing...")
+
         # Teleport ball to middle position
         print("Teleporting ball...")
         teleport_ball(0,0)
@@ -289,7 +269,7 @@ def reset(self, seed=None,**kwargs):
         start_game()
 
         print("Getting observation...")
-        observation, _ = self.get_observation()
+        observation = self.get_observation()
 
         # Reset shaped_reward_given boolean
         self.shaped_reward_given = False
@@ -297,7 +277,7 @@ def reset(self, seed=None,**kwargs):
         self.is_blue_dribbling = False
 
         print("Reset complete!")
-        return observation,{}
+        return observation, {}
 
 
 
diff --git a/roboteam_ai/src/RL/env_external.py b/roboteam_ai/src/RL/env_external.py
new file mode 100644
index 000000000..5131b397f
--- /dev/null
+++ b/roboteam_ai/src/RL/env_external.py
@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+
+"""
+Currently not in use, might start using it to go to a different way of looping through environment,
+right now completely GPT'ed, do not use.
+"""
+
+
+import numpy as np
+from gymnasium import spaces
+from ray.rllib.env.external_env import ExternalEnv
+import time
+import os
+import sys
+from google.protobuf.message import DecodeError
+
+# Make root folder /roboteam
+current_dir = os.path.dirname(os.path.abspath(__file__))
+roboteam_path = os.path.abspath(os.path.join(current_dir, "../../.."))
+sys.path.append(roboteam_path)
+
+from roboteam_ai.src.RL.src.sentActionCommand import send_action_command
+from roboteam_ai.src.RL.src.getState import get_ball_state, get_robot_state, get_referee_state
+from roboteam_ai.src.RL.src.teleportBall import teleport_ball
+from roboteam_ai.src.RL.src.resetRefereeAPI import reset_referee_state
+from roboteam_ai.src.RL.src.changeGameState import start_game
+
+class RoboTeamEnv(ExternalEnv):
+    def __init__(self, config=None):
+        # Initialize state variables
+        self.MAX_ROBOTS_US = 10
+        self.robot_grid = np.zeros((4, 2), dtype=int)
+        self.ball_position = np.zeros((1,2))
+        self.ball_quadrant = 4
+        self.yellow_score = 0
+        self.blue_score = 0
+        self.x = 0
+        self.y = 0
+        self.yellow_yellow_cards = 0
+        self.blue_yellow_cards = 0
+        self.ref_command = ""
+        self.shaped_reward_given = False
+        self.is_yellow_dribbling = False
+        self.is_blue_dribbling = False
+
+        # Define spaces before calling parent init
+        action_space = spaces.MultiDiscrete([self.MAX_ROBOTS_US + 1, self.MAX_ROBOTS_US + 1])
+        observation_space = spaces.Box(
+            low=float('-inf'),
+            high=float('inf'),
+            shape=(15,),
+            dtype=np.float64
+        )
+
+        # Initialize ExternalEnv parent with the spaces
+        super().__init__(action_space=action_space, observation_space=observation_space)
+
+        # Add Gymnasium compatibility attributes
+        self._spec = None
+        self.metadata = {
+            'render_modes': [],
+            'render.modes': [],
+            'render_fps': None
+        }
+        self.reward_range = (-float('inf'), float('inf'))
+
+    @property
+    def unwrapped(self):
+        """Gymnasium compatibility: Return the base environment."""
+        return self
+
+    @property
+    def spec(self):
+        """Gymnasium compatibility: Return environment specification."""
+        return self._spec
+
+    @spec.setter
+    def spec(self, value):
+        """Gymnasium compatibility: Set environment specification."""
+        self._spec = value
+
+    def _reset_sim(self):
+        """Reset the simulation state"""
+        try:
+            print("Teleporting ball...")
+            teleport_ball(0, 0)
+            print("Sent command to teleport ball to (0, 0, 0.0)")
+            print("Resetting referee state...")
+            reset_referee_state()
+            print("Starting game...")
+            start_game()
+            
+            self.shaped_reward_given = False
+            self.is_yellow_dribbling = False
+            self.is_blue_dribbling = False
+            self.yellow_score = 0
+            self.blue_score = 0
+        except Exception as e:
+            print(f"Error resetting simulation: {e}")
+
+    def _get_observation(self):
+        """Get the current observation from the simulator"""
+        try:
+            # Get robot and ball state
+            self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state()
+            self.ball_position, self.ball_quadrant = get_ball_state()
+            
+            # Convert and flatten robot positions
+            robot_positions_flat = self.robot_grid.astype(np.float64).flatten()  # 8 elements
+            ball_quadrant = np.array([float(self.ball_quadrant)], dtype=np.float64)  # 1 element
+            is_yellow_dribbling = np.array([float(self.is_yellow_dribbling)], dtype=np.float64)  # 1 element
+            
+            # Combine all parts
+            observation = np.concatenate([
+                robot_positions_flat,    # 8 elements
+                ball_quadrant,          # 1 element
+                is_yellow_dribbling,    # 1 element
+                np.zeros(5, dtype=np.float64)  # 5 elements padding
+            ])
+            
+            return observation.reshape(15,)
+        except Exception as e:
+            print(f"Error getting observation: {e}")
+            return np.zeros(15, dtype=np.float64)
+
+    def _execute_action(self, action):
+        """Execute the action in the simulator"""
+        try:
+            print(f"x {self.x}")
+            print(f"y {self.y}")
+            print(f"Stepping with action: {action}")
+            
+            attackers, defenders = action
+            
+            # Ensure non-negative values and total of MAX_ROBOTS_US
+            attackers = max(0, min(attackers, self.MAX_ROBOTS_US))
+            defenders = max(0, min(defenders, self.MAX_ROBOTS_US - attackers))
+            wallers = self.MAX_ROBOTS_US - (attackers + defenders)
+
+            # Send action to simulator
+            send_action_command(
+                num_attacker=attackers,
+                num_defender=defenders,
+                num_waller=wallers
+            )
+        except Exception as e:
+            print(f"Error executing action: {e}")
+
+    def _check_ball_placement(self):
+        """Handle ball placement commands"""
+        try:
+            referee_state, referee_info = get_referee_state()
+            self.ref_command = referee_info['command']
+
+            if (self.ref_command == 16 or self.ref_command == 17):
+                if referee_info["designated_position"] is not None:
+                    self.x = referee_info["designated_position"]["x"]/1000
+                    self.y = referee_info["designated_position"]["y"]/1000
+                    teleport_ball(self.x, self.y)
+        except Exception as e:
+            print(f"Error in ball placement: {e}")
+
+    def _calculate_reward(self):
+        """Calculate the reward for the current state"""
+        try:
+            goal_scored_reward = 0
+            if self.yellow_score == 1:
+                goal_scored_reward = 1
+            elif self.blue_score == 1:
+                goal_scored_reward = -1
+
+            shaped_reward = 0
+            if not self.shaped_reward_given and self.is_yellow_dribbling and (
+                self.ball_quadrant == 1 or self.ball_quadrant == 3):
+                self.shaped_reward_given = True
+                shaped_reward = 0.1
+
+            return goal_scored_reward + shaped_reward
+        except Exception as e:
+            print(f"Error calculating reward: {e}")
+            return 0.0
+
+    def _is_terminated(self):
+        """Check if episode should terminate"""
+        try:
+            referee_state, referee_info = get_referee_state()
+            self.ref_command = referee_info['command']
+            self.yellow_score = referee_state.yellow.score
+            self.blue_score = referee_state.blue.score
+            
+            return (self.ref_command == 0 and 
+                   (self.yellow_score == 1 or self.blue_score == 1))
+        except Exception as e:
+            print(f"Error checking termination: {e}")
+            return False
+
+    def run(self):
+        """Main control loop for the external environment."""
+        while True:
+            try:
+                # Start new episode
+                episode_id = self.start_episode()
+                self._reset_sim()
+                observation = self._get_observation()
+                
+                while True:
+                    # Get action from policy
+                    action = self.get_action(episode_id, observation)
+                    
+                    # Handle ball placement
+                    self._check_ball_placement()
+                    
+                    # Execute action if game is running
+                    if self.ref_command == "RUNNING":
+                        self._execute_action(action)
+                    
+                    # Calculate reward before getting next observation
+                    reward = self._calculate_reward()
+                    self.log_returns(episode_id, reward)
+                    
+                    # Get next observation
+                    observation = self._get_observation()
+                    
+                    # Check if episode is done
+                    if self._is_terminated():
+                        self.end_episode(episode_id, observation)
+                        break
+                    
+                    time.sleep(0.1)  # Control loop rate
+
+            except Exception as e:
+                print(f"Error in main loop: {e}")
+                time.sleep(1)  # Wait before retrying
\ No newline at end of file
diff --git a/roboteam_ai/src/RL/src/getState.py b/roboteam_ai/src/RL/src/getState.py
index e040f19ce..7dd206848 100644
--- a/roboteam_ai/src/RL/src/getState.py
+++ b/roboteam_ai/src/RL/src/getState.py
@@ -21,16 +21,18 @@
 # from roboteam_networking.proto.ssl_gc_api_pb2 import Output as RefereeState  # Alias for referee state
 from roboteam_networking.proto.messages_robocup_ssl_referee_pb2 import * # Alias for referee state
 
-IS_IN_K8S = True
+def is_kubernetes():
+    """Detect if running in Kubernetes environment"""
+    return os.getenv('KUBERNETES_SERVICE_HOST') is not None
 
 def get_zmq_address():
     """Get the appropriate ZMQ address based on environment"""
-    if IS_IN_K8S:
+    if is_kubernetes():
         host = "roboteam-ray-worker-svc"
-        print("Running in Kubernetes, using service DNS")
+        #print("Running in Kubernetes, using service DNS")
     else:
         host = "localhost"
-        print("Running locally")
+        #print("Running locally")
     return f"tcp://{host}:5558"
 
 # Function to get the ball state
@@ -46,13 +48,13 @@ def get_ball_state():
    socket_world.setsockopt_string(zmq.SUBSCRIBE, "")
    
    zmq_address = get_zmq_address()
-   print(f"Connecting to ZMQ at: {zmq_address}")
+   #print(f"Connecting to ZMQ at: {zmq_address}")
    socket_world.connect(zmq_address)
 
    try:
-       print("Waiting for ZMQ message...")
+       #print("Waiting for ZMQ message...")
        message = socket_world.recv()
-       print("Received ZMQ message")
+       #print("Received ZMQ message")
        state = RoboState.FromString(message)
        
        if not len(state.processed_vision_packets):
@@ -64,8 +66,8 @@ def get_ball_state():
            ball_position[0] = world.ball.pos.x
            ball_position[1] = world.ball.pos.y
            
-           print("x",ball_position[0])
-           print("y",ball_position[1])
+           #print("x",ball_position[0])
+           #print("y",ball_position[1])
 
            if abs(ball_position[0]) <= CENTER_THRESHOLD and abs(ball_position[1]) <= CENTER_THRESHOLD:
                ball_quadrant = 4  # Center
@@ -94,7 +96,7 @@ def get_robot_state():
     socket_world.setsockopt_string(zmq.SUBSCRIBE, "")
 
     zmq_address = get_zmq_address()
-    print(f"Connecting to ZMQ at: {zmq_address}")
+    #print(f"Connecting to ZMQ at: {zmq_address}")
     socket_world.connect(zmq_address)
 
     try:
@@ -150,59 +152,33 @@ def get_referee_state():
     sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq)
 
     try:
-        # Receive the data
+        sock.settimeout(5)
         message, _ = sock.recvfrom(4096)
         referee_state = Referee.FromString(message)
 
-        # Extract information from TeamInfo for Yellow and Blue teams
-        yellow_team_info = referee_state.yellow
-        blue_team_info = referee_state.blue
-
-        # Get details from Yellow TeamInfo
-        yellow_team_data = {
-            "name": yellow_team_info.name,
-            "score": yellow_team_info.score,
-            "red_cards": yellow_team_info.red_cards,
-            "yellow_cards": yellow_team_info.yellow_cards,
-            "fouls": yellow_team_info.foul_counter,
-            "ball_placement_failures": yellow_team_info.ball_placement_failures
-        }
-
-        # Get details from Blue TeamInfo
-        blue_team_data = {
-            "name": blue_team_info.name,
-            "score": blue_team_info.score,
-            "red_cards": blue_team_info.red_cards,
-            "yellow_cards": blue_team_info.yellow_cards,
-            "fouls": blue_team_info.foul_counter,
-            "ball_placement_failures": blue_team_info.ball_placement_failures
-        }
-
-        # Return both the raw referee_state object and a dictionary of extracted details
+        # Get x,y values directly
+        x, y = 0, 0  # Default values
         if referee_state.HasField('designated_position'):
-            designated_position = {
-                "x": referee_state.designated_position.x,
-                "y": referee_state.designated_position.y
-            }
-        else:
-            designated_position = None
-
-        # Return both the raw referee_state object and a dictionary of extracted details
-        return referee_state, {
-            "yellow_team": yellow_team_data,
-            "blue_team": blue_team_data,
-            "stage": referee_state.stage,
-            "command": referee_state.command,
-            "designated_position": designated_position
-        }
-
-    except DecodeError:
-        print("Failed to decode referee state message")
+            x = referee_state.designated_position.x
+            y = referee_state.designated_position.y
+
+        return (
+            referee_state.yellow.score,
+            referee_state.blue.score,
+            referee_state.stage,
+            referee_state.command,
+            x,
+            y
+        )
+    except socket.timeout:
+        print("Referee state timeout, returning defaults")
+        return 0, 0, 0, 0, 0, 0  # Default values
     except Exception as e:
-        print(f"Error: {e}")
+        print(f"Error getting referee state: {e}")
+        return 0, 0, 0, 0, 0, 0  # Default values
     finally:
         sock.close()
-
+        
 if __name__ == "__main__":
     # Get robot state
     grid_array, yellow_team_dribbling, blue_team_dribbling = get_robot_state()
diff --git a/roboteam_ai/src/RL/src/resetReferee.py b/roboteam_ai/src/RL/src/resetReferee.py
index 76591bbd3..80d62ea20 100644
--- a/roboteam_ai/src/RL/src/resetReferee.py
+++ b/roboteam_ai/src/RL/src/resetReferee.py
@@ -5,6 +5,10 @@
 import time
 from google.protobuf.timestamp_pb2 import Timestamp
 
+"""
+NOT USED
+"""
+
 current_dir = os.path.dirname(os.path.abspath(__file__))
 roboteam_path = os.path.abspath(os.path.join(current_dir, "..", "..", "..", ".."))
 
diff --git a/roboteam_ai/src/RL/src/websocketHandler.py b/roboteam_ai/src/RL/src/websocketHandler.py
index d7d66126a..a3ed8838b 100644
--- a/roboteam_ai/src/RL/src/websocketHandler.py
+++ b/roboteam_ai/src/RL/src/websocketHandler.py
@@ -3,16 +3,18 @@
 import asyncio
 import json
 
-IS_IN_K8S = True # We run it locally.
+def is_kubernetes():
+    """Detect if running in Kubernetes environment"""
+    return os.getenv('KUBERNETES_SERVICE_HOST') is not None
 
 def get_websocket_uri():
     """Get the appropriate URI based on the environment"""
-    if IS_IN_K8S:
+    if is_kubernetes():
         host = "roboteam-ray-worker-svc"
-        print("Running in Kubernetes, using service DNS")
+        #print("Running in Kubernetes, using service DNS")
     else:
         host = "localhost"
-        print("Running locally")
+        #print("Running locally")
     
     return f"ws://{host}:8081/api/control"