diff --git a/roboteam_ai/src/RL/RL_Ray/README.md b/roboteam_ai/src/RL/RL_Ray/README.md
deleted file mode 100644
index 44eb6a09d..000000000
--- a/roboteam_ai/src/RL/RL_Ray/README.md
+++ /dev/null
@@ -1 +0,0 @@
-RL_ray is meant to house the full RL stack that runs on Ray/Rlib
\ No newline at end of file
diff --git a/roboteam_ai/src/RL/RL_Ray/example_ray_job.py b/roboteam_ai/src/RL/RL_Ray/example_ray_job.py
deleted file mode 100644
index e78121a8f..000000000
--- a/roboteam_ai/src/RL/RL_Ray/example_ray_job.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import ray
-#Case 1: the function will be distributed across the ray cluster
-@ray.remote
-def compute_square(x):
-    return x * x
-
-#Case 2: the class will be distributes across th ray cluster
-@ray.remote
-class Counter:
-    def __init__(self):
-        self.count = 0
-
-    def increment(self):
-        self.count += 1
-        return self.count
-
-#Testing case 1:
-future = compute_square.remote(5)
-result = ray.get(future)
-print(f"Result: {result}")
-
-#submiting multiple tasks in parallel
-futures = [compute_square.remote(i) for i in range(10)]
-results = ray.get(futures)
-print(f"Results: {results}")
-#Testing case 2, using the class:
-counter = Counter.remote()
-future_count = counter.increment.remote()
-print(f"Count: {ray.get(future_count)}")
diff --git a/roboteam_ai/src/RL/RL_Ray/ray_jobs.py b/roboteam_ai/src/RL/RL_Ray/ray_jobs.py
deleted file mode 100644
index 2d781dc9a..000000000
--- a/roboteam_ai/src/RL/RL_Ray/ray_jobs.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import ray
-import requests
-
-#connect to the head node, to submit jobs
-#ray.init(address=os.getenv('RAY_ADDRESS'))
-
-#@ray.remote
-#def run_simulation(simulation_name, port):
-#    print(f"Running simulation {simulation_name} on port {port}")
-#    return f"Simulation {simulation_name} completed on port {port}"
-
-#result = ray.get(compute_square.remote(4))
-#print(result)
-
-#result
-#iiii
-job_payload = {
-    "entrypoint": "python3 example_ray_job.py"
-}
-
-response = requests.post("http://localhost:8265/api/jobs/", json=job_payload)
-print(response.json())
diff --git a/roboteam_ai/src/RL/env_external.py b/roboteam_ai/src/RL/env_external.py
deleted file mode 100644
index 5131b397f..000000000
--- a/roboteam_ai/src/RL/env_external.py
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Currently not in use, might start using it to go to a different way of looping through environment,
-right now completely GPT'ed, do not use.
-"""
-
-
-import numpy as np
-from gymnasium import spaces
-from ray.rllib.env.external_env import ExternalEnv
-import time
-import os
-import sys
-from google.protobuf.message import DecodeError
-
-# Make root folder /roboteam
-current_dir = os.path.dirname(os.path.abspath(__file__))
-roboteam_path = os.path.abspath(os.path.join(current_dir, "../../.."))
-sys.path.append(roboteam_path)
-
-from roboteam_ai.src.RL.src.sentActionCommand import send_action_command
-from roboteam_ai.src.RL.src.getState import get_ball_state, get_robot_state, get_referee_state
-from roboteam_ai.src.RL.src.teleportBall import teleport_ball
-from roboteam_ai.src.RL.src.resetRefereeAPI import reset_referee_state
-from roboteam_ai.src.RL.src.changeGameState import start_game
-
-class RoboTeamEnv(ExternalEnv):
-    def __init__(self, config=None):
-        # Initialize state variables
-        self.MAX_ROBOTS_US = 10
-        self.robot_grid = np.zeros((4, 2), dtype=int)
-        self.ball_position = np.zeros((1,2))
-        self.ball_quadrant = 4
-        self.yellow_score = 0
-        self.blue_score = 0
-        self.x = 0
-        self.y = 0
-        self.yellow_yellow_cards = 0
-        self.blue_yellow_cards = 0
-        self.ref_command = ""
-        self.shaped_reward_given = False
-        self.is_yellow_dribbling = False
-        self.is_blue_dribbling = False
-
-        # Define spaces before calling parent init
-        action_space = spaces.MultiDiscrete([self.MAX_ROBOTS_US + 1, self.MAX_ROBOTS_US + 1])
-        observation_space = spaces.Box(
-            low=float('-inf'),
-            high=float('inf'),
-            shape=(15,),
-            dtype=np.float64
-        )
-
-        # Initialize ExternalEnv parent with the spaces
-        super().__init__(action_space=action_space, observation_space=observation_space)
-
-        # Add Gymnasium compatibility attributes
-        self._spec = None
-        self.metadata = {
-            'render_modes': [],
-            'render.modes': [],
-            'render_fps': None
-        }
-        self.reward_range = (-float('inf'), float('inf'))
-
-    @property
-    def unwrapped(self):
-        """Gymnasium compatibility: Return the base environment."""
-        return self
-
-    @property
-    def spec(self):
-        """Gymnasium compatibility: Return environment specification."""
-        return self._spec
-
-    @spec.setter
-    def spec(self, value):
-        """Gymnasium compatibility: Set environment specification."""
-        self._spec = value
-
-    def _reset_sim(self):
-        """Reset the simulation state"""
-        try:
-            print("Teleporting ball...")
-            teleport_ball(0, 0)
-            print("Sent command to teleport ball to (0, 0, 0.0)")
-            print("Resetting referee state...")
-            reset_referee_state()
-            print("Starting game...")
-            start_game()
-            
-            self.shaped_reward_given = False
-            self.is_yellow_dribbling = False
-            self.is_blue_dribbling = False
-            self.yellow_score = 0
-            self.blue_score = 0
-        except Exception as e:
-            print(f"Error resetting simulation: {e}")
-
-    def _get_observation(self):
-        """Get the current observation from the simulator"""
-        try:
-            # Get robot and ball state
-            self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state()
-            self.ball_position, self.ball_quadrant = get_ball_state()
-            
-            # Convert and flatten robot positions
-            robot_positions_flat = self.robot_grid.astype(np.float64).flatten()  # 8 elements
-            ball_quadrant = np.array([float(self.ball_quadrant)], dtype=np.float64)  # 1 element
-            is_yellow_dribbling = np.array([float(self.is_yellow_dribbling)], dtype=np.float64)  # 1 element
-            
-            # Combine all parts
-            observation = np.concatenate([
-                robot_positions_flat,    # 8 elements
-                ball_quadrant,          # 1 element
-                is_yellow_dribbling,    # 1 element
-                np.zeros(5, dtype=np.float64)  # 5 elements padding
-            ])
-            
-            return observation.reshape(15,)
-        except Exception as e:
-            print(f"Error getting observation: {e}")
-            return np.zeros(15, dtype=np.float64)
-
-    def _execute_action(self, action):
-        """Execute the action in the simulator"""
-        try:
-            print(f"x {self.x}")
-            print(f"y {self.y}")
-            print(f"Stepping with action: {action}")
-            
-            attackers, defenders = action
-            
-            # Ensure non-negative values and total of MAX_ROBOTS_US
-            attackers = max(0, min(attackers, self.MAX_ROBOTS_US))
-            defenders = max(0, min(defenders, self.MAX_ROBOTS_US - attackers))
-            wallers = self.MAX_ROBOTS_US - (attackers + defenders)
-
-            # Send action to simulator
-            send_action_command(
-                num_attacker=attackers,
-                num_defender=defenders,
-                num_waller=wallers
-            )
-        except Exception as e:
-            print(f"Error executing action: {e}")
-
-    def _check_ball_placement(self):
-        """Handle ball placement commands"""
-        try:
-            referee_state, referee_info = get_referee_state()
-            self.ref_command = referee_info['command']
-
-            if (self.ref_command == 16 or self.ref_command == 17):
-                if referee_info["designated_position"] is not None:
-                    self.x = referee_info["designated_position"]["x"]/1000
-                    self.y = referee_info["designated_position"]["y"]/1000
-                    teleport_ball(self.x, self.y)
-        except Exception as e:
-            print(f"Error in ball placement: {e}")
-
-    def _calculate_reward(self):
-        """Calculate the reward for the current state"""
-        try:
-            goal_scored_reward = 0
-            if self.yellow_score == 1:
-                goal_scored_reward = 1
-            elif self.blue_score == 1:
-                goal_scored_reward = -1
-
-            shaped_reward = 0
-            if not self.shaped_reward_given and self.is_yellow_dribbling and (
-                self.ball_quadrant == 1 or self.ball_quadrant == 3):
-                self.shaped_reward_given = True
-                shaped_reward = 0.1
-
-            return goal_scored_reward + shaped_reward
-        except Exception as e:
-            print(f"Error calculating reward: {e}")
-            return 0.0
-
-    def _is_terminated(self):
-        """Check if episode should terminate"""
-        try:
-            referee_state, referee_info = get_referee_state()
-            self.ref_command = referee_info['command']
-            self.yellow_score = referee_state.yellow.score
-            self.blue_score = referee_state.blue.score
-            
-            return (self.ref_command == 0 and 
-                   (self.yellow_score == 1 or self.blue_score == 1))
-        except Exception as e:
-            print(f"Error checking termination: {e}")
-            return False
-
-    def run(self):
-        """Main control loop for the external environment."""
-        while True:
-            try:
-                # Start new episode
-                episode_id = self.start_episode()
-                self._reset_sim()
-                observation = self._get_observation()
-                
-                while True:
-                    # Get action from policy
-                    action = self.get_action(episode_id, observation)
-                    
-                    # Handle ball placement
-                    self._check_ball_placement()
-                    
-                    # Execute action if game is running
-                    if self.ref_command == "RUNNING":
-                        self._execute_action(action)
-                    
-                    # Calculate reward before getting next observation
-                    reward = self._calculate_reward()
-                    self.log_returns(episode_id, reward)
-                    
-                    # Get next observation
-                    observation = self._get_observation()
-                    
-                    # Check if episode is done
-                    if self._is_terminated():
-                        self.end_episode(episode_id, observation)
-                        break
-                    
-                    time.sleep(0.1)  # Control loop rate
-
-            except Exception as e:
-                print(f"Error in main loop: {e}")
-                time.sleep(1)  # Wait before retrying
\ No newline at end of file
diff --git a/roboteam_ai/src/RL/not_used/getBallState.py b/roboteam_ai/src/RL/not_used/getBallState.py
deleted file mode 100644
index 752e0e117..000000000
--- a/roboteam_ai/src/RL/not_used/getBallState.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import sys
-import os
-import zmq
-from google.protobuf.message import DecodeError
-import numpy as np
-
-'''
-GetBallState.py is a script to get the state of the ball (xy pos)
-This data will be fed into the RL. It uses the proto:State object.
-'''
-
-# Make sure to go back to the main roboteam directory
-current_dir = os.path.dirname(os.path.abspath(__file__))
-roboteam_path = os.path.abspath(os.path.join(current_dir, "..", "..", "..", ".."))
-
-# Add to sys.path
-sys.path.append(roboteam_path)
-
-# Now import the generated protobuf classes
-from roboteam_networking.proto.State_pb2 import State
-
-def get_ball_state():
-
-    # Initialize array
-    data_array = np.zeros((1, 2)) # Array of 1 row, with xy values
-
-    context = zmq.Context()
-    socket_world = context.socket(zmq.SUB)
-    socket_world.setsockopt_string(zmq.SUBSCRIBE, "")
-    socket_world.connect("tcp://127.0.0.1:5558")
-
-    while True:
-        try:
-            message = socket_world.recv()
-            state = State.FromString(message)
-            
-            if not len(state.processed_vision_packets):
-                continue
-            
-            world = state.last_seen_world
-            
-            # Get ball information
-            if world.HasField("ball"):
-                # Determine which half the ball is in
-                if world.ball.pos.x < 0:
-                    data_array[0] = [1, 0]  # Left half
-                    left_count = 1
-                    right_count = 0
-                else:
-                    data_array[0] = [0, 1]  # Right half
-                    left_count = 0
-                    right_count = 1
-            
-            print(data_array)
-            
-        except DecodeError:
-            print("Failed to decode protobuf message")
-        except zmq.ZMQError as e:
-            print(f"ZMQ Error: {e}")
-
-        return data_array, left_count, right_count
-
-if __name__ == "__main__":
-    get_ball_state()
\ No newline at end of file
diff --git a/roboteam_ai/src/RL/not_used/getRobotState.py b/roboteam_ai/src/RL/not_used/getRobotState.py
deleted file mode 100644
index c7044478c..000000000
--- a/roboteam_ai/src/RL/not_used/getRobotState.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import sys
-import os
-import zmq
-from google.protobuf.message import DecodeError
-import numpy as np
-
-'''
-GetRobotState.py is a script to get the state of the robots. It uses the proto:State object.
-It returns an array of 20 rows by 2 columns (10 vs 10 excluding keepers) of the current robot positions
-
-Right now it also creates an array of all robot positions but we don't do anything with that.
-'''
-
-# Make sure to go back to the main roboteam directory
-current_dir = os.path.dirname(os.path.abspath(__file__))
-roboteam_path = os.path.abspath(os.path.join(current_dir, "..", "..", "..", ".."))
-
-# Add to sys.path
-sys.path.append(roboteam_path)
-
-# Now import the generated protobuf classes
-from roboteam_networking.proto.State_pb2 import State
-
-def get_robot_state():
-
-    # Initialize array
-    robot_data = np.zeros((20, 2))  # Array of 20 rows, each filled with xy values
-
-    # Initialize counters
-    left_count_yellow = 0
-    right_count_yellow = 0
-    left_count_blue = 0
-    right_count_blue = 0
-
-    context = zmq.Context()
-    socket_world = context.socket(zmq.SUB)
-    socket_world.setsockopt_string(zmq.SUBSCRIBE, "")
-    socket_world.connect("tcp://127.0.0.1:5558")
-
-    try:
-        message = socket_world.recv()
-        state = State.FromString(message)
-        
-        if not len(state.processed_vision_packets):
-            return robot_data, left_count_yellow, right_count_yellow, left_count_blue, right_count_blue
-        
-        world = state.last_seen_world
-        
-        # Process yellow robots
-        for i in range(10): # loop 10 times
-            bot = world.yellow[i+1] # Skip keeper
-            robot_data[i] = [bot.pos.x, bot.pos.y]
-            if bot.pos.x <= 0:
-                left_count_yellow += 1
-            else:
-                right_count_yellow += 1
-        
-        # Process blue robots
-        for i in range(10): # loop 10 times
-            bot = world.blue[i+1] # Skip keeper
-            robot_data[i+10] = [bot.pos.x, bot.pos.y]
-            if bot.pos.x <= 0:
-                left_count_blue += 1
-            else:
-                right_count_blue += 1
-
-        print(robot_data)
-        print(f"Yellow robots: Left - {left_count_yellow}, Right - {right_count_yellow}")
-        print(f"Blue robots: Left - {left_count_blue}, Right - {right_count_blue}")
-        
-    except DecodeError:
-        print("Failed to decode protobuf message")
-    except zmq.ZMQError as e:
-        print(f"ZMQ Error: {e}")
-    finally:
-        socket_world.close()
-        context.term()
-
-    return left_count_yellow, right_count_yellow, left_count_blue, right_count_blue
-
-if __name__ == "__main__":
-    get_robot_state()
\ No newline at end of file
diff --git a/roboteam_ai/src/RL/trainRay.py b/roboteam_ai/src/RL/trainRay.py
deleted file mode 100644
index 7f0e973b9..000000000
--- a/roboteam_ai/src/RL/trainRay.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python3.8
-
-from env2 import RoboTeamEnv  # Ensure this points to your environment
-import ray
-import numpy as np
-from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.algorithms.callbacks import DefaultCallbacks
-import os
-import time
-
-class CustomEvalCallbacks(DefaultCallbacks):
-    eval_env_creator = None
-    eval_freq = 70
-    n_eval_episodes = 3
-
-    @classmethod
-    def set_parameters(cls, eval_env_creator, eval_freq=70, n_eval_episodes=3):
-        cls.eval_env_creator = eval_env_creator
-        cls.eval_freq = eval_freq
-        cls.n_eval_episodes = n_eval_episodes
-
-    def on_train_result(self, *, algorithm, result: dict, **kwargs):
-        # Use class variables
-        if result["timesteps_total"] % self.eval_freq == 0:
-            eval_rewards = []
-            eval_env = self.eval_env_creator()
-            for _ in range(self.n_eval_episodes):
-                episode_reward = 0
-                obs, _ = eval_env.reset()
-                done = False
-                while not done:
-                    action = algorithm.compute_single_action(obs)
-                    obs, reward, done,truncated, _ = eval_env.step(action)
-                    time.sleep(0.25)
-                    episode_reward += reward
-                eval_rewards.append(episode_reward)
-
-            mean_reward = np.mean(eval_rewards)
-            std_reward = np.std(eval_rewards)
-
-            # Save the model if it is the best so far
-            if mean_reward > self.best_mean_reward:
-                self.best_mean_reward = mean_reward
-                checkpoint_dir = algorithm.save("models/best_model_PPO")
-                print(f"New best model saved at {checkpoint_dir} with mean reward {mean_reward:.2f}")
-
-            # Log evaluation metrics
-            result['custom_metrics']["mean_eval_reward"] = mean_reward
-            result['custom_metrics']["std_eval_reward"] = std_reward
-            print(f"Evaluation at step {result['timesteps_total']}: Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")
-
-def main():
-    # Set parameters for CustomEvalCallbacks
-    eval_env_creator = lambda _: RoboTeamEnv(env_config={})  # Modified to accept one argument
-    CustomEvalCallbacks.set_parameters(eval_env_creator=eval_env_creator, eval_freq=20, n_eval_episodes=1)
-
-    # Initialize Ray and configure PPO algorithm
-    ray.init(ignore_reinit_error=True)
-    config = PPOConfig()
-    config.environment(RoboTeamEnv, env_config={})
-    config.framework("torch")
-    config.env_runners(num_env_runners=1)
-    config.training(
-        train_batch_size=2048,
-        lr=0.0003,
-        gamma=0.99,
-    )
-    config.sgd_minibatch_size = 64
-    config.gae_lambda = 0.95
-    config.clip_param = 0.2
-    config.entropy_coeff = 0.0
-    config.vf_loss_coeff = 0.5
-    config.callbacks(CustomEvalCallbacks)
-
-    trainer = config.build()
-
-    total_timesteps = 5000
-    train_batch_size = config.train_batch_size
-    total_iterations = total_timesteps // train_batch_size
-
-    for iteration in range(total_iterations):
-        result = trainer.train()
-        mean_reward = result['episode_reward_mean']
-        print(f"Iteration {iteration + 1}: Mean reward = {mean_reward}")
-
-    ray.shutdown()
-
-if __name__ == '__main__':
-    main()
diff --git a/roboteam_ai/src/RL/README.md b/roboteam_ai/src/rl/README.md
similarity index 100%
rename from roboteam_ai/src/RL/README.md
rename to roboteam_ai/src/rl/README.md
diff --git a/roboteam_ai/src/RL/env.py b/roboteam_ai/src/rl/env.py
similarity index 96%
rename from roboteam_ai/src/RL/env.py
rename to roboteam_ai/src/rl/env.py
index 39f5c6aa3..83203d688 100644
--- a/roboteam_ai/src/RL/env.py
+++ b/roboteam_ai/src/rl/env.py
@@ -16,14 +16,14 @@
 sys.path.append(roboteam_path)
 
 # Now import the functions
-from roboteam_ai.src.RL.src.sentActionCommand import send_action_command
-from roboteam_ai.src.RL.src.getState import get_ball_state, get_robot_state, get_referee_state
-from roboteam_ai.src.RL.src.teleportBall import teleport_ball
-from roboteam_ai.src.RL.src.resetRefereeAPI import reset_referee_state
-from roboteam_ai.src.RL.src.changeGameState import start_game
+from roboteam_ai.src.rl.src.sentActionCommand import send_action_command
+from roboteam_ai.src.rl.src.getState import get_ball_state, get_robot_state, get_referee_state
+from roboteam_ai.src.rl.src.teleportBall import teleport_ball
+from roboteam_ai.src.rl.src.resetRefereeAPI import reset_referee_state
+from roboteam_ai.src.rl.src.changeGameState import start_game
 
 """
-This environment file is in the form of a gymnasium environment.
+This environment file is in the form of a gymnasium environment. It is used for SB3
 We are yellow and we play against blue.
 
 Yellow cards do not stop the game, but maybe in the future it is nice to implement a punishment
diff --git a/roboteam_ai/src/RL/env2.py b/roboteam_ai/src/rl/env_ray.py
similarity index 93%
rename from roboteam_ai/src/RL/env2.py
rename to roboteam_ai/src/rl/env_ray.py
index 939b6a86b..898af2963 100644
--- a/roboteam_ai/src/RL/env2.py
+++ b/roboteam_ai/src/rl/env_ray.py
@@ -16,11 +16,11 @@
 sys.path.append(roboteam_path)
 
 # Now import the functions
-from roboteam_ai.src.RL.src.sentActionCommand import send_action_command
-from roboteam_ai.src.RL.src.getState import get_ball_state, get_robot_state, get_referee_state
-from roboteam_ai.src.RL.src.teleportBall import teleport_ball
-from roboteam_ai.src.RL.src.resetRefereeAPI import reset_referee_state
-from roboteam_ai.src.RL.src.changeGameState import start_game
+from roboteam_ai.src.rl.src.sentActionCommand import send_action_command
+from roboteam_ai.src.rl.src.getState import get_ball_state, get_robot_state, get_referee_state
+from roboteam_ai.src.rl.src.teleportBall import teleport_ball
+from roboteam_ai.src.rl.src.resetRefereeAPI import reset_referee_state
+from roboteam_ai.src.rl.src.changeGameState import start_game
 
 """
 This environment file is in the form of a gymnasium environment.
@@ -218,11 +218,7 @@ def step(self, action):
             # If STOP, we truncate
 
             elif self.ref_command in (0, 1):  # HALT or STOP
-                if self.ref_command == 0 and (self.yellow_score > 0 or self.blue_score > 0):
-                    print('Goal is scored, resetting game state')
-                    break
-                print("Game truncated due to false goal or random STOP")
-                break
+                break # Code will break if it hits HALT or STOP, then it checks if it truncated or a goal was scored.
 
             elif self.ref_command in (16, 17):  # Ball placement
                 self.teleport_ball_with_check(self.x, self.y)
@@ -244,6 +240,10 @@ def is_terminated(self):
         """
 
         if self.ref_command == 0 and (self.yellow_score == 1 or self.blue_score == 1): # HALT command indicates that either team scored
+            if self.yellow_score == 1:
+                print("Yellow team scored")
+            elif self.blue_score == 1:
+                print("Blue team scored")
             return True
         else:
             return False
@@ -251,13 +251,15 @@ def is_terminated(self):
     def is_truncated(self):
         """
         is_truncated checks if game should end prematurely:
-        - On HALT command with no goals scored
+        - On HALT command with no goals scored 
         - On STOP command
         """
         if self.ref_command == 0:  # HALT
             if (self.yellow_score == 0 and self.blue_score == 0):
+                print("Game truncated, goal wasn't accepted")
                 return True
         if self.ref_command == 1:  # STOP
+            print("Game truncated, random STOP called") 
             return True
         return False
 
diff --git a/roboteam_ai/src/RL/RL_Ray/simulator.yaml b/roboteam_ai/src/rl/simulator.yaml
similarity index 100%
rename from roboteam_ai/src/RL/RL_Ray/simulator.yaml
rename to roboteam_ai/src/rl/simulator.yaml
diff --git a/roboteam_ai/src/RL/src/changeGameState.py b/roboteam_ai/src/rl/src/changeGameState.py
similarity index 100%
rename from roboteam_ai/src/RL/src/changeGameState.py
rename to roboteam_ai/src/rl/src/changeGameState.py
diff --git a/roboteam_ai/src/RL/src/getState.py b/roboteam_ai/src/rl/src/getState.py
similarity index 100%
rename from roboteam_ai/src/RL/src/getState.py
rename to roboteam_ai/src/rl/src/getState.py
diff --git a/roboteam_ai/src/RL/src/resetReferee.py b/roboteam_ai/src/rl/src/resetReferee.py
similarity index 100%
rename from roboteam_ai/src/RL/src/resetReferee.py
rename to roboteam_ai/src/rl/src/resetReferee.py
diff --git a/roboteam_ai/src/RL/src/resetRefereeAPI.py b/roboteam_ai/src/rl/src/resetRefereeAPI.py
similarity index 100%
rename from roboteam_ai/src/RL/src/resetRefereeAPI.py
rename to roboteam_ai/src/rl/src/resetRefereeAPI.py
diff --git a/roboteam_ai/src/RL/src/sentActionCommand.py b/roboteam_ai/src/rl/src/sentActionCommand.py
similarity index 100%
rename from roboteam_ai/src/RL/src/sentActionCommand.py
rename to roboteam_ai/src/rl/src/sentActionCommand.py
diff --git a/roboteam_ai/src/RL/src/teleportBall.py b/roboteam_ai/src/rl/src/teleportBall.py
similarity index 100%
rename from roboteam_ai/src/RL/src/teleportBall.py
rename to roboteam_ai/src/rl/src/teleportBall.py
diff --git a/roboteam_ai/src/RL/src/websocketHandler.py b/roboteam_ai/src/rl/src/websocketHandler.py
similarity index 100%
rename from roboteam_ai/src/RL/src/websocketHandler.py
rename to roboteam_ai/src/rl/src/websocketHandler.py
diff --git a/roboteam_ai/src/RL/train.py b/roboteam_ai/src/rl/train.py
similarity index 100%
rename from roboteam_ai/src/RL/train.py
rename to roboteam_ai/src/rl/train.py
diff --git a/roboteam_ai/src/RL/RL_Ray/train.py b/roboteam_ai/src/rl/train_ray.py
similarity index 96%
rename from roboteam_ai/src/RL/RL_Ray/train.py
rename to roboteam_ai/src/rl/train_ray.py
index b532c677d..a27f4ef2a 100644
--- a/roboteam_ai/src/RL/RL_Ray/train.py
+++ b/roboteam_ai/src/rl/train_ray.py
@@ -9,11 +9,11 @@
 from ray.tune.registry import register_env
 
 # Add roboteam_ai to Python path
-roboteam_ai_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../.."))
+roboteam_ai_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
 print(f"Adding to PYTHONPATH: {roboteam_ai_root}")  # Debug print
 sys.path.append(roboteam_ai_root)
 
-from roboteam_ai.src.RL.env2 import RoboTeamEnv
+from roboteam_ai.src.rl.env_ray import RoboTeamEnv
 
 import warnings
 warnings.filterwarnings('ignore', category=DeprecationWarning)