Skip to content

Commit

Permalink
Initial version working on ray cluster (does not return observations …
Browse files Browse the repository at this point in the history
…yet)
  • Loading branch information
flimdejong committed Nov 20, 2024
1 parent 396e527 commit 15b0666
Show file tree
Hide file tree
Showing 7 changed files with 262 additions and 251 deletions.
71 changes: 61 additions & 10 deletions docker/runner/ray-cluster-combined.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
- key: kubernetes.io/hostname
operator: In
values:
- multinode-demo # Schedule on control-plane
- ray # Schedule on control-plane
containers:
- name: ray-head
image: roboteamtwente/ray:development
Expand Down Expand Up @@ -78,6 +78,8 @@ spec:
labels:
app: ray-worker
spec:
hostNetwork: true
dnsPolicy: ClusterFirstWithHostNet
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
Expand All @@ -86,7 +88,13 @@ spec:
- key: kubernetes.io/hostname
operator: In
values:
- multinode-demo-m02 # Schedule on worker node
- ray-m02 # Schedule on worker node
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: ray-worker
topologyKey: "kubernetes.io/hostname"
volumes:
- name: gradle-cache
emptyDir: {}
Expand All @@ -101,8 +109,8 @@ spec:
cpu: 500m
memory: 1Gi
limits:
cpu: 1000m
memory: 2Gi
cpu: 2000m
memory: 4Gi
env:
- name: LD_LIBRARY_PATH
value: /home/roboteam/build/release/lib
Expand All @@ -112,10 +120,10 @@ spec:
# Game Controller
- name: ssl-game-controller
image: robocupssl/ssl-game-controller:latest
args: ["-address", "0.0.0.0:8081"] # Changed from :8081 to explicitly bind to all interfaces
args: ["-address", "0.0.0.0:8081"]
ports:
- containerPort: 8081
protocol: TCP # Explicitly set protocol
protocol: TCP

# Primary AI
- name: roboteam-primary-ai
Expand Down Expand Up @@ -193,11 +201,11 @@ spec:
- "/home/roboteam/external/framework/build/bin/simulator-cli"
ports:
- containerPort: 10300
protocol: UDP # Simulator control port
protocol: UDP
- containerPort: 10301
protocol: TCP # Presumably TCP ports
protocol: TCP
- containerPort: 5558
protocol: TCP # ZMQ port
protocol: TCP
env:
- name: LD_LIBRARY_PATH
value: /home/roboteam/build/release/lib
Expand Down Expand Up @@ -244,4 +252,47 @@ spec:
- name: gc-interface
port: 8081
targetPort: 8081
nodePort: 30081 # Game controller interface
nodePort: 30081 # Game controller interface
- name: redis
port: 6379
targetPort: 6379
nodePort: 30679 # Choose an available port

---
apiVersion: v1
kind: Service
metadata:
name: roboteam-ray-cluster-head-svc
spec:
type: ClusterIP
selector:
app: ray-head
ports:
- name: redis
port: 6379
targetPort: 6379
- name: gcs
port: 10001
targetPort: 10001
- name: dashboard
port: 8265
targetPort: 8265
- name: serve
port: 8000
targetPort: 8000

---
apiVersion: v1
kind: Service
metadata:
name: roboteam-ray-worker-svc
spec:
selector:
app: ray-worker
ports:
- name: game-controller
port: 8081
targetPort: 8081
- name: simulator
port: 5558
targetPort: 5558
52 changes: 28 additions & 24 deletions roboteam_ai/src/RL/RL_Ray/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,23 @@ def verify_imports():
def main():
verify_imports()

# if not ray.is_initialized():
# ray.init(
# address="ray://192.168.49.2:31001",
# ignore_reinit_error=True,
# runtime_env={
# "env_vars": {
# "NUMPY_EXPERIMENTAL_ARRAY_FUNCTION": "0",

# },
# # "pip": [
# # "numpy==1.24.3",
# # "pyzmq==26.2.0"
# # ]
# }
# )

ray.init()
if not ray.is_initialized():
ray.init(
address=f"ray://192.168.49.2:31001",
ignore_reinit_error=True,
runtime_env={
"env_vars": {
"NUMPY_EXPERIMENTAL_ARRAY_FUNCTION": "0",

},
# "pip": [
# "numpy==1.24.3",
# "pyzmq==26.2.0"
# ]
}
)

# ray.init()

# We can set env_config here
def env_creator(env_config):
Expand All @@ -65,7 +65,13 @@ def env_creator(env_config):
.resources(num_gpus=0)
.env_runners(
num_env_runners=1,
num_envs_per_env_runner=1,
sample_timeout_s=None
)
# .api_stack(
# enable_rl_module_and_learner=True,
# enable_env_runner_and_connector_v2=True
# )
.debugging(
log_level="DEBUG",
seed=42
Expand All @@ -78,17 +84,15 @@ def env_creator(env_config):
algo = config.build()

for i in range(10):
print(f"\nStarting iteration {i}")
result = algo.train()
result.pop("config")
print("\nTraining metrics:")
print(f"Episode Reward Mean: {result.get('episode_reward_mean', 'N/A')}")
print(f"Episode Length Mean: {result.get('episode_len_mean', 'N/A')}")
print(f"Total Timesteps: {result.get('timesteps_total', 'N/A')}")
pprint(result)

if i % 5 == 0:
# Use save instead of save_to_path
checkpoint_dir = f"checkpoint_{i}"
os.makedirs(checkpoint_dir, exist_ok=True)
algo.save(checkpoint_dir)
print(f"Checkpoint saved in directory {checkpoint_dir}")

if __name__ == "__main__":
main()

Expand Down
47 changes: 32 additions & 15 deletions roboteam_ai/src/RL/env2.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@ def __init__(self, config=None):
self.blue_score = 0 # Initialize blue score to zero

# Initialize the observation space
self.observation_space = spaces.Box(low=0, high=self.MAX_ROBOTS_US, shape=(15,), dtype=np.int32)
self.observation_space = spaces.Box(low=float('-inf'), high=float('inf'), shape=(1,15), dtype=np.float64)


# Action space: [attackers, defenders]
# Wallers will be automatically calculated
self.action_space = spaces.MultiDiscrete([self.MAX_ROBOTS_US + 1, self.MAX_ROBOTS_US + 1])
Expand Down Expand Up @@ -170,26 +169,38 @@ def get_observation(self):
"""
get_observation is meant to get the observation space (kinda like the state)
"""

# Get the robot grid representation
self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state() # Matrix of 4 by 2 + 2 booleans
self.robot_grid, self.is_yellow_dribbling, self.is_blue_dribbling = get_robot_state()
print(f"Robot grid: {self.robot_grid}")
print(f"Yellow dribbling: {self.is_yellow_dribbling}, Blue dribbling: {self.is_blue_dribbling}")

# Get the ball location
self.ball_position, self.ball_quadrant = get_ball_state() # x,y coordinates, quadrant

self.ball_position, self.ball_quadrant = get_ball_state()
print(f"Ball position: {self.ball_position}, Ball quadrant: {self.ball_quadrant}")

robot_positions_flat = self.robot_grid.flatten()
# Convert and flatten robot positions to float64
robot_positions_flat = self.robot_grid.astype(np.float64).flatten() # 8 elements

# Use ball quadrant for observation
ball_quadrant = np.array([float(self.ball_quadrant)], dtype=np.float64) # 1 element

# Convert dribbling status to float64
is_yellow_dribbling = np.array([float(self.is_yellow_dribbling)], dtype=np.float64) # 1 element

# Convert `ball_position` (scalar) and `is_yellow_dribbling` (boolean) to compatible formats
ball_position = np.array([self.ball_quadrant]) # 1 element
is_yellow_dribbling = np.array([int(self.is_yellow_dribbling)]) # Convert boolean to int (0 or 1)
# Combine all parts into the observation array with padding
observation = np.concatenate([
robot_positions_flat, # 8 elements
ball_quadrant, # 1 element
is_yellow_dribbling, # 1 element
np.zeros(5, dtype=np.float64) # 5 elements to reach total of 15
])

# Combine all parts into a single 15-element observation array
# Pad with zeros if you need additional elements
observation = np.concatenate([robot_positions_flat, ball_position, is_yellow_dribbling, np.zeros(5)])
# Reshape to match expected shape (1, 15)
observation = observation.reshape(1, 15)

# Verify shape and dtype
assert observation.shape == (1, 15), f"Observation shape {observation.shape} != (1, 15)"
assert observation.dtype == np.float64, f"Observation dtype {observation.dtype} != float64"

return observation, self.calculate_reward()

Expand Down Expand Up @@ -230,7 +241,7 @@ def step(self, action):
observation_space, _ = self.reset()
truncated = self.is_truncated() # Determine if the episode was truncated, too much time or a yellow card

time.sleep(0.25) # DELAY FOR STEPS (ADJUST LATER)
time.sleep(0.1) # DELAY FOR STEPS (ADJUST LATER)

return observation_space, reward, done, truncated, {}

Expand Down Expand Up @@ -266,20 +277,26 @@ def reset(self, seed=None,**kwargs):
"""

# Teleport ball to middle position
print("Teleporting ball...")
teleport_ball(0,0)

# Reset referee state
print("Resetting referee state...")
reset_referee_state()

# Set blue team on right side + initiates kickoff
print("Starting game...")
start_game()

print("Getting observation...")
observation, _ = self.get_observation()

# Reset shaped_reward_given boolean
self.shaped_reward_given = False
self.is_yellow_dribbling = False
self.is_blue_dribbling = False

observation, _ = self.get_observation()
print("Reset complete!")
return observation,{}


Expand Down
Loading

0 comments on commit 15b0666

Please sign in to comment.