Skip to content

Commit

Permalink
Add logging for missing node_id in ray_nodes_idle_duration_ms_by_id a…
Browse files Browse the repository at this point in the history
…nd set default value

Signed-off-by: Mimi Liao <[email protected]>
  • Loading branch information
mimiliaogo committed Nov 25, 2024
1 parent 06858c0 commit be86afd
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 3 deletions.
4 changes: 3 additions & 1 deletion python/ray/autoscaler/_private/load_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ def update(
self.static_resources_by_ip[ip] = static_resources
self.raylet_id_by_ip[ip] = raylet_id
self.cluster_full_of_actors_detected = cluster_full_of_actors_detected
self.ray_nodes_last_used_time_by_ip[ip] = node_last_used_time_s

if not waiting_bundles:
waiting_bundles = []
Expand All @@ -122,6 +121,9 @@ def update(
self.dynamic_resources_by_ip[ip] = dynamic_resources_update

now = time.time()
self.ray_nodes_last_used_time_by_ip[ip] = (
node_last_used_time_s if node_last_used_time_s else now
)
self.last_heartbeat_time_by_ip[ip] = now
self.waiting_bundles = waiting_bundles
self.infeasible_bundles = infeasible_bundles
Expand Down
12 changes: 10 additions & 2 deletions python/ray/autoscaler/_private/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,13 @@ def update_load_metrics(self):
else:
ip = resource_message.node_manager_address

idle_duration_ms = ray_nodes_idle_duration_ms_by_id[node_id]
idle_duration_ms = 0.0
if node_id in ray_nodes_idle_duration_ms_by_id:
idle_duration_ms = ray_nodes_idle_duration_ms_by_id[node_id]
else:
logger.warning(
f"node_id {node_id} not found in ray_nodes_idle_duration_ms_by_id"
)

self.load_metrics.update(
ip,
Expand All @@ -331,7 +337,9 @@ def update_load_metrics(self):
infeasible_bundles,
pending_placement_groups,
cluster_full,
time.time() - idle_duration_ms / 1000, # last_used_time
time.time()
- idle_duration_ms
/ 1000, # node_last_used_time_s = now - idle_duration
)
if self.readonly_config:
self.readonly_config["available_node_types"].update(mirror_node_types)
Expand Down

0 comments on commit be86afd

Please sign in to comment.