Skip to content

Commit

Permalink
fix ping-and-up test for den launched clusters
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexandra Belousov authored and Alexandra Belousov committed Jan 15, 2025
1 parent e722b62 commit 1597c5d
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion runhouse/resources/hardware/on_demand_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,23 @@ def _update_from_sky_status(self, dryrun: bool = False):
cluster_dict = self._sky_status(refresh=not dryrun)
self._populate_connection_from_status_dict(cluster_dict)

def _update_from_den_status(self):
cluster_uri = rns_client.format_rns_address(self.rns_address or self.name)

cluster_den_status = (
requests.get(
f"{rns_client.api_server_url}/resource/{cluster_uri}/cluster/status",
headers=rns_client.request_headers(),
)
.json()
.get("data", None)
)
if not cluster_den_status:
return

cluster_dict = cluster_den_status[0].get("resource_info").get("cluster_config")
DenLauncher._update_from_den_response(cluster=self, config=cluster_dict)

def get_instance_type(self):
"""Returns instance type of the cluster."""
if self.instance_type and "--" in self.instance_type: # K8s specific syntax
Expand Down Expand Up @@ -792,6 +809,9 @@ def _ping(self, timeout=5, retry=False):
return True

if retry:
self._update_from_sky_status(dryrun=False)
if self.launcher == LauncherType.LOCAL:
self._update_from_sky_status(dryrun=False)
else:
self._update_from_den_status()
return super()._ping(timeout=timeout, retry=False)
return False

0 comments on commit 1597c5d

Please sign in to comment.