Skip to content

Commit

Permalink
[Core] Add v6e TPU Head Resource Autoscaling Support (ray-project#48201)
Browse files Browse the repository at this point in the history
Signed-off-by: Ryan O'Leary <[email protected]>
Signed-off-by: hjiang <[email protected]>
  • Loading branch information
ryanaoleary authored and dentiny committed Dec 7, 2024
1 parent 0a58eff commit f2dc181
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 3 deletions.
7 changes: 4 additions & 3 deletions python/ray/autoscaler/_private/kuberay/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"tpu-v5-lite-device": "v5e",
"tpu-v5-lite-podslice": "v5e",
"tpu-v5p-slice": "v5p",
"tpu-v6e-slice": "v6e",
}


Expand Down Expand Up @@ -102,9 +103,9 @@ def tpu_node_selectors_to_type(topology: str, accelerator: str) -> Optional[str]
# Reduce e.g. "2x2x2" to 8
chip_dimensions = [int(chip_count) for chip_count in topology.split("x")]
num_chips = reduce(lambda x, y: x * y, chip_dimensions)
default_num_cores_per_chip = 2
if generation == "v5e":
default_num_cores_per_chip = 1
default_num_cores_per_chip = 1
if generation == "v4" or generation == "v5p":
default_num_cores_per_chip = 2
num_cores = num_chips * default_num_cores_per_chip
return f"{generation}-{num_cores}"
return None
71 changes: 71 additions & 0 deletions python/ray/tests/kuberay/test_autoscaling_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
_get_custom_resources,
)

from ray.autoscaler._private.kuberay.utils import tpu_node_selectors_to_type

AUTOSCALING_CONFIG_MODULE_PATH = "ray.autoscaler._private.kuberay.autoscaling_config"


Expand Down Expand Up @@ -402,6 +404,75 @@ def _fetch_ray_cr_from_k8s(self) -> Dict[str, Any]:
assert out == {"ok-key": "ok-value"}


TPU_TYPES_ARGS = ",".join(
[
"accelerator",
"topology",
"expected_tpu_type",
]
)
TPU_TYPES_DATA = (
[]
if platform.system() == "Windows"
else [
pytest.param(
"tpu-v4-podslice",
None,
None,
id="tpu-none-topology",
),
pytest.param(
None,
"2x2x2",
None,
id="tpu-none-accelerator",
),
pytest.param(
"tpu-v4-podslice",
"2x2x2",
"v4-16",
id="tpu-v4-test",
),
pytest.param(
"tpu-v5-lite-device",
"2x2",
"v5e-4",
id="tpu-v5e-device-test",
),
pytest.param(
"tpu-v5-lite-podslice",
"2x4",
"v5e-8",
id="tpu-v5e-podslice-test",
),
pytest.param(
"tpu-v5p-slice",
"2x2x4",
"v5p-32",
id="tpu-v5p-test",
),
pytest.param(
"tpu-v6e-slice",
"16x16",
"v6e-256",
id="tpu-v6e-test",
),
]
)


@pytest.mark.skipif(platform.system() == "Windows", reason="Not relevant.")
@pytest.mark.parametrize(TPU_TYPES_ARGS, TPU_TYPES_DATA)
def test_tpu_node_selectors_to_type(
accelerator: str, topology: str, expected_tpu_type: str
):
"""Verify that tpu_node_selectors_to_type correctly returns TPU type from
TPU nodeSelectors.
"""
tpu_type = tpu_node_selectors_to_type(topology, accelerator)
assert expected_tpu_type == tpu_type


TPU_PARAM_ARGS = ",".join(
[
"ray_cr_in",
Expand Down

0 comments on commit f2dc181

Please sign in to comment.