Skip to content

Commit

Permalink
chore: make final updates to include Ray v2.42.0 support on SDK, make…
Browse files Browse the repository at this point in the history
… 2.42.0 the default Ray version over 2.33.0

FUTURE_COPYBARA_INTEGRATE_REVIEW=#4990 from googleapis:release-please--branches--main dc59b4d
PiperOrigin-RevId: 734649560
  • Loading branch information
vertex-sdk-bot authored and copybara-github committed Mar 7, 2025
1 parent 80cfc2f commit 98f5403
Show file tree
Hide file tree
Showing 10 changed files with 109 additions and 62 deletions.
13 changes: 13 additions & 0 deletions .kokoro/presubmit/unit_ray_2-42.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Format: //devtools/kokoro/config/proto/build.proto

# Run unit tests for Ray 2.42.0 on Python 3.10
env_vars: {
key: "NOX_SESSION"
value: "unit_ray(ray='2.42.0')"
}

# Run unit tests in parallel, splitting up by file
env_vars: {
key: "PYTEST_ADDOPTS"
value: "-n=auto --dist=loadscope"
}
6 changes: 3 additions & 3 deletions google/cloud/aiplatform/vertex_ray/client_builder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -47,7 +47,7 @@ def __init__(
persistent_resource_id,
" failed to start Head node properly.",
)
if ray.__version__ == "2.33.0":
if ray.__version__ in ("2.42.0", "2.33.0"):
super().__init__(
dashboard_url=dashboard_uri,
python_version=ray_client_context.python_version,
Expand All @@ -69,7 +69,7 @@ def __init__(
else:
raise ImportError(
f"[Ray on Vertex AI]: Unsupported version {ray.__version__}."
+ "Only 2.33.0 and 2.9.3 are supported."
+ "Only 2.42.0, 2.33.0, and 2.9.3 are supported."
)
self.persistent_resource_id = persistent_resource_id
self.vertex_sdk_version = str(VERTEX_SDK_VERSION)
Expand Down
16 changes: 8 additions & 8 deletions google/cloud/aiplatform/vertex_ray/cluster_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
def create_ray_cluster(
head_node_type: Optional[resources.Resources] = resources.Resources(),
python_version: Optional[str] = "3.10",
ray_version: Optional[str] = "2.33",
ray_version: Optional[str] = "2.42",
network: Optional[str] = None,
service_account: Optional[str] = None,
cluster_name: Optional[str] = None,
Expand All @@ -76,17 +76,17 @@ def create_ray_cluster(
head_node_type = Resources(
machine_type="n1-standard-8",
node_count=1,
accelerator_type="NVIDIA_TESLA_K80",
accelerator_type="NVIDIA_TESLA_T4",
accelerator_count=1,
custom_image="us-docker.pkg.dev/my-project/ray-cpu-image.2.9:latest", # Optional
custom_image="us-docker.pkg.dev/my-project/ray-cpu-image.2.33:latest", # Optional
)
worker_node_types = [Resources(
machine_type="n1-standard-8",
node_count=2,
accelerator_type="NVIDIA_TESLA_K80",
accelerator_type="NVIDIA_TESLA_T4",
accelerator_count=1,
custom_image="us-docker.pkg.dev/my-project/ray-gpu-image.2.9:latest", # Optional
custom_image="us-docker.pkg.dev/my-project/ray-gpu-image.2.33:latest", # Optional
)]
cluster_resource_name = vertex_ray.create_ray_cluster(
Expand All @@ -95,7 +95,7 @@ def create_ray_cluster(
service_account="[email protected]", # Optional
cluster_name="my-cluster-name", # Optional
worker_node_types=worker_node_types,
ray_version="2.9",
ray_version="2.33",
)
After a ray cluster is set up, you can call
Expand All @@ -109,7 +109,7 @@ def create_ray_cluster(
head_node_type: The head node resource. Resources.node_count must be 1.
If not set, default value of Resources() class will be used.
python_version: Python version for the ray cluster.
ray_version: Ray version for the ray cluster. Default is 2.33.0.
ray_version: Ray version for the ray cluster. Default is 2.42.0.
network: Virtual private cloud (VPC) network. For Ray Client, VPC
peering is required to connect to the Ray Cluster managed in the
Vertex API service. For Ray Job API, VPC network is not required
Expand Down Expand Up @@ -162,7 +162,7 @@ def create_ray_cluster(
local_ray_verion = _validation_utils.get_local_ray_version()
if ray_version != local_ray_verion:
if custom_images is None and head_node_type.custom_image is None:
install_ray_version = "2.33.0"
install_ray_version = "2.42.0"
logging.info(
"[Ray on Vertex]: Local runtime has Ray version %s"
", but the requested cluster runtime has %s. Please "
Expand Down
8 changes: 4 additions & 4 deletions google/cloud/aiplatform/vertex_ray/dashboard_sdk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,15 +29,15 @@ def get_job_submission_client_cluster_info(
"""A vertex_ray implementation of get_job_submission_client_cluster_info().
Implements
https://github.com/ray-project/ray/blob/ray-2.33.0/python/ray/dashboard/modules/dashboard_sdk.py#L84
https://github.com/ray-project/ray/blob/ray-2.42.0/python/ray/dashboard/modules/dashboard_sdk.py#L84
This will be called in from Ray Job API Python client.
Args:
address: Address without the module prefix `vertex_ray` but otherwise
the same format as passed to ray.init(address="vertex_ray://...").
*args: Reminder of positional args that might be passed down from
*args: Remainder of positional args that might be passed down from
the framework.
**kwargs: Reminder of keyword args that might be passed down from
**kwargs: Remainder of keyword args that might be passed down from
the framework.
Returns:
Expand Down
17 changes: 11 additions & 6 deletions google/cloud/aiplatform/vertex_ray/util/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,22 @@ class Resources:
class NodeImages:
"""Custom images for a ray cluster.
We currently support Ray v2.9 and v2.33 and python v3.10.
We currently support Ray v2.9, v2.33, v2.42 and python v3.10.
We also support python v3.11 for Ray v2.42.
The custom images must be extended from the following base images:
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-9.py310:latest",
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest",
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-33.py310:latest", or
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-33.py310:latest". In
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-33.py310:latest",
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-33.py310:latest",
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-42.py310:latest",
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-42.py310:latest",
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-42.py311:latest", or
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-42.py311:latest". In
order to use custom images, need to specify both head and worker images.
Attributes:
head: image for head node (eg. us-docker.pkg.dev/my-project/ray-cpu.2-9.py310-tf:latest).
worker: image for all worker nodes (eg. us-docker.pkg.dev/my-project/ray-gpu.2-9.py310-tf:latest).
head: image for head node (eg. us-docker.pkg.dev/my-project/ray-cpu.2-33.py310-tf:latest).
worker: image for all worker nodes (eg. us-docker.pkg.dev/my-project/ray-gpu.2-33.py310-tf:latest).
"""

head: str = None
Expand Down Expand Up @@ -147,7 +152,7 @@ class Cluster:
the cluster.
state: Describes the cluster state (defined in PersistentResource.State).
python_version: Python version for the ray cluster (e.g. "3.10").
ray_version: Ray version for the ray cluster (e.g. "2.9").
ray_version: Ray version for the ray cluster (e.g. "2.33").
head_node_type: The head node resource. Resources.node_count must be 1.
If not set, by default it is a CPU node with machine_type of n1-standard-8.
worker_node_types: The list of Resources of the worker nodes. Should not
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def unit_genai_minimal_dependencies(session):


@nox.session(python="3.10")
@nox.parametrize("ray", ["2.9.3", "2.33.0"])
@nox.parametrize("ray", ["2.9.3", "2.33.0", "2.42.0"])
def unit_ray(session, ray):
# Install all test dependencies, then install this package in-place.

Expand Down
15 changes: 7 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,19 +101,18 @@
preview_extra_require = []

ray_extra_require = [
# Cluster only supports 2.9.3 and 2.33.0. Keep 2.4.0 for our testing environment.
# Cluster only supports 2.9.3, 2.33.0, and 2.42.0. Keep 2.4.0 for our
# testing environment.
# Note that testing is submiting a job in a cluster with Ray 2.9.3 remotely.
(
"ray[default] >= 2.4, <= 2.33.0,!= 2.5.*,!= 2.6.*,!= 2.7.*,!="
" 2.8.*,!=2.9.0,!=2.9.1,!=2.9.2, !=2.10.*, !=2.11.*, !=2.12.*, !=2.13.*, !="
" 2.14.*, !=2.15.*, !=2.16.*, !=2.17.*, !=2.18.*, !=2.19.*, !=2.20.*, !="
" 2.21.*, !=2.22.*, !=2.23.*, !=2.24.*, !=2.25.*, !=2.26.*, !=2.27.*, !="
" 2.28.*, !=2.29.*, !=2.30.*, !=2.31.*, !=2.32.*; python_version<'3.11'"
"ray[default] >= 2.9.3, <= 2.42.0,!=2.10.*, !=2.11.*, !=2.12.*, !="
" 2.13.*, !=2.14.*, !=2.15.*, !=2.16.*, !=2.17.*, !=2.18.*, !=2.19.*, !="
" 2.20.*, !=2.21.*, !=2.22.*, !=2.23.*, !=2.24.*, !=2.25.*, !=2.26.*, !="
" 2.27.*, !=2.28.*, !=2.29.*, !=2.30.*, !=2.31.*, !=2.32.*; !=2.34.*, !="
" 2.35.*, !=2.36.*, !=2.37.*, ,!=2.38.*, !=2.39.*, !=2.40.*, !=2.41.*"
),
# To avoid ImportError: cannot import name 'packaging' from 'pkg_resources'
"setuptools < 70.0.0",
# Ray Data v2.4 in Python 3.11 is broken, but got fixed in Ray v2.5.
"ray[default] >= 2.5, <= 2.33.0; python_version=='3.11'",
"google-cloud-bigquery-storage",
"google-cloud-bigquery",
"pandas >= 1.0.0",
Expand Down
13 changes: 13 additions & 0 deletions testing/constraints-ray-2.42.0.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
ray==2.42.0
# Below constraints are inherited from constraints-3.10.txt
google-api-core
proto-plus==1.22.3
protobuf
mock==4.0.2
google-cloud-storage==2.2.1 # Increased for kfp 2.0 compatibility
packaging==24.1 # Increased to unbreak canonicalize_version error (b/377774673)
grpcio-testing==1.34.0
mlflow==1.30.1 # Pinned to speed up installation
pytest-xdist==3.3.1 # Pinned to unbreak unit tests
IPython # Added to test supernova rich html buttons

73 changes: 42 additions & 31 deletions tests/unit/vertex_ray/test_cluster_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def cluster_eq(returned_cluster, expected_cluster):
assert returned_cluster.state == expected_cluster.state


@pytest.mark.parametrize("ray_version", ["2.9", "2.33"])
@pytest.mark.parametrize("ray_version", ["2.9", "2.33", "2.42"])
@pytest.mark.usefixtures("google_auth_mock", "get_project_number_mock")
class TestClusterManagement:
def setup_method(self, ray_version):
Expand All @@ -317,14 +317,16 @@ def test_create_ray_cluster_1_pool_gpu_success(
assert tc.ClusterConstants.TEST_VERTEX_RAY_PR_ADDRESS == cluster_name

test_persistent_resource = tc.ClusterConstants.TEST_REQUEST_RUNNING_1_POOL

if ray_version == "2.9":
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
elif ray_version == "2.33":
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
else:
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_42
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = head_node_image

request = persistent_resource_service.CreatePersistentResourceRequest(
parent=tc.ProjectConstants.TEST_PARENT,
Expand Down Expand Up @@ -388,14 +390,17 @@ def test_create_ray_cluster_1_pool_gpu_with_labels_success(
test_persistent_resource = (
tc.ClusterConstants.TEST_REQUEST_RUNNING_1_POOL_WITH_LABELS
)

if ray_version == "2.9":
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
elif ray_version == "2.33":
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
else:
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_42

test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = head_node_image

request = persistent_resource_service.CreatePersistentResourceRequest(
parent=tc.ProjectConstants.TEST_PARENT,
Expand Down Expand Up @@ -449,20 +454,23 @@ def test_create_ray_cluster_2_pools_success(
)

test_persistent_resource = tc.ClusterConstants.TEST_REQUEST_RUNNING_2_POOLS

if ray_version == "2.9":
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_CPU_IMAGE_2_9
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"worker-pool1"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
head_node_image = tc.ClusterConstants.TEST_CPU_IMAGE_2_9
worker_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
elif ray_version == "2.33":
head_node_image = tc.ClusterConstants.TEST_CPU_IMAGE_2_33
worker_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
else:
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_CPU_IMAGE_2_33
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"worker-pool1"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
head_node_image = tc.ClusterConstants.TEST_CPU_IMAGE_2_42
worker_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_42

test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = head_node_image
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"worker-pool1"
] = worker_node_image

assert tc.ClusterConstants.TEST_VERTEX_RAY_PR_ADDRESS == cluster_name
request = persistent_resource_service.CreatePersistentResourceRequest(
Expand Down Expand Up @@ -526,14 +534,17 @@ def test_create_ray_cluster_byosa_success(
assert tc.ClusterConstants.TEST_VERTEX_RAY_PR_ADDRESS == cluster_name

test_persistent_resource = tc.ClusterConstants.TEST_REQUEST_RUNNING_1_POOL_BYOSA

if ray_version == "2.9":
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_9
elif ray_version == "2.33":
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
else:
test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = tc.ClusterConstants.TEST_GPU_IMAGE_2_33
head_node_image = tc.ClusterConstants.TEST_GPU_IMAGE_2_42

test_persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
"head-node"
] = head_node_image

request = persistent_resource_service.CreatePersistentResourceRequest(
parent=tc.ProjectConstants.TEST_PARENT,
Expand Down
8 changes: 7 additions & 1 deletion tests/unit/vertex_ray/test_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
)

predictionrayversion = pytest.mark.skipif(
ray.__version__ != "2.9.3", reason="Not currently supported on Ray 2.33"
ray.__version__ != "2.9.3", reason="Not currently supported on Ray 2.33 or Ray 2.42"
)


Expand Down Expand Up @@ -123,6 +123,12 @@ class ClusterConstants:
TEST_GPU_IMAGE_2_33 = (
"us-docker.pkg.dev/vertex-ai/training/ray-gpu.2-33.py310:latest"
)
TEST_CPU_IMAGE_2_42 = (
"us-docker.pkg.dev/vertex-ai/training/ray-cpu.2-42.py310:latest"
)
TEST_GPU_IMAGE_2_42 = (
"us-docker.pkg.dev/vertex-ai/training/ray-gpu.2-42.py310:latest"
)
TEST_CUSTOM_IMAGE = "us-docker.pkg.dev/my-project/ray-custom-image.2.9:latest"
TEST_PSC_NETWORK_ATTACHMENT = "my-network-attachment"
# RUNNING Persistent Cluster w/o Ray
Expand Down

0 comments on commit 98f5403

Please sign in to comment.