From 45ffe6eb99d96488fdec187bb47a4a78d9b5ee92 Mon Sep 17 00:00:00 2001 From: zygi Date: Wed, 9 Nov 2022 22:02:22 +0000 Subject: [PATCH] Update GCP autoscaler to support v4-8 TPU nodes (#30065) TPUv4s are now in Generally Available so there's no reason to exclude them. When configuring gcp autoscaler tpu nodes, valid accelerator types are whilelisted. TPUv4-8 nodes are not in the whitelist, even though they behave exactly the same as e.g. TPUv3-8 nodes. Signed-off-by: GitHub --- .../cluster/vms/references/ray-cluster-configuration.rst | 2 +- python/ray/autoscaler/_private/gcp/config.py | 4 ++-- python/ray/autoscaler/gcp/tpu.yaml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/cluster/vms/references/ray-cluster-configuration.rst b/doc/source/cluster/vms/references/ray-cluster-configuration.rst index 7f817271c77d..5db973be7392 100644 --- a/doc/source/cluster/vms/references/ray-cluster-configuration.rst +++ b/doc/source/cluster/vms/references/ray-cluster-configuration.rst @@ -1207,7 +1207,7 @@ Full configuration TPU Configuration ~~~~~~~~~~~~~~~~~ -It is possible to use `TPU VMs `_ on GCP. Currently, `TPU pods `_ (TPUs other than v2-8 and v3-8) are not supported. +It is possible to use `TPU VMs `_ on GCP. Currently, `TPU pods `_ (TPUs other than v2-8, v3-8 and v4-8) are not supported. Before using a config with TPUs, ensure that the `TPU API is enabled for your GCP project `_. diff --git a/python/ray/autoscaler/_private/gcp/config.py b/python/ray/autoscaler/_private/gcp/config.py index cf008ac473e0..a5f8510aca04 100644 --- a/python/ray/autoscaler/_private/gcp/config.py +++ b/python/ray/autoscaler/_private/gcp/config.py @@ -69,9 +69,9 @@ def get_node_type(node: dict) -> GCPNodeType: if "machineType" not in node and "acceleratorType" in node: # remove after TPU pod support is added! - if node["acceleratorType"] not in ("v2-8", "v3-8"): + if node["acceleratorType"] not in ("v2-8", "v3-8", "v4-8"): raise ValueError( - "For now, only v2-8' and 'v3-8' accelerator types are " + "For now, only 'v2-8', 'v3-8' and 'v4-8' accelerator types are " "supported. Support for TPU pods will be added in the future." ) diff --git a/python/ray/autoscaler/gcp/tpu.yaml b/python/ray/autoscaler/gcp/tpu.yaml index 001dcd87c56a..1e75a5d5f2ad 100644 --- a/python/ray/autoscaler/gcp/tpu.yaml +++ b/python/ray/autoscaler/gcp/tpu.yaml @@ -28,7 +28,7 @@ available_node_types: min_workers: 7 resources: {"TPU": 1} # use TPU custom resource in your code node_config: - # Only v2-8 and v3-8 accelerator types are currently supported. + # Only v2-8, v3-8 and v4-8 accelerator types are currently supported. # Support for TPU pods will be added in the future. acceleratorType: v2-8 runtimeVersion: v2-alpha