From f69fdb53ad0196d8cb92aac5918810e5de6eb3cf Mon Sep 17 00:00:00 2001 From: Hendrik Makait Date: Fri, 15 Nov 2024 13:01:48 +0100 Subject: [PATCH] Auto-chunking for ERA5 loading --- tests/geospatial/workloads/atmospheric_circulation.py | 2 +- tests/geospatial/workloads/climatology.py | 2 ++ tests/geospatial/workloads/rechunking.py | 1 + tests/geospatial/workloads/regridding.py | 1 + 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/geospatial/workloads/atmospheric_circulation.py b/tests/geospatial/workloads/atmospheric_circulation.py index 61a308fc6b..3fb82108d8 100644 --- a/tests/geospatial/workloads/atmospheric_circulation.py +++ b/tests/geospatial/workloads/atmospheric_circulation.py @@ -11,7 +11,7 @@ def atmospheric_circulation( ) -> Delayed: ds = xr.open_zarr( "gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr", - chunks={}, + chunks={"time": "auto"}, ) if scale == "small": # 852.56 GiB (small) diff --git a/tests/geospatial/workloads/climatology.py b/tests/geospatial/workloads/climatology.py index bfc13f80bc..36bfe2ba8c 100644 --- a/tests/geospatial/workloads/climatology.py +++ b/tests/geospatial/workloads/climatology.py @@ -72,6 +72,7 @@ def rechunk_map_blocks( # Load dataset ds = xr.open_zarr( "gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721.zarr", + chunks={"time": "auto"}, ) if scale == "small": @@ -122,6 +123,7 @@ def highlevel_api( # Load dataset ds = xr.open_zarr( "gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721.zarr", + chunks={"time": "auto"}, ) if scale == "small": diff --git a/tests/geospatial/workloads/rechunking.py b/tests/geospatial/workloads/rechunking.py index 9a20994aee..b5d3ca5f5f 100644 --- a/tests/geospatial/workloads/rechunking.py +++ b/tests/geospatial/workloads/rechunking.py @@ -11,6 +11,7 @@ def era5_rechunking( ) -> Delayed: ds = xr.open_zarr( "gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr", + chunks={"time": "auto"}, ).drop_encoding() if scale == "small": diff --git a/tests/geospatial/workloads/regridding.py b/tests/geospatial/workloads/regridding.py index 307c37f53d..0cba3449dd 100644 --- a/tests/geospatial/workloads/regridding.py +++ b/tests/geospatial/workloads/regridding.py @@ -13,6 +13,7 @@ def xesmf( ) -> Delayed: ds = xr.open_zarr( "gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr", + chunks={"time": "auto"}, ) # Fixed time range and variable as the interesting part of this benchmark scales with the # regridding matrix