From 2b89520d713aad6ef5b1e1e3898e60b87d2d36a8 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 20 Mar 2024 11:05:43 +0000 Subject: [PATCH] collapse bound performance --- Changelog.rst | 2 ++ cf/field.py | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index 7b6dbd7576..c09f1d0802 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -9,6 +9,8 @@ version NEXT to regrid the vertical axis in logarithmic coordinates to `cf.Field.regrids` and `cf.Field.regridc` (https://github.com/NCAS-CMS/cf-python/issues/715) +* Improve `cf.Field.collapse` performance by lazily computing reduced + axis coordinates (https://github.com/NCAS-CMS/cf-python/issues/741) * Fix misleading error message when it is not possible to create area weights requested from `cf.Field.collapse` (https://github.com/NCAS-CMS/cf-python/issues/731) diff --git a/cf/field.py b/cf/field.py index 571a6c1fb1..fcab8900c8 100644 --- a/cf/field.py +++ b/cf/field.py @@ -7149,14 +7149,37 @@ def collapse( if dim is None: continue - # Create a new dimension coordinate for this axis + # Create new dimension coordinate bounds if dim.has_bounds(): - bounds_data = [dim.bounds.datum(0), dim.bounds.datum(-1)] + b = dim.bounds.data else: - bounds_data = [dim.datum(0), dim.datum(-1)] + b = dim.data - units = dim.Units + cached_elements = b._get_cached_elements() + try: + # Try to set the new bounds from cached values + bounds_data = Data( + [[cached_elements[0], cached_elements[-1]]], + dtype=b.dtype, + units=b.Units, + ) + except KeyError: + # Otherwise create the new bounds lazily + ndim = b.ndim + bounds_data = Data.concatenate( + [ + b[(slice(0, 1, 1),) * ndim], + b[(slice(-1, None, 1),) * ndim], + ], + axis=-1, + copy=False, + ) + if ndim == 1: + bounds_data.insert_dimension(0, inplace=True) + bounds = self._Bounds(data=bounds_data) + + # Create a new dimension coordinate value if coordinate == "min": coordinate = "minimum" print( @@ -7171,21 +7194,17 @@ def collapse( ) if coordinate == "mid_range": - data = Data( - [(bounds_data[0] + bounds_data[1]) * 0.5], units=units - ) + data = bounds_data.mean(axes=1, weights=None, squeeze=True) elif coordinate == "minimum": - data = dim.data.min() + data = dim.data.min(squeeze=False) elif coordinate == "maximum": - data = dim.data.max() + data = dim.data.max(squeeze=False) else: raise ValueError( "Can't collapse: Bad parameter value: " f"coordinate={coordinate!r}" ) - bounds = self._Bounds(data=Data([bounds_data], units=units)) - dim.set_data(data, copy=False) dim.set_bounds(bounds, copy=False)