From 8f4d2f71e1802e4a2fdc4b56380518c604c439b0 Mon Sep 17 00:00:00 2001 From: Tom White Date: Mon, 4 Nov 2024 09:24:48 +0000 Subject: [PATCH] Free memory earlier (before writing result to Zarr) by changing function scope (#606) --- cubed/primitive/blockwise.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/cubed/primitive/blockwise.py b/cubed/primitive/blockwise.py index 3a9d760b..53817e10 100644 --- a/cubed/primitive/blockwise.py +++ b/cubed/primitive/blockwise.py @@ -77,16 +77,8 @@ def apply_blockwise(out_coords: List[int], *, config: BlockwiseSpec) -> None: # lithops needs params to be lists not tuples, so convert back out_coords_tuple = tuple(out_coords) - # get array chunks for input keys, preserving any nested list structure - args = [] - get_chunk_config = partial(get_chunk, config=config) - out_key = ("out",) + out_coords_tuple # array name is ignored by key_function - in_keys = config.key_function(out_key) - for in_key in in_keys: - arg = map_nested(get_chunk_config, in_key) - args.append(arg) + results = get_results_in_different_scope(out_coords, config=config) - results = config.function(*args) # if blockwise function is a regular function (not a generator) that doesn't return multiple values then make it iterable if not inspect.isgeneratorfunction(config.function) and not isinstance( results, tuple @@ -107,6 +99,24 @@ def apply_blockwise(out_coords: List[int], *, config: BlockwiseSpec) -> None: config.writes_list[i].open()[out_chunk_key] = result +def get_results_in_different_scope(out_coords: List[int], *, config: BlockwiseSpec): + # wrap function call in a function so that args go out of scope (and free memory) as soon as results are returned + + # lithops needs params to be lists not tuples, so convert back + out_coords_tuple = tuple(out_coords) + + # get array chunks for input keys, preserving any nested list structure + args = [] + get_chunk_config = partial(get_chunk, config=config) + out_key = ("out",) + out_coords_tuple # array name is ignored by key_function + in_keys = config.key_function(out_key) + for in_key in in_keys: + arg = map_nested(get_chunk_config, in_key) + args.append(arg) + + return config.function(*args) + + def key_to_slices( key: Tuple[int, ...], arr: T_ZarrArray, chunks: Optional[T_Chunks] = None ) -> Tuple[slice, ...]: