From 0d953e97bf35b9a1d0de49b7ccd1a9b642211405 Mon Sep 17 00:00:00 2001 From: devsjc <47188100+devsjc@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:30:31 +0100 Subject: [PATCH] fix(sat): Prevent error with existing zarr coord store (#122) * fix(sat): Prevent error with existing zarr coord store * fix(sat-etl): Fix removal of downloaded scans --- containers/sat/download_process_sat.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/containers/sat/download_process_sat.py b/containers/sat/download_process_sat.py index 396419b..19da70f 100644 --- a/containers/sat/download_process_sat.py +++ b/containers/sat/download_process_sat.py @@ -541,7 +541,7 @@ def _rewrite_zarr_times(output_name: str) -> None: # Need to remove these encodings to avoid chunking del ds.time.encoding["chunks"] del ds.time.encoding["preferred_chunks"] - ds.to_zarr(f"{output_name.split('.zarr')[0]}_coord.zarr", consolidated=True) + ds.to_zarr(f"{output_name.split('.zarr')[0]}_coord.zarr", consolidated=True, mode="w") # Remove current time ones shutil.rmtree(f"{output_name}/time/") # Add new time ones @@ -627,15 +627,14 @@ def run(args: argparse.Namespace) -> None: if len(scan_times) > cpu_count(): log.debug(f"Concurrency: {cpu_count()}") pool = Pool(max(cpu_count(), 10)) # EUMDAC only allows for 10 concurrent requests - raw_paths = pool.starmap( + results: list[list[pathlib.Path]] = pool.starmap( download_scans, [(sat_config, folder, scan_time, token) for scan_time in scan_times], ) pool.close() pool.join() - raw_paths = list(itertools.chain(raw_paths)) + raw_paths.extend(list(itertools.chain(*results))) else: - raw_paths = [] for scan_time in scan_times: result: list[pathlib.Path] = download_scans(sat_config, folder, scan_time, token) if len(result) > 0: