From 1d7df04d750a31ab2f0615a0a3d189d4030eb436 Mon Sep 17 00:00:00 2001 From: Philip Yoon Date: Tue, 13 Aug 2024 14:20:40 -0700 Subject: [PATCH] #924: Fixed reprocessing mode query using date range and frame id --- data_subscriber/cslc/cslc_query.py | 13 ++++++------- .../cslc_query_reproc_dates_frameid_k4_test.json | 16 ++++++++++++++++ .../cslc_query_reproc_dates_k2_test.json | 2 +- tests/scenarios/cslc_query_test.py | 2 ++ 4 files changed, 25 insertions(+), 8 deletions(-) create mode 100644 tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json diff --git a/data_subscriber/cslc/cslc_query.py b/data_subscriber/cslc/cslc_query.py index 297ae332..c31e4e29 100644 --- a/data_subscriber/cslc/cslc_query.py +++ b/data_subscriber/cslc/cslc_query.py @@ -396,10 +396,6 @@ def query_cmr(self, args, token, cmr, settings, timerange, now): if args.native_id is not None: all_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, args.native_id) - # Query by frame range and date range. Both must exist. - elif self.args.frame_id is not None and args.start_date is not None and args.end_date is not None: - all_granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange) - # Reprocessing by date range is a two-step process: # 1) Query CMR for all CSLC files in the date range specified and create list of granules with unique frame_ids # 2) Process each granule as if they were passed in as native_id @@ -407,7 +403,10 @@ def query_cmr(self, args, token, cmr, settings, timerange, now): all_granules = [] # First get all CSLC files in the range specified - granules = asyncio.run(async_query_cmr(args, token, cmr, settings, timerange, now)) + if self.args.frame_id is not None: + granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange) + else: + granules = asyncio.run(async_query_cmr(args, token, cmr, settings, timerange, now)) # Then create a unique set of frame_ids that we need to query for frame_id_map = defaultdict(str) @@ -419,9 +418,9 @@ def query_cmr(self, args, token, cmr, settings, timerange, now): new_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, native_id) all_granules.extend(new_granules) else: - raise Exception("Reprocessing mode requires 1) a native_id 2) frame range and date range or 3) a date range to be specified.") + raise Exception("Reprocessing mode requires either a native_id or a date range to be specified.") - else: + else: # Forward processing if self.args.frame_id is not None: all_granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange) else: diff --git a/tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json b/tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json new file mode 100644 index 00000000..5fd2351b --- /dev/null +++ b/tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json @@ -0,0 +1,16 @@ +{ "description": "This file specifies expected files to be downloaded in a DISP-S1 forward processing scenario", + "comment": "Reprocessing by date range", + "processing_mode": "reprocessing", + "param_type": "date_range", + "frame_id": 36540, + "k": 4, + "m": 1, + "validation_data": { + "2024-04-29T04:23:22Z, 2024-04-29T04:24:22Z": { + "f36540_a438": 27, + "f36540_a426": 27, + "f36540_a414": 27, + "f36540_a402": 27 + } + } +} diff --git a/tests/scenarios/cslc_query_reproc_dates_k2_test.json b/tests/scenarios/cslc_query_reproc_dates_k2_test.json index 6bcb83c9..d1cbf154 100644 --- a/tests/scenarios/cslc_query_reproc_dates_k2_test.json +++ b/tests/scenarios/cslc_query_reproc_dates_k2_test.json @@ -6,7 +6,7 @@ "m": 1, "validation_data": { "2023-05-01T08:00:00Z, 2023-05-01T08:01:00Z": {}, - "2023-12-01T07:00:00Z, 2023-12-01T07:10:00Z": { + "2024-04-29T04:23:22Z, 2024-04-29T04:24:22Z": { "f26694_a449": 27, "f26694_a448": 27, "f26693_a449": 27, diff --git a/tests/scenarios/cslc_query_test.py b/tests/scenarios/cslc_query_test.py index 0cfdf027..92867149 100644 --- a/tests/scenarios/cslc_query_test.py +++ b/tests/scenarios/cslc_query_test.py @@ -146,6 +146,8 @@ def run_query(args, authorization): start_date = date_range.split(",")[0].strip() end_date = date_range.split(",")[1].strip() current_args = query_arguments + [f"--start-date={start_date}", f"--end-date={end_date}", f"--job-queue={job_queue[proc_mode]}"] + if "frame_id" in j: + current_args.append(f"--frame-id={j['frame_id']}") query_and_validate(current_args, date_range, validation_data) elif (proc_mode == "historical"):