From 1d7df04d750a31ab2f0615a0a3d189d4030eb436 Mon Sep 17 00:00:00 2001
From: Philip Yoon <philip.j.yoon@jpl.nasa.gov>
Date: Tue, 13 Aug 2024 14:20:40 -0700
Subject: [PATCH] #924: Fixed reprocessing mode query using date range and
 frame id

---
 data_subscriber/cslc/cslc_query.py               | 13 ++++++-------
 .../cslc_query_reproc_dates_frameid_k4_test.json | 16 ++++++++++++++++
 .../cslc_query_reproc_dates_k2_test.json         |  2 +-
 tests/scenarios/cslc_query_test.py               |  2 ++
 4 files changed, 25 insertions(+), 8 deletions(-)
 create mode 100644 tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json

diff --git a/data_subscriber/cslc/cslc_query.py b/data_subscriber/cslc/cslc_query.py
index 297ae332..c31e4e29 100644
--- a/data_subscriber/cslc/cslc_query.py
+++ b/data_subscriber/cslc/cslc_query.py
@@ -396,10 +396,6 @@ def query_cmr(self, args, token, cmr, settings, timerange, now):
             if args.native_id is not None:
                 all_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, args.native_id)
 
-            # Query by frame range and date range. Both must exist.
-            elif self.args.frame_id is not None and args.start_date is not None and args.end_date is not None:
-                all_granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange)
-
             # Reprocessing by date range is a two-step process:
             # 1) Query CMR for all CSLC files in the date range specified and create list of granules with unique frame_ids
             # 2) Process each granule as if they were passed in as native_id
@@ -407,7 +403,10 @@ def query_cmr(self, args, token, cmr, settings, timerange, now):
                 all_granules = []
 
                 # First get all CSLC files in the range specified
-                granules = asyncio.run(async_query_cmr(args, token, cmr, settings, timerange, now))
+                if self.args.frame_id is not None:
+                    granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange)
+                else:
+                    granules = asyncio.run(async_query_cmr(args, token, cmr, settings, timerange, now))
 
                 # Then create a unique set of frame_ids that we need to query for
                 frame_id_map = defaultdict(str)
@@ -419,9 +418,9 @@ def query_cmr(self, args, token, cmr, settings, timerange, now):
                     new_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, native_id)
                     all_granules.extend(new_granules)
             else:
-                raise Exception("Reprocessing mode requires 1) a native_id 2) frame range and date range or 3) a date range to be specified.")
+                raise Exception("Reprocessing mode requires either a native_id or a date range to be specified.")
 
-        else:
+        else: # Forward processing
             if self.args.frame_id is not None:
                 all_granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange)
             else:
diff --git a/tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json b/tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json
new file mode 100644
index 00000000..5fd2351b
--- /dev/null
+++ b/tests/scenarios/cslc_query_reproc_dates_frameid_k4_test.json
@@ -0,0 +1,16 @@
+{ "description": "This file specifies expected files to be downloaded in a DISP-S1 forward processing scenario",
+  "comment": "Reprocessing by date range",
+  "processing_mode": "reprocessing",
+  "param_type": "date_range",
+  "frame_id": 36540,
+  "k": 4,
+  "m": 1,
+  "validation_data": {
+    "2024-04-29T04:23:22Z, 2024-04-29T04:24:22Z": {
+      "f36540_a438": 27,
+      "f36540_a426": 27,
+      "f36540_a414": 27,
+      "f36540_a402": 27
+    }
+  }
+}
diff --git a/tests/scenarios/cslc_query_reproc_dates_k2_test.json b/tests/scenarios/cslc_query_reproc_dates_k2_test.json
index 6bcb83c9..d1cbf154 100644
--- a/tests/scenarios/cslc_query_reproc_dates_k2_test.json
+++ b/tests/scenarios/cslc_query_reproc_dates_k2_test.json
@@ -6,7 +6,7 @@
   "m": 1,
   "validation_data": {
     "2023-05-01T08:00:00Z, 2023-05-01T08:01:00Z": {},
-    "2023-12-01T07:00:00Z, 2023-12-01T07:10:00Z": {
+    "2024-04-29T04:23:22Z, 2024-04-29T04:24:22Z": {
       "f26694_a449": 27,
       "f26694_a448": 27,
       "f26693_a449": 27,
diff --git a/tests/scenarios/cslc_query_test.py b/tests/scenarios/cslc_query_test.py
index 0cfdf027..92867149 100644
--- a/tests/scenarios/cslc_query_test.py
+++ b/tests/scenarios/cslc_query_test.py
@@ -146,6 +146,8 @@ def run_query(args, authorization):
                 start_date = date_range.split(",")[0].strip()
                 end_date = date_range.split(",")[1].strip()
                 current_args = query_arguments + [f"--start-date={start_date}", f"--end-date={end_date}",  f"--job-queue={job_queue[proc_mode]}"]
+                if  "frame_id" in j:
+                    current_args.append(f"--frame-id={j['frame_id']}")
                 query_and_validate(current_args, date_range, validation_data)
 
     elif (proc_mode == "historical"):