#964: Made partial fix where when date range and frame_id are specifi…

…ed, it processes just one frame instead of multiple
nasa · Aug 23, 2024 · 00371b4 · 00371b4
1 parent 24c52c8
commit 00371b4
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 11 deletions.
diff --git a/data_subscriber/cslc/cslc_query.py b/data_subscriber/cslc/cslc_query.py
@@ -397,23 +397,27 @@ def query_cmr(self, args, token, cmr, settings, timerange, now):
                 all_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, args.native_id)
 
             # Reprocessing by date range is a two-step process:
-            # 1) Query CMR for all CSLC files in the date range specified and create list of granules with unique frame_ids
+            # 1) Query CMR for all CSLC files in the date range specified and create list of granules with unique frame_ids per acq day index
             # 2) Process each granule as if they were passed in as native_id
             elif args.start_date is not None and args.end_date is not None:
                 all_granules = []
+                frame_id_map = defaultdict(str)
 
                 # First get all CSLC files in the range specified
+                # If the frame id is specified, we know what it should be
                 if self.args.frame_id is not None:
                     granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange)
+                    for granule in granules:
+                        frame_id_map[self.args.frame_id] = granule["granule_id"]
+
+                # If just the date range was specified, we could have up to two frame ids per burst
                 else:
                     granules = asyncio.run(async_query_cmr(args, token, cmr, settings, timerange, now))
+                    for granule in granules:
+                        _, _, _, frame_ids = parse_cslc_native_id(granule["granule_id"], self.burst_to_frames, self.disp_burst_map_hist)
+                        for frame_id in frame_ids:
+                            frame_id_map[frame_id] = granule["granule_id"]
 
-                # Then create a unique set of frame_ids that we need to query for
-                frame_id_map = defaultdict(str)
-                for granule in granules:
-                    _, _, _, frame_ids = parse_cslc_native_id(granule["granule_id"], self.burst_to_frames, self.disp_burst_map_hist)
-                    for frame_id in frame_ids:
-                        frame_id_map[frame_id] = granule["granule_id"]
                 for frame_id, native_id in frame_id_map.items():
                     new_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, native_id)
                     all_granules.extend(new_granules)

diff --git a/tests/scenarios/cslc_query_reproc_dates_frameid_34996_test.json b/tests/scenarios/cslc_query_reproc_dates_frameid_34996_test.json
@@ -11,6 +11,12 @@
       "f34996_a2772": 27,
       "f34996_a2760": 27,
       "f34996_a2748": 27
+    },
+    "2024-01-05T02:00:00Z, 2024-02-15T02:30:11Z": {
+      "f34996_a2784": 27,
+      "f34996_a2772": 27,
+      "f34996_a2760": 27,
+      "f34996_a2748": 27
     }
   }
 }
diff --git a/tests/scenarios/cslc_query_reproc_dates_k2_test.json b/tests/scenarios/cslc_query_reproc_dates_k2_test.json
@@ -7,10 +7,8 @@
   "validation_data": {
     "2023-05-01T08:00:00Z, 2023-05-01T08:01:00Z": {},
     "2024-04-29T04:23:22Z, 2024-04-29T04:24:22Z": {
-      "f26694_a449": 27,
-      "f26694_a448": 27,
-      "f26693_a449": 27,
-      "f26693_a448": 27
+      "f36540_a438": 27,
+      "f36540_a426": 27
     },
     "2023-12-01T08:00:00Z, 2023-12-01T09:00:00Z": {
       "f26694_a449": 27,