From 00371b4e82b04499868afb2180c1178d095af404 Mon Sep 17 00:00:00 2001 From: Philip Yoon Date: Fri, 23 Aug 2024 16:05:29 -0700 Subject: [PATCH] #964: Made partial fix where when date range and frame_id are specified, it processes just one frame instead of multiple --- data_subscriber/cslc/cslc_query.py | 18 +++++++++++------- ..._query_reproc_dates_frameid_34996_test.json | 6 ++++++ .../cslc_query_reproc_dates_k2_test.json | 6 ++---- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/data_subscriber/cslc/cslc_query.py b/data_subscriber/cslc/cslc_query.py index 7292c3e9..32212259 100644 --- a/data_subscriber/cslc/cslc_query.py +++ b/data_subscriber/cslc/cslc_query.py @@ -397,23 +397,27 @@ def query_cmr(self, args, token, cmr, settings, timerange, now): all_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, args.native_id) # Reprocessing by date range is a two-step process: - # 1) Query CMR for all CSLC files in the date range specified and create list of granules with unique frame_ids + # 1) Query CMR for all CSLC files in the date range specified and create list of granules with unique frame_ids per acq day index # 2) Process each granule as if they were passed in as native_id elif args.start_date is not None and args.end_date is not None: all_granules = [] + frame_id_map = defaultdict(str) # First get all CSLC files in the range specified + # If the frame id is specified, we know what it should be if self.args.frame_id is not None: granules = self.query_cmr_by_frame_and_dates(args, token, cmr, settings, now, timerange) + for granule in granules: + frame_id_map[self.args.frame_id] = granule["granule_id"] + + # If just the date range was specified, we could have up to two frame ids per burst else: granules = asyncio.run(async_query_cmr(args, token, cmr, settings, timerange, now)) + for granule in granules: + _, _, _, frame_ids = parse_cslc_native_id(granule["granule_id"], self.burst_to_frames, self.disp_burst_map_hist) + for frame_id in frame_ids: + frame_id_map[frame_id] = granule["granule_id"] - # Then create a unique set of frame_ids that we need to query for - frame_id_map = defaultdict(str) - for granule in granules: - _, _, _, frame_ids = parse_cslc_native_id(granule["granule_id"], self.burst_to_frames, self.disp_burst_map_hist) - for frame_id in frame_ids: - frame_id_map[frame_id] = granule["granule_id"] for frame_id, native_id in frame_id_map.items(): new_granules = self.query_cmr_by_native_id(args, token, cmr, settings, now, native_id) all_granules.extend(new_granules) diff --git a/tests/scenarios/cslc_query_reproc_dates_frameid_34996_test.json b/tests/scenarios/cslc_query_reproc_dates_frameid_34996_test.json index ddbc06a5..325ab49b 100644 --- a/tests/scenarios/cslc_query_reproc_dates_frameid_34996_test.json +++ b/tests/scenarios/cslc_query_reproc_dates_frameid_34996_test.json @@ -11,6 +11,12 @@ "f34996_a2772": 27, "f34996_a2760": 27, "f34996_a2748": 27 + }, + "2024-01-05T02:00:00Z, 2024-02-15T02:30:11Z": { + "f34996_a2784": 27, + "f34996_a2772": 27, + "f34996_a2760": 27, + "f34996_a2748": 27 } } } diff --git a/tests/scenarios/cslc_query_reproc_dates_k2_test.json b/tests/scenarios/cslc_query_reproc_dates_k2_test.json index d1cbf154..447978b2 100644 --- a/tests/scenarios/cslc_query_reproc_dates_k2_test.json +++ b/tests/scenarios/cslc_query_reproc_dates_k2_test.json @@ -7,10 +7,8 @@ "validation_data": { "2023-05-01T08:00:00Z, 2023-05-01T08:01:00Z": {}, "2024-04-29T04:23:22Z, 2024-04-29T04:24:22Z": { - "f26694_a449": 27, - "f26694_a448": 27, - "f26693_a449": 27, - "f26693_a448": 27 + "f36540_a438": 27, + "f36540_a426": 27 }, "2023-12-01T08:00:00Z, 2023-12-01T09:00:00Z": { "f26694_a449": 27,