Skip to content

Commit

Permalink
Merge pull request #39 from nsidc/remove-hard-coded-atm1b-itrf-ranges
Browse files Browse the repository at this point in the history
Remove hard-coded ITRF ranges for ATM1B data
  • Loading branch information
trey-stafford authored Oct 15, 2024
2 parents c24993b + a6b2e06 commit 30f1349
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 123 deletions.
1 change: 0 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
`DatasetSearchParameters`.
- `transform_itrf` will calculate plate for source ITRF if not given with
`target_epoch`.
- If the ATM1B qfit file header does not contain any mention of the ITRF, source it from hard-coded ranges based on date.
- Add support for ILATM1B v2 and BLATM1B v1.

# v0.2.0
Expand Down
97 changes: 15 additions & 82 deletions src/nsidc/iceflow/data/atm1b.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
from __future__ import annotations

import collections
import datetime as dt
import logging
import re
from enum import Enum
from pathlib import Path
from typing import cast

import h5py
import numpy as np
import pandas as pd
import pandera as pa
from gps_timemachine.gps import leap_seconds
from loguru import logger
from numpy.typing import DTypeLike

from nsidc.iceflow.data.models import ATM1BDataFrame
Expand Down Expand Up @@ -315,99 +312,35 @@ def _qfit_file_header(filepath: Path) -> str:
raise RuntimeError(err)


# NOTE: See the docstring of `_infer_qfit_itrf` for more context about these
# hard-coded ITRF date-ranges.
# TODO: a typed-dict might be better here.
GranuleItrfRange = collections.namedtuple("GranuleItrfRange", "start end itrf")
ATM1B_GRANULE_ITRFS = [
GranuleItrfRange(dt.date(1993, 6, 23), dt.date(1996, 6, 5), "ITRF93"),
GranuleItrfRange(dt.date(1997, 4, 25), dt.date(1997, 5, 28), "ITRF94"),
GranuleItrfRange(dt.date(1998, 6, 27), dt.date(1999, 5, 25), "ITRF96"),
GranuleItrfRange(dt.date(2000, 5, 12), dt.date(2001, 5, 27), "ITRF97"),
GranuleItrfRange(dt.date(2001, 12, 18), dt.date(2002, 11, 21), "ITRF2000"),
GranuleItrfRange(dt.date(2002, 11, 22), dt.date(2002, 11, 22), "ITRF97"),
GranuleItrfRange(dt.date(2002, 11, 23), dt.date(2002, 12, 13), "ITRF2000"),
GranuleItrfRange(dt.date(2002, 12, 14), dt.date(2002, 12, 14), "ITRF97"),
GranuleItrfRange(dt.date(2002, 12, 15), dt.date(2007, 5, 11), "ITRF2000"),
GranuleItrfRange(dt.date(2007, 9, 10), dt.date(2011, 5, 16), "ITRF2005"),
GranuleItrfRange(dt.date(2011, 10, 12), dt.date(2018, 5, 1), "ITRF2008"),
]


def _qfit_itrf_from_date(date: dt.date) -> str:
"""Return a strting representing the ITRF for a qfit file based on date.
This function should only be used for looking up the ITRF for qfit files that
lack a header. It is based on a hard-coded list with no formal provenance.
"""

def _find(gdt, i, lower, upper) -> str | None:
# Binary search for the correct ITRF to make this fast. If we
# do a naive linear search, it can run up to 10x longer.
g = ATM1B_GRANULE_ITRFS[i]
if gdt < g.start:
if i > lower:
return _find(gdt, (i - lower) // 2, lower, i)
else:
return None
elif gdt <= g.end:
itrf_str = g.itrf
return cast(str, itrf_str)
elif i < (upper - 1):
return _find(gdt, (upper + i) // 2, i, upper)
else:
return None

lower, upper = (0, len(ATM1B_GRANULE_ITRFS))
i = (upper - lower) // 2
result = _find(date, i, lower, upper)
if result is None:
err_msg = f"Failed to find ITRF for {date=}"
raise RuntimeError(err_msg)

return result


def _infer_qfit_itrf(filepath: Path, date: dt.date) -> str:
def _infer_qfit_itrf(filepath: Path) -> str:
"""Takes an ILATM1B/BLATM1B qfit filepath and returns a string representing
the ITRF.
This function infers the ITRF based on the qfit file header, which is
described here:
https://nsidc.org/sites/nsidc.org/files/files/ReadMe_qfit.txt
Failing that, this function falls back to a hard-coded ITRF based on the
date.
The string we extract the ITRF from looks like this:
`./091109_aa_l12_cfm_itrf05_18may10_palm_roth_amu2`
From which we extract an ITRF of `ITRF2005` from the `_itrf05_` bit.
This function does its "best" based on prior work, and places trust in that
prior work. However, this should be more closely reviewd. See
https://github.com/nsidc/iceflow/issues/35.
According to Michael Studinger (see
https://github.com/nsidc/iceflow/issues/35#issuecomment-2408619586), This
string represents the "GPS trajectory that was used to reference the lidar
data and that has the ITRF epoch in its file name."
"""
header = _qfit_file_header(filepath)
results = re.finditer(r"itrf\d{2,4}", header)
itrfs = list({result.group() for result in results})

itrf_for_date = _qfit_itrf_from_date(date)

if len(itrfs) == 1:
itrf_from_qfit_header = _normalize_itrf_str(itrfs[0])
if itrf_for_date != itrf_from_qfit_header:
warn_msg = (
f"ITRF in qfit header for {filepath=} is inconsistent with expected ITRF for {date=}."
f" qfit header has {itrf_from_qfit_header}. Expected {itrf_for_date}."
f" Using the ITRF found in the header: {itrf_from_qfit_header}"
)
logger.warning(warn_msg)

return itrf_from_qfit_header

warn_msg = (
f"Failed to find ITRF in qfit header file for {filepath}."
f" Falling back on hard-coded ITRF for {date=}: {itrf_for_date}"
)
logger.warning(warn_msg)

return itrf_for_date
err_msg = f"Failed to extract ITRF from qfit header: {filepath}"
raise RuntimeError(err_msg)


def _extract_itrf_from_h5_file(filepath: Path) -> str:
Expand All @@ -419,11 +352,11 @@ def _extract_itrf_from_h5_file(filepath: Path) -> str:
return itrf


def extract_itrf(filepath: Path, date: dt.date) -> str:
def extract_itrf(filepath: Path) -> str:
ext = filepath.suffix

if ext == ".qi":
return _infer_qfit_itrf(filepath, date=date)
return _infer_qfit_itrf(filepath)
elif ext == ".h5":
return _extract_itrf_from_h5_file(filepath)

Expand Down Expand Up @@ -514,7 +447,7 @@ def atm1b_data(filepath: Path) -> ATM1BDataFrame:
file_date = _blatm1bv1_date(filename)
data = _atm1b_qfit_data(filepath, file_date)

itrf = extract_itrf(filepath, date=file_date)
itrf = extract_itrf(filepath)
data["ITRF"] = itrf

data = data.set_index("utc_datetime")
Expand Down
2 changes: 0 additions & 2 deletions tests/integration/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,4 @@ def test_atm1b_blatm1b(tmp_path):
output_dir=tmp_path,
)

# Untransformed ITRF is expected to be ITRF2000 based on hard-coded ranges
# (qfit header lacks ITRF info)
assert (results_blamt1b_v2_2014.ITRF == "ITRF2000").all()
38 changes: 0 additions & 38 deletions tests/unit/test_atm1b_itrf_range.py

This file was deleted.

0 comments on commit 30f1349

Please sign in to comment.