nsidc · trey-stafford · Oct 16, 2024 · Aug 28, 2024 · Aug 28, 2024 · Oct 15, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,7 @@
 - `transform_itrf` will calculate plate for source ITRF if not given with
   `target_epoch`.
 - Add support for ILATM1B v2 and BLATM1B v1.
+- Add support for ILVIS2.
 
 # v0.2.0
 

diff --git a/src/nsidc/iceflow/data/ilvis2.py b/src/nsidc/iceflow/data/ilvis2.py
@@ -0,0 +1,217 @@
+from __future__ import annotations
+
+import datetime as dt
+import re
+from collections import namedtuple
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import pandera as pa
+
+from nsidc.iceflow.data.models import ILVIS2DataFrame
+
+# The user guide indicates ILVIS2 data uses ITRF2000 as a reference frame:
+# https://nsidc.org/sites/default/files/documents/user-guide/ilvis2-v001-userguide.pdf
+ILVIS2_ITRF = "ITRF2000"
+
+Field = namedtuple("Field", ["name", "type", "scale_factor"])
+
+"""
+See: https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS104.html
+
+Note: The LVIS site (above) and NSIDC data files use different names
+for the fields. The list of field tuples below matches the
+documentation on the LVIS site. This is done to simplify the code and
+ease the mental mapping of the v1.0.4 and v2.0.2b fields to the
+database.  The mapping between the names used below (same as LVIS
+docs) and the field names used in the NSIDC files is:
+
+
+  CLON = LONGITUDE_CENTROID
+  CLAT = LATITUDE_CENTROID
+  ZC = ELEVATION_CENTROID
+  GLON = LONGITUDE_LOW
+  GLAT = LATITUDE_LOW
+  ZG = ELEVATION_LOW
+  HLON = LONGITUDE_HIGH
+  HLAT = LATITUDE_HIGH
+  ZH = ELEVATION_HIGH
+
+"""
+ILVIS2_V104_FIELDS = [
+    Field("LFID", None, np.uint64),
+    Field("SHOTNUMBER", None, np.uint64),
+    Field("TIME", 10**6, np.uint64),
+    Field("CLON", 10**6, np.int64),
+    Field("CLAT", 10**6, np.int64),
+    Field("ZC", 10**6, np.int64),
+    Field("GLON", None, np.float64),
+    Field("GLAT", None, np.float64),
+    Field("ZG", None, np.float64),
+    Field("HLON", 10**6, np.int64),
+    Field("HLAT", 10**6, np.int64),
+    Field("ZH", 10**6, np.int64),
+]
+
+"""
+See: https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS202.html
+Note: Version 2.0.2b was used for Greenland 2017
+"""
+ILVIS2_V202b_FIELDS = [
+    Field("LFID", None, np.uint64),
+    Field("SHOTNUMBER", None, np.uint64),
+    Field("TIME", 10**6, np.uint64),
+    Field("GLON", None, np.float64),
+    Field("GLAT", None, np.float64),
+    Field("ZG", None, np.float64),
+    Field("HLON", 10**6, np.int64),
+    Field("HLAT", 10**6, np.int64),
+    Field("ZH", 10**6, np.int64),
+    Field("TLON", 10**6, np.int64),
+    Field("TLAT", 10**6, np.int64),
+    Field("ZT", 10**6, np.int64),
+    Field("RH10", 10**3, np.int64),
+    Field("RH15", 10**3, np.int64),
+    Field("RH20", 10**3, np.int64),
+    Field("RH25", 10**3, np.int64),
+    Field("RH30", 10**3, np.int64),
+    Field("RH35", 10**3, np.int64),
+    Field("RH40", 10**3, np.int64),
+    Field("RH45", 10**3, np.int64),
+    Field("RH50", 10**3, np.int64),
+    Field("RH55", 10**3, np.int64),
+    Field("RH60", 10**3, np.int64),
+    Field("RH65", 10**3, np.int64),
+    Field("RH70", 10**3, np.int64),
+    Field("RH75", 10**3, np.int64),
+    Field("RH80", 10**3, np.int64),
+    Field("RH85", 10**3, np.int64),
+    Field("RH90", 10**3, np.int64),
+    Field("RH95", 10**3, np.int64),
+    Field("RH96", 10**3, np.int64),
+    Field("RH97", 10**3, np.int64),
+    Field("RH98", 10**3, np.int64),
+    Field("RH99", 10**3, np.int64),
+    Field("RH100", 10**3, np.int64),
+    Field("AZIMUTH", 10**3, np.int64),
+    Field("INCIDENT_ANGLE", 10**3, np.int64),
+    Field("RANGE", 10**3, np.int64),
+    Field("COMPLEXITY", 10**3, np.int64),
+    Field("CHANNEL_ZT", None, np.uint8),
+    Field("CHANNEL_ZG", None, np.uint8),
+    Field("CHANNEL_RH", None, np.uint8),
+]
+
+"""Names of fields that contain longitude values. The values in these
+fields will be shifted to the range [-180,180)."""
+ILVIS2_LONGITUDE_FIELD_NAMES = ["CLON", "GLON", "HLON", "TLON"]
+
+
+def _file_date(filename: str) -> dt.date:
+    """Return the datetime from the ILVIS2 filename."""
+    return dt.datetime.strptime(filename[9:18], "%Y_%m%d").date()
+
+
+def _shift_lon(lon):
+    """Shift longitude values from [0,360] to [-180,180]"""
+    if lon >= 180.0:
+        return lon - 360.0
+    return lon
+
+
+def _add_utc_datetime(df: pd.DataFrame, file_date) -> pd.DataFrame:
+    """Add a `utc_datetime` column to the DataFrame, with values
+    calculated from the given date and the `TIME` values in the
+    dataset (seconds of the day).
+    """
+    df["utc_datetime"] = pd.to_datetime(file_date)
+    df["utc_datetime"] = df["utc_datetime"] + pd.to_timedelta(df["TIME"], unit="s")
+
+    return df
+
+
+def _scale_and_convert(df: pd.DataFrame, fields):
+    """For any column in the list of Field named tuples, optionally scale
+    the corresponding column in the DataFrame and convert the column
+    type.
+    """
+    for name, scale_factor, dtype in fields:
+        if scale_factor is not None:
+            df.loc[:, name] *= scale_factor
+        if dtype != df.dtypes[name]:
+            df[name] = df[name].astype(dtype)
+
+    return df
+
+
+def _ilvis2_data(filepath: Path, file_date: dt.date, fields) -> pd.DataFrame:
+    """Return an ILVIS2 file DataFrame, performing all necessary
+    conversions / augmentation on the data.
+    """
+    field_names = [name for name, _, _ in fields]
+    df = pd.read_csv(filepath, sep=r"\s+", comment="#", names=field_names)
+
+    for col in ILVIS2_LONGITUDE_FIELD_NAMES:
+        if col in df.columns:
+            df[col] = df[col].apply(_shift_lon)
+
+    df = _add_utc_datetime(df, file_date)
+
+    df = _scale_and_convert(df, fields)
+
+    return df
+
+
+@pa.check_types()
+def ilvis2_data(filepath: Path) -> ILVIS2DataFrame:
+    """Return the ilvis2 data given a filepath.
+
+    Parameters
+    ----------
+    fn
+        The filename (str) to read. This can be a file in the LVIS2
+        v1.0.4 or v2.0.2b format.
+        https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS104.html
+        https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS202.html
+
+    Returns
+    -------
+    data
+        The ilvis2 (pandas.DataFrame) data.
+
+    """
+    filename = filepath.name
+    match = re.search(r"_\D{2}(\d{4})_", filename)
+    if not match:
+        err = f"Failed to recognize {filename} as ILVIS2 data."
+        raise RuntimeError(err)
+
+    year = int(match.group(1))
+
+    if year < 2017:
+        the_fields = ILVIS2_V104_FIELDS
+    else:
+        the_fields = ILVIS2_V202b_FIELDS
+
+    file_date = _file_date(filename)
+
+    data = _ilvis2_data(filepath, file_date, the_fields)
+    data["ITRF"] = ILVIS2_ITRF
+
+    # TODO: this data does not have a single set of latitude, longitude, and
+    # elevation fields. Instead, it has e.g., "CLON" and "GLON" and "HLON". In
+    # the original `valkyrie` service code, it looks like "GLON", "GLAT", and
+    # "ZG" cols were used as for the points stored in the valkyrie database and
+    # transformed by the ITRF transformation service. Ideally, we support
+    # consistent transformation of the ITRF across all lat/lon/elev
+    # fields. E.g., a user may be more interested in looking at the "CLON",
+    # "CLAT", and "ZC" fields instead.
+    # For now, we will replicate the behavior of `valkyrie`:
+    data["latitude"] = data["GLAT"]
+    data["longitude"] = data["GLON"]
+    data["elevation"] = data["ZG"]
+
+    data = data.set_index("utc_datetime")
+
+    return ILVIS2DataFrame(data)
diff --git a/src/nsidc/iceflow/data/models.py b/src/nsidc/iceflow/data/models.py
@@ -37,11 +37,75 @@ class ATM1BSchema(CommonDataColumnsSchema):
     pulse_width: Series[float] = pa.Field(nullable=True, coerce=True)
 
 
+# Note/TODO: the ILVIS2 data contain multiple sets of lat/lon/elev. The common
+# schema assumes one set of lat/lon/elev which is used for the ITRF
+# transformation code.
+class ILVIS2Schema(CommonDataColumnsSchema):
+    # Common columns
+    LFID: Series[float] = pa.Field(nullable=True, coerce=True)
+    SHOTNUMBER: Series[float] = pa.Field(nullable=True, coerce=True)
+    TIME: Series[float] = pa.Field(nullable=True, coerce=True)
+    ZG: Series[float] = pa.Field(nullable=True, coerce=True)
+    GLAT: Series[float] = pa.Field(nullable=True, coerce=True)
+    GLON: Series[float] = pa.Field(nullable=True, coerce=True)
+    HLAT: Series[float] = pa.Field(nullable=True, coerce=True)
+    HLON: Series[float] = pa.Field(nullable=True, coerce=True)
+    ZH: Series[float] = pa.Field(nullable=True, coerce=True)
+
+    # V104-specific
+    CLAT: Series[float] = pa.Field(nullable=True, coerce=True)
+    CLON: Series[float] = pa.Field(nullable=True, coerce=True)
+    ZC: Series[float] = pa.Field(nullable=True, coerce=True)
+
+    # V202B-specific
+    AZIMUTH: Series[float] = pa.Field(nullable=True, coerce=True)
+    CHANNEL_RH: Series[float] = pa.Field(nullable=True, coerce=True)
+    CHANNEL_ZG: Series[float] = pa.Field(nullable=True, coerce=True)
+    CHANNEL_ZT: Series[float] = pa.Field(nullable=True, coerce=True)
+    COMPLEXITY: Series[float] = pa.Field(nullable=True, coerce=True)
+    INCIDENT_ANGLE: Series[float] = pa.Field(nullable=True, coerce=True)
+    RANGE: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH10: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH15: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH20: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH25: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH30: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH35: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH40: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH45: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH50: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH55: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH60: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH65: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH70: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH75: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH80: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH85: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH90: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH95: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH96: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH97: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH98: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH99: Series[float] = pa.Field(nullable=True, coerce=True)
+    RH100: Series[float] = pa.Field(nullable=True, coerce=True)
+    TLAT: Series[float] = pa.Field(nullable=True, coerce=True)
+    TLON: Series[float] = pa.Field(nullable=True, coerce=True)
+    ZT: Series[float] = pa.Field(nullable=True, coerce=True)
+
+    class Config:
+        # This ensures all columns are present, regardless of the date. Granules
+        # before 2017 use the V104 fields and anything after uses the v202b
+        # fields. The data type for all values must be `float` because the null
+        # value is `np.nan` - a float.
+        add_missing_columns = True
+
+
 IceflowDataFrame = DataFrame[CommonDataColumnsSchema]
 ATM1BDataFrame = DataFrame[ATM1BSchema]
+ILVIS2DataFrame = DataFrame[ILVIS2Schema]
 
 ATM1BShortName = Literal["ILATM1B", "BLATM1B"]
-DatasetShortName = ATM1BShortName
+DatasetShortName = ATM1BShortName | Literal["ILVIS2"]
 
 
 class Dataset(pydantic.BaseModel):
@@ -64,6 +128,11 @@ class BLATM1BDataset(ATM1BDataset):
     version: Literal["1"] = "1"
 
 
+class ILVIS2Dataset(Dataset):
+    short_name: DatasetShortName = "ILVIS2"
+    version: Literal["1"] = "1"
+
+
 class BoundingBox(pydantic.BaseModel):
     lower_left_lon: float
     lower_left_lat: float

diff --git a/src/nsidc/iceflow/data/read.py b/src/nsidc/iceflow/data/read.py
@@ -4,20 +4,30 @@
 from pathlib import Path
 
 from nsidc.iceflow.data.atm1b import atm1b_data
+from nsidc.iceflow.data.ilvis2 import ilvis2_data
 from nsidc.iceflow.data.models import (
     ATM1BDataFrame,
     ATM1BDataset,
     Dataset,
     IceflowDataFrame,
+    ILVIS2DataFrame,
+    ILVIS2Dataset,
 )
 
 
 @functools.singledispatch
-def read_data(dataset: Dataset, _filepath: Path) -> IceflowDataFrame | ATM1BDataFrame:
+def read_data(
+    dataset: Dataset, _filepath: Path
+) -> IceflowDataFrame | ATM1BDataFrame | ILVIS2DataFrame:
     msg = f"{dataset=} not recognized."
     raise RuntimeError(msg)
 
 
 @read_data.register
 def _(_dataset: ATM1BDataset, filepath: Path) -> ATM1BDataFrame:
     return atm1b_data(filepath)
+
+
+@read_data.register
+def _(_dataset: ILVIS2Dataset, filepath: Path) -> ILVIS2DataFrame:
+    return ilvis2_data(filepath)
diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py
@@ -21,6 +21,7 @@
     DatasetSearchParameters,
     IceflowDataFrame,
     ILATM1BDataset,
+    ILVIS2Dataset,
 )
 
 
@@ -80,3 +81,23 @@ def test_atm1b_blatm1b(tmp_path):
     )
 
     assert (results_blamt1b_v2_2014.ITRF == "ITRF2000").all()
+
+
+def test_ivlis2(tmp_path):
+    common_bounding_box = BoundingBox(
+        lower_left_lon=-120.0,
+        lower_left_lat=-80.0,
+        upper_right_lon=-90.0,
+        upper_right_lat=-65.0,
+    )
+
+    results = fetch_iceflow_df(
+        dataset_search_params=DatasetSearchParameters(
+            dataset=ILVIS2Dataset(),
+            bounding_box=common_bounding_box,
+            temporal=(dt.datetime(2009, 10, 25, 15), dt.datetime(2009, 10, 25, 17)),
+        ),
+        output_dir=tmp_path,
+    )
+
+    assert (results.ITRF == "ITRF2000").all()