Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support ILVIS2 #40

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- `transform_itrf` will calculate plate for source ITRF if not given with
`target_epoch`.
- Add support for ILATM1B v2 and BLATM1B v1.
- Add support for ILVIS2.

# v0.2.0

Expand Down
217 changes: 217 additions & 0 deletions src/nsidc/iceflow/data/ilvis2.py
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of this code is copied directly from valkyrie, with some minor modifications.

valkyrie did not have any tests defined to ensure the logic here is sound - it would be nice to add some! Right now, the end-to-end test asserts that data can be found, downloaded, and read with this code.

Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
from __future__ import annotations

import datetime as dt
import re
from collections import namedtuple
from pathlib import Path

import numpy as np
import pandas as pd
import pandera as pa

from nsidc.iceflow.data.models import ILVIS2DataFrame

# The user guide indicates ILVIS2 data uses ITRF2000 as a reference frame:
# https://nsidc.org/sites/default/files/documents/user-guide/ilvis2-v001-userguide.pdf
ILVIS2_ITRF = "ITRF2000"

Field = namedtuple("Field", ["name", "type", "scale_factor"])

"""
See: https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS104.html

Note: The LVIS site (above) and NSIDC data files use different names
for the fields. The list of field tuples below matches the
documentation on the LVIS site. This is done to simplify the code and
ease the mental mapping of the v1.0.4 and v2.0.2b fields to the
database. The mapping between the names used below (same as LVIS
docs) and the field names used in the NSIDC files is:


CLON = LONGITUDE_CENTROID
CLAT = LATITUDE_CENTROID
ZC = ELEVATION_CENTROID
GLON = LONGITUDE_LOW
GLAT = LATITUDE_LOW
ZG = ELEVATION_LOW
HLON = LONGITUDE_HIGH
HLAT = LATITUDE_HIGH
ZH = ELEVATION_HIGH

"""
ILVIS2_V104_FIELDS = [
Field("LFID", None, np.uint64),
Field("SHOTNUMBER", None, np.uint64),
Field("TIME", 10**6, np.uint64),
Field("CLON", 10**6, np.int64),
Field("CLAT", 10**6, np.int64),
Field("ZC", 10**6, np.int64),
Field("GLON", None, np.float64),
Field("GLAT", None, np.float64),
Field("ZG", None, np.float64),
Field("HLON", 10**6, np.int64),
Field("HLAT", 10**6, np.int64),
Field("ZH", 10**6, np.int64),
]

"""
See: https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS202.html
Note: Version 2.0.2b was used for Greenland 2017
"""
ILVIS2_V202b_FIELDS = [
Field("LFID", None, np.uint64),
Field("SHOTNUMBER", None, np.uint64),
Field("TIME", 10**6, np.uint64),
Field("GLON", None, np.float64),
Field("GLAT", None, np.float64),
Field("ZG", None, np.float64),
Field("HLON", 10**6, np.int64),
Field("HLAT", 10**6, np.int64),
Field("ZH", 10**6, np.int64),
Field("TLON", 10**6, np.int64),
Field("TLAT", 10**6, np.int64),
Field("ZT", 10**6, np.int64),
Field("RH10", 10**3, np.int64),
Field("RH15", 10**3, np.int64),
Field("RH20", 10**3, np.int64),
Field("RH25", 10**3, np.int64),
Field("RH30", 10**3, np.int64),
Field("RH35", 10**3, np.int64),
Field("RH40", 10**3, np.int64),
Field("RH45", 10**3, np.int64),
Field("RH50", 10**3, np.int64),
Field("RH55", 10**3, np.int64),
Field("RH60", 10**3, np.int64),
Field("RH65", 10**3, np.int64),
Field("RH70", 10**3, np.int64),
Field("RH75", 10**3, np.int64),
Field("RH80", 10**3, np.int64),
Field("RH85", 10**3, np.int64),
Field("RH90", 10**3, np.int64),
Field("RH95", 10**3, np.int64),
Field("RH96", 10**3, np.int64),
Field("RH97", 10**3, np.int64),
Field("RH98", 10**3, np.int64),
Field("RH99", 10**3, np.int64),
Field("RH100", 10**3, np.int64),
Field("AZIMUTH", 10**3, np.int64),
Field("INCIDENT_ANGLE", 10**3, np.int64),
Field("RANGE", 10**3, np.int64),
Field("COMPLEXITY", 10**3, np.int64),
Field("CHANNEL_ZT", None, np.uint8),
Field("CHANNEL_ZG", None, np.uint8),
Field("CHANNEL_RH", None, np.uint8),
]

"""Names of fields that contain longitude values. The values in these
fields will be shifted to the range [-180,180)."""
ILVIS2_LONGITUDE_FIELD_NAMES = ["CLON", "GLON", "HLON", "TLON"]


def _file_date(filename: str) -> dt.date:
"""Return the datetime from the ILVIS2 filename."""
return dt.datetime.strptime(filename[9:18], "%Y_%m%d").date()


def _shift_lon(lon):
"""Shift longitude values from [0,360] to [-180,180]"""
if lon >= 180.0:
return lon - 360.0
return lon


def _add_utc_datetime(df: pd.DataFrame, file_date) -> pd.DataFrame:
"""Add a `utc_datetime` column to the DataFrame, with values
calculated from the given date and the `TIME` values in the
dataset (seconds of the day).
"""
df["utc_datetime"] = pd.to_datetime(file_date)
df["utc_datetime"] = df["utc_datetime"] + pd.to_timedelta(df["TIME"], unit="s")

return df


def _scale_and_convert(df: pd.DataFrame, fields):
"""For any column in the list of Field named tuples, optionally scale
the corresponding column in the DataFrame and convert the column
type.
"""
for name, scale_factor, dtype in fields:
if scale_factor is not None:
df.loc[:, name] *= scale_factor
if dtype != df.dtypes[name]:
df[name] = df[name].astype(dtype)

return df


def _ilvis2_data(filepath: Path, file_date: dt.date, fields) -> pd.DataFrame:
"""Return an ILVIS2 file DataFrame, performing all necessary
conversions / augmentation on the data.
"""
field_names = [name for name, _, _ in fields]
df = pd.read_csv(filepath, sep=r"\s+", comment="#", names=field_names)

for col in ILVIS2_LONGITUDE_FIELD_NAMES:
if col in df.columns:
df[col] = df[col].apply(_shift_lon)

df = _add_utc_datetime(df, file_date)

df = _scale_and_convert(df, fields)

return df


@pa.check_types()
def ilvis2_data(filepath: Path) -> ILVIS2DataFrame:
"""Return the ilvis2 data given a filepath.

Parameters
----------
fn
The filename (str) to read. This can be a file in the LVIS2
v1.0.4 or v2.0.2b format.
https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS104.html
https://lvis.gsfc.nasa.gov/Data/Data_Structure/DataStructure_LDS202.html

Returns
-------
data
The ilvis2 (pandas.DataFrame) data.

"""
filename = filepath.name
match = re.search(r"_\D{2}(\d{4})_", filename)
if not match:
err = f"Failed to recognize {filename} as ILVIS2 data."
raise RuntimeError(err)

year = int(match.group(1))

if year < 2017:
the_fields = ILVIS2_V104_FIELDS
else:
the_fields = ILVIS2_V202b_FIELDS

file_date = _file_date(filename)

data = _ilvis2_data(filepath, file_date, the_fields)
data["ITRF"] = ILVIS2_ITRF

# TODO: this data does not have a single set of latitude, longitude, and
# elevation fields. Instead, it has e.g., "CLON" and "GLON" and "HLON". In
# the original `valkyrie` service code, it looks like "GLON", "GLAT", and
# "ZG" cols were used as for the points stored in the valkyrie database and
# transformed by the ITRF transformation service. Ideally, we support
# consistent transformation of the ITRF across all lat/lon/elev
# fields. E.g., a user may be more interested in looking at the "CLON",
# "CLAT", and "ZC" fields instead.
# For now, we will replicate the behavior of `valkyrie`:
data["latitude"] = data["GLAT"]
data["longitude"] = data["GLON"]
data["elevation"] = data["ZG"]

data = data.set_index("utc_datetime")

return ILVIS2DataFrame(data)
71 changes: 70 additions & 1 deletion src/nsidc/iceflow/data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,75 @@ class ATM1BSchema(CommonDataColumnsSchema):
pulse_width: Series[float] = pa.Field(nullable=True, coerce=True)


# Note/TODO: the ILVIS2 data contain multiple sets of lat/lon/elev. The common
# schema assumes one set of lat/lon/elev which is used for the ITRF
# transformation code.
class ILVIS2Schema(CommonDataColumnsSchema):
# Common columns
LFID: Series[float] = pa.Field(nullable=True, coerce=True)
SHOTNUMBER: Series[float] = pa.Field(nullable=True, coerce=True)
TIME: Series[float] = pa.Field(nullable=True, coerce=True)
ZG: Series[float] = pa.Field(nullable=True, coerce=True)
GLAT: Series[float] = pa.Field(nullable=True, coerce=True)
GLON: Series[float] = pa.Field(nullable=True, coerce=True)
HLAT: Series[float] = pa.Field(nullable=True, coerce=True)
HLON: Series[float] = pa.Field(nullable=True, coerce=True)
ZH: Series[float] = pa.Field(nullable=True, coerce=True)

# V104-specific
CLAT: Series[float] = pa.Field(nullable=True, coerce=True)
CLON: Series[float] = pa.Field(nullable=True, coerce=True)
ZC: Series[float] = pa.Field(nullable=True, coerce=True)

# V202B-specific
AZIMUTH: Series[float] = pa.Field(nullable=True, coerce=True)
CHANNEL_RH: Series[float] = pa.Field(nullable=True, coerce=True)
CHANNEL_ZG: Series[float] = pa.Field(nullable=True, coerce=True)
CHANNEL_ZT: Series[float] = pa.Field(nullable=True, coerce=True)
COMPLEXITY: Series[float] = pa.Field(nullable=True, coerce=True)
INCIDENT_ANGLE: Series[float] = pa.Field(nullable=True, coerce=True)
RANGE: Series[float] = pa.Field(nullable=True, coerce=True)
RH10: Series[float] = pa.Field(nullable=True, coerce=True)
RH15: Series[float] = pa.Field(nullable=True, coerce=True)
RH20: Series[float] = pa.Field(nullable=True, coerce=True)
RH25: Series[float] = pa.Field(nullable=True, coerce=True)
RH30: Series[float] = pa.Field(nullable=True, coerce=True)
RH35: Series[float] = pa.Field(nullable=True, coerce=True)
RH40: Series[float] = pa.Field(nullable=True, coerce=True)
RH45: Series[float] = pa.Field(nullable=True, coerce=True)
RH50: Series[float] = pa.Field(nullable=True, coerce=True)
RH55: Series[float] = pa.Field(nullable=True, coerce=True)
RH60: Series[float] = pa.Field(nullable=True, coerce=True)
RH65: Series[float] = pa.Field(nullable=True, coerce=True)
RH70: Series[float] = pa.Field(nullable=True, coerce=True)
RH75: Series[float] = pa.Field(nullable=True, coerce=True)
RH80: Series[float] = pa.Field(nullable=True, coerce=True)
RH85: Series[float] = pa.Field(nullable=True, coerce=True)
RH90: Series[float] = pa.Field(nullable=True, coerce=True)
RH95: Series[float] = pa.Field(nullable=True, coerce=True)
RH96: Series[float] = pa.Field(nullable=True, coerce=True)
RH97: Series[float] = pa.Field(nullable=True, coerce=True)
RH98: Series[float] = pa.Field(nullable=True, coerce=True)
RH99: Series[float] = pa.Field(nullable=True, coerce=True)
RH100: Series[float] = pa.Field(nullable=True, coerce=True)
TLAT: Series[float] = pa.Field(nullable=True, coerce=True)
TLON: Series[float] = pa.Field(nullable=True, coerce=True)
ZT: Series[float] = pa.Field(nullable=True, coerce=True)

class Config:
# This ensures all columns are present, regardless of the date. Granules
# before 2017 use the V104 fields and anything after uses the v202b
# fields. The data type for all values must be `float` because the null
# value is `np.nan` - a float.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this class should be broken in two: one for pre-2017 and one for post-2017. The resulting df would just have the columns that are present in the source data.

The downside to this is that users would get dataframes with inconsistent columns for ILVIS2 data. At some point we want to look at harmonizing across all supported datasets, so that a user can easily compare elevations. Different datasets already have different sets of columns, so this is a bridge we'll have to cross soon

add_missing_columns = True


IceflowDataFrame = DataFrame[CommonDataColumnsSchema]
ATM1BDataFrame = DataFrame[ATM1BSchema]
ILVIS2DataFrame = DataFrame[ILVIS2Schema]

ATM1BShortName = Literal["ILATM1B", "BLATM1B"]
DatasetShortName = ATM1BShortName
DatasetShortName = ATM1BShortName | Literal["ILVIS2"]


class Dataset(pydantic.BaseModel):
Expand All @@ -64,6 +128,11 @@ class BLATM1BDataset(ATM1BDataset):
version: Literal["1"] = "1"


class ILVIS2Dataset(Dataset):
short_name: DatasetShortName = "ILVIS2"
version: Literal["1"] = "1"


class BoundingBox(pydantic.BaseModel):
lower_left_lon: float
lower_left_lat: float
Expand Down
12 changes: 11 additions & 1 deletion src/nsidc/iceflow/data/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,30 @@
from pathlib import Path

from nsidc.iceflow.data.atm1b import atm1b_data
from nsidc.iceflow.data.ilvis2 import ilvis2_data
from nsidc.iceflow.data.models import (
ATM1BDataFrame,
ATM1BDataset,
Dataset,
IceflowDataFrame,
ILVIS2DataFrame,
ILVIS2Dataset,
)


@functools.singledispatch
def read_data(dataset: Dataset, _filepath: Path) -> IceflowDataFrame | ATM1BDataFrame:
def read_data(
dataset: Dataset, _filepath: Path
) -> IceflowDataFrame | ATM1BDataFrame | ILVIS2DataFrame:
msg = f"{dataset=} not recognized."
raise RuntimeError(msg)


@read_data.register
def _(_dataset: ATM1BDataset, filepath: Path) -> ATM1BDataFrame:
return atm1b_data(filepath)


@read_data.register
def _(_dataset: ILVIS2Dataset, filepath: Path) -> ILVIS2DataFrame:
return ilvis2_data(filepath)
21 changes: 21 additions & 0 deletions tests/integration/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
DatasetSearchParameters,
IceflowDataFrame,
ILATM1BDataset,
ILVIS2Dataset,
)


Expand Down Expand Up @@ -80,3 +81,23 @@ def test_atm1b_blatm1b(tmp_path):
)

assert (results_blamt1b_v2_2014.ITRF == "ITRF2000").all()


def test_ivlis2(tmp_path):
common_bounding_box = BoundingBox(
lower_left_lon=-120.0,
lower_left_lat=-80.0,
upper_right_lon=-90.0,
upper_right_lat=-65.0,
)

results = fetch_iceflow_df(
dataset_search_params=DatasetSearchParameters(
dataset=ILVIS2Dataset(),
bounding_box=common_bounding_box,
temporal=(dt.datetime(2009, 10, 25, 15), dt.datetime(2009, 10, 25, 17)),
trey-stafford marked this conversation as resolved.
Show resolved Hide resolved
),
output_dir=tmp_path,
)

assert (results.ITRF == "ITRF2000").all()
Loading