Skip to content

Commit

Permalink
docstring cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
danielfromearth committed Dec 11, 2024
1 parent 40049e0 commit f388a00
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 57 deletions.
4 changes: 2 additions & 2 deletions ncompare/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def _cli(args: Optional[Sequence[str]]) -> argparse.Namespace:
Parameters
----------
args : None or list[str]
if None, then argparse will use sys.argv[1:]
args
if None, then argparse will use `sys.argv[1:]`
"""
parser = argparse.ArgumentParser(
description="Compare the variables contained within two different NetCDF datasets"
Expand Down
127 changes: 85 additions & 42 deletions ncompare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,29 +63,29 @@ def compare(
file_xlsx: Union[str, Path] = "",
column_widths: Optional[tuple[Union[int, str], Union[int, str], Union[int, str]]] = None,
) -> None:
"""Compare the variables contained within two different NetCDF datasets.
"""Compare the variables contained within two different netCDF datasets.
Parameters
----------
nc_a : str
filepath to NetCDF4
nc_b : str
filepath to NetCDF4
only_diffs : bool, optional
nc_a
filepath to the first netCDF
nc_b
filepath to the second netCDF
only_diffs
Whether to show only the variables/attributes that are different between the two files
no_color : bool, default False
no_color
Turns off the use of ANSI escape character sequences for producing colored terminal text
show_chunks : bool, default False
show_chunks
Whether to include data chunk sizes in the displayed comparison of variables
show_attributes : bool, default False
show_attributes
Whether to include variable attributes in the displayed comparison of variables
file_text : str
file_text
filepath destination to save captured text output as a TXT file.
file_csv : str
file_csv
filepath destination to save comparison output as comma-separated values (CSV).
file_xlsx : str
file_xlsx
filepath destination to save comparison output as an Excel workbook.
column_widths : tuple[int | str, int | str, int | str], optional
column_widths
the width in number of characters for each column of the comparison table.
Returns
Expand Down Expand Up @@ -138,15 +138,20 @@ def run_through_comparisons(
show_chunks: bool,
show_attributes: bool,
) -> None:
"""Execute a series of comparisons between two NetCDF files.
"""Execute a series of comparisons between two netCDF files.
Parameters
----------
out
instance of Outputter
nc_a
path to the first netCDF file
nc_b
path to the second netCDF file
show_chunks
whether to include data chunk sizes in the displayed comparison of variables
show_attributes
whether to include variable attributes in the displayed comparison of variables
"""
# Show the dimensions of each file and evaluate differences.
out.print(Fore.LIGHTBLUE_EX + "\nRoot-level Dimensions:", add_to_history=True)
Expand Down Expand Up @@ -176,10 +181,14 @@ def walk_common_groups_tree(
Parameters
----------
top_a_name : str
top_a : netCDF4.Dataset or netCDF4.Group
top_b_name : str
top_b : netCDF4.Dataset or netCDF4.Group
top_a_name
name of the first group or dataset
top_a
the first group or dataset
top_b_name
name of the second group or dataset
top_b
the second group or dataset
Yields
------
Expand Down Expand Up @@ -231,7 +240,32 @@ def compare_two_nc_files(
show_chunks: bool = False,
show_attributes: bool = False,
) -> tuple[int, int, int]:
"""Go through all groups and all variables, and show them side by side - whether they align and where they don't."""
"""Go through all groups and all variables, and show them side by side,
highlighting whether they align and where they don't.
Parameters
----------
out
instance of Outputter
nc_one
path to the first dataset
nc_two
path to the second dataset
show_chunks
whether to include chunks along side variables
show_attributes
whether to include variable attributes
Returns
-------
tuple
int
number of entries only present in the first (left) dataset
int
number of entries only present in the second (right) dataset
int
number of entries present in both the first (left) and second (right) datasets
"""
out.side_by_side(" ", "File A", "File B", force_display_even_if_same=True)

num_var_diffs = {"left": 0, "right": 0, "both": 0}
Expand Down Expand Up @@ -296,6 +330,7 @@ def _print_group_details_side_by_side(
show_attributes: bool,
show_chunks: bool,
) -> None:
"""Align and display group details side by side."""
out.side_by_side(
" ",
" ",
Expand Down Expand Up @@ -353,24 +388,26 @@ def _print_var_properties_side_by_side(
v_b: VarProperties,
show_chunks: bool = False,
show_attributes: bool = False,
):
# Gather all variable property pairs first, before printing, so we can decide whether to highlight the variable header
) -> None:
"""Align and display variable properties side by side."""
# Gather all variable property pairs first, before printing,
# so we can decide whether to highlight the variable header.
pairs_to_check_and_show = [
(v_a.dtype, v_b.dtype),
(v_a.shape, v_b.shape),
]
if show_chunks:
pairs_to_check_and_show.append((v_a.chunking, v_b.chunking))
if show_attributes:
for attr_a_key, attr_a, attr_b_key, attr_b in get_and_check_variable_attributes(v_a, v_b):
# Check whether attr_a_key is empty, because it might be if the variable doesn't exist in File A.
for attr_a_key, attr_a, attr_b_key, attr_b in _get_and_check_variable_attributes(v_a, v_b):
# Check whether attr_a_key is empty,
# because it might be if the variable doesn't exist in File A.
pairs_to_check_and_show.append((attr_a, attr_b))
# Scale Factor
scale_factor_pair = get_and_check_variable_scale_factor(v_a, v_b)
scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b)
if scale_factor_pair:
pairs_to_check_and_show.append((scale_factor_pair[0], scale_factor_pair[1]))

# print(f"pairs_to_check_and_show === {pairs_to_check_and_show}")
there_is_a_difference = False
for pair in pairs_to_check_and_show:
if pair[0] != pair[1]:
Expand All @@ -382,7 +419,7 @@ def _print_var_properties_side_by_side(
# if there_is_a_difference:
# header_color = Fore.RED

# If all attributes are the same, and keep-only-diffs is set -> DONT print
# If all attributes are the same, and keep-only-diffs is set -> DON'T print
# If all attributes are the same, and keep-only-diffs is NOT set -> print
# If some attributes are different -> print no matter else
if there_is_a_difference or (not out.keep_only_diffs):
Expand All @@ -403,8 +440,9 @@ def _print_var_properties_side_by_side(
out.side_by_side("chunksize:", v_a.chunking, v_b.chunking, highlight_diff=True)
# Attributes
if show_attributes:
for attr_a_key, attr_a, attr_b_key, attr_b in get_and_check_variable_attributes(v_a, v_b):
# Check whether attr_a_key is empty, because it might be if the variable doesn't exist in File A.
for attr_a_key, attr_a, attr_b_key, attr_b in _get_and_check_variable_attributes(v_a, v_b):
# Check whether attr_a_key is empty,
# because it might be if the variable doesn't exist in File A.
out.side_by_side(
f"{attr_a_key if attr_a_key else attr_b_key}:",
attr_a,
Expand All @@ -413,12 +451,15 @@ def _print_var_properties_side_by_side(
)

# Scale Factor
scale_factor_pair = get_and_check_variable_scale_factor(v_a, v_b)
scale_factor_pair = _get_and_check_variable_scale_factor(v_a, v_b)
if scale_factor_pair:
out.side_by_side("sf:", scale_factor_pair[0], scale_factor_pair[1], highlight_diff=True)


def get_and_check_variable_scale_factor(v_a, v_b) -> Union[None, tuple[str, str]]:
def _get_and_check_variable_scale_factor(
v_a: VarProperties, v_b: VarProperties
) -> Union[None, tuple[str, str]]:
"""Get a string representation of the scale factor for two variables."""
if getattr(v_a.variable, "scale_factor", None):
sf_a = v_a.variable.scale_factor
else:
Expand All @@ -433,7 +474,10 @@ def get_and_check_variable_scale_factor(v_a, v_b) -> Union[None, tuple[str, str]
return None


def get_and_check_variable_attributes(v_a, v_b):
def _get_and_check_variable_attributes(
v_a: VarProperties, v_b: VarProperties
) -> Iterator[tuple[str, str, str, str]]:
"""Go through and yield each attribute pair for two variables."""
# Get the name of attributes if they exist
attrs_a_names = []
if v_a.attributes:
Expand All @@ -453,20 +497,14 @@ def _var_properties(group: Union[netCDF4.Dataset, netCDF4.Group], varname: str)
Parameters
----------
group : `netCDF4.Dataset` or netCDF4.Group object
varname : str
group
a dataset or group of variables
varname
the name of the variable
Returns
-------
netCDF4.Variable
str
dtype of variable values
tuple
shape of variable
tuple
chunking
dict
any other attributes for this variable
VarProperties
"""
if varname:
the_variable = group.variables[varname]
Expand All @@ -493,6 +531,7 @@ def _var_properties(group: Union[netCDF4.Dataset, netCDF4.Group], varname: str)


def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) -> str:
"""Get a string representation of the attribute value."""
if attribute_key and (attribute_key in varprops.attributes):
attr = varprops.attributes[attribute_key]
if isinstance(attr, Iterable) and not isinstance(attr, (str, float)):
Expand All @@ -508,6 +547,7 @@ def _get_attribute_value_as_str(varprops: VarProperties, attribute_key: str) ->


def _get_vars(nc_filepath: Union[str, Path], groupname: str) -> list:
"""Get a list of variables from a netCDF group."""
try:
grp = xr.open_dataset(nc_filepath, backend_kwargs={"group": groupname})
except OSError as err:
Expand All @@ -519,12 +559,15 @@ def _get_vars(nc_filepath: Union[str, Path], groupname: str) -> list:


def _get_groups(nc_filepath: Union[str, Path]) -> list:
"""Get a list of groups from a netCDF."""
with netCDF4.Dataset(nc_filepath) as dataset:
groups_list = list(dataset.groups.keys())
return groups_list


def _get_dims(nc_filepath: Union[str, Path]) -> list:
"""Get a list of dimensions from a netCDF."""

def __get_dim_list(decode_times=True):
with xr.open_dataset(nc_filepath, decode_times=decode_times) as dataset:
return list(dataset.sizes.items())
Expand Down
Loading

0 comments on commit f388a00

Please sign in to comment.