Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Sandro Campos <[email protected]>
  • Loading branch information
delucchi-cmu and camposandro authored Nov 20, 2023
1 parent 4fdd21e commit 22d61c9
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/hipscat/catalog/partition_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def write_to_metadata_files(self, catalog_path: FilePointer, storage_options: di
"""Generate parquet metadata, using the known partitions.
Args:
catalog_path (str): base path for the catalog
catalog_path (FilePointer): base path for the catalog
storage_options (dict): dictionary that contains abstract filesystem credentials
"""
batches = [
Expand Down Expand Up @@ -103,7 +103,7 @@ def read_from_csv(cls, partition_info_file: FilePointer, storage_options: dict =
"""Read partition info from a `partition_info.csv` file to create an object
Args:
partition_info_file: FilePointer to the `partition_info.csv` file
partition_info_file (FilePointer): FilePointer to the `partition_info.csv` file
storage_options (dict): dictionary that contains abstract filesystem credentials
Returns:
Expand Down
8 changes: 5 additions & 3 deletions src/hipscat/io/parquet_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
from hipscat.io.file_io.file_pointer import get_fs, strip_leading_slash_for_pyarrow


def row_group_stat_single_value(row_group, stat_key):
def row_group_stat_single_value(row_group, stat_key: str):
"""Convenience method to find the min and max inside a statistics dictionary,
and raise an error if they're unequal.
Args:
row_group: dataset fragment row group
stat_key (str): column name of interest.
Returns:
The value of the specified row group statistic
"""
if stat_key not in row_group.statistics:
raise ValueError(f"row group doesn't have expected key {stat_key}")
Expand All @@ -28,7 +30,7 @@ def row_group_stat_single_value(row_group, stat_key):
return min_val


def write_parquet_metadata(catalog_path, storage_options: dict = None, output_path: str = None):
def write_parquet_metadata(catalog_path: str, storage_options: dict = None, output_path: str = None):
"""Generate parquet metadata, using the already-partitioned parquet files
for this catalog.
Expand Down Expand Up @@ -104,7 +106,7 @@ def write_parquet_metadata_for_batches(
write_parquet_metadata(temp_pq_file, storage_options=storage_options, output_path=output_path)


def read_row_group_fragments(metadata_file, storage_options: dict = None):
def read_row_group_fragments(metadata_file: str, storage_options: dict = None):
"""Generator for metadata fragment row groups in a parquet metadata file.
Args:
Expand Down
6 changes: 3 additions & 3 deletions src/hipscat/io/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def is_valid_catalog(pointer: FilePointer) -> bool:
return is_catalog_info_valid(pointer) and (is_partition_info_valid(pointer) or is_metadata_valid(pointer))


def is_catalog_info_valid(pointer: FilePointer):
def is_catalog_info_valid(pointer: FilePointer) -> bool:
"""Checks if catalog_info is valid for a given base catalog pointer
Args:
Expand All @@ -34,7 +34,7 @@ def is_catalog_info_valid(pointer: FilePointer):
return is_valid


def is_partition_info_valid(pointer: FilePointer):
def is_partition_info_valid(pointer: FilePointer) -> bool:
"""Checks if partition_info is valid for a given base catalog pointer
Args:
Expand All @@ -48,7 +48,7 @@ def is_partition_info_valid(pointer: FilePointer):
return partition_info_exists


def is_metadata_valid(pointer: FilePointer):
def is_metadata_valid(pointer: FilePointer) -> bool:
"""Checks if _metadata is valid for a given base catalog pointer
Args:
Expand Down

0 comments on commit 22d61c9

Please sign in to comment.