From 47cb40362efd2e360badc468e7d7e29bad5fa33e Mon Sep 17 00:00:00 2001 From: delucchi-cmu Date: Thu, 3 Aug 2023 08:15:31 -0400 Subject: [PATCH 1/2] Address a handful of mypy warnings. --- .../association_catalog/association_catalog.py | 2 +- .../association_catalog_info.py | 8 ++++---- src/hipscat/inspection/almanac.py | 4 ++-- src/hipscat/inspection/almanac_info.py | 14 ++++++++------ src/hipscat/io/file_io/file_io.py | 4 +++- src/hipscat/io/file_io/file_pointer.py | 4 ++-- src/hipscat/io/paths.py | 6 +++--- 7 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/hipscat/catalog/association_catalog/association_catalog.py b/src/hipscat/catalog/association_catalog/association_catalog.py index 34f70bef..7e74a2bd 100644 --- a/src/hipscat/catalog/association_catalog/association_catalog.py +++ b/src/hipscat/catalog/association_catalog/association_catalog.py @@ -53,7 +53,7 @@ def _get_partition_join_info_from_pixels( raise TypeError("join_pixels must be of type PartitionJoinInfo or DataFrame") @classmethod - def _read_args(cls, catalog_base_dir: FilePointer) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]: + def _read_args(cls, catalog_base_dir: FilePointer) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]: # type: ignore[override] args = super()._read_args(catalog_base_dir) partition_join_info_file = paths.get_partition_join_info_pointer(catalog_base_dir) partition_join_info = PartitionJoinInfo.read_from_file(partition_join_info_file) diff --git a/src/hipscat/catalog/association_catalog/association_catalog_info.py b/src/hipscat/catalog/association_catalog/association_catalog_info.py index 9835e952..66f24afa 100644 --- a/src/hipscat/catalog/association_catalog/association_catalog_info.py +++ b/src/hipscat/catalog/association_catalog/association_catalog_info.py @@ -8,16 +8,16 @@ class AssociationCatalogInfo(BaseCatalogInfo): """Catalog Info for a HiPSCat Association Catalog""" - primary_catalog: str = None + primary_catalog: str | None = None """Catalog name for the primary (left) side of association""" - primary_column: str = None + primary_column: str | None = None """Column name in the primary (left) side of join""" - join_catalog: str = None + join_catalog: str | None = None """Catalog name for the joining (right) side of association""" - join_column: str = None + join_column: str | None = None """Column name in the joining (right) side of join""" required_fields = BaseCatalogInfo.required_fields + [ diff --git a/src/hipscat/inspection/almanac.py b/src/hipscat/inspection/almanac.py index ee890e11..b4a3ea84 100644 --- a/src/hipscat/inspection/almanac.py +++ b/src/hipscat/inspection/almanac.py @@ -178,7 +178,7 @@ def _init_catalog_links(self): else: # pragma: no cover warnings.warn(f"Unknown catalog type {catalog_entry.catalog_type}") - def _get_linked_catalog(self, linked_text, namespace) -> AlmanacInfo: + def _get_linked_catalog(self, linked_text, namespace) -> AlmanacInfo | None: """Find a catalog to be used for linking catalogs within the almanac. e.g. for an association table, we will have a primary and join catalog. @@ -214,7 +214,7 @@ def _get_linked_catalog(self, linked_text, namespace) -> AlmanacInfo: return None return self.entries[resolved_name] - def catalogs(self, include_deprecated=False, types: List[str] = None): + def catalogs(self, include_deprecated=False, types: List[str] | None = None): """Get names of catalogs in the almanac, matching the provided conditions. Catalogs must meet all criteria provided in order to be returned (e.g. diff --git a/src/hipscat/inspection/almanac_info.py b/src/hipscat/inspection/almanac_info.py index 54e34728..c088a028 100644 --- a/src/hipscat/inspection/almanac_info.py +++ b/src/hipscat/inspection/almanac_info.py @@ -20,10 +20,10 @@ class AlmanacInfo: catalog_path: str = "" catalog_name: str = "" catalog_type: str = "" - primary: str = None - join: str = None - primary_link: Self = None - join_link: Self = None + primary: str | None = None + join: str | None = None + primary_link: Self | None = None + join_link: Self | None = None sources: List[Self] = field(default_factory=list) objects: List[Self] = field(default_factory=list) margins: List[Self] = field(default_factory=list) @@ -38,7 +38,7 @@ class AlmanacInfo: catalog_info: dict = field(default_factory=dict) - catalog_info_object: BaseCatalogInfo = None + catalog_info_object: BaseCatalogInfo | None = None def __post_init__(self): if len(self.catalog_info): @@ -74,7 +74,9 @@ def get_default_dir() -> str: @classmethod def from_catalog_dir(cls, catalog_base_dir: str) -> Self: """Create almanac information from the catalog information found at the target directory""" - catalog_info = catalog_info_factory.from_catalog_dir(catalog_base_dir=catalog_base_dir) + catalog_info = catalog_info_factory.from_catalog_dir( + catalog_base_dir=file_io.get_file_pointer_from_path(catalog_base_dir) + ) args = { "catalog_path": catalog_base_dir, "catalog_name": catalog_info.catalog_name, diff --git a/src/hipscat/io/file_io/file_io.py b/src/hipscat/io/file_io/file_io.py index f6a5a0a3..506cf3d2 100644 --- a/src/hipscat/io/file_io/file_io.py +++ b/src/hipscat/io/file_io/file_io.py @@ -98,7 +98,9 @@ def read_parquet_metadata(file_pointer: FilePointer, **kwargs) -> pq.FileMetaDat return pq.read_metadata(file_pointer, **kwargs) -def write_parquet_metadata(schema: Any, file_pointer: FilePointer, metadata_collector: list = None, **kwargs): +def write_parquet_metadata( + schema: Any, file_pointer: FilePointer, metadata_collector: list | None = None, **kwargs +): """Write a metadata only parquet file from a schema Args: diff --git a/src/hipscat/io/file_io/file_pointer.py b/src/hipscat/io/file_io/file_pointer.py index 1c213315..9c8c95af 100644 --- a/src/hipscat/io/file_io/file_pointer.py +++ b/src/hipscat/io/file_io/file_pointer.py @@ -49,7 +49,7 @@ def is_regular_file(pointer: FilePointer) -> bool: return os.path.isfile(pointer) -def find_files_matching_path(*paths: str) -> List[FilePointer]: +def find_files_matching_path(pointer: FilePointer, *paths: str) -> List[FilePointer]: """Find files or directories matching the provided path parts. Args: @@ -58,7 +58,7 @@ def find_files_matching_path(*paths: str) -> List[FilePointer]: Returns: New file pointers to files found matching the path """ - matcher = append_paths_to_pointer(*paths) + matcher = append_paths_to_pointer(pointer, *paths) return [get_file_pointer_from_path(x) for x in glob.glob(matcher)] diff --git a/src/hipscat/io/paths.py b/src/hipscat/io/paths.py index 17375ec8..d45a31cf 100644 --- a/src/hipscat/io/paths.py +++ b/src/hipscat/io/paths.py @@ -45,13 +45,13 @@ def pixel_directory( FilePointer directory name """ norder = int(pixel_order) - if pixel_number is None and directory_number is None: - raise ValueError("One of pixel_number or directory_number is required to create pixel directory") if directory_number is not None: ndir = directory_number - else: + elif pixel_number is not None: npix = int(pixel_number) ndir = int(npix / 10_000) * 10_000 + else: + raise ValueError("One of pixel_number or directory_number is required to create pixel directory") return create_hive_directory_name( catalog_base_dir, [ORDER_DIRECTORY_PREFIX, DIR_DIRECTORY_PREFIX], From 9675de6f8dff74afd45d09dea2c282e29d3a1924 Mon Sep 17 00:00:00 2001 From: delucchi-cmu Date: Thu, 3 Aug 2023 08:45:33 -0400 Subject: [PATCH 2/2] Address python-3.8-specific issues. --- .../catalog/association_catalog/association_catalog.py | 4 +++- .../catalog/association_catalog/association_catalog_info.py | 2 ++ src/hipscat/inspection/almanac.py | 2 ++ src/hipscat/inspection/almanac_info.py | 2 ++ src/hipscat/io/file_io/file_io.py | 2 ++ 5 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/hipscat/catalog/association_catalog/association_catalog.py b/src/hipscat/catalog/association_catalog/association_catalog.py index 7e74a2bd..85b4f58c 100644 --- a/src/hipscat/catalog/association_catalog/association_catalog.py +++ b/src/hipscat/catalog/association_catalog/association_catalog.py @@ -53,7 +53,9 @@ def _get_partition_join_info_from_pixels( raise TypeError("join_pixels must be of type PartitionJoinInfo or DataFrame") @classmethod - def _read_args(cls, catalog_base_dir: FilePointer) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]: # type: ignore[override] + def _read_args( + cls, catalog_base_dir: FilePointer + ) -> Tuple[CatalogInfoClass, JoinPixelInputTypes]: # type: ignore[override] args = super()._read_args(catalog_base_dir) partition_join_info_file = paths.get_partition_join_info_pointer(catalog_base_dir) partition_join_info = PartitionJoinInfo.read_from_file(partition_join_info_file) diff --git a/src/hipscat/catalog/association_catalog/association_catalog_info.py b/src/hipscat/catalog/association_catalog/association_catalog_info.py index 66f24afa..8fd33eed 100644 --- a/src/hipscat/catalog/association_catalog/association_catalog_info.py +++ b/src/hipscat/catalog/association_catalog/association_catalog_info.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from dataclasses import dataclass from hipscat.catalog.catalog_type import CatalogType diff --git a/src/hipscat/inspection/almanac.py b/src/hipscat/inspection/almanac.py index b4a3ea84..2a9e7598 100644 --- a/src/hipscat/inspection/almanac.py +++ b/src/hipscat/inspection/almanac.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import glob import os import warnings diff --git a/src/hipscat/inspection/almanac_info.py b/src/hipscat/inspection/almanac_info.py index c088a028..8222d7ae 100644 --- a/src/hipscat/inspection/almanac_info.py +++ b/src/hipscat/inspection/almanac_info.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import dataclasses import os from dataclasses import dataclass, field diff --git a/src/hipscat/io/file_io/file_io.py b/src/hipscat/io/file_io/file_io.py index 506cf3d2..d060e697 100644 --- a/src/hipscat/io/file_io/file_io.py +++ b/src/hipscat/io/file_io/file_io.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import json import os import shutil