From 819ab5315824fd755b48c0a45b61b2e5395f2725 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:33:26 -0500 Subject: [PATCH 1/7] Try windows test workflow --- .github/workflows/testing-windows.yml | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/testing-windows.yml diff --git a/.github/workflows/testing-windows.yml b/.github/workflows/testing-windows.yml new file mode 100644 index 00000000..4b218d82 --- /dev/null +++ b/.github/workflows/testing-windows.yml @@ -0,0 +1,34 @@ +# This workflow will install Python dependencies and run tests in a Windows environment. +# This is intended to catch any file-system specific issues, and so runs less +# frequently than other test suites. + +name: Windows unit test + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + + runs-on: windows-latest + strategy: + matrix: + python-version: ['3.10'] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Run unit tests with pytest + run: | + python -m pytest tests \ No newline at end of file From 1dd47bc0ffd5c965ec4414c5931617997c79ab26 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:38:17 -0500 Subject: [PATCH 2/7] non-conditional on requirements. --- .github/workflows/testing-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing-windows.yml b/.github/workflows/testing-windows.yml index 4b218d82..c27bcc58 100644 --- a/.github/workflows/testing-windows.yml +++ b/.github/workflows/testing-windows.yml @@ -28,7 +28,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -e .[dev] - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -r requirements.txt - name: Run unit tests with pytest run: | python -m pytest tests \ No newline at end of file From 901323cade3706c627b8e841b7dbcfe3fc3eb2b3 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com> Date: Wed, 4 Dec 2024 09:45:05 -0500 Subject: [PATCH 3/7] oops --- .github/workflows/testing-windows.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/testing-windows.yml b/.github/workflows/testing-windows.yml index c27bcc58..6d63bf88 100644 --- a/.github/workflows/testing-windows.yml +++ b/.github/workflows/testing-windows.yml @@ -28,7 +28,6 @@ jobs: run: | python -m pip install --upgrade pip pip install -e .[dev] - pip install -r requirements.txt - name: Run unit tests with pytest run: | python -m pytest tests \ No newline at end of file From 721859cabf0821fdac8d620c234de6533509480a Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi Date: Thu, 5 Dec 2024 10:39:48 -0500 Subject: [PATCH 4/7] Make tests windows-friendly. --- tests/hats/io/test_paths.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/hats/io/test_paths.py b/tests/hats/io/test_paths.py index 2bd48e58..49a42608 100644 --- a/tests/hats/io/test_paths.py +++ b/tests/hats/io/test_paths.py @@ -1,5 +1,7 @@ """Test pixel path creation""" +import os + import pytest from hats.io import paths @@ -8,14 +10,14 @@ def test_pixel_directory(): """Simple case with sensical inputs""" - expected = "/foo/dataset/Norder=0/Dir=0" + expected = os.path.join(os.sep, "foo", "dataset", "Norder=0", "Dir=0") result = paths.pixel_directory("/foo", 0, 5) - assert str(result) == expected + assert str(result) == str(expected) def test_pixel_directory_number(): """Simple case with sensical inputs""" - expected = "/foo/dataset/Norder=0/Dir=0" + expected = os.path.join(os.sep, "foo", "dataset", "Norder=0", "Dir=0") result = paths.pixel_directory("/foo", pixel_order=0, pixel_number=5, directory_number=0) assert str(result) == expected @@ -37,7 +39,7 @@ def test_pixel_directory_nonint(): def test_pixel_catalog_file(): """Simple case with sensical inputs""" - expected = "/foo/dataset/Norder=0/Dir=0/Npix=5.parquet" + expected = os.path.join(os.sep, "foo", "dataset", "Norder=0", "Dir=0", "Npix=5.parquet") result = paths.pixel_catalog_file("/foo", HealpixPixel(0, 5)) assert str(result) == expected @@ -58,7 +60,10 @@ def test_pixel_catalog_file_nonint(): def test_pixel_catalog_files(): - expected = ["/foo/dataset/Norder=0/Dir=0/Npix=5.parquet", "/foo/dataset/Norder=1/Dir=0/Npix=16.parquet"] + expected = [ + os.path.join(os.sep, "foo", "dataset", "Norder=0", "Dir=0", "Npix=5.parquet"), + os.path.join(os.sep, "foo", "dataset", "Norder=1", "Dir=0", "Npix=16.parquet"), + ] result = paths.pixel_catalog_files("/foo/", [HealpixPixel(0, 5), HealpixPixel(1, 16)]) assert expected == result @@ -106,6 +111,9 @@ def test_get_healpix_from_path(): result = paths.get_healpix_from_path("/foo/dataset/Norder=5/Dir=0/Npix=34.parquet") assert result == expected + result = paths.get_healpix_from_path("C:\\foo\\dataset\\Norder=5\\Dir=0\\Npix=34.parquet") + assert result == expected + result = paths.get_healpix_from_path("Norder=5/Dir=0/Npix=34.pq") assert result == expected From 724aec6ffd0421aa0b1c663040df1ea3a251dc70 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi Date: Thu, 5 Dec 2024 10:50:06 -0500 Subject: [PATCH 5/7] Coerce frame type (windows prefers int32) --- tests/hats/io/file_io/test_file_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/hats/io/file_io/test_file_io.py b/tests/hats/io/file_io/test_file_io.py index f63814b4..10971f53 100644 --- a/tests/hats/io/file_io/test_file_io.py +++ b/tests/hats/io/file_io/test_file_io.py @@ -103,10 +103,10 @@ def test_load_csv_to_pandas_generator_encoding(tmp_path): def test_write_df_to_csv(tmp_path): - random_df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD")) + random_df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list("ABCD")).astype(int) test_file_path = tmp_path / "test.csv" write_dataframe_to_csv(random_df, test_file_path, index=False) - loaded_df = pd.read_csv(test_file_path) + loaded_df = pd.read_csv(test_file_path).astype(int) pd.testing.assert_frame_equal(loaded_df, random_df) From 9e19447e6c8c7693f60b53f3df730d849e15c95c Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi Date: Thu, 5 Dec 2024 13:59:15 -0500 Subject: [PATCH 6/7] Path updates --- tests/hats/io/test_paths.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/hats/io/test_paths.py b/tests/hats/io/test_paths.py index 49a42608..b5749535 100644 --- a/tests/hats/io/test_paths.py +++ b/tests/hats/io/test_paths.py @@ -61,10 +61,10 @@ def test_pixel_catalog_file_nonint(): def test_pixel_catalog_files(): expected = [ - os.path.join(os.sep, "foo", "dataset", "Norder=0", "Dir=0", "Npix=5.parquet"), - os.path.join(os.sep, "foo", "dataset", "Norder=1", "Dir=0", "Npix=16.parquet"), + os.path.join("foo", "dataset", "Norder=0", "Dir=0", "Npix=5.parquet"), + os.path.join("foo", "dataset", "Norder=1", "Dir=0", "Npix=16.parquet"), ] - result = paths.pixel_catalog_files("/foo/", [HealpixPixel(0, 5), HealpixPixel(1, 16)]) + result = paths.pixel_catalog_files("foo", [HealpixPixel(0, 5), HealpixPixel(1, 16)]) assert expected == result From 711a171b849260873b3a4190b96bb85a16bf1201 Mon Sep 17 00:00:00 2001 From: Melissa DeLucchi Date: Thu, 5 Dec 2024 16:12:18 -0500 Subject: [PATCH 7/7] Remove zombie method. --- benchmarks/benchmarks.py | 4 --- src/hats/io/paths.py | 49 +------------------------------------ tests/hats/io/test_paths.py | 18 -------------- 3 files changed, 1 insertion(+), 70 deletions(-) diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index 97f874fa..34460e87 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -11,7 +11,6 @@ import hats.pixel_math.healpix_shim as hp from hats.catalog import Catalog, PartitionInfo, TableProperties from hats.catalog.association_catalog.partition_join_info import PartitionJoinInfo -from hats.io.paths import pixel_catalog_files from hats.pixel_math import HealpixPixel from hats.pixel_tree import PixelAlignment, align_trees from hats.pixel_tree.pixel_tree import PixelTree @@ -66,9 +65,6 @@ def time_inner_pixel_alignment(self): def time_outer_pixel_alignment(self): align_trees(self.pixel_tree_1, self.pixel_tree_2, alignment_type="outer") - def time_paths_creation(self): - pixel_catalog_files("foo/", self.pixel_list) - class MetadataSuite: """Suite that generates catalog files and benchmarks the operations on them.""" diff --git a/src/hats/io/paths.py b/src/hats/io/paths.py index f79c4d7a..6156605c 100644 --- a/src/hats/io/paths.py +++ b/src/hats/io/paths.py @@ -4,7 +4,7 @@ import re from pathlib import Path -from typing import Dict, List +from typing import Dict from urllib.parse import urlencode from fsspec.implementations.http import HTTPFileSystem @@ -90,53 +90,6 @@ def get_healpix_from_path(path: str) -> HealpixPixel: return HealpixPixel(int(order), int(pixel)) -def pixel_catalog_files( - catalog_base_dir: str | Path | UPath | None, - pixels: List[HealpixPixel], - query_params: Dict | None = None, -) -> List[UPath]: - """Create a list of path *pointers* for pixel catalog files. This will not create the directory - or files. - - The catalog file names will take the HiPS standard form of:: - - /Norder=/Dir=/Npix=.parquet - - Where the directory number is calculated using integer division as:: - - (pixel_number/10000)*10000 - - Args: - catalog_base_dir (UPath): base directory of the catalog (includes catalog name) - pixels (List[HealpixPixel]): the healpix pixels to create pointers to - query_params (dict): Params to append to URL. Ex: {'cols': ['ra', 'dec'], 'fltrs': ['r>=10', 'g<18']} - - Returns (List[str]): - A list of paths to the pixels, in the same order as the input pixel list. - """ - catalog_base_dir = get_upath(catalog_base_dir) / DATASET_DIR - fs = catalog_base_dir.fs - base_path = str(catalog_base_dir) - if not base_path.endswith(fs.sep): - base_path += fs.sep - - url_params = "" - if isinstance(fs, HTTPFileSystem) and query_params: - url_params = dict_to_query_urlparams(query_params) - - return [ - base_path - + fs.sep.join( - [ - f"{PARTITION_ORDER}={pixel.order}", - f"{PARTITION_DIR}={pixel.dir}", - f"{PARTITION_PIXEL}={pixel.pixel}.parquet" + url_params, - ] - ) - for pixel in pixels - ] - - def dict_to_query_urlparams(query_params: Dict | None = None) -> str: """Converts a dictionary to a url query parameter string diff --git a/tests/hats/io/test_paths.py b/tests/hats/io/test_paths.py index b5749535..df31d9b8 100644 --- a/tests/hats/io/test_paths.py +++ b/tests/hats/io/test_paths.py @@ -59,24 +59,6 @@ def test_pixel_catalog_file_nonint(): paths.pixel_catalog_file("/foo", "zero", "five") -def test_pixel_catalog_files(): - expected = [ - os.path.join("foo", "dataset", "Norder=0", "Dir=0", "Npix=5.parquet"), - os.path.join("foo", "dataset", "Norder=1", "Dir=0", "Npix=16.parquet"), - ] - result = paths.pixel_catalog_files("foo", [HealpixPixel(0, 5), HealpixPixel(1, 16)]) - assert expected == result - - -def test_pixel_catalog_files_w_query_params(): - expected = [ - "https://foo/dataset/Norder=0/Dir=0/Npix=5.parquet?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13" - ] - query_params = {"columns": ["ID", "RA", "DEC", "r_auto"], "filters": ["r_auto<13"]} - result = paths.pixel_catalog_files("https://foo", [HealpixPixel(0, 5)], query_params=query_params) - assert expected == result - - def test_dict_to_query_urlparams(): expected = "?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13" query_params = {"columns": ["ID", "RA", "DEC", "r_auto"], "filters": ["r_auto<13"]}