Skip to content

Commit

Permalink
Merge pull request #91 from LSSTDESC/u/jchiang/find_UW_star_files
Browse files Browse the repository at this point in the history
find UW star files avoiding hard-coded names
  • Loading branch information
jchiang87 authored Apr 24, 2024
2 parents 7a748c7 + 4853818 commit bec65b9
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 22 deletions.
3 changes: 2 additions & 1 deletion skycatalogs/catalog_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,8 @@ def __init__(self, parts, area_partition=None, skycatalog_root=None,
_star_db = '/global/cfs/cdirs/lsst/groups/SSim/DC2/dc2_stellar_healpixel.db'
_sn_db = '/global/cfs/cdirs/lsst/groups/SSim/DC2/cosmoDC2_v1.1.4/sne_cosmoDC2_v1.1.4_MS_DDF_healpix.db'

_star_parquet = '/global/cfs/cdirs/descssim/postDC2/UW_star_catalog'
# _star_parquet = '/global/cfs/cdirs/descssim/postDC2/UW_star_catalog'
_star_parquet = '/sdf/data/rubin/shared/ops-rehearsal-3/imSim_catalogs/UW_stars'

self._galaxy_stride = galaxy_stride
if pkg_root:
Expand Down
76 changes: 55 additions & 21 deletions skycatalogs/utils/star_parquet_input.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,66 @@
# import pyarrow as pa
import os
import glob
import pyarrow.parquet as pq
import pandas as pd
import numpy as np
import numpy.ma as ma
import healpy
from lsst.meas.algorithms.htmIndexer import HtmIndexer


def _find_star_files(dirpath, pixel):
'''
Parameters
----------
dirpath string Path to directory containing input parquet files
HTM partitioning
pixel int Healpix pixel
class UWStarFiles:
_files = {} # shared state

Returns
-------
List of parquet file paths for files which might have sources belonging
to specified pixel
'''
# For now, hard-code to 3 files which have everything needed
# for joint simulation region
def __init__(self, input_dir):
self._index_files(input_dir)
self.htm_indexer = HtmIndexer(depth=20)

def _index_files(self, input_dir):
if self._files:
# The files have already been indexed.
return
files = sorted(glob.glob(os.path.join(input_dir,
'stars_chunk_*.parquet')))
for item in files:
tokens = os.path.basename(item).split('_')
imin = int(tokens[2])
imax = int(tokens[3].split('.')[0])
self._files[(imin, imax)] = item

def find_files(self, pixel, nside=32, res_factor=16):
"""
Find the UW input files that cover a given healpixel by sampling the
interior of the healpixel and computing the htm indexes at those
locations.
fnames = ['stars_chunk_8800000000000_8900000000000.parquet',
'stars_chunk_9200000000000_9300000000000.parquet',
'stars_chunk_9700000000000_9800000000000.parquet']
Parameters
----------
pixel : int
Healpixel id.
nside : int
Resolution parameter of the healpixel map.
res_factor : int
Sampling factor of the healpix to ensure coverage of the healpixel
by the UW files.
return [os.path.join(dirpath, fname) for fname in fnames]
Returns
-------
list : List of filenames.
"""
corners = np.transpose(healpy.boundaries(nside, pixel))
subpixels = healpy.query_polygon(nside*res_factor, corners)
ra, dec = [], []
for subpix in subpixels:
coord = healpy.pix2ang(nside*res_factor, subpix, lonlat=True)
ra.append(coord[0])
dec.append(coord[1])
files = set()
indices = set(self.htm_indexer.indexPoints(ra, dec))
for index in indices:
for (imin, imax), item in self._files.items():
if imin <= index <= imax:
files.add(item)
return files


def _calculate_pixel_mask(ra, dec, pixel, nside=32):
Expand All @@ -49,7 +82,7 @@ def _calculate_pixel_mask(ra, dec, pixel, nside=32):
return mask


def _star_parquet_reader(dirpath, pixel, output_arrow_schema):
def _star_parquet_reader(dirpath, pixel, output_arrow_schema, nside=32):
# Get requisite info from parquet files for sources in pixel.
# Next do renames and calculation for magnorm
to_read = ['simobjid', 'ra', 'decl', 'mura', 'mudecl', 'vrad',
Expand All @@ -59,7 +92,8 @@ def _star_parquet_reader(dirpath, pixel, output_arrow_schema):
out_fields = ['id', 'ra', 'dec', 'mura', 'mudec', 'radial_velocity',
'parallax', 'sed_filepath', 'magnorm', 'ebv']

paths = _find_star_files(dirpath, pixel)
uw_files = UWStarFiles(dirpath)
paths = uw_files.find_files(pixel, nside)
out_dict = {}
for k in out_fields:
out_dict[k] = []
Expand Down

0 comments on commit bec65b9

Please sign in to comment.