From 26abfbe0dc8519c437710eab21d924f1d0ebd719 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Fri, 2 Feb 2024 12:45:42 -0800 Subject: [PATCH 1/5] minor longstanding bug fix in little-used routine; option to return data column as list rather than np.array --- skycatalogs/objects/base_object.py | 6 +++--- skycatalogs/readers/parquet_reader.py | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/skycatalogs/objects/base_object.py b/skycatalogs/objects/base_object.py index 6ec5b81c..0e372641 100644 --- a/skycatalogs/objects/base_object.py +++ b/skycatalogs/objects/base_object.py @@ -495,7 +495,7 @@ def subcomponents(self): return ['this_object'] return subs - def get_native_attribute(self, attribute_name): + def get_native_attribute(self, attribute_name, no_np=False): ''' Retrieve a particular attribute for a collection If we already have it, just return it. Otherwise attempt @@ -509,7 +509,7 @@ def get_native_attribute(self, attribute_name): for r in self._rdrs: if attribute_name in r.columns: d = r.read_columns([attribute_name], self._mask, - row_group=self._row_group) + row_group=self._row_group, no_np=no_np) if not d: return None val = d[attribute_name] @@ -525,7 +525,7 @@ def get_native_attributes(self, attribute_list): final_d = dict() for r in self._rdrs: to_read = remaining.intersection(r.columns) - d = r.read_columns(to_read, self._mask) + d = r.read_columns(to_read, self._mask, row_group=self._row_group) remaining.difference_update(to_read) for k in d: final_d[k] = d[k] diff --git a/skycatalogs/readers/parquet_reader.py b/skycatalogs/readers/parquet_reader.py index fbf15287..f2746f8e 100644 --- a/skycatalogs/readers/parquet_reader.py +++ b/skycatalogs/readers/parquet_reader.py @@ -46,12 +46,14 @@ def columns(self): def n_row_groups(self): return self._meta.num_row_groups - def read_columns(self, cols, mask, row_group=-1): + def read_columns(self, cols, mask, row_group=-1, no_np=False): ''' Parameters ----------- cols list of column names belonging to the file mask if not None, use it and return compressed array + no_np if true, do not return as np.array. + Returns ------- @@ -76,6 +78,9 @@ def read_columns(self, cols, mask, row_group=-1): if mask is not None: c_data = ma.array(c_data, mask=mask).compressed() - d[c] = np.array([_ for _ in c_data]) + if not no_np: + d[c] = np.array([_ for _ in c_data]) + else: + d[c] = [_ for _ in c_data] return d From e78f0d946f65aaccc1132ef41fa3c31e22d0344b Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Sat, 3 Feb 2024 17:13:20 -0800 Subject: [PATCH 2/5] add the script that does the work in GitHub --- skycatalogs/scripts/rotate.py | 252 ++++++++++++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 skycatalogs/scripts/rotate.py diff --git a/skycatalogs/scripts/rotate.py b/skycatalogs/scripts/rotate.py new file mode 100644 index 00000000..67543100 --- /dev/null +++ b/skycatalogs/scripts/rotate.py @@ -0,0 +1,252 @@ +import os +import time +import healpy +import logging # expected by make_galaxy_schema +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq +# Following imports were used in old GCR_siminterface rotate class +# from lsst.sims.utils import angularSeparation +# from lsst.sims.utils import rotationMatrixFromVectors +# from lsst.sims.utils import cartesianFromSpherical, sphericalFromCartesian + +# Possible alternatives are in rubin_schedule.utils.coordinate_transformations +# +from rubin_scheduler.utils import cartesian_from_spherical, spherical_from_cartesian +from rubin_scheduler.utils import rotation_matrix_from_vectors, angular_separation +from skycatalogs.skyCatalogs import open_catalog +from skycatalogs.utils.parquet_schema_utils import make_galaxy_schema +from skycatalogs.utils.creator_utils import make_MW_extinction_av +from skycatalogs.utils.shapes import Disk + +DC2_RA = [68, 64, 58, + 68.5, 62, 55, + 67, 62.5, 57] +DC2_DEC = [-30.0, -32.0, -31.0, + -34.5, -36, -36.5, + -40, -42, -41] +FIELD_NAMES = ['COSMOS', 'DEEP_AO', 'DESI_SV3_R1', + 'Rubin_SV_095_-25', 'Rubin_SV_125_-15', 'Rubin_SV_225__-40', + 'Rubin_SV_250_2', 'Rubin_SV_280_-48', 'Rubin_SV_300_-41'] +FIELD_RA = [150.1, 216.0, 179.6, 95.0, 125.0, 225.0, 250.0, 280.0, 300.0] +FIELD_DEC = [2.1819444444444445, -12.5, 0.0, -25.0, -15.0, -40.0, 2.0, + -48.0, -41.0] +DISK_RADIUS_DG = 1.4 + + +class FieldRotator(object): + + def __init__(self, ra0, dec0, ra1, dec1): + """ + Parameters + ---------- + ra0, dec0 are the coordinates of the original field + center in degrees + + ra1, dec1 are the coordinates of the new field center + in degrees + + The transform() method of this class operates by first + applying a rotation that carries the original field center + into the new field center. Points are then transformed into + a basis in which the unit vector defining the new field center + is the x-axis. A rotation about the x-axis is applied so that + a point that was due north of the original field center is still + due north of the field center at the new location. Finally, + points are transformed back into the original x,y,z bases. + """ + + # do we actually need to do the rotation, or is the simulation + # already in the right spot? + self._needs_to_be_rotated = True + rot_dist = angular_separation(ra0, dec0, ra1, dec1) + if rot_dist < 1.0/3600.0: + self._needs_to_be_rotated = False + return + + # find the rotation that carries the original field center + # to the new field center + xyz = cartesian_from_spherical(np.radians(ra0), np.radians(dec0)) + xyz1 = cartesian_from_spherical(np.radians(ra1), np.radians(dec1)) + if np.abs(1.0-np.dot(xyz, xyz1)) < 1.0e-10: + self._transformation = np.identity(3, dtype=float) + return + + first_rotation = rotation_matrix_from_vectors(xyz, xyz1) + + # create a basis set in which the unit vector + # defining the new field center is the x axis + xx = np.dot(first_rotation, xyz) + rng = np.random.RandomState(99) + mag = np.NaN + while np.abs(mag) < 1.0e-20 or np.isnan(mag): + random_vec = rng.random_sample(3) + comp = np.dot(random_vec, xx) + yy = random_vec - comp*xx + mag = np.sqrt((yy**2).sum()) + yy /= mag + + zz = np.cross(xx, yy) + + to_self_bases = np.array([xx, + yy, + zz]) + + out_of_self_bases = to_self_bases.transpose() + + # Take a point due north of the original field + # center. Apply first_rotation to carry it to + # the new field. Transform it to the [xx, yy, zz] + # bases and find the rotation about xx that will + # make it due north of the new field center. + # Finally, transform back to the original bases. + d_dec = 0.1 + north = cartesian_from_spherical(np.radians(ra0), + np.radians(dec0+d_dec)) + + north = np.dot(first_rotation, north) + + # print(np.degrees(sphericalFromCartesian(north))) + + north_true = cartesian_from_spherical(np.radians(ra1), + np.radians(dec1+d_dec)) + + north = np.dot(to_self_bases, north) + north_true = np.dot(to_self_bases, north_true) + north = np.array([north[1], north[2]]) + north /= np.sqrt((north**2).sum()) + north_true = np.array([north_true[1], north_true[2]]) + north_true /= np.sqrt((north_true**2).sum()) + + c = north_true[0]*north[0]+north_true[1]*north[1] + s = north[0]*north_true[1]-north[1]*north_true[0] + norm = np.sqrt(c*c+s*s) + c = c/norm + s = s/norm + + # nprime = np.array([c*north[0]-s*north[1], + # s*north[0]+c*north[1]]) + + yz_rotation = np.array([[1.0, 0.0, 0.0], + [0.0, c, -s], + [0.0, s, c]]) + + second_rotation = np.dot(out_of_self_bases, + np.dot(yz_rotation, + to_self_bases)) + + self._transformation = np.dot(second_rotation, + first_rotation) + + def transform(self, ra, dec): + """ + ra, dec are in degrees; return the RA, Dec coordinates + of the point about the new field center + """ + xyz = cartesian_from_spherical(np.radians(ra), + np.radians(dec)).transpose() + xyz = np.dot(self._transformation, xyz).transpose() + ra_out, dec_out = spherical_from_cartesian(xyz) + return np.degrees(ra_out), np.degrees(dec_out) + + +class GalaxyRotator: + def __init__(self, field_rotator, field_name, obj_list): + self._field_rotator = field_rotator + self._field_name = field_name + self._obj_list = obj_list + self._stride = 1000000 + + def output_field_pixels(self, output_dir, arrow_schema): + # First make necessary new columns and keep track of output + # healpixels + field_distinct = set() + colls = self._obj_list.get_collections() + new_ra = [] + new_dec = [] + new_av_ext = [] + hps = [] + hps_distinct = [] + # Make a dict indexed by hp number. Value is df to be written + # to file for that hp + hp_out_dict = dict() + for c in colls: + ra, dec = self._field_rotator.transform(c._ra, c._dec) + new_ra.append(ra) + new_dec.append(dec) + new_av_ext.append(make_MW_extinction_av(ra, dec)) + rot_hp = np.array(healpy.pixelfunc.ang2pix(NSIDE, ra, dec, + lonlat=True)) + hps.append(rot_hp) + distinct = {h for h in rot_hp} + hps_distinct.append(sorted(distinct)) + field_distinct = set.union(field_distinct, distinct) + + # hp_sorted = sorted(field_distinct) + native = colls[0].native_columns + + for i, c in enumerate(colls): + to_get = [n for n in native if (n != 'ra') and (n != 'dec') and (n != 'MW_av') and n != 'sed_val_bulge' and n != 'sed_val_disk'] + dat = c.get_native_attributes(to_get) + dat['ra'] = new_ra[i] + dat['dec'] = new_dec[i] + dat['MW_av'] = new_av_ext[i] + dat['hp'] = hps[i] + # special handling for for tophat SEDs + for k in ['sed_val_bulge', 'sed_val_disk', 'sed_val_knots']: + dat[k] = c.get_native_attribute(k, no_np=True) + + df = pd.DataFrame(dat) + df.sort_values('hp', inplace=True) + + # Now for hps represented in the collection, copy relevant + # rows to df for that hp + for hp in hps_distinct[i]: + sub_df = (df.loc[df['hp'] == hp]).copy() + if hp in hp_out_dict: + hp_out_dict[hp] = pd.concat([hp_out_dict[hp], sub_df]) + else: + hp_out_dict[hp] = sub_df + del df + + # Now write out healpixels for this collection (sets of healpixels + # belonging to different fields are disjoint) + # for hp in hps_distinct: + for hp in field_distinct: + outpath = os.path.join(output_dir, f'galaxy_{hp}.parquet') + writer = pq.ParquetWriter(outpath, arrow_schema) + dlen = len(hp_out_dict[hp]['galaxy_id']) + if dlen == 0: + continue + u_bnd = min(self._stride, dlen) + l_bnd = 0 + while u_bnd > l_bnd: + out_df = hp_out_dict[hp].iloc[l_bnd:u_bnd] + out_table = pa.Table.from_pandas(out_df, schema=arrow_schema) + writer.write_table(out_table) + l_bnd = u_bnd + u_bnd = min(l_bnd + self._stride, dlen) + + writer.close() + print(f'{time.asctime()} Completed pixel {hp}') + + +input_config = '/pscratch/sd/j/jrbogart/desc/skycatalogs/rehearsal/skyCatalog.yaml' +rotated_dir = '/pscratch/sd/j/jrbogart/desc/skycatalogs/rotated' +cat = open_catalog(input_config) +NSIDE = 32 # should really read from config + +for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, FIELD_RA, FIELD_DEC, FIELD_NAMES): + disk = Disk(dc2_ra, dc2_dec, DISK_RADIUS_DG * 3600) + obj_list = cat.get_object_type_by_region(disk, 'galaxy') + print('Field ', name) + print(f'# objects: {len(obj_list)}') + print(f'# collections: {obj_list.collection_count}') + + rotator = FieldRotator(dc2_ra, dc2_dec, f_ra, f_dec) + galaxy_schema = make_galaxy_schema('rotate_logger') + galaxy_rotator = GalaxyRotator(rotator, name, obj_list) + galaxy_rotator.output_field_pixels(rotated_dir, galaxy_schema) + + break ### for testing stop after 1 field From 53d47868c3505e7a4824a95f4dba10fb21bc44aa Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 6 Feb 2024 18:07:58 -0800 Subject: [PATCH 3/5] Clean up rotate script a bit and add output to better record what happened --- skycatalogs/scripts/rotate.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/skycatalogs/scripts/rotate.py b/skycatalogs/scripts/rotate.py index 67543100..0cb49deb 100644 --- a/skycatalogs/scripts/rotate.py +++ b/skycatalogs/scripts/rotate.py @@ -1,18 +1,11 @@ import os import time import healpy -import logging # expected by make_galaxy_schema import numpy as np import pandas as pd import pyarrow as pa import pyarrow.parquet as pq -# Following imports were used in old GCR_siminterface rotate class -# from lsst.sims.utils import angularSeparation -# from lsst.sims.utils import rotationMatrixFromVectors -# from lsst.sims.utils import cartesianFromSpherical, sphericalFromCartesian -# Possible alternatives are in rubin_schedule.utils.coordinate_transformations -# from rubin_scheduler.utils import cartesian_from_spherical, spherical_from_cartesian from rubin_scheduler.utils import rotation_matrix_from_vectors, angular_separation from skycatalogs.skyCatalogs import open_catalog @@ -213,6 +206,7 @@ def output_field_pixels(self, output_dir, arrow_schema): # Now write out healpixels for this collection (sets of healpixels # belonging to different fields are disjoint) # for hp in hps_distinct: + print('Generating data for pixels ', field_distinct) for hp in field_distinct: outpath = os.path.join(output_dir, f'galaxy_{hp}.parquet') writer = pq.ParquetWriter(outpath, arrow_schema) @@ -237,7 +231,11 @@ def output_field_pixels(self, output_dir, arrow_schema): cat = open_catalog(input_config) NSIDE = 32 # should really read from config -for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, FIELD_RA, FIELD_DEC, FIELD_NAMES): +# For Ops rehearsal Did first field in a separate run +# for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA[1:], DC2_DEC[1:], FIELD_RA[1:], +# FIELD_DEC[1:], FIELD_NAMES[1:]): +for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, FIELD_RA, + FIELD_DEC, FIELD_NAMES): disk = Disk(dc2_ra, dc2_dec, DISK_RADIUS_DG * 3600) obj_list = cat.get_object_type_by_region(disk, 'galaxy') print('Field ', name) @@ -248,5 +246,5 @@ def output_field_pixels(self, output_dir, arrow_schema): galaxy_schema = make_galaxy_schema('rotate_logger') galaxy_rotator = GalaxyRotator(rotator, name, obj_list) galaxy_rotator.output_field_pixels(rotated_dir, galaxy_schema) - - break ### for testing stop after 1 field + print('Completed field ', name) + # break ### for testing stop after 1 field From 2878d59eacb0bb6f0cb55dbd603c022f1f613cbd Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 14 May 2024 14:22:30 -0700 Subject: [PATCH 4/5] address reviewer comments --- skycatalogs/readers/parquet_reader.py | 4 ++++ skycatalogs/scripts/rotate.py | 20 +++++++++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/skycatalogs/readers/parquet_reader.py b/skycatalogs/readers/parquet_reader.py index f2746f8e..c4e5a74d 100644 --- a/skycatalogs/readers/parquet_reader.py +++ b/skycatalogs/readers/parquet_reader.py @@ -54,6 +54,10 @@ def read_columns(self, cols, mask, row_group=-1, no_np=False): mask if not None, use it and return compressed array no_np if true, do not return as np.array. + NOTE: In most cases returning the data as an np.array is convenient + for the caller, but if the elements of a column are themselves + each an array the resulting np.array cannot be used for certain + purposes, such as writing back out to a parquet file. Returns ------- diff --git a/skycatalogs/scripts/rotate.py b/skycatalogs/scripts/rotate.py index 0cb49deb..176f788c 100644 --- a/skycatalogs/scripts/rotate.py +++ b/skycatalogs/scripts/rotate.py @@ -32,6 +32,10 @@ class FieldRotator(object): def __init__(self, ra0, dec0, ra1, dec1): """ + The source of the code in this class can be found here: + + https://github.com/LSSTDESC/sims_GCRCatSimInterface/blob/master/python/desc/sims/GCRCatSimInterface/ProtoDC2DatabaseEmulator.py#L41 + Parameters ---------- ra0, dec0 are the coordinates of the original field @@ -118,9 +122,6 @@ def __init__(self, ra0, dec0, ra1, dec1): c = c/norm s = s/norm - # nprime = np.array([c*north[0]-s*north[1], - # s*north[0]+c*north[1]]) - yz_rotation = np.array([[1.0, 0.0, 0.0], [0.0, c, -s], [0.0, s, c]]) @@ -176,17 +177,23 @@ def output_field_pixels(self, output_dir, arrow_schema): hps_distinct.append(sorted(distinct)) field_distinct = set.union(field_distinct, distinct) - # hp_sorted = sorted(field_distinct) native = colls[0].native_columns for i, c in enumerate(colls): - to_get = [n for n in native if (n != 'ra') and (n != 'dec') and (n != 'MW_av') and n != 'sed_val_bulge' and n != 'sed_val_disk'] + to_get = [n for n in native if n not in ('ra', 'dec', 'MW_av', + 'sed_val_bulge', + 'sed_val_disk')] dat = c.get_native_attributes(to_get) dat['ra'] = new_ra[i] dat['dec'] = new_dec[i] dat['MW_av'] = new_av_ext[i] dat['hp'] = hps[i] - # special handling for for tophat SEDs + + # Special handling for for tophat SEDs. The np.array + # normally returned by get_native_attribute causes problems + # when, as in this case, the elements of the array are + # themselves arrays. The result cannot be written back + # out to a parquet file. for k in ['sed_val_bulge', 'sed_val_disk', 'sed_val_knots']: dat[k] = c.get_native_attribute(k, no_np=True) @@ -205,7 +212,6 @@ def output_field_pixels(self, output_dir, arrow_schema): # Now write out healpixels for this collection (sets of healpixels # belonging to different fields are disjoint) - # for hp in hps_distinct: print('Generating data for pixels ', field_distinct) for hp in field_distinct: outpath = os.path.join(output_dir, f'galaxy_{hp}.parquet') From 8199155475107da146a3d01c0a830d88b14504e2 Mon Sep 17 00:00:00 2001 From: Joanne Bogart Date: Tue, 14 May 2024 17:34:28 -0700 Subject: [PATCH 5/5] get ready for ops4 fields --- skycatalogs/scripts/rotate.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/skycatalogs/scripts/rotate.py b/skycatalogs/scripts/rotate.py index 176f788c..fd8ed5b0 100644 --- a/skycatalogs/scripts/rotate.py +++ b/skycatalogs/scripts/rotate.py @@ -25,6 +25,9 @@ FIELD_RA = [150.1, 216.0, 179.6, 95.0, 125.0, 225.0, 250.0, 280.0, 300.0] FIELD_DEC = [2.1819444444444445, -12.5, 0.0, -25.0, -15.0, -40.0, 2.0, -48.0, -41.0] +OPS4_NEW_NAMES = ['DEEP_B0', 'ELAIS_S1'] +OPS4_NEW_RA = [310.00, 9.45] +OPS4_NEW_DEC = [-19.0, -44.0] DISK_RADIUS_DG = 1.4 @@ -233,15 +236,17 @@ def output_field_pixels(self, output_dir, arrow_schema): input_config = '/pscratch/sd/j/jrbogart/desc/skycatalogs/rehearsal/skyCatalog.yaml' -rotated_dir = '/pscratch/sd/j/jrbogart/desc/skycatalogs/rotated' +rotated_dir = '/pscratch/sd/j/jrbogart/desc/skycatalogs/rotated_ops4' cat = open_catalog(input_config) NSIDE = 32 # should really read from config -# For Ops rehearsal Did first field in a separate run -# for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA[1:], DC2_DEC[1:], FIELD_RA[1:], -# FIELD_DEC[1:], FIELD_NAMES[1:]): -for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, FIELD_RA, - FIELD_DEC, FIELD_NAMES): +# For ops rehearsal 3 +# for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, FIELD_RA, +# FIELD_DEC, FIELD_NAMES): +# Additional fiels for ops rehearsal 4 +for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, OPS4_NEW_RA, + OPS4_NEW_DEC, OPS4_NEW_NAMES): + disk = Disk(dc2_ra, dc2_dec, DISK_RADIUS_DG * 3600) obj_list = cat.get_object_type_by_region(disk, 'galaxy') print('Field ', name)