diff --git a/skycatalogs/objects/base_object.py b/skycatalogs/objects/base_object.py index deb3eb12..0fa20f95 100644 --- a/skycatalogs/objects/base_object.py +++ b/skycatalogs/objects/base_object.py @@ -492,7 +492,7 @@ def subcomponents(self): return ['this_object'] return subs - def get_native_attribute(self, attribute_name): + def get_native_attribute(self, attribute_name, no_np=False): ''' Retrieve a particular attribute for a collection If we already have it, just return it. Otherwise attempt @@ -506,7 +506,7 @@ def get_native_attribute(self, attribute_name): for r in self._rdrs: if attribute_name in r.columns: d = r.read_columns([attribute_name], self._mask, - row_group=self._row_group) + row_group=self._row_group, no_np=no_np) if not d: return None val = d[attribute_name] @@ -522,7 +522,7 @@ def get_native_attributes(self, attribute_list): final_d = dict() for r in self._rdrs: to_read = remaining.intersection(r.columns) - d = r.read_columns(to_read, self._mask) + d = r.read_columns(to_read, self._mask, row_group=self._row_group) remaining.difference_update(to_read) for k in d: final_d[k] = d[k] diff --git a/skycatalogs/readers/parquet_reader.py b/skycatalogs/readers/parquet_reader.py index fbf15287..c4e5a74d 100644 --- a/skycatalogs/readers/parquet_reader.py +++ b/skycatalogs/readers/parquet_reader.py @@ -46,12 +46,18 @@ def columns(self): def n_row_groups(self): return self._meta.num_row_groups - def read_columns(self, cols, mask, row_group=-1): + def read_columns(self, cols, mask, row_group=-1, no_np=False): ''' Parameters ----------- cols list of column names belonging to the file mask if not None, use it and return compressed array + no_np if true, do not return as np.array. + + NOTE: In most cases returning the data as an np.array is convenient + for the caller, but if the elements of a column are themselves + each an array the resulting np.array cannot be used for certain + purposes, such as writing back out to a parquet file. Returns ------- @@ -76,6 +82,9 @@ def read_columns(self, cols, mask, row_group=-1): if mask is not None: c_data = ma.array(c_data, mask=mask).compressed() - d[c] = np.array([_ for _ in c_data]) + if not no_np: + d[c] = np.array([_ for _ in c_data]) + else: + d[c] = [_ for _ in c_data] return d diff --git a/skycatalogs/scripts/rotate.py b/skycatalogs/scripts/rotate.py new file mode 100644 index 00000000..fd8ed5b0 --- /dev/null +++ b/skycatalogs/scripts/rotate.py @@ -0,0 +1,261 @@ +import os +import time +import healpy +import numpy as np +import pandas as pd +import pyarrow as pa +import pyarrow.parquet as pq + +from rubin_scheduler.utils import cartesian_from_spherical, spherical_from_cartesian +from rubin_scheduler.utils import rotation_matrix_from_vectors, angular_separation +from skycatalogs.skyCatalogs import open_catalog +from skycatalogs.utils.parquet_schema_utils import make_galaxy_schema +from skycatalogs.utils.creator_utils import make_MW_extinction_av +from skycatalogs.utils.shapes import Disk + +DC2_RA = [68, 64, 58, + 68.5, 62, 55, + 67, 62.5, 57] +DC2_DEC = [-30.0, -32.0, -31.0, + -34.5, -36, -36.5, + -40, -42, -41] +FIELD_NAMES = ['COSMOS', 'DEEP_AO', 'DESI_SV3_R1', + 'Rubin_SV_095_-25', 'Rubin_SV_125_-15', 'Rubin_SV_225__-40', + 'Rubin_SV_250_2', 'Rubin_SV_280_-48', 'Rubin_SV_300_-41'] +FIELD_RA = [150.1, 216.0, 179.6, 95.0, 125.0, 225.0, 250.0, 280.0, 300.0] +FIELD_DEC = [2.1819444444444445, -12.5, 0.0, -25.0, -15.0, -40.0, 2.0, + -48.0, -41.0] +OPS4_NEW_NAMES = ['DEEP_B0', 'ELAIS_S1'] +OPS4_NEW_RA = [310.00, 9.45] +OPS4_NEW_DEC = [-19.0, -44.0] +DISK_RADIUS_DG = 1.4 + + +class FieldRotator(object): + + def __init__(self, ra0, dec0, ra1, dec1): + """ + The source of the code in this class can be found here: + + https://github.com/LSSTDESC/sims_GCRCatSimInterface/blob/master/python/desc/sims/GCRCatSimInterface/ProtoDC2DatabaseEmulator.py#L41 + + Parameters + ---------- + ra0, dec0 are the coordinates of the original field + center in degrees + + ra1, dec1 are the coordinates of the new field center + in degrees + + The transform() method of this class operates by first + applying a rotation that carries the original field center + into the new field center. Points are then transformed into + a basis in which the unit vector defining the new field center + is the x-axis. A rotation about the x-axis is applied so that + a point that was due north of the original field center is still + due north of the field center at the new location. Finally, + points are transformed back into the original x,y,z bases. + """ + + # do we actually need to do the rotation, or is the simulation + # already in the right spot? + self._needs_to_be_rotated = True + rot_dist = angular_separation(ra0, dec0, ra1, dec1) + if rot_dist < 1.0/3600.0: + self._needs_to_be_rotated = False + return + + # find the rotation that carries the original field center + # to the new field center + xyz = cartesian_from_spherical(np.radians(ra0), np.radians(dec0)) + xyz1 = cartesian_from_spherical(np.radians(ra1), np.radians(dec1)) + if np.abs(1.0-np.dot(xyz, xyz1)) < 1.0e-10: + self._transformation = np.identity(3, dtype=float) + return + + first_rotation = rotation_matrix_from_vectors(xyz, xyz1) + + # create a basis set in which the unit vector + # defining the new field center is the x axis + xx = np.dot(first_rotation, xyz) + rng = np.random.RandomState(99) + mag = np.NaN + while np.abs(mag) < 1.0e-20 or np.isnan(mag): + random_vec = rng.random_sample(3) + comp = np.dot(random_vec, xx) + yy = random_vec - comp*xx + mag = np.sqrt((yy**2).sum()) + yy /= mag + + zz = np.cross(xx, yy) + + to_self_bases = np.array([xx, + yy, + zz]) + + out_of_self_bases = to_self_bases.transpose() + + # Take a point due north of the original field + # center. Apply first_rotation to carry it to + # the new field. Transform it to the [xx, yy, zz] + # bases and find the rotation about xx that will + # make it due north of the new field center. + # Finally, transform back to the original bases. + d_dec = 0.1 + north = cartesian_from_spherical(np.radians(ra0), + np.radians(dec0+d_dec)) + + north = np.dot(first_rotation, north) + + # print(np.degrees(sphericalFromCartesian(north))) + + north_true = cartesian_from_spherical(np.radians(ra1), + np.radians(dec1+d_dec)) + + north = np.dot(to_self_bases, north) + north_true = np.dot(to_self_bases, north_true) + north = np.array([north[1], north[2]]) + north /= np.sqrt((north**2).sum()) + north_true = np.array([north_true[1], north_true[2]]) + north_true /= np.sqrt((north_true**2).sum()) + + c = north_true[0]*north[0]+north_true[1]*north[1] + s = north[0]*north_true[1]-north[1]*north_true[0] + norm = np.sqrt(c*c+s*s) + c = c/norm + s = s/norm + + yz_rotation = np.array([[1.0, 0.0, 0.0], + [0.0, c, -s], + [0.0, s, c]]) + + second_rotation = np.dot(out_of_self_bases, + np.dot(yz_rotation, + to_self_bases)) + + self._transformation = np.dot(second_rotation, + first_rotation) + + def transform(self, ra, dec): + """ + ra, dec are in degrees; return the RA, Dec coordinates + of the point about the new field center + """ + xyz = cartesian_from_spherical(np.radians(ra), + np.radians(dec)).transpose() + xyz = np.dot(self._transformation, xyz).transpose() + ra_out, dec_out = spherical_from_cartesian(xyz) + return np.degrees(ra_out), np.degrees(dec_out) + + +class GalaxyRotator: + def __init__(self, field_rotator, field_name, obj_list): + self._field_rotator = field_rotator + self._field_name = field_name + self._obj_list = obj_list + self._stride = 1000000 + + def output_field_pixels(self, output_dir, arrow_schema): + # First make necessary new columns and keep track of output + # healpixels + field_distinct = set() + colls = self._obj_list.get_collections() + new_ra = [] + new_dec = [] + new_av_ext = [] + hps = [] + hps_distinct = [] + # Make a dict indexed by hp number. Value is df to be written + # to file for that hp + hp_out_dict = dict() + for c in colls: + ra, dec = self._field_rotator.transform(c._ra, c._dec) + new_ra.append(ra) + new_dec.append(dec) + new_av_ext.append(make_MW_extinction_av(ra, dec)) + rot_hp = np.array(healpy.pixelfunc.ang2pix(NSIDE, ra, dec, + lonlat=True)) + hps.append(rot_hp) + distinct = {h for h in rot_hp} + hps_distinct.append(sorted(distinct)) + field_distinct = set.union(field_distinct, distinct) + + native = colls[0].native_columns + + for i, c in enumerate(colls): + to_get = [n for n in native if n not in ('ra', 'dec', 'MW_av', + 'sed_val_bulge', + 'sed_val_disk')] + dat = c.get_native_attributes(to_get) + dat['ra'] = new_ra[i] + dat['dec'] = new_dec[i] + dat['MW_av'] = new_av_ext[i] + dat['hp'] = hps[i] + + # Special handling for for tophat SEDs. The np.array + # normally returned by get_native_attribute causes problems + # when, as in this case, the elements of the array are + # themselves arrays. The result cannot be written back + # out to a parquet file. + for k in ['sed_val_bulge', 'sed_val_disk', 'sed_val_knots']: + dat[k] = c.get_native_attribute(k, no_np=True) + + df = pd.DataFrame(dat) + df.sort_values('hp', inplace=True) + + # Now for hps represented in the collection, copy relevant + # rows to df for that hp + for hp in hps_distinct[i]: + sub_df = (df.loc[df['hp'] == hp]).copy() + if hp in hp_out_dict: + hp_out_dict[hp] = pd.concat([hp_out_dict[hp], sub_df]) + else: + hp_out_dict[hp] = sub_df + del df + + # Now write out healpixels for this collection (sets of healpixels + # belonging to different fields are disjoint) + print('Generating data for pixels ', field_distinct) + for hp in field_distinct: + outpath = os.path.join(output_dir, f'galaxy_{hp}.parquet') + writer = pq.ParquetWriter(outpath, arrow_schema) + dlen = len(hp_out_dict[hp]['galaxy_id']) + if dlen == 0: + continue + u_bnd = min(self._stride, dlen) + l_bnd = 0 + while u_bnd > l_bnd: + out_df = hp_out_dict[hp].iloc[l_bnd:u_bnd] + out_table = pa.Table.from_pandas(out_df, schema=arrow_schema) + writer.write_table(out_table) + l_bnd = u_bnd + u_bnd = min(l_bnd + self._stride, dlen) + + writer.close() + print(f'{time.asctime()} Completed pixel {hp}') + + +input_config = '/pscratch/sd/j/jrbogart/desc/skycatalogs/rehearsal/skyCatalog.yaml' +rotated_dir = '/pscratch/sd/j/jrbogart/desc/skycatalogs/rotated_ops4' +cat = open_catalog(input_config) +NSIDE = 32 # should really read from config + +# For ops rehearsal 3 +# for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, FIELD_RA, +# FIELD_DEC, FIELD_NAMES): +# Additional fiels for ops rehearsal 4 +for dc2_ra, dc2_dec, f_ra, f_dec, name in zip(DC2_RA, DC2_DEC, OPS4_NEW_RA, + OPS4_NEW_DEC, OPS4_NEW_NAMES): + + disk = Disk(dc2_ra, dc2_dec, DISK_RADIUS_DG * 3600) + obj_list = cat.get_object_type_by_region(disk, 'galaxy') + print('Field ', name) + print(f'# objects: {len(obj_list)}') + print(f'# collections: {obj_list.collection_count}') + + rotator = FieldRotator(dc2_ra, dc2_dec, f_ra, f_dec) + galaxy_schema = make_galaxy_schema('rotate_logger') + galaxy_rotator = GalaxyRotator(rotator, name, obj_list) + galaxy_rotator.output_field_pixels(rotated_dir, galaxy_schema) + print('Completed field ', name) + # break ### for testing stop after 1 field