From dee2be5588c81bbc142c62ceac1dceeb1a221099 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 18 Jul 2023 11:29:40 +0100 Subject: [PATCH 1/3] add CLMM to CI --- .github/workflows/ci.yml | 45 ++++++++++++++++++++++++++++++++++++++ examples/clmm/config.yml | 12 +++++----- examples/clmm/pipeline.yml | 3 ++- txpipe/source_selector.py | 6 +++-- 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca1c5aa36..52c2f496c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -85,6 +85,7 @@ jobs: run: | ceci examples/metacal/pipeline.yml test -f data/example/outputs/shear_xi_plus.png + jupyter nbconvert --to notebook --execute "notebooks/Reading TXPipe Outputs.ipynb" - name: Show logs if: ${{ always() }} @@ -225,6 +226,50 @@ jobs: run: | tail -n +1 data/example/logs_redmagic/* + CLMM_pipeline: + runs-on: ubuntu-latest + + needs: Download_Data + + container: + image: ghcr.io/lsstdesc/txpipe:v0.7 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + with: + submodules: true + + - name: Cache example data + id: cache-example + uses: actions/cache@v3 + env: + cache-name: cache-example-data + with: + path: ./data/example/inputs ./data/example/rail-bpz-inputs + # update this when we change package contents and want + # to force an update + key: example-data-v1 + + - name: Download test data + if: steps.cache-example.outputs.cache-hit != 'true' + run: | + wget -O example.tar.gz "https://portal.nersc.gov/cfs/lsst/txpipe/data/example.tar.gz" + tar -zxvf example.tar.gz + + - name: Run clmm pipeline + run: | + ceci examples/clmm/pipeline.yml + test -f data/clmm/outputs/cluster_shear_catalogs.hdf5 + jupyter nbconvert --to notebook --execute "notebooks/exploring-cluster-shear-cat.ipynb" + + + - name: Show logs + if: ${{ always() }} + run: | + tail -n +1 data/clmm/logs/* + + Other_Pipeline_Dry_Runs: runs-on: ubuntu-latest diff --git a/examples/clmm/config.yml b/examples/clmm/config.yml index 81b9bb161..9bb0b05c3 100755 --- a/examples/clmm/config.yml +++ b/examples/clmm/config.yml @@ -12,8 +12,8 @@ PZPrepareEstimatorSource: nzbins: 301 columns_file: ./data/bpz_riz.columns data_path: ./data/example/rail-bpz-inputs - spectra_file: SED/CWWSB4.list - prior_band: i + spectra_file: CWWSB4.list + ref_band: i # Not sure about this prior_file: hdfn_gen p_min: 0.005 @@ -37,13 +37,13 @@ PZEstimatorSource: dz: 0.01 nzbins: 301 data_path: ./data/example/rail-bpz-inputs - band_names: [mag_r, mag_i, mag_z] - band_err_names: [mag_err_r, mag_err_i, mag_err_z] + bands: [mag_r, mag_i, mag_z] + err_bands: [mag_err_r, mag_err_i, mag_err_z] hdf5_groupname: shear/00 nondetect_val: .inf columns_file: ./data/bpz_riz.columns - spectra_file: SED/CWWSB4.list - prior_band: mag_i + spectra_file: CWWSB4.list + ref_band: mag_i prior_file: hdfn_gen p_min: 0.005 gauss_kernel: 0.0 diff --git a/examples/clmm/pipeline.yml b/examples/clmm/pipeline.yml index e81b70aec..63d959017 100755 --- a/examples/clmm/pipeline.yml +++ b/examples/clmm/pipeline.yml @@ -8,7 +8,8 @@ site: name: local max_threads: 2 -modules: txpipe rail.stages +modules: txpipe rail.stages rail.estimation.algos.bpz_lite + python_paths: - submodules/RAIL diff --git a/txpipe/source_selector.py b/txpipe/source_selector.py index 5defba739..0380c5b66 100755 --- a/txpipe/source_selector.py +++ b/txpipe/source_selector.py @@ -629,13 +629,15 @@ def data_iterator(self): bands, "mag", "mag_err", shear_catalog_type="metadetect" ) + renames = {} + # We need truth shears and/or PZ point-estimates for each shear too if self.config["input_pz"]: shear_cols += metadetect_variants("mean_z") elif self.config["true_z"]: - shear_cols += ["redshift_true"] + shear_cols += ["00/redshift_true"] + renames["00/redshift_true"] = "redshift_true" - renames = {} for prefix in ["00", "1p", "1m", "2p", "2m"]: renames[f"{prefix}/mcal_psf_T_mean"] = f"{prefix}/psf_T_mean" From 1b88edb3d5ca20143b2f93daabb8316cbeba4b16 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 18 Jul 2023 21:59:11 +0100 Subject: [PATCH 2/3] Make CombinedClusterCatalog.from_pipeline_file work again --- examples/clmm/pipeline.yml | 6 +-- .../txpipe_cluster_background_selection.ipynb | 42 +++++++++++++++---- txpipe/extensions/clmm/select.py | 41 +++++++++--------- 3 files changed, 54 insertions(+), 35 deletions(-) diff --git a/examples/clmm/pipeline.yml b/examples/clmm/pipeline.yml index 63d959017..70cc9a543 100755 --- a/examples/clmm/pipeline.yml +++ b/examples/clmm/pipeline.yml @@ -8,12 +8,10 @@ site: name: local max_threads: 2 -modules: txpipe rail.stages rail.estimation.algos.bpz_lite +modules: txpipe rail.estimation.algos.bpz_lite -python_paths: - - submodules/RAIL - +python_paths: [] stages: - name: PZPrepareEstimatorSource # Prepare the p(z) estimator diff --git a/notebooks/txpipe_cluster_background_selection.ipynb b/notebooks/txpipe_cluster_background_selection.ipynb index 63a0d5c92..9f452d2f3 100644 --- a/notebooks/txpipe_cluster_background_selection.ipynb +++ b/notebooks/txpipe_cluster_background_selection.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "35818ae3-26f3-48cf-b910-a22cae198f23", "metadata": {}, @@ -33,6 +34,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "54d52caf-ba4f-4d87-98fc-36bbd275135e", "metadata": {}, @@ -60,6 +62,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0c7fd3ce-e568-4228-9788-436c36916d36", "metadata": {}, @@ -89,6 +92,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "44a5c54a-c9f2-4301-ba88-29dff76f93a1", "metadata": {}, @@ -123,6 +127,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9b30b542-fe35-478d-9561-f4df45d83850", "metadata": {}, @@ -158,6 +163,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "33844c37-b2c7-4400-9f6c-f0c5bb0c3ce4", "metadata": {}, @@ -316,6 +322,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7eb7d1e3-186b-42b8-ad4f-d96b59781a36", "metadata": { @@ -341,7 +348,7 @@ " shear_tomography_catalog=\"data/example/outputs_metadetect/shear_tomography_catalog.hdf5\",\n", " cluster_catalog=\"./data/example/inputs/cluster_catalog.hdf5\",\n", " cluster_shear_catalogs=\"my_cluster_shear_catalog.hdf5\",\n", - " photoz_pdfs=\"data/example/inputs/photoz_pdfs.hdf5\",\n", + " source_photoz_pdfs=\"data/example/inputs/photoz_pdfs.hdf5\",\n", ")" ] }, @@ -364,6 +371,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "40271313-fc70-46e1-b502-45af5518a1a6", "metadata": {}, @@ -401,6 +409,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2616e035-a282-4f5e-8cda-85fa426bddd4", "metadata": {}, @@ -445,6 +454,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "97925489-6fec-4257-86c8-e29ba5f6d9e8", "metadata": {}, @@ -486,6 +496,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3d7cc171-ab5a-41cf-8d5d-5d3b0e363554", "metadata": {}, @@ -527,6 +538,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "5d34bbe6-e101-4afa-8bda-cc53c08a41bf", "metadata": {}, @@ -535,6 +547,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "9733d99a-b0e0-46dd-ab86-c46bcb1e692b", "metadata": {}, @@ -558,6 +571,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "39be3193-7583-4630-a268-b875bdd6a3cf", "metadata": {}, @@ -594,6 +608,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "3ca15a17-07ed-4f08-bac3-7440e3d91442", "metadata": {}, @@ -648,6 +663,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "abc951a8-1364-465e-864f-2ef7c650b4e8", "metadata": {}, @@ -678,6 +694,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "439d6764-39a3-4419-a5a9-195a3a1a1ddd", "metadata": {}, @@ -738,6 +755,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7ec67963-f92c-4520-9aa6-7ef7b9d866b7", "metadata": {}, @@ -843,6 +861,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "5543cce4-efcb-40a7-9d04-33588e85e8f3", "metadata": {}, @@ -865,6 +884,15 @@ " raise RumtimeError(\"Please wait a bit longer for the pipeline to complete\")" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "0e4607c8", + "metadata": {}, + "source": [ + "Because we have a pipeline file for this example we can make our CombinedClusterCatalog directly from that" + ] + }, { "cell_type": "code", "execution_count": 22, @@ -872,14 +900,8 @@ "metadata": {}, "outputs": [], "source": [ - "# TODO: fix finding all these automatically from the pipeline object\n", - "ccc = txpipe.extensions.CombinedClusterCatalog(\n", - " shear_catalog=\"data/cosmodc2/20deg2/shear_catalog.hdf5\",\n", - " shear_tomography_catalog=\"data/cosmodc2/outputs-20deg2/shear_tomography_catalog.hdf5\",\n", - " cluster_catalog=\"./data/cosmodc2/20deg2/cluster_catalog.hdf5\",\n", - " cluster_shear_catalogs=\"data/cosmodc2/outputs-20deg2/cluster_shear_catalogs.hdf5\",\n", - " photoz_pdfs=\"data/cosmodc2/outputs-20deg2/source_photoz_pdfs.hdf5\",\n", - ")" + "\n", + "ccc = txpipe.extensions.CombinedClusterCatalog.from_pipeline_file(pipeline_file)" ] }, { @@ -957,6 +979,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "79f72253-e286-4a9e-bb8e-f9b64382f9c2", "metadata": {}, @@ -1009,6 +1032,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "25cb96d0-234c-4ac2-a967-825f973753d4", "metadata": {}, diff --git a/txpipe/extensions/clmm/select.py b/txpipe/extensions/clmm/select.py index 121affa62..2c30e3680 100644 --- a/txpipe/extensions/clmm/select.py +++ b/txpipe/extensions/clmm/select.py @@ -444,10 +444,10 @@ def iterate_source_catalog(self): class CombinedClusterCatalog: - def __init__(self, shear_catalog, shear_tomography_catalog, cluster_catalog, cluster_shear_catalogs, photoz_pdfs): + def __init__(self, shear_catalog, shear_tomography_catalog, cluster_catalog, cluster_shear_catalogs, source_photoz_pdfs): _, self.calibrator = Calibrator.load(shear_tomography_catalog) self.shear_cat = ShearCatalog(shear_catalog, "r") - self.pz_cat = PhotozPDFFile(photoz_pdfs,"r").file + self.pz_cat = PhotozPDFFile(source_photoz_pdfs,"r").file self.cluster_catalog = HDFFile(cluster_catalog, "r").file self.cluster_shear_catalogs = HDFFile(cluster_shear_catalogs, "r").file self.cluster_cat_cols = list(self.cluster_catalog['clusters'].keys()) @@ -461,36 +461,33 @@ def from_pipeline_file(cls, pipeline_file, run_dir='.'): dry_run=True ) - pipeline = ceci.Pipeline.create(pipe_config) - - outputs = {} - for stage in pipeline.stages: - outputs.update(stage.find_outputs(pipe_config["output_dir"])) + with ceci.prepare_for_pipeline(pipe_config): + pipeline = ceci.Pipeline.create(pipe_config) # make a list of files we need - tags = [ + + stage = pipeline["CLClusterShearCatalogs"] + + ccc_tags = [ "shear_catalog", "cluster_catalog", - "cluster_shear_catalogs", "shear_tomography_catalog", - "photoz_pdfs", + "source_photoz_pdfs", + "cluster_shear_catalogs", ] - - paths = pipeline.overall_inputs.copy() - for stage in pipeline.stages: - paths.update(stage.find_outputs(pipe_config["output_dir"])) - - files = {} - for tag in tags: - if tag not in paths: - raise ValueError(f"This pipeline did not generate or ingest {tag} needed for cluster WL") - path = paths[tag] + + paths = {} + for tag in ccc_tags: + path = pipeline.overall_inputs.get(tag) + if path is None: + path = pipeline.pipeline_files[tag] if not os.path.exists(path): raise ValueError(f"File {path} does not exist - pipeline may not have run") - files[tag] = path + paths[tag] = path - return cls(**files) + + return cls(**paths) def get_cluster_info(self, cluster_index): From f9c234149225b7f4ad628265c58a6949eb3c6440 Mon Sep 17 00:00:00 2001 From: Joe Zuntz Date: Tue, 18 Jul 2023 22:00:00 +0100 Subject: [PATCH 3/3] remove h5py file --- notebooks/exploring-cluster-shear-cat.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/notebooks/exploring-cluster-shear-cat.ipynb b/notebooks/exploring-cluster-shear-cat.ipynb index 67945285d..b5d86cb81 100644 --- a/notebooks/exploring-cluster-shear-cat.ipynb +++ b/notebooks/exploring-cluster-shear-cat.ipynb @@ -9,7 +9,6 @@ "source": [ "%matplotlib inline\n", "import numpy as np\n", - "import h5py\n", "import matplotlib.pyplot as plt\n", "\n", "# Run this notebook from the directory above\n",