From dee2be5588c81bbc142c62ceac1dceeb1a221099 Mon Sep 17 00:00:00 2001
From: Joe Zuntz <joezuntz@googlemail.com>
Date: Tue, 18 Jul 2023 11:29:40 +0100
Subject: [PATCH 1/3] add CLMM to CI

---
 .github/workflows/ci.yml   | 45 ++++++++++++++++++++++++++++++++++++++
 examples/clmm/config.yml   | 12 +++++-----
 examples/clmm/pipeline.yml |  3 ++-
 txpipe/source_selector.py  |  6 +++--
 4 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ca1c5aa36..52c2f496c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -85,6 +85,7 @@ jobs:
       run: |
         ceci examples/metacal/pipeline.yml
         test -f data/example/outputs/shear_xi_plus.png
+        jupyter nbconvert --to notebook --execute "notebooks/Reading TXPipe Outputs.ipynb"
 
     - name: Show logs
       if: ${{ always() }}
@@ -225,6 +226,50 @@ jobs:
       run: |
         tail -n +1 data/example/logs_redmagic/*
 
+  CLMM_pipeline:
+    runs-on: ubuntu-latest
+
+    needs: Download_Data
+
+    container:
+      image: ghcr.io/lsstdesc/txpipe:v0.7
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+      with:
+        submodules: true
+
+    - name: Cache example data
+      id: cache-example
+      uses: actions/cache@v3
+      env:
+        cache-name: cache-example-data
+      with:
+        path: ./data/example/inputs ./data/example/rail-bpz-inputs
+        # update this when we change package contents and want
+        # to force an update
+        key: example-data-v1
+
+    - name: Download test data
+      if: steps.cache-example.outputs.cache-hit != 'true'
+      run: |
+        wget -O example.tar.gz  "https://portal.nersc.gov/cfs/lsst/txpipe/data/example.tar.gz"
+        tar -zxvf example.tar.gz
+
+    - name: Run clmm pipeline
+      run: |
+        ceci examples/clmm/pipeline.yml
+        test -f data/clmm/outputs/cluster_shear_catalogs.hdf5
+        jupyter nbconvert --to notebook --execute "notebooks/exploring-cluster-shear-cat.ipynb"
+
+
+    - name: Show logs
+      if: ${{ always() }}
+      run: |
+        tail -n +1 data/clmm/logs/*
+
+
   Other_Pipeline_Dry_Runs:
     runs-on: ubuntu-latest
 
diff --git a/examples/clmm/config.yml b/examples/clmm/config.yml
index 81b9bb161..9bb0b05c3 100755
--- a/examples/clmm/config.yml
+++ b/examples/clmm/config.yml
@@ -12,8 +12,8 @@ PZPrepareEstimatorSource:
     nzbins: 301
     columns_file: ./data/bpz_riz.columns
     data_path: ./data/example/rail-bpz-inputs
-    spectra_file: SED/CWWSB4.list
-    prior_band: i
+    spectra_file: CWWSB4.list
+    ref_band: i
     # Not sure about this
     prior_file: hdfn_gen
     p_min: 0.005
@@ -37,13 +37,13 @@ PZEstimatorSource:
     dz: 0.01
     nzbins: 301
     data_path: ./data/example/rail-bpz-inputs
-    band_names: [mag_r, mag_i, mag_z]
-    band_err_names: [mag_err_r, mag_err_i, mag_err_z]
+    bands: [mag_r, mag_i, mag_z]
+    err_bands: [mag_err_r, mag_err_i, mag_err_z]
     hdf5_groupname: shear/00
     nondetect_val: .inf
     columns_file: ./data/bpz_riz.columns
-    spectra_file: SED/CWWSB4.list
-    prior_band: mag_i
+    spectra_file: CWWSB4.list
+    ref_band: mag_i
     prior_file: hdfn_gen
     p_min: 0.005
     gauss_kernel: 0.0
diff --git a/examples/clmm/pipeline.yml b/examples/clmm/pipeline.yml
index e81b70aec..63d959017 100755
--- a/examples/clmm/pipeline.yml
+++ b/examples/clmm/pipeline.yml
@@ -8,7 +8,8 @@ site:
     name: local
     max_threads: 2
 
-modules: txpipe  rail.stages
+modules: txpipe  rail.stages     rail.estimation.algos.bpz_lite
+
 
 python_paths:
     - submodules/RAIL
diff --git a/txpipe/source_selector.py b/txpipe/source_selector.py
index 5defba739..0380c5b66 100755
--- a/txpipe/source_selector.py
+++ b/txpipe/source_selector.py
@@ -629,13 +629,15 @@ def data_iterator(self):
             bands, "mag", "mag_err", shear_catalog_type="metadetect"
         )
 
+        renames = {}
+
         # We need truth shears and/or PZ point-estimates for each shear too
         if self.config["input_pz"]:
             shear_cols += metadetect_variants("mean_z")
         elif self.config["true_z"]:
-            shear_cols += ["redshift_true"]
+            shear_cols += ["00/redshift_true"]
+            renames["00/redshift_true"] = "redshift_true"
 
-        renames = {}
         for prefix in ["00", "1p", "1m", "2p", "2m"]:
             renames[f"{prefix}/mcal_psf_T_mean"] = f"{prefix}/psf_T_mean"
 

From 1b88edb3d5ca20143b2f93daabb8316cbeba4b16 Mon Sep 17 00:00:00 2001
From: Joe Zuntz <joezuntz@googlemail.com>
Date: Tue, 18 Jul 2023 21:59:11 +0100
Subject: [PATCH 2/3] Make CombinedClusterCatalog.from_pipeline_file work again

---
 examples/clmm/pipeline.yml                    |  6 +--
 .../txpipe_cluster_background_selection.ipynb | 42 +++++++++++++++----
 txpipe/extensions/clmm/select.py              | 41 +++++++++---------
 3 files changed, 54 insertions(+), 35 deletions(-)

diff --git a/examples/clmm/pipeline.yml b/examples/clmm/pipeline.yml
index 63d959017..70cc9a543 100755
--- a/examples/clmm/pipeline.yml
+++ b/examples/clmm/pipeline.yml
@@ -8,12 +8,10 @@ site:
     name: local
     max_threads: 2
 
-modules: txpipe  rail.stages     rail.estimation.algos.bpz_lite
+modules: txpipe  rail.estimation.algos.bpz_lite
 
 
-python_paths:
-    - submodules/RAIL
-
+python_paths: []
 
 stages:
     - name: PZPrepareEstimatorSource   # Prepare the p(z) estimator
diff --git a/notebooks/txpipe_cluster_background_selection.ipynb b/notebooks/txpipe_cluster_background_selection.ipynb
index 63a0d5c92..9f452d2f3 100644
--- a/notebooks/txpipe_cluster_background_selection.ipynb
+++ b/notebooks/txpipe_cluster_background_selection.ipynb
@@ -1,6 +1,7 @@
 {
  "cells": [
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "35818ae3-26f3-48cf-b910-a22cae198f23",
    "metadata": {},
@@ -33,6 +34,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "54d52caf-ba4f-4d87-98fc-36bbd275135e",
    "metadata": {},
@@ -60,6 +62,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "0c7fd3ce-e568-4228-9788-436c36916d36",
    "metadata": {},
@@ -89,6 +92,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "44a5c54a-c9f2-4301-ba88-29dff76f93a1",
    "metadata": {},
@@ -123,6 +127,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "9b30b542-fe35-478d-9561-f4df45d83850",
    "metadata": {},
@@ -158,6 +163,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "33844c37-b2c7-4400-9f6c-f0c5bb0c3ce4",
    "metadata": {},
@@ -316,6 +322,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "7eb7d1e3-186b-42b8-ad4f-d96b59781a36",
    "metadata": {
@@ -341,7 +348,7 @@
     "    shear_tomography_catalog=\"data/example/outputs_metadetect/shear_tomography_catalog.hdf5\",\n",
     "    cluster_catalog=\"./data/example/inputs/cluster_catalog.hdf5\",\n",
     "    cluster_shear_catalogs=\"my_cluster_shear_catalog.hdf5\",\n",
-    "    photoz_pdfs=\"data/example/inputs/photoz_pdfs.hdf5\",\n",
+    "    source_photoz_pdfs=\"data/example/inputs/photoz_pdfs.hdf5\",\n",
     ")"
    ]
   },
@@ -364,6 +371,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "40271313-fc70-46e1-b502-45af5518a1a6",
    "metadata": {},
@@ -401,6 +409,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "2616e035-a282-4f5e-8cda-85fa426bddd4",
    "metadata": {},
@@ -445,6 +454,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "97925489-6fec-4257-86c8-e29ba5f6d9e8",
    "metadata": {},
@@ -486,6 +496,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "3d7cc171-ab5a-41cf-8d5d-5d3b0e363554",
    "metadata": {},
@@ -527,6 +538,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "5d34bbe6-e101-4afa-8bda-cc53c08a41bf",
    "metadata": {},
@@ -535,6 +547,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "9733d99a-b0e0-46dd-ab86-c46bcb1e692b",
    "metadata": {},
@@ -558,6 +571,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "39be3193-7583-4630-a268-b875bdd6a3cf",
    "metadata": {},
@@ -594,6 +608,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "3ca15a17-07ed-4f08-bac3-7440e3d91442",
    "metadata": {},
@@ -648,6 +663,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "abc951a8-1364-465e-864f-2ef7c650b4e8",
    "metadata": {},
@@ -678,6 +694,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "439d6764-39a3-4419-a5a9-195a3a1a1ddd",
    "metadata": {},
@@ -738,6 +755,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "7ec67963-f92c-4520-9aa6-7ef7b9d866b7",
    "metadata": {},
@@ -843,6 +861,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "5543cce4-efcb-40a7-9d04-33588e85e8f3",
    "metadata": {},
@@ -865,6 +884,15 @@
     "    raise RumtimeError(\"Please wait a bit longer for the pipeline to complete\")"
    ]
   },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "0e4607c8",
+   "metadata": {},
+   "source": [
+    "Because we have a pipeline file for this example we can make our CombinedClusterCatalog directly from that"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 22,
@@ -872,14 +900,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# TODO: fix finding all these automatically from the pipeline object\n",
-    "ccc = txpipe.extensions.CombinedClusterCatalog(\n",
-    "    shear_catalog=\"data/cosmodc2/20deg2/shear_catalog.hdf5\",\n",
-    "    shear_tomography_catalog=\"data/cosmodc2/outputs-20deg2/shear_tomography_catalog.hdf5\",\n",
-    "    cluster_catalog=\"./data/cosmodc2/20deg2/cluster_catalog.hdf5\",\n",
-    "    cluster_shear_catalogs=\"data/cosmodc2/outputs-20deg2/cluster_shear_catalogs.hdf5\",\n",
-    "    photoz_pdfs=\"data/cosmodc2/outputs-20deg2/source_photoz_pdfs.hdf5\",\n",
-    ")"
+    "\n",
+    "ccc = txpipe.extensions.CombinedClusterCatalog.from_pipeline_file(pipeline_file)"
    ]
   },
   {
@@ -957,6 +979,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "79f72253-e286-4a9e-bb8e-f9b64382f9c2",
    "metadata": {},
@@ -1009,6 +1032,7 @@
    ]
   },
   {
+   "attachments": {},
    "cell_type": "markdown",
    "id": "25cb96d0-234c-4ac2-a967-825f973753d4",
    "metadata": {},
diff --git a/txpipe/extensions/clmm/select.py b/txpipe/extensions/clmm/select.py
index 121affa62..2c30e3680 100644
--- a/txpipe/extensions/clmm/select.py
+++ b/txpipe/extensions/clmm/select.py
@@ -444,10 +444,10 @@ def iterate_source_catalog(self):
 
 
 class CombinedClusterCatalog:
-    def __init__(self, shear_catalog, shear_tomography_catalog, cluster_catalog, cluster_shear_catalogs, photoz_pdfs):
+    def __init__(self, shear_catalog, shear_tomography_catalog, cluster_catalog, cluster_shear_catalogs, source_photoz_pdfs):
         _, self.calibrator = Calibrator.load(shear_tomography_catalog)
         self.shear_cat = ShearCatalog(shear_catalog, "r")
-        self.pz_cat = PhotozPDFFile(photoz_pdfs,"r").file
+        self.pz_cat = PhotozPDFFile(source_photoz_pdfs,"r").file
         self.cluster_catalog = HDFFile(cluster_catalog, "r").file
         self.cluster_shear_catalogs = HDFFile(cluster_shear_catalogs, "r").file
         self.cluster_cat_cols = list(self.cluster_catalog['clusters'].keys())
@@ -461,36 +461,33 @@ def from_pipeline_file(cls, pipeline_file, run_dir='.'):
             dry_run=True
         )
 
-        pipeline = ceci.Pipeline.create(pipe_config)
-
-        outputs = {}
-        for stage in pipeline.stages:
-            outputs.update(stage.find_outputs(pipe_config["output_dir"]))
+        with ceci.prepare_for_pipeline(pipe_config):
+            pipeline = ceci.Pipeline.create(pipe_config)
 
 
         # make a list of files we need
-        tags = [
+
+        stage = pipeline["CLClusterShearCatalogs"]
+
+        ccc_tags = [
             "shear_catalog",
             "cluster_catalog",
-            "cluster_shear_catalogs",
             "shear_tomography_catalog",
-            "photoz_pdfs",
+            "source_photoz_pdfs",
+            "cluster_shear_catalogs",
         ]
-
-        paths = pipeline.overall_inputs.copy()
-        for stage in pipeline.stages:
-            paths.update(stage.find_outputs(pipe_config["output_dir"]))
-
-        files = {}
-        for tag in tags:
-            if tag not in paths:
-                raise ValueError(f"This pipeline did not generate or ingest {tag} needed for cluster WL")
-            path = paths[tag]
+        
+        paths = {}
+        for tag in ccc_tags:
+            path = pipeline.overall_inputs.get(tag)
+            if path is None:
+                path = pipeline.pipeline_files[tag]
             if not os.path.exists(path):
                 raise ValueError(f"File {path} does not exist - pipeline may not have run")
-            files[tag] = path
+            paths[tag] = path
 
-        return cls(**files)
+    
+        return cls(**paths)
 
 
     def get_cluster_info(self, cluster_index):

From f9c234149225b7f4ad628265c58a6949eb3c6440 Mon Sep 17 00:00:00 2001
From: Joe Zuntz <joezuntz@googlemail.com>
Date: Tue, 18 Jul 2023 22:00:00 +0100
Subject: [PATCH 3/3] remove h5py file

---
 notebooks/exploring-cluster-shear-cat.ipynb | 1 -
 1 file changed, 1 deletion(-)

diff --git a/notebooks/exploring-cluster-shear-cat.ipynb b/notebooks/exploring-cluster-shear-cat.ipynb
index 67945285d..b5d86cb81 100644
--- a/notebooks/exploring-cluster-shear-cat.ipynb
+++ b/notebooks/exploring-cluster-shear-cat.ipynb
@@ -9,7 +9,6 @@
    "source": [
     "%matplotlib inline\n",
     "import numpy as np\n",
-    "import h5py\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "# Run this notebook from the directory above\n",