From fdf201e6d3370626b9c7d2a8955929069671f202 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 3 Jun 2024 13:27:47 +0200 Subject: [PATCH 01/15] actually fix sf err calc --- src/pygama/pargen/AoE_cal.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index f166f83d9..49fa4d38c 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -719,8 +719,10 @@ def get_survival_fraction( pc_n = ct_n + surv_n - sf = (surv_n / pc_n) * 100 - err = sf * np.sqrt((ct_err / pc_n**2) ** 2 + (surv_err / pc_n**2) ** 2) + sf = surv_n / pc_n + err = 100 * sf * (1 - sf) * np.sqrt((ct_err / ct_n) ** 2 + (surv_err / surv_n) ** 2) + sf *= 100 + return sf, err, cut_pars, surv_pars @@ -835,8 +837,9 @@ def compton_sf(cut_param, low_cut_val, high_cut_val=None, mode="greater", dt_mas pc_n = ct_n + surv_n - sf = (surv_n / pc_n) * 100 - err = sf * np.sqrt((ct_err / pc_n**2) ** 2 + (surv_err / pc_n**2) ** 2) + sf = surv_n / pc_n + err = 100 * sf * (1 - sf) * np.sqrt((ct_err / ct_n) ** 2 + (surv_err / surv_n) ** 2) + sf *= 100 return { "low_cut": low_cut_val, From 9c0ed845c67900771d22e7e342b07e1c7211ef1f Mon Sep 17 00:00:00 2001 From: George Marshall Date: Fri, 6 Sep 2024 15:43:44 +0100 Subject: [PATCH 02/15] bound sigma --- src/pygama/pargen/energy_cal.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 7f89ed8a6..92697e927 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -1964,7 +1964,7 @@ def get_hpge_energy_peak_par_guess( bl=bg - step / 2, method="interpolate", )[0] - if sigma <= 0 or abs(sigma / sigma_guess) > 5: + if sigma <= 0 or abs(sigma / sigma_guess) > 5 or sigma > (fit_range[1] - fit_range[0]) / 4: raise ValueError except ValueError: try: @@ -1980,11 +1980,11 @@ def get_hpge_energy_peak_par_guess( except RuntimeError: sigma = -1 if sigma <= 0 or sigma > 1000 or abs(sigma / sigma_guess) > 5: - if sigma_guess is not None and sigma_guess > 0 and sigma_guess < 1000: + if sigma_guess is not None and sigma_guess > 0 and sigma_guess < (fit_range[1] - fit_range[0]) / 4: sigma = sigma_guess else: (_, sigma, _) = pgh.get_gaussian_guess(hist, bins) - if sigma is not None and sigma_guess > 0 and sigma_guess < 1000: + if sigma is not None and sigma_guess > 0 and sigma_guess < (fit_range[1] - fit_range[0]) / 4: pass else: log.info( @@ -2076,7 +2076,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, None), + "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), "n_bkg": (0, None), "hstep": (-1, 1), "x_lo": (None, None), @@ -2087,7 +2087,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, None), + "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), "htail": (0, 0.5), "tau": (0.1 * parguess["sigma"], 5 * parguess["sigma"]), "n_bkg": (0, None), @@ -2100,7 +2100,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, None), + "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), "n_bkg": (0, None), "x_lo": (None, None), "x_hi": (None, None), @@ -2109,7 +2109,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, None), + "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), "n_bkg": (0, None), "m": (None, None), "b": (None, None), From 7ba46afe2e54794cc41877bd93ca37d066cf3a81 Mon Sep 17 00:00:00 2001 From: George Marshall Date: Fri, 6 Sep 2024 15:45:53 +0100 Subject: [PATCH 03/15] pc fixes --- src/pygama/pargen/energy_cal.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 92697e927..6719b1dd5 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -1964,7 +1964,11 @@ def get_hpge_energy_peak_par_guess( bl=bg - step / 2, method="interpolate", )[0] - if sigma <= 0 or abs(sigma / sigma_guess) > 5 or sigma > (fit_range[1] - fit_range[0]) / 4: + if ( + sigma <= 0 + or abs(sigma / sigma_guess) > 5 + or sigma > (fit_range[1] - fit_range[0]) / 4 + ): raise ValueError except ValueError: try: @@ -1980,11 +1984,19 @@ def get_hpge_energy_peak_par_guess( except RuntimeError: sigma = -1 if sigma <= 0 or sigma > 1000 or abs(sigma / sigma_guess) > 5: - if sigma_guess is not None and sigma_guess > 0 and sigma_guess < (fit_range[1] - fit_range[0]) / 4: + if ( + sigma_guess is not None + and sigma_guess > 0 + and sigma_guess < (fit_range[1] - fit_range[0]) / 4 + ): sigma = sigma_guess else: (_, sigma, _) = pgh.get_gaussian_guess(hist, bins) - if sigma is not None and sigma_guess > 0 and sigma_guess < (fit_range[1] - fit_range[0]) / 4: + if ( + sigma is not None + and sigma_guess > 0 + and sigma_guess < (fit_range[1] - fit_range[0]) / 4 + ): pass else: log.info( @@ -2076,7 +2088,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), "n_bkg": (0, None), "hstep": (-1, 1), "x_lo": (None, None), @@ -2087,7 +2099,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), "htail": (0, 0.5), "tau": (0.1 * parguess["sigma"], 5 * parguess["sigma"]), "n_bkg": (0, None), @@ -2100,7 +2112,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), "n_bkg": (0, None), "x_lo": (None, None), "x_hi": (None, None), @@ -2109,7 +2121,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"]-parguess["x_lo"])/4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), "n_bkg": (0, None), "m": (None, None), "b": (None, None), From bf4e72fb3b9bcc34e3349d565824001fe9c308df Mon Sep 17 00:00:00 2001 From: George Marshall Date: Sat, 7 Sep 2024 16:43:26 +0100 Subject: [PATCH 04/15] increase limit on sigma to give fit more room --- src/pygama/pargen/energy_cal.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 6719b1dd5..a9c4b888c 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -1967,7 +1967,7 @@ def get_hpge_energy_peak_par_guess( if ( sigma <= 0 or abs(sigma / sigma_guess) > 5 - or sigma > (fit_range[1] - fit_range[0]) / 4 + or sigma > (fit_range[1] - fit_range[0]) / 2 ): raise ValueError except ValueError: @@ -1983,11 +1983,15 @@ def get_hpge_energy_peak_par_guess( )[0] except RuntimeError: sigma = -1 - if sigma <= 0 or sigma > 1000 or abs(sigma / sigma_guess) > 5: + if ( + sigma <= 0 + or sigma > (fit_range[1] - fit_range[0]) / 2 + or abs(sigma / sigma_guess) > 5 + ): if ( sigma_guess is not None and sigma_guess > 0 - and sigma_guess < (fit_range[1] - fit_range[0]) / 4 + and sigma_guess < (fit_range[1] - fit_range[0]) / 2 ): sigma = sigma_guess else: @@ -1995,7 +1999,7 @@ def get_hpge_energy_peak_par_guess( if ( sigma is not None and sigma_guess > 0 - and sigma_guess < (fit_range[1] - fit_range[0]) / 4 + and sigma_guess < (fit_range[1] - fit_range[0]) / 2 ): pass else: @@ -2088,7 +2092,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 2), "n_bkg": (0, None), "hstep": (-1, 1), "x_lo": (None, None), @@ -2099,7 +2103,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 2), "htail": (0, 0.5), "tau": (0.1 * parguess["sigma"], 5 * parguess["sigma"]), "n_bkg": (0, None), @@ -2112,7 +2116,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 2), "n_bkg": (0, None), "x_lo": (None, None), "x_hi": (None, None), @@ -2121,7 +2125,7 @@ def get_hpge_energy_bounds(func, parguess): return { "n_sig": (0, None), "mu": (parguess["x_lo"], parguess["x_hi"]), - "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 4), + "sigma": (0, (parguess["x_hi"] - parguess["x_lo"]) / 2), "n_bkg": (0, None), "m": (None, None), "b": (None, None), From b826460be75a7dfd1d176dcb7ab84e948e590c41 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Wed, 9 Oct 2024 15:21:55 +0200 Subject: [PATCH 05/15] first version aoe sf joint fitter --- src/pygama/pargen/AoE_cal.py | 244 +++++++++++++++++++++++++++-------- 1 file changed, 191 insertions(+), 53 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 49fa4d38c..af66db883 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -4,6 +4,7 @@ from __future__ import annotations +import copy import logging import re from datetime import datetime @@ -607,8 +608,92 @@ def get_peak_label(peak: float) -> str: return "Tl FEP @" +def pass_pdf_hpge( + x, + x_lo, + x_hi, + n_sig1, + n_sig2, + mu, + sigma, + htail, + tau, + n_bkg1, + n_bkg2, + hstep1, + hstep2, + hstep3, +): + return hpge_peak.pdf_ext( + x, x_lo, x_hi, n_sig1, mu, sigma, htail, tau, n_bkg1, hstep1 + ) + + +def fail_pdf_hpge( + x, + x_lo, + x_hi, + n_sig1, + n_sig2, + mu, + sigma, + htail, + tau, + n_bkg1, + n_bkg2, + hstep1, + hstep2, + hstep3, +): + return hpge_peak.pdf_ext( + x, x_lo, x_hi, n_sig2, mu, sigma, htail, tau, n_bkg2, hstep2 + ) + + +def tot_pdf_hpge( + x, + x_lo, + x_hi, + n_sig1, + n_sig2, + mu, + sigma, + htail, + tau, + n_bkg1, + n_bkg2, + hstep1, + hstep2, + hstep3, +): + return hpge_peak.pdf_ext( + x, x_lo, x_hi, n_sig1 + n_sig2, mu, sigma, htail, tau, n_bkg1 + n_bkg2, hstep3 + ) + + +def pass_pdf_gos( + x, x_lo, x_hi, n_sig1, n_sig2, mu, sigma, n_bkg1, n_bkg2, hstep1, hstep2, hstep3 +): + return gauss_on_step.pdf_ext(x, x_lo, x_hi, n_sig1, mu, sigma, n_bkg1, hstep1) + + +def fail_pdf_gos( + x, x_lo, x_hi, n_sig1, n_sig2, mu, sigma, n_bkg1, n_bkg2, hstep1, hstep2, hstep3 +): + return gauss_on_step.pdf_ext(x, x_lo, x_hi, n_sig2, mu, sigma, n_bkg2, hstep2) + + +def tot_pdf_gos( + x, x_lo, x_hi, n_sig1, n_sig2, mu, sigma, n_bkg1, n_bkg2, hstep1, hstep2, hstep3 +): + return gauss_on_step.pdf_ext( + x, x_lo, x_hi, n_sig1 + n_sig2, mu, sigma, n_bkg1 + n_bkg2, hstep3 + ) + + def update_guess(func, parguess, energies): - if func == gauss_on_step: + if func == gauss_on_step or func == hpge_peak: + total_events = len(energies) parguess["n_sig"] = len( energies[ @@ -616,15 +701,16 @@ def update_guess(func, parguess, energies): & (energies < parguess["mu"] + 2 * parguess["sigma"]) ] ) - parguess["n_bkg"] = total_events - parguess["n_sig"] - return parguess - - if func == hpge_peak: - total_events = len(energies) - parguess["n_sig"] = len( + parguess["n_sig"] -= len( energies[ - (energies > parguess["mu"] - 2 * parguess["sigma"]) - & (energies < parguess["mu"] + 2 * parguess["sigma"]) + (energies > parguess["x_lo"]) + & (energies < parguess["x_lo"] + 2 * parguess["sigma"]) + ] + ) + parguess["n_sig"] -= len( + energies[ + (energies > parguess["x_hi"] - 2 * parguess["sigma"]) + & (energies < parguess["x_hi"]) ] ) parguess["n_bkg"] = total_events - parguess["n_sig"] @@ -643,11 +729,11 @@ def get_survival_fraction( eres_pars, fit_range=None, high_cut=None, - guess_pars_cut=None, - guess_pars_surv=None, + pars=None, dt_mask=None, mode="greater", func=hpge_peak, + fix_step=False, display=0, ): if dt_mask is None: @@ -672,8 +758,8 @@ def get_survival_fraction( else: raise ValueError("mode not recognised") - if guess_pars_cut is None or guess_pars_surv is None: - (pars, errs, cov, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( + if pars is None: + (pars, _, _, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( energy, func, guess_func=energy_guess, @@ -682,40 +768,74 @@ def get_survival_fraction( fit_range=fit_range, ) - guess_pars_cut = pars - guess_pars_surv = pars + guess_pars_cut = copy.deepcopy(pars) + guess_pars_surv = copy.deepcopy(pars) + # add update guess here for n_sig and n_bkg guess_pars_cut = update_guess(func, guess_pars_cut, energy[(~nan_idxs) & (~idxs)]) - (cut_pars, cut_errs, cut_cov, _, _, _, _, _) = pgc.unbinned_staged_energy_fit( - energy[(~nan_idxs) & (~idxs)], - func, - guess=guess_pars_cut, - guess_func=energy_guess, - bounds_func=get_bounds, - fixed_func=fix_all_but_nevents, - guess_kwargs={"peak": peak, "eres": eres_pars}, - lock_guess=True, - allow_tail_drop=False, - fit_range=fit_range, - ) - guess_pars_surv = update_guess(func, guess_pars_cut, energy[(~nan_idxs) & (idxs)]) - (surv_pars, surv_errs, surv_cov, _, _, _, _, _) = pgc.unbinned_staged_energy_fit( - energy[(~nan_idxs) & (idxs)], - func, - guess=guess_pars_surv, - guess_func=energy_guess, - bounds_func=get_bounds, - fixed_func=fix_all_but_nevents, - guess_kwargs={"peak": peak, "eres": eres_pars}, - lock_guess=True, - allow_tail_drop=False, - fit_range=fit_range, - ) + guess_pars_surv = update_guess(func, guess_pars_surv, energy[(~nan_idxs) & (idxs)]) + + parguess = { + "x_lo": pars["x_lo"], + "x_hi": pars["x_hi"], + "mu": pars["mu"], + "sigma": pars["sigma"], + "n_sig1": guess_pars_surv["n_sig"], + "n_bkg1": guess_pars_surv["n_bkg"], + "n_sig2": guess_pars_cut["n_sig"], + "n_bkg2": guess_pars_cut["n_bkg"], + "hstep1": pars["hstep"], + "hstep2": pars["hstep"], + "hstep3": pars["hstep"], + } + + bounds = { + "n_sig1": (0, pars["n_sig"] + pars["n_bkg"]), + "n_sig2": (0, pars["n_sig"] + pars["n_bkg"]), + "n_bkg1": (0, pars["n_bkg"] + pars["n_sig"]), + "n_bkg2": (0, pars["n_bkg"] + pars["n_sig"]), + "hstep1": (-1, 1), + "hstep2": (-1, 1), + "hstep3": (-1, 1), + } + + if func == hpge_peak: + parguess.update({"htail": pars["htail"], "tau": pars["tau"]}) + + if func == hpge_peak: + lh = ( + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (idxs)], pass_pdf_hpge) + + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_hpge) + + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs)], tot_pdf_hpge) + ) + elif func == gauss_on_step: + lh = ( + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (idxs)], pass_pdf_gos) + + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_gos) + + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs)], tot_pdf_gos) + ) - ct_n = cut_pars["n_sig"] - ct_err = cut_errs["n_sig"] - surv_n = surv_pars["n_sig"] - surv_err = surv_errs["n_sig"] + else: + raise ValueError("Unknown func") + + m = Minuit(lh, **parguess) + fixed = ["x_lo", "x_hi", "mu", "sigma"] + if func == hpge_peak: + fixed += ["tau", "htail"] + if fix_step is True: + fixed += ["hstep1", "hstep2", "hstep3"] + + m.fixed[fixed] = True + for arg, val in bounds.items(): + m.limits[arg] = val + + m.simplex().migrad() + m.hesse() + + ct_n = m.values["n_sig2"] + ct_err = m.errors["n_sig2"] + surv_n = m.values["n_sig1"] + surv_err = m.errors["n_sig1"] pc_n = ct_n + surv_n @@ -723,7 +843,29 @@ def get_survival_fraction( err = 100 * sf * (1 - sf) * np.sqrt((ct_err / ct_n) ** 2 + (surv_err / surv_n) ** 2) sf *= 100 - return sf, err, cut_pars, surv_pars + if display > 1: + fig, (ax1, ax2, ax3) = plt.subplots(1, 3) + bins = np.arange(1552, 1612, 1) + ax1.hist(energy[(~nan_idxs) & (idxs)], bins=bins, histtype="step") + + ax2.hist(energy[(~nan_idxs) & (~idxs)], bins=bins, histtype="step") + + ax3.hist(energy[(~nan_idxs)], bins=bins, histtype="step") + + if func == hpge_peak: + ax1.plot(bins, pass_pdf_hpge(bins, **m.values.to_dict())[1]) + ax2.plot(bins, fail_pdf_hpge(bins, **m.values.to_dict())[1]) + + ax3.plot(bins, tot_pdf_hpge(bins, **m.values.to_dict())[1]) + elif func == gauss_on_step: + ax1.plot(bins, pass_pdf_gos(bins, **m.values.to_dict())[1]) + ax2.plot(bins, fail_pdf_gos(bins, **m.values.to_dict())[1]) + + ax3.plot(bins, tot_pdf_gos(bins, **m.values.to_dict())[1]) + + plt.show() + + return sf, err, m.values, m.errors def get_sf_sweep( @@ -754,7 +896,7 @@ def get_sf_sweep( cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) out_df = pd.DataFrame() - (pars, _, _, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( + (pars, errs, _, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( energy, hpge_peak, guess_func=energy_guess, @@ -762,8 +904,6 @@ def get_sf_sweep( guess_kwargs={"peak": peak, "eres": eres_pars}, fit_range=fit_range, ) - guess_pars_cut = pars - guess_pars_surv = pars for cut_val in cut_vals: try: @@ -776,8 +916,7 @@ def get_sf_sweep( fit_range=fit_range, dt_mask=dt_mask, mode=mode, - guess_pars_cut=guess_pars_cut, - guess_pars_surv=guess_pars_surv, + pars=pars, func=func, ) out_df = pd.concat( @@ -790,7 +929,7 @@ def get_sf_sweep( raise (e) out_df.set_index("cut_val", inplace=True) if final_cut_value is not None: - sf, sf_err, cut_pars, surv_pars = get_survival_fraction( + sf, sf_err, _, _ = get_survival_fraction( energy, cut_param, final_cut_value, @@ -799,8 +938,7 @@ def get_sf_sweep( fit_range=fit_range, dt_mask=dt_mask, mode=mode, - guess_pars_cut=guess_pars_cut, - guess_pars_surv=guess_pars_surv, + pars=pars, func=func, ) else: From 45bc09c2d85c8ca5f39eec3395004a4167644c25 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 10 Oct 2024 16:09:17 +0200 Subject: [PATCH 06/15] update peak definitions to make efficiency a parameter of fits --- src/pygama/pargen/AoE_cal.py | 165 +++++++++++++++-------------------- 1 file changed, 71 insertions(+), 94 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index af66db883..ec5ce2b5b 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -608,86 +608,84 @@ def get_peak_label(peak: float) -> str: return "Tl FEP @" -def pass_pdf_hpge( - x, - x_lo, - x_hi, - n_sig1, - n_sig2, - mu, - sigma, - htail, - tau, - n_bkg1, - n_bkg2, - hstep1, - hstep2, - hstep3, +def pass_pdf_gos( + x, x_lo, x_hi, n_sig, epsilon_sig, n_bkg, epsilon_bkg, mu, sigma, hstep1, hstep2 ): - return hpge_peak.pdf_ext( - x, x_lo, x_hi, n_sig1, mu, sigma, htail, tau, n_bkg1, hstep1 + return gauss_on_step.pdf_ext( + x, x_lo, x_hi, n_sig * epsilon_sig, mu, sigma, n_bkg * epsilon_bkg, hstep1 ) -def fail_pdf_hpge( +def fail_pdf_gos( + x, x_lo, x_hi, n_sig, epsilon_sig, n_bkg, epsilon_bkg, mu, sigma, hstep1, hstep2 +): + return gauss_on_step.pdf_ext( + x, + x_lo, + x_hi, + n_sig * (1 - epsilon_sig), + mu, + sigma, + n_bkg * (1 - epsilon_bkg), + hstep2, + ) + + +def pass_pdf_hpge( x, x_lo, x_hi, - n_sig1, - n_sig2, + n_sig, + epsilon_sig, + n_bkg, + epsilon_bkg, mu, sigma, htail, tau, - n_bkg1, - n_bkg2, hstep1, hstep2, - hstep3, ): return hpge_peak.pdf_ext( - x, x_lo, x_hi, n_sig2, mu, sigma, htail, tau, n_bkg2, hstep2 + x, + x_lo, + x_hi, + n_sig * epsilon_sig, + mu, + sigma, + htail, + tau, + n_bkg * epsilon_bkg, + hstep1, ) -def tot_pdf_hpge( +def fail_pdf_hpge( x, x_lo, x_hi, - n_sig1, - n_sig2, + n_sig, + epsilon_sig, + n_bkg, + epsilon_bkg, mu, sigma, htail, tau, - n_bkg1, - n_bkg2, hstep1, hstep2, - hstep3, ): return hpge_peak.pdf_ext( - x, x_lo, x_hi, n_sig1 + n_sig2, mu, sigma, htail, tau, n_bkg1 + n_bkg2, hstep3 - ) - - -def pass_pdf_gos( - x, x_lo, x_hi, n_sig1, n_sig2, mu, sigma, n_bkg1, n_bkg2, hstep1, hstep2, hstep3 -): - return gauss_on_step.pdf_ext(x, x_lo, x_hi, n_sig1, mu, sigma, n_bkg1, hstep1) - - -def fail_pdf_gos( - x, x_lo, x_hi, n_sig1, n_sig2, mu, sigma, n_bkg1, n_bkg2, hstep1, hstep2, hstep3 -): - return gauss_on_step.pdf_ext(x, x_lo, x_hi, n_sig2, mu, sigma, n_bkg2, hstep2) - - -def tot_pdf_gos( - x, x_lo, x_hi, n_sig1, n_sig2, mu, sigma, n_bkg1, n_bkg2, hstep1, hstep2, hstep3 -): - return gauss_on_step.pdf_ext( - x, x_lo, x_hi, n_sig1 + n_sig2, mu, sigma, n_bkg1 + n_bkg2, hstep3 + x, + x_lo, + x_hi, + n_sig * (1 - epsilon_sig), + mu, + sigma, + htail, + tau, + n_bkg * (1 - epsilon_bkg), + hstep2, ) @@ -730,6 +728,7 @@ def get_survival_fraction( fit_range=None, high_cut=None, pars=None, + errs=None, dt_mask=None, mode="greater", func=hpge_peak, @@ -758,8 +757,8 @@ def get_survival_fraction( else: raise ValueError("mode not recognised") - if pars is None: - (pars, _, _, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( + if pars is None or errs is None: + (pars, errs, cov, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( energy, func, guess_func=energy_guess, @@ -768,11 +767,9 @@ def get_survival_fraction( fit_range=fit_range, ) - guess_pars_cut = copy.deepcopy(pars) guess_pars_surv = copy.deepcopy(pars) # add update guess here for n_sig and n_bkg - guess_pars_cut = update_guess(func, guess_pars_cut, energy[(~nan_idxs) & (~idxs)]) guess_pars_surv = update_guess(func, guess_pars_surv, energy[(~nan_idxs) & (idxs)]) parguess = { @@ -780,50 +777,44 @@ def get_survival_fraction( "x_hi": pars["x_hi"], "mu": pars["mu"], "sigma": pars["sigma"], - "n_sig1": guess_pars_surv["n_sig"], - "n_bkg1": guess_pars_surv["n_bkg"], - "n_sig2": guess_pars_cut["n_sig"], - "n_bkg2": guess_pars_cut["n_bkg"], "hstep1": pars["hstep"], "hstep2": pars["hstep"], - "hstep3": pars["hstep"], + "n_sig": pars["n_sig"], + "n_bkg": pars["n_bkg"], + "epsilon_sig": guess_pars_surv["n_sig"] / pars["n_sig"], + "epsilon_bkg": guess_pars_surv["n_bkg"] / pars["n_bkg"], } bounds = { - "n_sig1": (0, pars["n_sig"] + pars["n_bkg"]), - "n_sig2": (0, pars["n_sig"] + pars["n_bkg"]), - "n_bkg1": (0, pars["n_bkg"] + pars["n_sig"]), - "n_bkg2": (0, pars["n_bkg"] + pars["n_sig"]), + "n_sig": (0, pars["n_sig"] + pars["n_bkg"]), + "epsilon_sig": (0, 1), + "n_bkg": (0, pars["n_bkg"] + pars["n_sig"]), + "epsilon_bkg": (0, 1), "hstep1": (-1, 1), "hstep2": (-1, 1), - "hstep3": (-1, 1), } if func == hpge_peak: parguess.update({"htail": pars["htail"], "tau": pars["tau"]}) if func == hpge_peak: - lh = ( - cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (idxs)], pass_pdf_hpge) - + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_hpge) - + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs)], tot_pdf_hpge) - ) + lh = cost.ExtendedUnbinnedNLL( + energy[(~nan_idxs) & (idxs)], pass_pdf_hpge + ) + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_hpge) elif func == gauss_on_step: - lh = ( - cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (idxs)], pass_pdf_gos) - + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_gos) - + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs)], tot_pdf_gos) - ) + lh = cost.ExtendedUnbinnedNLL( + energy[(~nan_idxs) & (idxs)], pass_pdf_gos + ) + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_gos) else: raise ValueError("Unknown func") m = Minuit(lh, **parguess) - fixed = ["x_lo", "x_hi", "mu", "sigma"] + fixed = ["x_lo", "x_hi", "n_sig", "n_bkg", "mu", "sigma"] # "hstep" if func == hpge_peak: fixed += ["tau", "htail"] if fix_step is True: - fixed += ["hstep1", "hstep2", "hstep3"] + fixed += ["hstep1", "hstep2"] m.fixed[fixed] = True for arg, val in bounds.items(): @@ -832,37 +823,23 @@ def get_survival_fraction( m.simplex().migrad() m.hesse() - ct_n = m.values["n_sig2"] - ct_err = m.errors["n_sig2"] - surv_n = m.values["n_sig1"] - surv_err = m.errors["n_sig1"] - - pc_n = ct_n + surv_n - - sf = surv_n / pc_n - err = 100 * sf * (1 - sf) * np.sqrt((ct_err / ct_n) ** 2 + (surv_err / surv_n) ** 2) - sf *= 100 + sf = m.values["epsilon_sig"] * 100 + err = m.errors["epsilon_sig"] * 100 if display > 1: - fig, (ax1, ax2, ax3) = plt.subplots(1, 3) + fig, (ax1, ax2) = plt.subplots(1, 2) bins = np.arange(1552, 1612, 1) ax1.hist(energy[(~nan_idxs) & (idxs)], bins=bins, histtype="step") ax2.hist(energy[(~nan_idxs) & (~idxs)], bins=bins, histtype="step") - ax3.hist(energy[(~nan_idxs)], bins=bins, histtype="step") - if func == hpge_peak: ax1.plot(bins, pass_pdf_hpge(bins, **m.values.to_dict())[1]) ax2.plot(bins, fail_pdf_hpge(bins, **m.values.to_dict())[1]) - - ax3.plot(bins, tot_pdf_hpge(bins, **m.values.to_dict())[1]) elif func == gauss_on_step: ax1.plot(bins, pass_pdf_gos(bins, **m.values.to_dict())[1]) ax2.plot(bins, fail_pdf_gos(bins, **m.values.to_dict())[1]) - ax3.plot(bins, tot_pdf_gos(bins, **m.values.to_dict())[1]) - plt.show() return sf, err, m.values, m.errors From 9f6ddd82ba3dc3cf9eefcf945a0d47c1820d8414 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 24 Oct 2024 17:40:12 +0200 Subject: [PATCH 07/15] split survival fractions into own script --- src/pygama/pargen/lq_cal.py | 7 +- src/pygama/pargen/survival_fractions.py | 638 ++++++++++++++++++++++++ 2 files changed, 641 insertions(+), 4 deletions(-) create mode 100644 src/pygama/pargen/survival_fractions.py diff --git a/src/pygama/pargen/lq_cal.py b/src/pygama/pargen/lq_cal.py index 669330b2b..c6c31dc2b 100644 --- a/src/pygama/pargen/lq_cal.py +++ b/src/pygama/pargen/lq_cal.py @@ -16,6 +16,7 @@ import pygama.math.histogram as pgh import pygama.pargen.AoE_cal as AoE from pygama.math.distributions import gaussian +from pygama.pargen.survival_fractions import compton_sf_sweep, get_sf_sweep log = logging.getLogger(__name__) @@ -564,12 +565,10 @@ def calibrate(self, df, initial_lq_param): f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" ) - cut_df, sf, sf_err = AoE.compton_sf_sweep( + cut_df, sf, sf_err = compton_sf_sweep( peak_df[self.cal_energy_param].to_numpy(), peak_df[final_lq_param].to_numpy(), self.cut_val, - peak, - fwhm, cut_range=(0, 5), n_samples=30, mode="less", @@ -587,7 +586,7 @@ def calibrate(self, df, initial_lq_param): peak_df = select_df.query( f"({self.cal_energy_param}>{fit_range[0]})&({self.cal_energy_param}<{fit_range[1]})" ) - cut_df, sf, sf_err = AoE.get_sf_sweep( + cut_df, sf, sf_err = get_sf_sweep( peak_df[self.cal_energy_param].to_numpy(), peak_df[final_lq_param].to_numpy(), self.cut_val, diff --git a/src/pygama/pargen/survival_fractions.py b/src/pygama/pargen/survival_fractions.py new file mode 100644 index 000000000..89a6762d1 --- /dev/null +++ b/src/pygama/pargen/survival_fractions.py @@ -0,0 +1,638 @@ +""" +This module provides functions for calculating survival fractions for a cut. +""" + +from __future__ import annotations + +import copy +import logging + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from iminuit import Minuit, cost +from iminuit.util import ValueView + +import pygama.pargen.energy_cal as pgc +from pygama.math.distributions import gauss_on_step, hpge_peak + +log = logging.getLogger(__name__) + + +def energy_guess(energy, func_i, fit_range=None, bin_width=1, peak=None, eres=None): + """ + Simple guess for peak fitting + """ + if fit_range is None: + fit_range = (np.nanmin(energy), np.nanmax(energy)) + if func_i == hpge_peak or func_i == gauss_on_step: + parguess = pgc.get_hpge_energy_peak_par_guess( + energy, func_i, fit_range=fit_range + ) + + if peak is not None: + parguess["mu"] = peak + + if eres is not None: + parguess["sigma"] = eres / 2.355 + + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i] = 0 + + else: + log.error(f"energy_guess not implemented for {func_i}") + return None + return parguess + + +def fix_all_but_nevents(func): + """ + Fixes all parameters except the number of signal and background events + and their efficiencies + Returns: Sequence list of fixed indexes for fitting and mask for parameters + """ + + if func == gauss_on_step: + # pars are: n_sig, mu, sigma, n_bkg, hstep, lower, upper, components + fixed = ["x_lo", "x_hi", "mu", "sigma", "hstep"] + + elif func == hpge_peak: + # pars are: , components + fixed = ["x_lo", "x_hi", "mu", "sigma", "htail", "tau", "hstep"] + + else: + log.error(f"get_hpge_E_fixed not implemented for {func}") + return None, None + mask = ~np.in1d(func.required_args(), fixed) + return fixed, mask + + +def get_bounds(func, parguess): + """ + Gets bounds for the fit parameters + """ + if func == hpge_peak or func == gauss_on_step: + bounds = pgc.get_hpge_energy_bounds(func, parguess) + + bounds["mu"] = (parguess["mu"] - 1, parguess["mu"] + 1) + bounds["n_sig"] = (0, 2 * (parguess["n_sig"] + parguess["n_bkg"])) + bounds["n_bkg"] = (0, 2 * (parguess["n_sig"] + parguess["n_bkg"])) + + else: + log.error(f"get_bounds not implemented for {func}") + return None + return bounds + + +def pass_pdf_gos( + x, x_lo, x_hi, n_sig, epsilon_sig, n_bkg, epsilon_bkg, mu, sigma, hstep1, hstep2 +): + """ + pdf for gauss on step fit reparamerised to calculate the efficiency of the cut + this is the passing pdf + """ + return gauss_on_step.pdf_ext( + x, x_lo, x_hi, n_sig * epsilon_sig, mu, sigma, n_bkg * epsilon_bkg, hstep1 + ) + + +def fail_pdf_gos( + x, x_lo, x_hi, n_sig, epsilon_sig, n_bkg, epsilon_bkg, mu, sigma, hstep1, hstep2 +): + """ + pdf for gauss on step fit reparamerised to calculate the efficiency of the cut + this is the cut pdf + """ + return gauss_on_step.pdf_ext( + x, + x_lo, + x_hi, + n_sig * (1 - epsilon_sig), + mu, + sigma, + n_bkg * (1 - epsilon_bkg), + hstep2, + ) + + +def pass_pdf_hpge( + x, + x_lo, + x_hi, + n_sig, + epsilon_sig, + n_bkg, + epsilon_bkg, + mu, + sigma, + htail, + tau, + hstep1, + hstep2, +): + """ + pdf for hpge peak fit reparamerised to calculate the efficiency of the cut. + this is the passing pdf + """ + return hpge_peak.pdf_ext( + x, + x_lo, + x_hi, + n_sig * epsilon_sig, + mu, + sigma, + htail, + tau, + n_bkg * epsilon_bkg, + hstep1, + ) + + +def fail_pdf_hpge( + x, + x_lo, + x_hi, + n_sig, + epsilon_sig, + n_bkg, + epsilon_bkg, + mu, + sigma, + htail, + tau, + hstep1, + hstep2, +): + """ + pdf for hpge peak fit reparamerised to calculate the efficiency of the cut + this is the cut pdf + """ + return hpge_peak.pdf_ext( + x, + x_lo, + x_hi, + n_sig * (1 - epsilon_sig), + mu, + sigma, + htail, + tau, + n_bkg * (1 - epsilon_bkg), + hstep2, + ) + + +def update_guess(func, parguess, energies): + """ + Updates guess for the number of signal and background events + """ + if func == gauss_on_step or func == hpge_peak: + + total_events = len(energies) + parguess["n_sig"] = len( + energies[ + (energies > parguess["mu"] - 2 * parguess["sigma"]) + & (energies < parguess["mu"] + 2 * parguess["sigma"]) + ] + ) + parguess["n_sig"] -= len( + energies[ + (energies > parguess["x_lo"]) + & (energies < parguess["x_lo"] + 2 * parguess["sigma"]) + ] + ) + parguess["n_sig"] -= len( + energies[ + (energies > parguess["x_hi"] - 2 * parguess["sigma"]) + & (energies < parguess["x_hi"]) + ] + ) + parguess["n_bkg"] = total_events - parguess["n_sig"] + return parguess + + else: + log.error(f"update_guess not implemented for {func}") + return parguess + + +def get_survival_fraction( + energy: np.ndarray, + cut_param: np.ndarray, + cut_val: float, + peak: float, + eres_pars: float, + fit_range: tuple = None, + high_cut: float = None, + pars: ValueView = None, + data_mask: np.ndarray = None, + mode: str = "greater", + func=hpge_peak, + fix_step=False, + display=0, +): + """ + Function for calculating the survival fraction of a cut + using a fit to the surviving and failing energy distributions. + + Parameters + ---------- + + energy: array + array of energies + cut_param: array + array of the cut parameter for the survival fraction calculation, should have the same length as energy + cut_val: float + value of the cut parameter to be used for the survival fraction calculation + peak: float + energy of the peak to be fitted + eres_pars: float + energy resolution parameter for the peak + fit_range: tuple + range of the fit in keV + high_cut: float + upper value for the cut parameter to have a range in the cut value + pars: iMinuit ValueView + initial parameters for the fit + data_mask: array + mask for the data to be used in the fit + mode: str + mode of the cut, either "greater" or "less" + func: function + function to be used in the fit + fix_step: bool + option to fix the step parameters in the fit + display: int + option to display the fit if greater than 1 + + Returns + ------- + + sf: float + survival fraction + err: float + error on the survival fraction + values: iMinuit ValueView + values of the parameters of the fit + errors: iMinuit ValueView + errors on the parameters of the fit + """ + if data_mask is None: + data_mask = np.full(len(cut_param), True, dtype=bool) + + if not isinstance(energy, np.ndarray): + energy = np.array(energy) + if not isinstance(cut_param, np.ndarray): + cut_param = np.array(cut_param) + + if fit_range is None: + fit_range = (np.nanmin(energy), np.nanmax(energy)) + + nan_idxs = np.isnan(cut_param) + if high_cut is not None: + idxs = (cut_param > cut_val) & (cut_param < high_cut) & data_mask + else: + if mode == "greater": + idxs = (cut_param > cut_val) & data_mask + elif mode == "less": + idxs = (cut_param < cut_val) & data_mask + else: + raise ValueError("mode not recognised") + + if pars is None: + (pars, errs, cov, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( + energy, + func, + guess_func=energy_guess, + bounds_func=get_bounds, + guess_kwargs={"peak": peak, "eres": eres_pars}, + fit_range=fit_range, + ) + + guess_pars_surv = copy.deepcopy(pars) + + # add update guess here for n_sig and n_bkg + guess_pars_surv = update_guess(func, guess_pars_surv, energy[(~nan_idxs) & (idxs)]) + + parguess = { + "x_lo": pars["x_lo"], + "x_hi": pars["x_hi"], + "mu": pars["mu"], + "sigma": pars["sigma"], + "hstep1": pars["hstep"], + "hstep2": pars["hstep"], + "n_sig": pars["n_sig"], + "n_bkg": pars["n_bkg"], + "epsilon_sig": guess_pars_surv["n_sig"] / pars["n_sig"], + "epsilon_bkg": guess_pars_surv["n_bkg"] / pars["n_bkg"], + } + + bounds = { + "n_sig": (0, pars["n_sig"] + pars["n_bkg"]), + "epsilon_sig": (0, 1), + "n_bkg": (0, pars["n_bkg"] + pars["n_sig"]), + "epsilon_bkg": (0, 1), + "hstep1": (-1, 1), + "hstep2": (-1, 1), + } + + if func == hpge_peak: + parguess.update({"htail": pars["htail"], "tau": pars["tau"]}) + + if func == hpge_peak: + lh = cost.ExtendedUnbinnedNLL( + energy[(~nan_idxs) & (idxs)], pass_pdf_hpge + ) + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_hpge) + elif func == gauss_on_step: + lh = cost.ExtendedUnbinnedNLL( + energy[(~nan_idxs) & (idxs)], pass_pdf_gos + ) + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_gos) + + else: + raise ValueError("Unknown func") + + m = Minuit(lh, **parguess) + fixed = ["x_lo", "x_hi", "n_sig", "n_bkg", "mu", "sigma"] # "hstep" + if func == hpge_peak: + fixed += ["tau", "htail"] + if fix_step is True: + fixed += ["hstep1", "hstep2"] + + m.fixed[fixed] = True + for arg, val in bounds.items(): + m.limits[arg] = val + + m.simplex().migrad() + m.hesse() + + sf = m.values["epsilon_sig"] * 100 + err = m.errors["epsilon_sig"] * 100 + + if display > 1: + fig, (ax1, ax2) = plt.subplots(1, 2) + bins = np.arange(1552, 1612, 1) + ax1.hist(energy[(~nan_idxs) & (idxs)], bins=bins, histtype="step") + + ax2.hist(energy[(~nan_idxs) & (~idxs)], bins=bins, histtype="step") + + if func == hpge_peak: + ax1.plot(bins, pass_pdf_hpge(bins, **m.values.to_dict())[1]) + ax2.plot(bins, fail_pdf_hpge(bins, **m.values.to_dict())[1]) + elif func == gauss_on_step: + ax1.plot(bins, pass_pdf_gos(bins, **m.values.to_dict())[1]) + ax2.plot(bins, fail_pdf_gos(bins, **m.values.to_dict())[1]) + + plt.show() + + return sf, err, m.values, m.errors + + +def get_sf_sweep( + energy: np.array, + cut_param: np.array, + final_cut_value: float = None, + peak: float = 1592.5, + eres_pars: list = None, + data_mask=None, + cut_range=(-5, 5), + n_samples=26, + mode="greater", + fit_range=None, + debug_mode=False, +) -> tuple(pd.DataFrame, float, float): + """ + Function sweeping through cut values and calculating the survival fraction for each value + using a fit to the surviving and failing enegry distributions. + + Parameters + ---------- + + energy: array + array of energies + cut_param: array + array of the cut parameter for the survival fraction calculation, should have the same length as energy + final_cut_val: float + value of the final cut parameter to be used for the survival fraction calculation + peak: float + energy of the peak to be fitted + eres_pars: float + energy resolution parameter for the peak + data_mask: array + mask for the data to be used in the fit + cut_range: tuple + range of the cut values to be swept through + n_samples: int + number of samples to be taken in the cut range + mode: str + mode of the cut, either "greater" or "less" + fit_range: tuple + range of the fit in keV + debug_mode: bool + option to raise an error if there is an issue with + + Returns + ------- + + out_df: Dataframe + Dataframe of cut values with the survival fraction and error + sf: float + survival fraction + err: float + error on the survival fraction + + """ + + if data_mask is None: + data_mask = np.full(len(cut_param), True, dtype=bool) + + if not isinstance(energy, np.ndarray): + energy = np.array(energy) + if not isinstance(cut_param, np.ndarray): + cut_param = np.array(cut_param) + + cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) + out_df = pd.DataFrame() + + (pars, errs, _, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( + energy, + hpge_peak, + guess_func=energy_guess, + bounds_func=get_bounds, + guess_kwargs={"peak": peak, "eres": eres_pars}, + fit_range=fit_range, + ) + + for cut_val in cut_vals: + try: + sf, err, _, _ = get_survival_fraction( + energy, + cut_param, + cut_val, + peak, + eres_pars, + fit_range=fit_range, + data_mask=data_mask, + mode=mode, + pars=pars, + func=func, + ) + out_df = pd.concat( + [out_df, pd.DataFrame([{"cut_val": cut_val, "sf": sf, "sf_err": err}])] + ) + except BaseException as e: + if e == KeyboardInterrupt: + raise (e) + elif debug_mode: + raise (e) + out_df.set_index("cut_val", inplace=True) + if final_cut_value is not None: + sf, sf_err, _, _ = get_survival_fraction( + energy, + cut_param, + final_cut_value, + peak, + eres_pars, + fit_range=fit_range, + data_mask=data_mask, + mode=mode, + pars=pars, + func=func, + ) + else: + sf = None + sf_err = None + return ( + out_df, + sf, + sf_err, + ) + + +def compton_sf( + cut_param, low_cut_val, high_cut_val=None, mode="greater", data_mask=None +) -> dict: + """ + Function for calculating the survival fraction of a cut in a compton region with + a simple counting analysis. + + Parameters + ---------- + + cut_param: array + array of the cut parameter for the survival fraction calculation, should have the same length as energy + low_cut_val: float + value of the cut parameter to be used for the survival fraction calculation + high_cut_val: float + upper value for the cut parameter to have a range in the cut value + mode: str + mode of the cut, either "greater" or "less" + data_mask: array + mask for the data to be used in the fit + + Returns + ------- + + sf : dict + dictionary containing the low cut value, survival fraction and error on the survival fraction + """ + if data_mask is None: + data_mask = np.full(len(cut_param), True, dtype=bool) + + if not isinstance(cut_param, np.ndarray): + cut_param = np.array(cut_param) + + if high_cut_val is not None: + mask = (cut_param > low_cut_val) & (cut_param < high_cut_val) & data_mask + else: + if mode == "greater": + mask = (cut_param > low_cut_val) & data_mask + elif mode == "less": + mask = (cut_param < low_cut_val) & data_mask + else: + raise ValueError("mode not recognised") + + ct_n = len(cut_param[~mask]) + ct_err = np.sqrt(len(cut_param[~mask])) + surv_n = len(cut_param[mask]) + surv_err = np.sqrt(len(cut_param[mask])) + + pc_n = ct_n + surv_n + + sf = surv_n / pc_n + err = 100 * sf * (1 - sf) * np.sqrt((ct_err / ct_n) ** 2 + (surv_err / surv_n) ** 2) + sf *= 100 + + return { + "low_cut": low_cut_val, + "sf": sf, + "sf_err": err, + "high_cut": high_cut_val, + } + + +def compton_sf_sweep( + energy: np.array, + cut_param: np.array, + final_cut_value: float, + data_mask: np.array = None, + cut_range=(-5, 5), + n_samples=51, + mode="greater", +) -> tuple(pd.DataFrame, float, float): + """ + Function sweeping through cut values and calculating the survival fraction for each value + using a simple counting analysis. + + Parameters + ---------- + + energy: array + array of energies + cut_param: array + array of the cut parameter for the survival fraction calculation, should have the same length as energy + final_cut_val: float + value of the final cut parameter to be used for the survival fraction calculation + data_mask: array + mask for the data to be used in the fit + cut_range: tuple + range of the cut values to be swept through + n_samples: int + number of samples to be taken in the cut range + mode: str + mode of the cut, either "greater" or "less" + + Returns + ------- + + out_df: Dataframe + Dataframe of cut values with the survival fraction and error + sf: float + survival fraction + err: float + error on the survival fraction + + """ + if not isinstance(energy, np.ndarray): + energy = np.array(energy) + if not isinstance(cut_param, np.ndarray): + cut_param = np.array(cut_param) + + cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) + out_df = pd.DataFrame() + + for cut_val in cut_vals: + ct_dict = compton_sf(cut_param, cut_val, mode=mode, data_mask=data_mask) + df = pd.DataFrame( + [ + { + "cut_val": ct_dict["low_cut"], + "sf": ct_dict["sf"], + "sf_err": ct_dict["sf_err"], + } + ] + ) + out_df = pd.concat([out_df, df]) + out_df.set_index("cut_val", inplace=True) + + sf_dict = compton_sf(cut_param, final_cut_value, mode=mode, data_mask=data_mask) + + return out_df, sf_dict["sf"], sf_dict["sf_err"] From 9c4a1ca572bad1be356a3a6aca866d5d13165534 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 24 Oct 2024 17:40:28 +0200 Subject: [PATCH 08/15] docstrings!!! wowowowowow --- src/pygama/pargen/AoE_cal.py | 792 ++++++++++++----------------------- 1 file changed, 274 insertions(+), 518 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index ec5ce2b5b..8b58a468e 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -4,7 +4,6 @@ from __future__ import annotations -import copy import logging import re from datetime import datetime @@ -21,17 +20,16 @@ import pygama.math.binned_fitting as pgf import pygama.math.histogram as pgh import pygama.pargen.energy_cal as pgc -from pygama.math.distributions import ( - exgauss, - gauss_on_exgauss, - gauss_on_step, - gaussian, - hpge_peak, - nb_erfc, -) +from pygama.math.distributions import exgauss, gauss_on_exgauss, gaussian, nb_erfc from pygama.math.functions.gauss import nb_gauss_amp from pygama.math.functions.hpge_peak import hpge_get_fwfm, hpge_get_fwhm, hpge_get_mode from pygama.math.functions.sum_dists import SumDists +from pygama.pargen.survival_fractions import ( + compton_sf, + compton_sf_sweep, + get_sf_sweep, + get_survival_fraction, +) from pygama.pargen.utils import convert_to_minuit, return_nans log = logging.getLogger(__name__) @@ -153,7 +151,7 @@ def aoe_peak_bounds(func, guess, **kwargs): "x_hi": (None, None), "n_sig": (0, None), "mu": (guess["x_lo"], guess["x_hi"]), - "sigma": (0, None), + "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), "n_bkg": (0, None), "tau": (0, None), } @@ -163,8 +161,8 @@ def aoe_peak_bounds(func, guess, **kwargs): "x_hi": (None, None), "n_sig": (0, None), "mu": (guess["x_lo"], guess["x_hi"]), - "sigma": (0, None), - "htail": (0, 1), + "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), + "htail": (0, 0.5), "tau_sig": (None, 0), "n_bkg": (0, None), "tau": (0, None), @@ -175,7 +173,7 @@ def aoe_peak_bounds(func, guess, **kwargs): "x_hi": (None, None), "area": (0, None), "mu": (guess["x_lo"], guess["x_hi"]), - "sigma": (0, None), + "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), "tau": (0, None), } elif func == gaussian: @@ -184,7 +182,7 @@ def aoe_peak_bounds(func, guess, **kwargs): "x_hi": (None, None), "area": (0, None), "mu": (guess["x_lo"], guess["x_hi"]), - "sigma": (0, None), + "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), } for item, value in kwargs.items(): @@ -474,8 +472,8 @@ def fit_time_means(tstamps, means, sigmas): """ Fit the time dependence of the means of the A/E distribution - Args: - + Parameters + ---------- tstamps: np.array Timestamps of the data means: np.array @@ -534,478 +532,14 @@ def fit_time_means(tstamps, means, sigmas): return out_dict -def energy_guess(energy, func_i, fit_range=None, bin_width=1, peak=None, eres=None): - """ - Simple guess for peak fitting - """ - if fit_range is None: - fit_range = (np.nanmin(energy), np.nanmax(energy)) - if func_i == hpge_peak or func_i == gauss_on_step: - parguess = pgc.get_hpge_energy_peak_par_guess( - energy, func_i, fit_range=fit_range - ) - - if peak is not None: - parguess["mu"] = peak - - if eres is not None: - parguess["sigma"] = eres / 2.355 - - for i, guess in enumerate(parguess): - if np.isnan(guess): - parguess[i] = 0 - - else: - log.error(f"energy_guess not implemented for {func_i}") - return None - return parguess - - -def fix_all_but_nevents(func): - """ - Returns: Sequence list of fixed indexes for fitting and mask for parameters - """ - - if func == gauss_on_step: - # pars are: n_sig, mu, sigma, n_bkg, hstep, lower, upper, components - fixed = ["x_lo", "x_hi", "mu", "sigma", "hstep"] - - elif func == hpge_peak: - # pars are: , components - fixed = ["x_lo", "x_hi", "mu", "sigma", "htail", "tau", "hstep"] - - else: - log.error(f"get_hpge_E_fixed not implemented for {func}") - return None, None - mask = ~np.in1d(func.required_args(), fixed) - return fixed, mask - - -def get_bounds(func, parguess): - if func == hpge_peak or func == gauss_on_step: - bounds = pgc.get_hpge_energy_bounds(func, parguess) - - bounds["mu"] = (parguess["mu"] - 1, parguess["mu"] + 1) - bounds["n_sig"] = (0, 2 * (parguess["n_sig"] + parguess["n_bkg"])) - bounds["n_bkg"] = (0, 2 * (parguess["n_sig"] + parguess["n_bkg"])) - - else: - log.error(f"get_bounds not implemented for {func}") - return None - return bounds - - -def get_peak_label(peak: float) -> str: - if peak == 2039: - return "CC @" - elif peak == 1592.5: - return "Tl DEP @" - elif peak == 1620.5: - return "Bi FEP @" - elif peak == 2103.53: - return "Tl SEP @" - elif peak == 2614.5: - return "Tl FEP @" - - -def pass_pdf_gos( - x, x_lo, x_hi, n_sig, epsilon_sig, n_bkg, epsilon_bkg, mu, sigma, hstep1, hstep2 -): - return gauss_on_step.pdf_ext( - x, x_lo, x_hi, n_sig * epsilon_sig, mu, sigma, n_bkg * epsilon_bkg, hstep1 - ) - - -def fail_pdf_gos( - x, x_lo, x_hi, n_sig, epsilon_sig, n_bkg, epsilon_bkg, mu, sigma, hstep1, hstep2 -): - return gauss_on_step.pdf_ext( - x, - x_lo, - x_hi, - n_sig * (1 - epsilon_sig), - mu, - sigma, - n_bkg * (1 - epsilon_bkg), - hstep2, - ) - - -def pass_pdf_hpge( - x, - x_lo, - x_hi, - n_sig, - epsilon_sig, - n_bkg, - epsilon_bkg, - mu, - sigma, - htail, - tau, - hstep1, - hstep2, -): - return hpge_peak.pdf_ext( - x, - x_lo, - x_hi, - n_sig * epsilon_sig, - mu, - sigma, - htail, - tau, - n_bkg * epsilon_bkg, - hstep1, - ) - - -def fail_pdf_hpge( - x, - x_lo, - x_hi, - n_sig, - epsilon_sig, - n_bkg, - epsilon_bkg, - mu, - sigma, - htail, - tau, - hstep1, - hstep2, -): - return hpge_peak.pdf_ext( - x, - x_lo, - x_hi, - n_sig * (1 - epsilon_sig), - mu, - sigma, - htail, - tau, - n_bkg * (1 - epsilon_bkg), - hstep2, - ) - - -def update_guess(func, parguess, energies): - if func == gauss_on_step or func == hpge_peak: - - total_events = len(energies) - parguess["n_sig"] = len( - energies[ - (energies > parguess["mu"] - 2 * parguess["sigma"]) - & (energies < parguess["mu"] + 2 * parguess["sigma"]) - ] - ) - parguess["n_sig"] -= len( - energies[ - (energies > parguess["x_lo"]) - & (energies < parguess["x_lo"] + 2 * parguess["sigma"]) - ] - ) - parguess["n_sig"] -= len( - energies[ - (energies > parguess["x_hi"] - 2 * parguess["sigma"]) - & (energies < parguess["x_hi"]) - ] - ) - parguess["n_bkg"] = total_events - parguess["n_sig"] - return parguess - - else: - log.error(f"update_guess not implemented for {func}") - return parguess - - -def get_survival_fraction( - energy, - cut_param, - cut_val, - peak, - eres_pars, - fit_range=None, - high_cut=None, - pars=None, - errs=None, - dt_mask=None, - mode="greater", - func=hpge_peak, - fix_step=False, - display=0, -): - if dt_mask is None: - dt_mask = np.full(len(cut_param), True, dtype=bool) - - if not isinstance(energy, np.ndarray): - energy = np.array(energy) - if not isinstance(cut_param, np.ndarray): - cut_param = np.array(cut_param) - - if fit_range is None: - fit_range = (np.nanmin(energy), np.nanmax(energy)) - - nan_idxs = np.isnan(cut_param) - if high_cut is not None: - idxs = (cut_param > cut_val) & (cut_param < high_cut) & dt_mask - else: - if mode == "greater": - idxs = (cut_param > cut_val) & dt_mask - elif mode == "less": - idxs = (cut_param < cut_val) & dt_mask - else: - raise ValueError("mode not recognised") - - if pars is None or errs is None: - (pars, errs, cov, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( - energy, - func, - guess_func=energy_guess, - bounds_func=get_bounds, - guess_kwargs={"peak": peak, "eres": eres_pars}, - fit_range=fit_range, - ) - - guess_pars_surv = copy.deepcopy(pars) - - # add update guess here for n_sig and n_bkg - guess_pars_surv = update_guess(func, guess_pars_surv, energy[(~nan_idxs) & (idxs)]) - - parguess = { - "x_lo": pars["x_lo"], - "x_hi": pars["x_hi"], - "mu": pars["mu"], - "sigma": pars["sigma"], - "hstep1": pars["hstep"], - "hstep2": pars["hstep"], - "n_sig": pars["n_sig"], - "n_bkg": pars["n_bkg"], - "epsilon_sig": guess_pars_surv["n_sig"] / pars["n_sig"], - "epsilon_bkg": guess_pars_surv["n_bkg"] / pars["n_bkg"], - } - - bounds = { - "n_sig": (0, pars["n_sig"] + pars["n_bkg"]), - "epsilon_sig": (0, 1), - "n_bkg": (0, pars["n_bkg"] + pars["n_sig"]), - "epsilon_bkg": (0, 1), - "hstep1": (-1, 1), - "hstep2": (-1, 1), - } - - if func == hpge_peak: - parguess.update({"htail": pars["htail"], "tau": pars["tau"]}) - - if func == hpge_peak: - lh = cost.ExtendedUnbinnedNLL( - energy[(~nan_idxs) & (idxs)], pass_pdf_hpge - ) + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_hpge) - elif func == gauss_on_step: - lh = cost.ExtendedUnbinnedNLL( - energy[(~nan_idxs) & (idxs)], pass_pdf_gos - ) + cost.ExtendedUnbinnedNLL(energy[(~nan_idxs) & (~idxs)], fail_pdf_gos) - - else: - raise ValueError("Unknown func") - - m = Minuit(lh, **parguess) - fixed = ["x_lo", "x_hi", "n_sig", "n_bkg", "mu", "sigma"] # "hstep" - if func == hpge_peak: - fixed += ["tau", "htail"] - if fix_step is True: - fixed += ["hstep1", "hstep2"] - - m.fixed[fixed] = True - for arg, val in bounds.items(): - m.limits[arg] = val - - m.simplex().migrad() - m.hesse() - - sf = m.values["epsilon_sig"] * 100 - err = m.errors["epsilon_sig"] * 100 - - if display > 1: - fig, (ax1, ax2) = plt.subplots(1, 2) - bins = np.arange(1552, 1612, 1) - ax1.hist(energy[(~nan_idxs) & (idxs)], bins=bins, histtype="step") - - ax2.hist(energy[(~nan_idxs) & (~idxs)], bins=bins, histtype="step") - - if func == hpge_peak: - ax1.plot(bins, pass_pdf_hpge(bins, **m.values.to_dict())[1]) - ax2.plot(bins, fail_pdf_hpge(bins, **m.values.to_dict())[1]) - elif func == gauss_on_step: - ax1.plot(bins, pass_pdf_gos(bins, **m.values.to_dict())[1]) - ax2.plot(bins, fail_pdf_gos(bins, **m.values.to_dict())[1]) - - plt.show() - - return sf, err, m.values, m.errors - - -def get_sf_sweep( - energy: np.array, - cut_param: np.array, - final_cut_value: float = None, - peak: float = 1592.5, - eres_pars: list = None, - dt_mask=None, - cut_range=(-5, 5), - n_samples=26, - mode="greater", - fit_range=None, - debug_mode=False, -) -> tuple(pd.DataFrame, float, float): - """ - Calculates survival fraction for gamma lines using fitting method as in cut determination - """ - - if dt_mask is None: - dt_mask = np.full(len(cut_param), True, dtype=bool) - - if not isinstance(energy, np.ndarray): - energy = np.array(energy) - if not isinstance(cut_param, np.ndarray): - cut_param = np.array(cut_param) - - cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) - out_df = pd.DataFrame() - - (pars, errs, _, _, func, _, _, _) = pgc.unbinned_staged_energy_fit( - energy, - hpge_peak, - guess_func=energy_guess, - bounds_func=get_bounds, - guess_kwargs={"peak": peak, "eres": eres_pars}, - fit_range=fit_range, - ) - - for cut_val in cut_vals: - try: - sf, err, _, _ = get_survival_fraction( - energy, - cut_param, - cut_val, - peak, - eres_pars, - fit_range=fit_range, - dt_mask=dt_mask, - mode=mode, - pars=pars, - func=func, - ) - out_df = pd.concat( - [out_df, pd.DataFrame([{"cut_val": cut_val, "sf": sf, "sf_err": err}])] - ) - except BaseException as e: - if e == KeyboardInterrupt: - raise (e) - elif debug_mode: - raise (e) - out_df.set_index("cut_val", inplace=True) - if final_cut_value is not None: - sf, sf_err, _, _ = get_survival_fraction( - energy, - cut_param, - final_cut_value, - peak, - eres_pars, - fit_range=fit_range, - dt_mask=dt_mask, - mode=mode, - pars=pars, - func=func, - ) - else: - sf = None - sf_err = None - return ( - out_df, - sf, - sf_err, - ) - - -def compton_sf(cut_param, low_cut_val, high_cut_val=None, mode="greater", dt_mask=None): - if dt_mask is None: - dt_mask = np.full(len(cut_param), True, dtype=bool) - - if not isinstance(cut_param, np.ndarray): - cut_param = np.array(cut_param) - - if high_cut_val is not None: - mask = (cut_param > low_cut_val) & (cut_param < high_cut_val) & dt_mask - else: - if mode == "greater": - mask = (cut_param > low_cut_val) & dt_mask - elif mode == "less": - mask = (cut_param < low_cut_val) & dt_mask - else: - raise ValueError("mode not recognised") - - ct_n = len(cut_param[~mask]) - ct_err = np.sqrt(len(cut_param[~mask])) - surv_n = len(cut_param[mask]) - surv_err = np.sqrt(len(cut_param[mask])) - - pc_n = ct_n + surv_n - - sf = surv_n / pc_n - err = 100 * sf * (1 - sf) * np.sqrt((ct_err / ct_n) ** 2 + (surv_err / surv_n) ** 2) - sf *= 100 - - return { - "low_cut": low_cut_val, - "sf": sf, - "sf_err": err, - "high_cut": high_cut_val, - } - - -def compton_sf_sweep( - energy: np.array, - cut_param: np.array, - final_cut_value: float, - peak: float, - eres: list[float, float] = None, - dt_mask: np.array = None, - cut_range=(-5, 5), - n_samples=51, - mode="greater", -) -> tuple(float, np.array, list): +class CalAoE: """ - Determines survival fraction for compton continuum by basic counting + Class for calibrating the A/E, + this follow 5 steps: + the time correction, drift time correction, energy correction, + the cut level calculation and the survival fraction calculation. """ - if not isinstance(energy, np.ndarray): - energy = np.array(energy) - if not isinstance(cut_param, np.ndarray): - cut_param = np.array(cut_param) - - cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) - out_df = pd.DataFrame() - for cut_val in cut_vals: - ct_dict = compton_sf(cut_param, cut_val, mode=mode, dt_mask=dt_mask) - df = pd.DataFrame( - [ - { - "cut_val": ct_dict["low_cut"], - "sf": ct_dict["sf"], - "sf_err": ct_dict["sf_err"], - } - ] - ) - out_df = pd.concat([out_df, df]) - out_df.set_index("cut_val", inplace=True) - - sf_dict = compton_sf(cut_param, final_cut_value, mode=mode, dt_mask=dt_mask) - - return out_df, sf_dict["sf"], sf_dict["sf_err"] - - -class CalAoE: def __init__( self, cal_dicts: dict = None, @@ -1017,12 +551,50 @@ def __init__( dep_correct: bool = False, dt_cut: dict = None, dt_param: str = "dt_eff", - high_cut_val: int = 3, + high_cut_val: float = 3, mean_func: Callable = Pol1, sigma_func: Callable = SigmaFit, - compt_bands_width: int = 20, + compt_bands_width: float = 20, debug_mode: bool = False, ): + """ + Parameters + ---------- + + cal_dicts: dict + Dictionary of calibration parameters can either be empty/None, for a single run or for multiple runs + keyed by timestamp in the format YYYYMMDDTHHMMSSZ + cal_energy_param: str + Calibrated energy parameter to use for A/E calibrations and for determining peak events + eres_func: callable + Function to determine the energy resolution should take in a single variable the calibrated energy + pdf: PDF + PDF to fit to the A/E distribution + selection_string: str + Selection string for the data that will be passed as a query to the data dataframe + dt_corr: bool + Whether to correct the drift time + dep_correct: bool + Whether to correct the double escape peak into the single site band before cut determination + dt_cut: dict + Dictionary of the drift time cut parameters in the form: + {"out_param": "dt_cut", "hard": False} + where the out_param is the name of the parameter to cut on in the dataframe (should have been precalculated) + and "hard" is whether to remove these events completely for survival fraction calculations + or whether they they should only be removed in the cut determination/ A/E calibration steps + but the survival fractions should be calculated with them included + high_cut_val: float + Value to cut the A/E distribution at on the high side + mean_func: Callable + Function to fit the energy dependence of the A/E mean, should be in the form of a class as above for Pol1 + sigma_func: Callable + Function to fit the energy dependence of the A/E sigma, should be in the form of a class as above for Sigma_Fit + compt_bands_width: float + Width of the compton bands to use for the energy correction + debug_mode: bool + If True will raise errors if the A/E calibration fails otherwise just return NaN values + + """ self.cal_dicts = cal_dicts if cal_dicts is not None else {} self.cal_energy_param = cal_energy_param self.eres_func = eres_func @@ -1047,6 +619,11 @@ def __init__( self.debug_mode = debug_mode def update_cal_dicts(self, update_dict): + """ + Util function for updating the calibration dictionaries + checks if the dictionary is in the format of a single run or multiple runs + and then updates the dictionary accordingly + """ if len(self.cal_dicts) > 0 and re.match( r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0] ): @@ -1059,8 +636,40 @@ def update_cal_dicts(self, update_dict): self.cal_dicts.update(update_dict) def time_correction( - self, df, aoe_param, mode="full", output_name="AoE_Timecorr", display=0 + self, + df: pd.DataFrame, + aoe_param: str, + mode: str = "full", + output_name: str = "AoE_Timecorr", + display: int = 0, ): + """ + Calculates the time correction for the A/E parameter by fitting the 1-1.3 MeV + Compton continuum and extracting the centroid. If only a single run is passed will + just perform a shift of the A/E parameter to 1 using the centroid otherwise for multiple + runs the shift will be determined on the mode given in. + + Parameters + ---------- + + df: pd.DataFrame + Dataframe containing the data + aoe_param: str + Name of the A/E parameter to use + mode: str + Mode to use for the time correction, can be "full", "partial" or "none": + + none: just use the mean of the a/e centroids to shift all the data + partial: iterate through the centroids if vary by less than 0.4 sigma + then group and take mean otherwise when a run higher than 0.4 sigma + is found if it is a single run set to nan otherwise start a new block + full : each run will be corrected individually + output_name: str + Name of the output parameter for the time corrected A/E in the dataframe and to + be added to the calibration dictionary + display: int + plot level + """ log.info("Starting A/E time correction") self.timecorr_df = pd.DataFrame() try: @@ -1177,7 +786,7 @@ def time_correction( log.info("A/E time correction finished") else: try: - pars, errs, cov = unbinned_aoe_fit( + pars, errs, _ = unbinned_aoe_fit( df.query( f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300" )[aoe_param], @@ -1264,7 +873,23 @@ def drift_time_correction( display: int = 0, ): """ - Calculates the correction needed to align the two drift time regions for ICPC detectors + Calculates the correction needed to align the two drift time regions for ICPC detectors. + This is done by fitting the two drift time peaks in the drift time spectrum and then + fitting the A/E peaks in each of these regions. A simple linear correction is then applied + to align these regions. + + Parameters + ---------- + + data: pd.DataFrame + Dataframe containing the data + aoe_param: str + Name of the A/E parameter to use as starting point + output_name: str + Name of the output parameter for the drift time corrected A/E in the dataframe and to + be added to the calibration dictionary + display: int + plot level """ log.info("Starting A/E drift time correction") self.dt_res_dict = {} @@ -1388,13 +1013,31 @@ def energy_correction( self, data: pd.DataFrame, aoe_param: str, - corrected_param="AoE_Corrected", - classifier_param="AoE_Classifier", + corrected_param: str = "AoE_Corrected", + classifier_param: str = "AoE_Classifier", display: int = 0, ): """ Calculates the corrections needed for the energy dependence of the A/E. - Does this by fitting the compton continuum in slices and then applies fits to the centroid and variance. + Does this by fitting the compton continuum in slices and then applies fits + to the centroid and variance. + + Parameters + ---------- + + data: pd.DataFrame + Dataframe containing the data + aoe_param: str + Name of the A/E parameter to use as starting point + corrected_param: str + Name of the output parameter for the energy mean corrected A/E to + be added in the dataframe and to the calibration dictionary + classifier_param: str + Name of the output parameter for the full mean and sigma energy corrected A/E classifier + to be added in the dataframe and to the calibration dictionary + display: int + plot level + """ log.info("Starting A/E energy correction") @@ -1644,8 +1287,12 @@ def energy_correction( "expression": f"{aoe_param}/({self.mean_func.string_func(self.cal_energy_param)})", "parameters": mu_pars.to_dict(), }, + f"_{classifier_param}_intermediate": { + "expression": f"({aoe_param})-({self.mean_func.string_func(self.cal_energy_param)})", + "parameters": mu_pars.to_dict(), + }, classifier_param: { - "expression": f"({corrected_param}-1)/({self.sigma_func.string_func(self.cal_energy_param)})", + "expression": f"(_{classifier_param}_intermediate)/({self.sigma_func.string_func(self.cal_energy_param)})", "parameters": sig_pars.to_dict(), }, } @@ -1657,13 +1304,34 @@ def get_aoe_cut_fit( aoe_param: str, peak: float, ranges: tuple, - dep_acc: float, + dep_acc: float = 0.9, output_cut_param: str = "AoE_Low_Cut", display: int = 1, ): """ - Determines A/E cut by sweeping through values and for each one fitting the DEP to determine how many events survive. - Then interpolates to get cut value at desired DEP survival fraction (typically 90%) + Determines A/E cut by sweeping through values and for each one fitting the + DEP to determine how many events survive. + Fits the resulting distribution and + interpolates to get cut value at desired DEP survival fraction (typically 90%) + + Parameters + ---------- + + data: pd.DataFrame + Dataframe containing the data + aoe_param: str + Name of the A/E parameter to use as starting point + peak : float + Energy of the peak to use for the cut determination e.g. 1592.5 + ranges: tuple + Tuple of the range in keV below and above the peak to use for the cut determination e.g. (20,40) + dep_acc: float + Desired DEP survival fraction for final cut + output_cut_param: str + Name of the output parameter for the events passing A/E in the dataframe and to + be added to the calibration dictionary + display: int + plot level """ log.info("Starting A/E low cut determination") @@ -1688,7 +1356,7 @@ def get_aoe_cut_fit( peak, self.eres_func(peak), fit_range=erange, - dt_mask=None, + data_mask=None, cut_range=(-8, 0), n_samples=40, mode="greater", @@ -1750,14 +1418,37 @@ def get_aoe_cut_fit( def calculate_survival_fractions_sweep( self, - data, - aoe_param, - peaks, - fit_widths, + data: pd.DataFrame, + aoe_param: str, + peaks: list, + fit_widths: list[tuple], n_samples=26, cut_range=(-5, 5), mode="greater", ): + """ + Calculate survival fractions for the A/E cut for a list of peaks by sweeping through values + of the A/E cut to show how this varies + + Parameters + ---------- + + data: pd.DataFrame + Dataframe containing the data + aoe_param: str + Name of the parameter in the dataframe for the final A/E classifier + peaks: list + List of peaks to calculate the survival fractions for + fit_widths: list + List of tuples of the energy range to fit the peak in + n_samples: int + Number of samples to take in the sweep + cut_range: tuple + Range of the cut to sweep through + mode: str + mode to use for the cut determination, can be "greater" or "less" i.e. do we want to + keep events with A/E greater or less than the cut value + """ sfs = pd.DataFrame() peak_dfs = {} @@ -1778,12 +1469,10 @@ def calculate_survival_fractions_sweep( peak_df[self.cal_energy_param].to_numpy(), peak_df[aoe_param].to_numpy(), self.low_cut_val, - peak, - fwhm, cut_range=cut_range, n_samples=n_samples, mode=mode, - dt_mask=( + data_mask=( peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None @@ -1812,7 +1501,7 @@ def calculate_survival_fractions_sweep( cut_range=cut_range, n_samples=n_samples, mode=mode, - dt_mask=( + data_mask=( peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None @@ -1850,8 +1539,32 @@ def calculate_survival_fractions_sweep( return sfs, peak_dfs def calculate_survival_fractions( - self, data, aoe_param, peaks, fit_widths, mode="greater" + self, + data: pd.DataFrame, + aoe_param: str, + peaks: list, + fit_widths: list[tuple], + mode: str = "greater", ): + """ + Calculate survival fractions for the A/E cut for a list of peaks for the final + A/E cut value + + Parameters + ---------- + + data: pd.DataFrame + Dataframe containing the data + aoe_param: str + Name of the parameter in the dataframe for the final A/E classifier + peaks: list + List of peaks to calculate the survival fractions for + fit_widths: list + List of tuples of the energy range to fit the peak in + mode: str + mode to use for the cut determination, can be "greater" or "less" i.e. do we want to + keep events with A/E greater or less than the cut value + """ sfs = pd.DataFrame() for i, peak in enumerate(peaks): fwhm = self.eres_func(peak) @@ -1868,7 +1581,7 @@ def calculate_survival_fractions( self.low_cut_val, self.high_cut_val, mode=mode, - dt_mask=( + data_mask=( peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None @@ -1897,7 +1610,7 @@ def calculate_survival_fractions( fit_range=fit_range, mode=mode, high_cut=self.high_cut_val, - dt_mask=( + data_mask=( peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None @@ -1928,16 +1641,43 @@ def calculate_survival_fractions( def calibrate( self, - df, - initial_aoe_param, - peaks_of_interest=None, - fit_widths=None, - cut_peak_idx=0, - dep_acc=0.9, - sf_nsamples=11, - sf_cut_range=(-5, 5), - timecorr_mode="full", + df: pd.DataFrame, + initial_aoe_param: str, + peaks_of_interest: list = None, + fit_widths: list[tuple] = None, + cut_peak_idx: int = 0, + dep_acc: float = 0.9, + sf_nsamples: int = 11, + sf_cut_range: tuple = (-5, 5), + timecorr_mode: str = "full", ): + """ + Main function to run a full A/E calibration with all steps i.e. time correction, drift time correction, + energy correction, A/E cut determination and survival fraction calculation + + Parameters + ---------- + + df: pd.DataFrame + Dataframe containing the data + initial_aoe_param: str + Name of the A/E parameter in dataframe to use as starting point + peaks_of_interest: list + List of peaks to calculate the survival fractions for + fit_widths: list + List of tuples of the energy range to fit the peak in for survival fraction determination + cut_peak_idx: int + Index of the peak in peaks of interest to use for the cut determination + dep_acc: float + Desired survival fraction in the peak for final cut value + sf_nsamples: int + Number of samples to take in the survival fraction sweep + sf_cut_range: tuple + Range to use for the survival fraction sweep + timecorr_mode: str + Mode to use for the time correction, see time_correction function for details + + """ if peaks_of_interest is None: peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] if fit_widths is None: @@ -2035,6 +1775,22 @@ def calibrate( self.two_side_sfs_by_run = None +def get_peak_label(peak: float) -> str: + if peak == 2039: + return "CC @" + elif peak == 1592.5: + return "Tl DEP @" + elif peak == 1620.5: + return "Bi FEP @" + elif peak == 2103.53: + return "Tl SEP @" + elif peak == 2614.5: + return "Tl FEP @" + + +# below are a few plotting functions that can be used to plot the results of the A/E calibration + + def plot_aoe_mean_time( aoe_class, data, time_param="AoE_Timecorr", figsize=(12, 8), fontsize=12 ): From e0cff0ff9b9e58d5a0326af32b8a2fb6e55c7e88 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 18 Nov 2024 13:15:26 +0100 Subject: [PATCH 09/15] fix step as default --- src/pygama/pargen/survival_fractions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/survival_fractions.py b/src/pygama/pargen/survival_fractions.py index 89a6762d1..d72227736 100644 --- a/src/pygama/pargen/survival_fractions.py +++ b/src/pygama/pargen/survival_fractions.py @@ -227,7 +227,7 @@ def get_survival_fraction( data_mask: np.ndarray = None, mode: str = "greater", func=hpge_peak, - fix_step=False, + fix_step=True, display=0, ): """ From e82937b164b6eaf1e73ae250012e3758b58cafef Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 18 Nov 2024 13:16:05 +0100 Subject: [PATCH 10/15] aoe fitting improvements, change range of gaussian fit, improved bounds, improved guessing --- src/pygama/pargen/AoE_cal.py | 153 +++++++++++++++++++---------------- 1 file changed, 83 insertions(+), 70 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 8b58a468e..a47dc9760 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -76,11 +76,16 @@ def aoe_peak_guess(func, hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] - try: - _, sigma, _ = pgh.get_gaussian_guess(hist, bins) - except Exception: - pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mu, n_bins=20) - _, sigma, _ = pars + pars, _ = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mu, n_bins=5) + bin_centres = pgh.get_bin_centers(bins) + if pars is None: + i_0 = np.argmax(hist) + mu = bin_centres[i_0] + (_, sigma, _) = pgh.get_gaussian_guess(hist, bins) + else: + mu = pars[0] + sigma = pars[1] + ls_guess = 2 * np.sum(hist[(bin_centers > mu) & (bin_centers < (mu + 2.5 * sigma))]) if func == aoe_peak: @@ -150,22 +155,22 @@ def aoe_peak_bounds(func, guess, **kwargs): "x_lo": (None, None), "x_hi": (None, None), "n_sig": (0, None), - "mu": (guess["x_lo"], guess["x_hi"]), + "mu": ((guess["x_lo"] + guess["x_hi"]) / 2, guess["x_hi"]), "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), "n_bkg": (0, None), - "tau": (0, None), + "tau": (0, 1), } elif func == aoe_peak_with_high_tail: bounds_dict = { "x_lo": (None, None), "x_hi": (None, None), "n_sig": (0, None), - "mu": (guess["x_lo"], guess["x_hi"]), + "mu": ((guess["x_lo"] + guess["x_hi"]) / 2, guess["x_hi"]), "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), "htail": (0, 0.5), "tau_sig": (None, 0), "n_bkg": (0, None), - "tau": (0, None), + "tau": (0, 1), } elif func == exgauss: bounds_dict = { @@ -174,7 +179,7 @@ def aoe_peak_bounds(func, guess, **kwargs): "area": (0, None), "mu": (guess["x_lo"], guess["x_hi"]), "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), - "tau": (0, None), + "tau": (0, 1), } elif func == gaussian: bounds_dict = { @@ -196,7 +201,7 @@ def aoe_peak_fixed(func, **kwargs): elif func == aoe_peak_with_high_tail: fixed = ["x_lo", "x_hi"] elif func == exgauss: - fixed = ["x_lo", "x_hi"] + fixed = ["x_lo", "x_hi", "mu", "sigma"] elif func == gaussian: fixed = ["x_lo", "x_hi"] mask = ~np.in1d(func.required_args(), fixed) @@ -261,8 +266,8 @@ def unbinned_aoe_fit( Returns ------- - tuple(np.array, np.array) - Tuple of fit values and errors + tuple(np.array, np.array, np.ndarray, tuple) + Tuple of fit values, errors, covariance matrix and full fit info """ if not isinstance(aoe, np.ndarray): aoe = np.array(aoe) @@ -276,31 +281,31 @@ def unbinned_aoe_fit( hist, bins, var = pgh.get_hist(aoe, bins=500) gpars = aoe_peak_guess(gaussian, hist, bins, var) - c1_min = gpars["mu"] - 2 * gpars["sigma"] + c1_min = gpars["mu"] - 0.5 * gpars["sigma"] c1_max = gpars["mu"] + 3 * gpars["sigma"] - + gpars["x_lo"] = c1_min + gpars["x_hi"] = c1_max # Initial fit just using Gaussian - c1 = cost.ExtendedUnbinnedNLL( + csig = cost.ExtendedUnbinnedNLL( aoe[(aoe < c1_max) & (aoe > c1_min)], gaussian.pdf_ext ) - m1 = Minuit(c1, *gpars) + msig = Minuit(csig, *gpars) bounds = aoe_peak_bounds(gaussian, gpars) for arg, val in bounds.items(): - m1.limits[arg] = val + msig.limits[arg] = val for fix in aoe_peak_fixed(gaussian)[0]: - m1.fixed[fix] = True - m1.migrad() + msig.fixed[fix] = True + msig.migrad() # Range to fit over, below this tail behaviour more exponential, few events above - fmin = m1.values["mu"] - 15 * m1.values["sigma"] + fmin = msig.values["mu"] - 15 * msig.values["sigma"] if fmin < np.nanmin(aoe): fmin = np.nanmin(aoe) - fmax_bkg = m1.values["mu"] - 5 * m1.values["sigma"] - fmax = m1.values["mu"] + 5 * m1.values["sigma"] - - n_bkg_guess = len(aoe[(aoe < fmax) & (aoe > fmin)]) - m1.values["area"] + fmax_bkg = msig.values["mu"] - 5 * msig.values["sigma"] + fmax = msig.values["mu"] + 5 * msig.values["sigma"] + n_bkg_guess = len(aoe[(aoe < fmax) & (aoe > fmin)]) - msig.values["area"] bkg_guess = aoe_peak_guess( exgauss, @@ -308,34 +313,47 @@ def unbinned_aoe_fit( bins, var, area=n_bkg_guess, - mu=m1.values["mu"], - sigma=m1.values["sigma"], + mu=msig.values["mu"], + sigma=msig.values["sigma"] * 0.9, x_lo=fmin, x_hi=fmax_bkg, ) - c2 = cost.ExtendedUnbinnedNLL(aoe[(aoe < fmax_bkg) & (aoe > fmin)], exgauss.pdf_ext) - m2 = Minuit(c2, *bkg_guess) + cbkg = cost.ExtendedUnbinnedNLL( + aoe[(aoe < fmax_bkg) & (aoe > fmin)], exgauss.pdf_ext + ) + mbkg = Minuit(cbkg, *bkg_guess) bounds = aoe_peak_bounds(exgauss, bkg_guess) for arg, val in bounds.items(): - m2.limits[arg] = val + mbkg.limits[arg] = val for fix in aoe_peak_fixed(exgauss)[0]: - m2.fixed[fix] = True - m2.simplex().migrad() - m2.hesse() + mbkg.fixed[fix] = True + mbkg.simplex().migrad() + mbkg.hesse() x0 = aoe_peak_guess( pdf, hist, bins, var, - n_sig=m1.values["area"], - mu=m1.values["mu"], - sigma=m1.values["sigma"], - n_bkg=m2.values["area"], - tau=m2.values["tau"], + n_sig=( + msig.values["area"] + - np.diff( + exgauss.cdf_ext( + np.array([msig.values["mu"] - 2 * msig.values["sigma"], fmax]), + mbkg.values["area"], + msig.values["mu"], + msig.values["sigma"], + mbkg.values["tau"], + ) + )[0] + ), + mu=msig.values["mu"], + sigma=msig.values["sigma"], + n_bkg=mbkg.values["area"], + tau=mbkg.values["tau"], x_lo=fmin, x_hi=fmax, ) @@ -416,7 +434,7 @@ def unbinned_aoe_fit( elif cs2[0] * 1.05 < cs[0]: fit = fit2 - elif frac_errors1 < frac_errors2: + elif frac_errors1 <= frac_errors2: fit = fit1 elif frac_errors1 > frac_errors2: @@ -426,27 +444,23 @@ def unbinned_aoe_fit( if display > 1: aoe = aoe[(aoe < fmax) & (aoe > fmin)] - bin_width = ( - 2 - * (np.nanpercentile(aoe, 75) - np.nanpercentile(aoe, 25)) - * len(aoe) ** (-1 / 3) - ) - nbins = int(np.ceil((np.nanmax(aoe) - np.nanmin(aoe)) / bin_width)) # *5 + nbins = 100 plt.figure() xs = np.linspace(fmin, fmax, 1000) counts, bins, bars = plt.hist(aoe, bins=nbins, histtype="step", label="Data") dx = np.diff(bins) - plt.plot(xs, pdf.get_pdf(xs, *fit[0]) * dx[0], label="Full fit") + plt.plot(xs, pdf.get_pdf(xs, *x0) * dx[0], label="Guess") + plt.plot(xs, pdf.get_pdf(xs, *fit[0]) * dx[0], label="Full Fit") pdf.components = True sig, bkg = pdf.get_pdf(xs, *fit[0]) pdf.components = False plt.plot(xs, sig * dx[0], label="Signal") plt.plot(xs, bkg * dx[0], label="Background") plt.plot( - xs, gaussian.pdf_ext(xs, *m1.values)[1] * dx[0], label="Initial Gaussian" + xs, gaussian.pdf_ext(xs, *msig.values)[1] * dx[0], label="Initial Gaussian" ) - plt.plot(xs, exgauss.pdf_ext(xs, *m2.values)[1] * dx[0], label="Bkg guess") + plt.plot(xs, exgauss.pdf_ext(xs, *mbkg.values)[1] * dx[0], label="Bkg guess") plt.xlabel("A/E") plt.ylabel("Counts") plt.legend(loc="upper left") @@ -462,10 +476,9 @@ def unbinned_aoe_fit( ) plt.legend(loc="upper left") plt.show() - return fit[0], fit[1], fit[2] - + return fit[0], fit[1], fit[2], fit else: - return fit[0], fit[1], fit[2] + return fit[0], fit[1], fit[2], fit def fit_time_means(tstamps, means, sigmas): @@ -676,7 +689,7 @@ def time_correction( if "run_timestamp" in df: for tstamp, time_df in df.groupby("run_timestamp", sort=True): try: - pars, errs, cov = unbinned_aoe_fit( + pars, errs, _, _ = unbinned_aoe_fit( time_df.query( f"{self.fit_selection} & ({self.cal_energy_param}>1000) & ({self.cal_energy_param}<1300)" )[aoe_param], @@ -786,7 +799,7 @@ def time_correction( log.info("A/E time correction finished") else: try: - pars, errs, _ = unbinned_aoe_fit( + pars, errs, _, _ = unbinned_aoe_fit( df.query( f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300" )[aoe_param], @@ -964,7 +977,7 @@ def drift_time_correction( f"{self.dt_param}>{mus[1] - 2 * sigmas[1]} & {self.dt_param}<{mus[1] + 2 * sigmas[1]}" ) - aoe_pars, aoe_errs, _ = unbinned_aoe_fit( + aoe_pars, aoe_errs, _, _ = unbinned_aoe_fit( final_df.query(aoe_grp1)[aoe_param], pdf=self.pdf, display=display ) @@ -973,7 +986,7 @@ def drift_time_correction( "errs": aoe_errs.to_dict(), } - aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit( + aoe_pars2, aoe_errs2, _, _ = unbinned_aoe_fit( final_df.query(aoe_grp2)[aoe_param], pdf=self.pdf, display=display ) @@ -1076,7 +1089,7 @@ def energy_correction( # Fit each compton band for band in compt_bands: try: - pars, errs, cov = unbinned_aoe_fit( + pars, errs, cov, _ = unbinned_aoe_fit( select_df.query( f"{self.cal_energy_param}>{band}&{self.cal_energy_param}< {self.compt_bands_width+band}" )[aoe_param], @@ -1216,7 +1229,7 @@ def energy_correction( emin = peak - n_sigma * sigma emax = peak + n_sigma * sigma try: - dep_pars, dep_err, _ = unbinned_aoe_fit( + dep_pars, dep_err, _, _ = unbinned_aoe_fit( select_df.query( f"{self.cal_energy_param}>{emin}&{self.cal_energy_param}<{emax}" )[aoe_param], @@ -1775,19 +1788,6 @@ def calibrate( self.two_side_sfs_by_run = None -def get_peak_label(peak: float) -> str: - if peak == 2039: - return "CC @" - elif peak == 1592.5: - return "Tl DEP @" - elif peak == 1620.5: - return "Bi FEP @" - elif peak == 2103.53: - return "Tl SEP @" - elif peak == 2614.5: - return "Tl FEP @" - - # below are a few plotting functions that can be used to plot the results of the A/E calibration @@ -2283,6 +2283,19 @@ def plot_cut_fit( return fig +def get_peak_label(peak: float) -> str: + if peak == 2039: + return "CC @" + elif peak == 1592.5: + return "Tl DEP @" + elif peak == 1620.5: + return "Bi FEP @" + elif peak == 2103.53: + return "Tl SEP @" + elif peak == 2614.5: + return "Tl FEP @" + + def plot_survival_fraction_curves( aoe_class, data, figsize=(12, 8), fontsize=12 ) -> plt.figure: From c7b6d7da87af1ac6da93642c219b69b932e3d5a8 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 18 Nov 2024 19:05:32 +0100 Subject: [PATCH 11/15] add averaging consecutive and interpolation between consecutive samples --- src/pygama/pargen/AoE_cal.py | 161 ++++++++++++++++++++++++++++++++--- 1 file changed, 151 insertions(+), 10 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index a47dc9760..d40890143 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -421,8 +421,18 @@ def unbinned_aoe_fit( fit2 = (m2.values, m2.errors, m2.covariance, cs2, pdf, mask, valid2, m2) - frac_errors1 = np.sum(np.abs(np.array(m.errors)[mask] / np.array(m.values)[mask])) - frac_errors2 = np.sum(np.abs(np.array(m2.errors)[mask] / np.array(m2.values)[mask])) + frac_errors1 = np.sum( + np.abs( + np.array(m.errors)[mask & (np.array(m.values) > 0)] + / np.array(m.values)[mask & (np.array(m.values) > 0)] + ) + ) + frac_errors2 = np.sum( + np.abs( + np.array(m2.errors)[mask & (np.array(m2.values) > 0)] + / np.array(m2.values)[mask & (np.array(m2.values) > 0)] + ) + ) if valid2 is False: fit = fit1 @@ -545,6 +555,73 @@ def fit_time_means(tstamps, means, sigmas): return out_dict +def average_consecutive(tstamps, means): + """ + Fit the time dependence of the means of the A/E distribution by average consecutive entries + + Parameters + ---------- + tstamps: np.array + Timestamps of the data + means: np.array + Means of the A/E distribution + sigmas: np.array + Sigmas of the A/E distribution + + Returns: dict + Dictionary of the time dependence of the means + """ + out_dict = {} + for i, tstamp in enumerate(tstamps): + if i + 1 == len(means): + out_dict[tstamp] = means[i] + else: + out_dict[tstamp] = np.mean([means[i], means[i + 1]]) + return out_dict + + +def interpolate_consecutive(tstamps, means, times, aoe_param, output_name): + """ + Fit the time dependence of the means of the A/E distribution by average consecutive entries + + Parameters + ---------- + tstamps: np.array + Timestamps of the data + means: np.array + Means of the A/E distribution + sigmas: np.array + Sigmas of the A/E distribution + times: np.array + Times of the mean samples in unix time format + + Returns: dict + Dictionary of the time dependence of the means + """ + out_dict = {} + for i, tstamp in enumerate(tstamps): + if i + 1 == len(means): + out_dict[tstamp] = { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": means[i]}, + } + } + else: + out_dict[tstamp] = { + output_name: { + "expression": f"{aoe_param} / ((timestamp - a ) * b + c) ", + "parameters": { + "a": times[i], + "b": (means[i + 1] - means[i]) / (times[i + 1] - times[i]), + "c": means[i], + }, + } + } + + return out_dict + + class CalAoE: """ Class for calibrating the A/E, @@ -677,6 +754,8 @@ def time_correction( then group and take mean otherwise when a run higher than 0.4 sigma is found if it is a single run set to nan otherwise start a new block full : each run will be corrected individually + average_consecutive: average the consecutive centroids + interpolate_consecutive: interpolate between the consecutive centroids output_name: str Name of the output parameter for the time corrected A/E in the dataframe and to be added to the calibration dictionary @@ -749,6 +828,15 @@ def time_correction( np.array(self.timecorr_df["mean"]), np.array(self.timecorr_df["sigma"]), ) + final_time_dict = { + tstamp: { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": t_dict}, + } + } + for tstamp, t_dict in time_dict.items() + } elif mode == "full": time_dict = { @@ -758,21 +846,37 @@ def time_correction( np.array(self.timecorr_df["mean"]), ) } + final_time_dict = { + tstamp: { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": t_dict}, + } + } + for tstamp, t_dict in time_dict.items() + } elif mode == "none": time_dict = { time: np.nanmean(self.timecorr_df["mean"]) for time in np.array(self.timecorr_df.index) } + final_time_dict = { + tstamp: { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": t_dict}, + } + } + for tstamp, t_dict in time_dict.items() + } - else: - raise ValueError("unknown mode") - - df[output_name] = df[aoe_param] / np.array( - [time_dict[tstamp] for tstamp in df["run_timestamp"]] - ) - self.update_cal_dicts( - { + elif mode == "average_consecutive": + time_dict = average_consecutive( + np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + ) + final_time_dict = { tstamp: { output_name: { "expression": f"{aoe_param}/a", @@ -781,7 +885,44 @@ def time_correction( } for tstamp, t_dict in time_dict.items() } + + elif mode == "interpolate_consecutive": + if "timestamp" in df: + times = [] + for tstamp in np.array(self.timecorr_df.index): + times.append( + np.nanmedian( + df.query(f"run_timestamp=='{tstamp}'")[ + "timestamp" + ] + ) + ) + final_time_dict = interpolate_consecutive( + np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + times, + aoe_param, + output_name, + ) + time_dict = { + time: mean + for time, mean in zip( + np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + ) + } + else: + raise ValueError( + "need timestamp column in dataframe for interpolation" + ) + + else: + raise ValueError("unknown mode") + + df[output_name] = df[aoe_param] / np.array( + [time_dict[tstamp] for tstamp in df["run_timestamp"]] ) + self.update_cal_dicts(final_time_dict) else: df[output_name] = ( df[aoe_param] / np.array(self.timecorr_df["mean"])[0] From 13ec928367e0b5114cd37648272bdbc152af9281 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 18 Nov 2024 22:25:46 +0100 Subject: [PATCH 12/15] guess improvements for tests --- src/pygama/pargen/AoE_cal.py | 50 ++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index d40890143..f148619cd 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -78,7 +78,12 @@ def aoe_peak_guess(func, hist, bins, var, **kwargs): mu = bin_centers[np.argmax(hist)] pars, _ = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mu, n_bins=5) bin_centres = pgh.get_bin_centers(bins) - if pars is None: + if pars is None or ( + pars[0] > bins[-1] + or pars[0] < bins[0] + or pars[-1] < (0.5 * np.nanmax(hist)) + or pars[-1] > (2 * np.nanmax(hist)) + ): i_0 = np.argmax(hist) mu = bin_centres[i_0] (_, sigma, _) = pgh.get_gaussian_guess(hist, bins) @@ -156,7 +161,10 @@ def aoe_peak_bounds(func, guess, **kwargs): "x_hi": (None, None), "n_sig": (0, None), "mu": ((guess["x_lo"] + guess["x_hi"]) / 2, guess["x_hi"]), - "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), + "sigma": ( + (guess["x_hi"] - guess["x_lo"]) / 50, + (guess["x_hi"] - guess["x_lo"]) / 4, + ), "n_bkg": (0, None), "tau": (0, 1), } @@ -166,7 +174,10 @@ def aoe_peak_bounds(func, guess, **kwargs): "x_hi": (None, None), "n_sig": (0, None), "mu": ((guess["x_lo"] + guess["x_hi"]) / 2, guess["x_hi"]), - "sigma": (0, (guess["x_hi"] - guess["x_lo"]) / 4), + "sigma": ( + (guess["x_hi"] - guess["x_lo"]) / 50, + (guess["x_hi"] - guess["x_lo"]) / 4, + ), "htail": (0, 0.5), "tau_sig": (None, 0), "n_bkg": (0, None), @@ -278,7 +289,10 @@ def unbinned_aoe_fit( * len(aoe) ** (-1 / 3) ) nbins = int(np.ceil((np.nanmax(aoe) - np.nanmin(aoe)) / bin_width)) - hist, bins, var = pgh.get_hist(aoe, bins=500) + hist, bins, var = pgh.get_hist( + aoe[(aoe < np.nanpercentile(aoe, 99)) & (aoe > np.nanpercentile(aoe, 1))], + bins=500, + ) gpars = aoe_peak_guess(gaussian, hist, bins, var) c1_min = gpars["mu"] - 0.5 * gpars["sigma"] @@ -333,23 +347,27 @@ def unbinned_aoe_fit( mbkg.simplex().migrad() mbkg.hesse() + nsig_guess = ( + msig.values["area"] + - np.diff( + exgauss.cdf_ext( + np.array([msig.values["mu"] - 2 * msig.values["sigma"], fmax]), + mbkg.values["area"], + msig.values["mu"], + msig.values["sigma"], + mbkg.values["tau"], + ) + )[0] + ) + if nsig_guess < 0: + nsig_guess = 0 + x0 = aoe_peak_guess( pdf, hist, bins, var, - n_sig=( - msig.values["area"] - - np.diff( - exgauss.cdf_ext( - np.array([msig.values["mu"] - 2 * msig.values["sigma"], fmax]), - mbkg.values["area"], - msig.values["mu"], - msig.values["sigma"], - mbkg.values["tau"], - ) - )[0] - ), + n_sig=nsig_guess, mu=msig.values["mu"], sigma=msig.values["sigma"], n_bkg=mbkg.values["area"], From 3f69abb688a4bff7345b7bc4da1c995b21ec8675 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 18 Nov 2024 23:27:10 +0100 Subject: [PATCH 13/15] another round of tweaks to pass tests --- src/pygama/pargen/AoE_cal.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index f148619cd..5370848b2 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -78,15 +78,19 @@ def aoe_peak_guess(func, hist, bins, var, **kwargs): mu = bin_centers[np.argmax(hist)] pars, _ = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mu, n_bins=5) bin_centres = pgh.get_bin_centers(bins) + _, fallback_sigma, _ = pgh.get_gaussian_guess(hist, bins) + if pars is None or ( pars[0] > bins[-1] or pars[0] < bins[0] or pars[-1] < (0.5 * np.nanmax(hist)) or pars[-1] > (2 * np.nanmax(hist)) + or pars[1] > 2 * fallback_sigma + or pars[1] < 0.5 * fallback_sigma ): i_0 = np.argmax(hist) mu = bin_centres[i_0] - (_, sigma, _) = pgh.get_gaussian_guess(hist, bins) + sigma = fallback_sigma else: mu = pars[0] sigma = pars[1] @@ -236,7 +240,11 @@ def guess(bands, means, mean_errs): class SigmaFit: @staticmethod def func(x, a, b, c): - return np.sqrt(a + (b / (x + 10**-99)) ** c) + return np.where( + (x > 0) & ((a + (b / (x + 10**-99)) ** c) > 0), + np.sqrt(a + (b / (x + 10**-99)) ** c), + np.nan, + ) @staticmethod def string_func(input_param): @@ -283,14 +291,14 @@ def unbinned_aoe_fit( if not isinstance(aoe, np.ndarray): aoe = np.array(aoe) - bin_width = ( - 2 - * (np.nanpercentile(aoe, 75) - np.nanpercentile(aoe, 25)) - * len(aoe) ** (-1 / 3) - ) - nbins = int(np.ceil((np.nanmax(aoe) - np.nanmin(aoe)) / bin_width)) + # bin_width = ( + # 2 + # * (np.nanpercentile(aoe, 75) - np.nanpercentile(aoe, 25)) + # * len(aoe) ** (-1 / 3) + # ) + # nbins = int(np.ceil((np.nanmax(aoe) - np.nanmin(aoe)) / bin_width)) hist, bins, var = pgh.get_hist( - aoe[(aoe < np.nanpercentile(aoe, 99)) & (aoe > np.nanpercentile(aoe, 1))], + aoe[(aoe < np.nanpercentile(aoe, 99)) & (aoe > np.nanpercentile(aoe, 10))], bins=500, ) From cf8b1812cd45063a98c91375efb7b10d18d9457b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 2 Dec 2024 18:23:51 +0100 Subject: [PATCH 14/15] adjust error to sampling rate and add units --- src/pygama/pargen/extract_tau.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 281c92b46..e6764acdf 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -54,6 +54,7 @@ def get_decay_constant( sampling_rate = wfs["dt"].nda[0] units = wfs["dt"].attrs["units"] tau = f"{tau*sampling_rate}*{units}" + err = f"{err*sampling_rate}*{units}" if "pz" in self.output_dict: self.output_dict["pz"].update({"tau": tau, "tau_err": err}) From a310ca5f4e90e828863bea0d5d569127f94e9cd3 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 13 Jan 2025 17:10:40 +0100 Subject: [PATCH 15/15] fix docs --- src/pygama/pargen/AoE_cal.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 5370848b2..6906c0fc6 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -693,8 +693,10 @@ def __init__( dep_correct: bool Whether to correct the double escape peak into the single site band before cut determination dt_cut: dict - Dictionary of the drift time cut parameters in the form: - {"out_param": "dt_cut", "hard": False} + Dictionary of the drift time cut parameters in the form:: + + {"out_param": "dt_cut", "hard": False} + where the out_param is the name of the parameter to cut on in the dataframe (should have been precalculated) and "hard" is whether to remove these events completely for survival fraction calculations or whether they they should only be removed in the cut determination/ A/E calibration steps @@ -777,8 +779,8 @@ def time_correction( none: just use the mean of the a/e centroids to shift all the data partial: iterate through the centroids if vary by less than 0.4 sigma - then group and take mean otherwise when a run higher than 0.4 sigma - is found if it is a single run set to nan otherwise start a new block + then group and take mean otherwise when a run higher than 0.4 sigma + is found if it is a single run set to nan otherwise start a new block full : each run will be corrected individually average_consecutive: average the consecutive centroids interpolate_consecutive: interpolate between the consecutive centroids @@ -787,6 +789,7 @@ def time_correction( be added to the calibration dictionary display: int plot level + """ log.info("Starting A/E time correction") self.timecorr_df = pd.DataFrame() @@ -1070,6 +1073,7 @@ def drift_time_correction( be added to the calibration dictionary display: int plot level + """ log.info("Starting A/E drift time correction") self.dt_res_dict = {} @@ -1512,6 +1516,7 @@ def get_aoe_cut_fit( be added to the calibration dictionary display: int plot level + """ log.info("Starting A/E low cut determination") @@ -1628,6 +1633,7 @@ def calculate_survival_fractions_sweep( mode: str mode to use for the cut determination, can be "greater" or "less" i.e. do we want to keep events with A/E greater or less than the cut value + """ sfs = pd.DataFrame() peak_dfs = {} @@ -1744,6 +1750,7 @@ def calculate_survival_fractions( mode: str mode to use for the cut determination, can be "greater" or "less" i.e. do we want to keep events with A/E greater or less than the cut value + """ sfs = pd.DataFrame() for i, peak in enumerate(peaks):