diff --git a/CombineHarvester/CombinePdfs/test/MorphingMSSM.cpp b/CombineHarvester/CombinePdfs/test/MorphingMSSM.cpp index 8552204c..3887d48e 100644 --- a/CombineHarvester/CombinePdfs/test/MorphingMSSM.cpp +++ b/CombineHarvester/CombinePdfs/test/MorphingMSSM.cpp @@ -227,6 +227,10 @@ int main() { signal_types["ggH"], {mt_cats[0]}, true); cb.AddProcesses(masses, {"htt"}, {"8TeV"}, {"mt"}, signal_types["bbH"], {mt_cats[1]}, true); + cb.AddProcesses(masses, {"htt"}, {"8TeV"}, {"mt"}, + signal_types["bbH"], {mt_cats[0]}, true); + cb.AddProcesses(masses, {"htt"}, {"8TeV"}, {"mt"}, + signal_types["ggH"], {mt_cats[1]}, true); cout << " done\n"; cout << "Adding systematic uncertainties..."; @@ -251,18 +255,15 @@ int main() { cout << " done\n"; cout << "Scaling signal process rates for acceptance...\n"; - map xs; for (string e : {"8TeV"}) { for (string p : {"ggH", "bbH"}) { - ch::ParseTable(&xs, "input/xsecs_brs/mssm_" + p + "_" + e + "_accept.txt", - {p + "_" + e}); - } - } - for (string const& e : {"8TeV"}) { - for (string const& p : {"ggH", "bbH"}) { cout << "Scaling for process " << p << " and era " << e << "\n"; + auto gr = ch::TGraphFromTable( + "input/xsecs_brs/mssm_" + p + "_" + e + "_accept.txt", "mPhi", + "accept"); cb.cp().process(signal_types[p]).era({e}).ForEachProc([&](ch::Process *proc) { - ch::ScaleProcessRate(proc, &xs, p+"_"+e, ""); + double m = boost::lexical_cast(proc->mass()); + proc->set_rate(proc->rate() * gr.Eval(m)); }); } } @@ -336,6 +337,11 @@ int main() { // bbA.Write("bbA"); // bbX.Write("bbX"); cb.cp().mass({"*"}).WriteDatacard(folder + "/htt_mt_mssm.txt", output); + auto bins = cb.bin_set(); + for (auto b : bins) { + cb.cp().bin({b}).mass({"*"}).WriteDatacard( + folder + "/" + b + ".txt", output); + } output.Close(); } diff --git a/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp b/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp index 47ac6a6e..d8cac68f 100644 --- a/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp +++ b/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp @@ -82,18 +82,15 @@ int main() { std::cout << " done\n"; std::cout << "Scaling signal process rates for acceptance...\n"; - map xs; - for (std::string const& e : {"7TeV", "8TeV"}) { - for (std::string const& p : {"ggH", "bbH"}) { - ch::ParseTable(&xs, "data/xsecs_brs/mssm_" + p + "_" + e + "_accept.txt", - {p + "_" + e}); - } - } - for (std::string const& e : {"7TeV", "8TeV"}) { - for (std::string const& p : {"ggH", "bbH"}) { + for (string e : {"8TeV"}) { + for (string p : {"ggH", "bbH"}) { std::cout << "Scaling for process " << p << " and era " << e << "\n"; + auto gr = ch::TGraphFromTable( + "input/xsecs_brs/mssm_" + p + "_" + e + "_accept.txt", "mPhi", + "accept"); cb.cp().process({p}).era({e}).ForEachProc([&](ch::Process *proc) { - ch::ScaleProcessRate(proc, &xs, p+"_"+e, ""); + double m = boost::lexical_cast(proc->mass()); + proc->set_rate(proc->rate() * gr.Eval(m)); }); } } diff --git a/CombineHarvester/CombinePdfs/test/Test.cpp b/CombineHarvester/CombinePdfs/test/SMLegacyMorphing.cpp similarity index 57% rename from CombineHarvester/CombinePdfs/test/Test.cpp rename to CombineHarvester/CombinePdfs/test/SMLegacyMorphing.cpp index a6c6584b..fc32473c 100644 --- a/CombineHarvester/CombinePdfs/test/Test.cpp +++ b/CombineHarvester/CombinePdfs/test/SMLegacyMorphing.cpp @@ -10,12 +10,12 @@ #include "CombineTools/interface/Utilities.h" #include "CombineTools/interface/HttSystematics.h" #include "CombinePdfs/interface/MorphFunctions.h" +#include "CombineTools/interface/BinByBin.h" using namespace std; int main() { ch::CombineHarvester cb; - // cb.SetVerbosity(1); typedef vector> Categories; @@ -103,7 +103,7 @@ int main() { {0, "tauTau_1jet_high_mediumhiggs"}, {1, "tauTau_1jet_high_highhiggs"}, {2, "tauTau_vbf"}}; - vector masses = ch::MassesFromRange("110-145:5"); + vector masses = ch::ValsFromRange("110:145|5"); cout << ">> Creating processes and observations...\n"; for (string era : {"7TeV", "8TeV"}) { @@ -145,117 +145,67 @@ int main() { cout << ">> Scaling signal process rates...\n"; map xs; // Get the table of H->tau tau BRs vs mass - ch::ParseTable(&xs, "input/xsecs_brs/htt_YR3.txt", {"htt"}); + xs["htt"] = ch::TGraphFromTable("input/xsecs_brs/htt_YR3.txt", "mH", "br"); for (string const& e : {"7TeV", "8TeV"}) { for (string const& p : sig_procs) { // Get the table of xsecs vs mass for process "p" and era "e": - ch::ParseTable(&xs, "data/xsecs_brs/"+p+"_"+e+"_YR3.txt", {p+"_"+e}); + xs[p+"_"+e] = ch::TGraphFromTable("input/xsecs_brs/"+p+"_"+e+"_YR3.txt", "mH", "xsec"); cout << ">>>> Scaling for process " << p << " and era " << e << "\n"; cb.cp().process({p}).era({e}).ForEachProc([&](ch::Process *proc) { - ch::ScaleProcessRate(proc, &xs, p+"_"+e, "htt"); + double m = boost::lexical_cast(proc->mass()); + proc->set_rate(proc->rate() * xs[p+"_"+e].Eval(m) * xs["htt"].Eval(m)); }); } } - ch::ParseTable(&xs, "input/xsecs_brs/hww_over_htt.txt", {"hww_over_htt"}); + xs["hww_over_htt"] = ch::TGraphFromTable("input/xsecs_brs/hww_over_htt.txt", "mH", "ratio"); for (string const& e : {"7TeV", "8TeV"}) { for (string const& p : {"ggH", "qqH"}) { cb.cp().channel({"em"}).process({p+"_hww125"}).era({e}) .ForEachProc([&](ch::Process *proc) { - ch::ScaleProcessRate(proc, &xs, p+"_"+e, "htt", "125"); - ch::ScaleProcessRate(proc, &xs, "hww_over_htt", "", "125"); + proc->set_rate(proc->rate() * xs[p+"_"+e].Eval(125.) * xs["htt"].Eval(125.)); + proc->set_rate(proc->rate() * xs["hww_over_htt"].Eval(125.)); }); } } - cout << ">> Merging bin errors...\n"; - ch::CombineHarvester cb_et = move(cb.cp().channel({"et"})); - for (string era : {"7TeV", "8TeV"}) { - cb_et.cp().era({era}).bin_id({1, 2}).process({"ZL", "ZJ", "QCD", "W"}) - .MergeBinErrors(0.1, 0.5); - cb_et.cp().era({era}).bin_id({3, 5}).process({"W"}) - .MergeBinErrors(0.1, 0.5); - } - cb_et.cp().era({"7TeV"}).bin_id({6}).process({"ZL", "ZJ", "W", "ZTT"}) - .MergeBinErrors(0.1, 0.5); - cb_et.cp().era({"8TeV"}).bin_id({7}).process({"ZL", "ZJ", "W", "ZTT"}) - .MergeBinErrors(0.1, 0.5); - cb_et.cp().era({"8TeV"}).bin_id({6}).process({"ZL", "ZJ", "W"}) - .MergeBinErrors(0.1, 0.5); - - ch::CombineHarvester cb_mt = move(cb.cp().channel({"mt"})); - for (string era : {"7TeV", "8TeV"}) { - cb_mt.cp().era({era}).bin_id({1, 2, 3, 4}).process({"W", "QCD"}) - .MergeBinErrors(0.1, 0.5); - } - cb_mt.cp().era({"7TeV"}).bin_id({5}).process({"W"}) - .MergeBinErrors(0.1, 0.5); - cb_mt.cp().era({"7TeV"}).bin_id({6}).process({"W", "ZTT"}) - .MergeBinErrors(0.1, 0.5); - cb_mt.cp().era({"8TeV"}).bin_id({5, 6}).process({"W"}) - .MergeBinErrors(0.1, 0.5); - cb_mt.cp().era({"8TeV"}).bin_id({7}).process({"W", "ZTT"}) - .MergeBinErrors(0.1, 0.5); - - ch::CombineHarvester cb_em = move(cb.cp().channel({"em"})); - for (string era : {"7TeV", "8TeV"}) { - cb_em.cp().era({era}).bin_id({1, 3}).process({"Fakes"}) - .MergeBinErrors(0.1, 0.5); - } - cb_em.cp().era({"7TeV"}).bin_id({4}).process({"Fakes", "EWK", "Ztt"}) - .MergeBinErrors(0.1, 0.5); - cb_em.cp().era({"8TeV"}).bin_id({5}).process({"Fakes", "EWK", "Ztt"}) - .MergeBinErrors(0.1, 0.5); - cb_em.cp().era({"8TeV"}).bin_id({4}).process({"Fakes", "EWK"}) - .MergeBinErrors(0.1, 0.5); - - ch::CombineHarvester cb_ee_mm = move(cb.cp().channel({"ee", "mm"})); - for (string era : {"7TeV", "8TeV"}) { - cb_ee_mm.cp().era({era}).bin_id({1, 3, 4}) - .process({"ZTT", "ZEE", "ZMM", "TTJ"}) - .MergeBinErrors(0.0, 0.5); - } - - ch::CombineHarvester cb_tt = move(cb.cp().channel({"tt"})); - cb_tt.cp().bin_id({0, 1, 2}).era({"8TeV"}).process({"ZTT", "QCD"}) - .MergeBinErrors(0.1, 0.5); - - cout << ">> Generating bbb uncertainties...\n"; - cb_mt.cp().bin_id({0, 1, 2, 3, 4}).process({"W", "QCD"}) - .AddBinByBin(0.1, true, &cb); - cb_mt.cp().era({"7TeV"}).bin_id({5}).process({"W"}) - .AddBinByBin(0.1, true, &cb); - cb_mt.cp().era({"7TeV"}).bin_id({6}).process({"W", "ZTT"}) - .AddBinByBin(0.1, true, &cb); - cb_mt.cp().era({"8TeV"}).bin_id({5, 6}).process({"W"}) - .AddBinByBin(0.1, true, &cb); - cb_mt.cp().era({"8TeV"}).bin_id({7}).process({"W", "ZTT"}) - .AddBinByBin(0.1, true, &cb); - - cb_et.cp().bin_id({1, 2}).process({"ZL", "ZJ", "QCD", "W"}) - .AddBinByBin(0.1, true, &cb); - cb_et.cp().bin_id({3, 5}).process({"W"}) - .AddBinByBin(0.1, true, &cb); - cb_et.cp().era({"7TeV"}).bin_id({6}).process({"ZL", "ZJ", "W", "ZTT"}) - .AddBinByBin(0.1, true, &cb); - cb_et.cp().era({"8TeV"}).bin_id({7}).process({"ZL", "ZJ", "W", "ZTT"}) - .AddBinByBin(0.1, true, &cb); - cb_et.cp().era({"8TeV"}).bin_id({6}).process({"ZL", "ZJ", "W"}) - .AddBinByBin(0.1, true, &cb); - - cb_em.cp().bin_id({1, 3}).process({"Fakes"}) - .AddBinByBin(0.1, true, &cb); - cb_em.cp().era({"7TeV"}).bin_id({4}).process({"Fakes", "EWK", "Ztt"}) - .AddBinByBin(0.1, true, &cb); - cb_em.cp().era({"8TeV"}).bin_id({5}).process({"Fakes", "EWK", "Ztt"}) - .AddBinByBin(0.1, true, &cb); - cb_em.cp().era({"8TeV"}).bin_id({4}).process({"Fakes", "EWK"}) - .AddBinByBin(0.1, true, &cb); - - cb_ee_mm.cp().bin_id({1, 3, 4}).process({"ZTT", "ZEE", "ZMM", "TTJ"}) - .AddBinByBin(0.0, true, &cb); - - cb_tt.cp().bin_id({0, 1, 2}).era({"8TeV"}).process({"QCD", "ZTT"}) - .AddBinByBin(0.1, true, &cb); + cout << ">> Merging bin errors and generating bbb uncertainties...\n"; + + auto bbb = ch::BinByBinFactory() + .SetAddThreshold(0.1) + .SetMergeThreshold(0.5) + .SetFixNorm(true); + + ch::CombineHarvester cb_et = cb.cp().channel({"et"}); + bbb.MergeAndAdd(cb_et.cp().era({"7TeV"}).bin_id({1, 2}).process({"ZL", "ZJ", "QCD", "W"}), cb); + bbb.MergeAndAdd(cb_et.cp().era({"7TeV"}).bin_id({3, 5}).process({"W"}), cb); + bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({1, 2}).process({"ZL", "ZJ", "QCD", "W"}), cb); + bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({3, 5}).process({"W"}), cb); + bbb.MergeAndAdd(cb_et.cp().era({"7TeV"}).bin_id({6}).process({"ZL", "ZJ", "W", "ZTT"}), cb); + bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({6}).process({"ZL", "ZJ", "W"}), cb); + bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({7}).process({"ZL", "ZJ", "W", "ZTT"}), cb); + + ch::CombineHarvester cb_mt = cb.cp().channel({"mt"}); + bbb.MergeAndAdd(cb_mt.cp().era({"7TeV"}).bin_id({1, 2, 3, 4}).process({"W", "QCD"}), cb); + bbb.MergeAndAdd(cb_mt.cp().era({"8TeV"}).bin_id({1, 2, 3, 4}).process({"W", "QCD"}), cb); + bbb.MergeAndAdd(cb_mt.cp().era({"7TeV"}).bin_id({5}).process({"W"}), cb); + bbb.MergeAndAdd(cb_mt.cp().era({"7TeV"}).bin_id({6}).process({"W", "ZTT"}), cb); + bbb.MergeAndAdd(cb_mt.cp().era({"8TeV"}).bin_id({5, 6}).process({"W"}), cb); + bbb.MergeAndAdd(cb_mt.cp().era({"8TeV"}).bin_id({7}).process({"W", "ZTT"}), cb); + + ch::CombineHarvester cb_em = cb.cp().channel({"em"}); + bbb.MergeAndAdd(cb_em.cp().era({"7TeV"}).bin_id({1, 3}).process({"Fakes"}), cb); + bbb.MergeAndAdd(cb_em.cp().era({"8TeV"}).bin_id({1, 3}).process({"Fakes"}), cb); + bbb.MergeAndAdd(cb_em.cp().era({"7TeV"}).bin_id({4}).process({"Fakes", "EWK", "Ztt"}), cb); + bbb.MergeAndAdd(cb_em.cp().era({"8TeV"}).bin_id({5}).process({"Fakes", "EWK", "Ztt"}), cb); + bbb.MergeAndAdd(cb_em.cp().era({"8TeV"}).bin_id({4}).process({"Fakes", "EWK"}), cb); + + ch::CombineHarvester cb_tt = cb.cp().channel({"tt"}); + bbb.MergeAndAdd(cb_tt.cp().era({"8TeV"}).bin_id({0, 1, 2}).process({"ZTT", "QCD"}), cb); + + bbb.SetAddThreshold(0.); // ee and mm use a different threshold + ch::CombineHarvester cb_ll = cb.cp().channel({"ee", "mm"}); + bbb.MergeAndAdd(cb_ll.cp().era({"7TeV"}).bin_id({1, 3, 4}).process({"ZTT", "ZEE", "ZMM", "TTJ"}), cb); + bbb.MergeAndAdd(cb_ll.cp().era({"8TeV"}).bin_id({1, 3, 4}).process({"ZTT", "ZEE", "ZMM", "TTJ"}), cb); cout << ">> Setting standardised bin names...\n"; ch::SetStandardBinNames(cb); @@ -303,30 +253,30 @@ int main() { string folder = "output/sm_cards_morphed"; boost::filesystem::create_directories(folder); + TFile output((folder + "/htt.input.root").c_str(), + "RECREATE"); for (string chn : chns) { - TFile output((folder + "/htt_" + chn + ".input.root").c_str(), - "RECREATE"); - // auto bins = cb.cp().channel({chn}).bin_set(); - // for (auto b : bins) { - // for (auto m : masses) { - // cout << ">> Writing datacard for bin: " << b << " and mass: " << m - // << "\r" << flush; - // cb.cp().channel({chn}).bin({b}).mass({m, "*"}).WriteDatacard( - // folder+"/"+b + "_" + m + ".txt", output); - // } - // } - cb.cp().channel({chn}).mass({"125", "*"}).WriteDatacard( - folder+"/htt_" + chn + "_125.txt", output); - output.Close(); + boost::filesystem::create_directories(folder+"/"+chn); + //Use CH to create combined card for each channel + cb.cp().channel({chn}).mass({"*"}).WriteDatacard( + folder + "/" + chn + "/combinedCard.txt", output); + auto bins = cb.cp().channel({chn}).bin_set(); + for (auto b : bins) { + cout << ">> Writing datacard for bin: " << b << "\r" << flush; + //Also print individual datacards for each category of each channel + boost::filesystem::create_directories(folder+"/"+chn); + cb.cp().channel({chn}).bin({b}).mass({"*"}).WriteDatacard( + folder + "/" + chn + "/" + b + ".txt", output); + //Also print individual datacards for each category of each channel in the combined directory + boost::filesystem::create_directories(folder+"/cmb"); + cb.cp().channel({chn}).bin({b}).mass({"*"}).WriteDatacard( + folder + "/cmb/"+ b + ".txt", output); + } } - TFile output((folder + "/htt_combined.input.root").c_str(), - "RECREATE"); - cb.cp().mass({"125", "*"}).WriteDatacard( - folder+"/htt_combined_125.txt", output); + //Use CH to create combined card for full combination + cb.cp().mass({"*"}).WriteDatacard( + folder + "/cmb/combinedCard.txt", output); output.Close(); cout << "\n>> Done!\n"; } - - - } diff --git a/CombineHarvester/CombineTools/Rules.mk b/CombineHarvester/CombineTools/Rules.mk index 4f97db72..f8295152 100644 --- a/CombineHarvester/CombineTools/Rules.mk +++ b/CombineHarvester/CombineTools/Rules.mk @@ -8,6 +8,8 @@ $(d)/interface/GitVersion.h: $(TOP)/../.git/logs/HEAD @echo -e "Updating $@" @echo -e "namespace ch { inline std::string GitVersion() { return \""$(shell git describe --dirty)"\"; } }\n" > $@ +$(d)/src/CombineHarvester_Datacards.cc : $(d)/interface/GitVersion.h + clean_$(d)/interface/GitVersion.h : rm -f $(subst clean_,,$@) diff --git a/CombineHarvester/CombineTools/interface/TFileIO.h b/CombineHarvester/CombineTools/interface/TFileIO.h index e6633e91..69ee5747 100644 --- a/CombineHarvester/CombineTools/interface/TFileIO.h +++ b/CombineHarvester/CombineTools/interface/TFileIO.h @@ -42,7 +42,7 @@ void ch::WriteToTFile(T const* ptr, TFile* file, std::string const& path) { } gDirectory->cd(as_vec[i].c_str()); } - if (!gDirectory->Get(as_vec.back().c_str())) { + if (!gDirectory->FindKey(as_vec.back().c_str())) { gDirectory->WriteTObject(ptr, as_vec.back().c_str()); } gDirectory->cd("/"); diff --git a/CombineHarvester/CombineTools/scripts/injectBBH.py b/CombineHarvester/CombineTools/scripts/injectBBH.py new file mode 100755 index 00000000..e4159e1f --- /dev/null +++ b/CombineHarvester/CombineTools/scripts/injectBBH.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python + +import combineharvester as ch +import ROOT as R +import glob + +SCALES = { + ('mt', 1) : 1.12, #0jet-low + ('mt', 2) : 1.13, #0jet-high + ('mt', 3) : 0.39, #1jet-low + ('mt', 4) : 0.39, #1jet-high (low boost) + ('mt', 5) : 0.08, #1jet-high (med boost) + ('mt', 6) : 0.23, #vbf loose + ('mt', 7) : 0.23, #vbf tight + ('et', 1) : 1.12, #0jet-low + ('et', 2) : 1.13, #0jet-high + ('et', 3) : 0.39, #1jet-low + ('et', 4) : 0.39, #1jet-high (low boost) + ('et', 5) : 0.08, #1jet-high (med boost) + ('et', 6) : 0.23, #vbf loose + ('et', 7) : 0.23, #vbf tight + ('em', 0) : 1.12, #0jet-low + ('em', 1) : 1.13, #0jet-high + ('em', 2) : 0.39, #1jet-low + ('em', 3) : 0.31, #1jet-high + ('em', 4) : 0.23, #vbf loose + ('em', 5) : 0.23, #vbf tight + ('ee', 0) : 1.12, #0jet-low + ('ee', 1) : 1.13, #0jet-high + ('ee', 2) : 0.39, #1jet-low + ('ee', 3) : 0.31, #1jet-high + ('ee', 4) : 0.23, #vbf + ('mm', 0) : 1.12, #0jet-low + ('mm', 1) : 1.13, #0jet-high + ('mm', 2) : 0.39, #1jet-low + ('mm', 3) : 0.31, #1jet-high + ('mm', 4) : 0.23, #vbf + ('tt', 0) : 0.08, #1jet-high (med boost) + ('tt', 1) : 0.08, #1jet-high (high boost) + ('tt', 2) : 0.23 #vbf +} + +def CustomizeProcs(p): + p.set_process('bbH') + if p.era() == '8TeV': + p.set_rate(p.rate() * (0.2030/18.94) * SCALES[(p.channel(), p.bin_id())]) # ratio of YR3 bbH/ggH @ 125.1 + elif p.era() == '7TeV': + p.set_rate(p.rate() * (0.1555/15.11) * SCALES[(p.channel(), p.bin_id())]) # ratio of YR3 bbH/ggH @ 125.1 + else: + print "Don't know how to scale for " + p.era() + +def CustomizeSysts(p): + p.set_process('bbH') + +cmb = ch.CombineHarvester() + +for card in glob.glob('output/htt-YR3-hpt/*/htt*.txt'): + cmb.QuickParseDatacard(card, "$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt") + +for card in glob.glob('output/htt-YR3-hpt/*/vhtt*.txt'): + cmb.QuickParseDatacard(card, "$MASS/$ANALYSIS_$BINID_$ERA.txt") + +ch.CloneProcs(cmb.cp().process(['ggH']), cmb, lambda p : CustomizeProcs(p)) +ch.CloneSysts(cmb.cp().process(['ggH']), cmb, lambda p : CustomizeSysts(p)) + +# Now we drop QCDscale and pdf: +sys_init = set(cmb.cp().process(['bbH']).syst_name_set()) +cmb.FilterSysts(lambda sys : + sys.process() == 'bbH' and + ( + sys.name().startswith('QCDscale') or + sys.name().startswith('pdf') + ) + ) +sys_final = set(cmb.cp().process(['bbH']).syst_name_set()) +print sys_init - sys_final + +# https://twiki.cern.ch/twiki/bin/view/LHCPhysics/CERNYellowReportPageAt8TeV#bbH_Process +# +7.2 -7.8 +7.5 -6.9 (ggH 8TeV: scale, pdf) +# +10.3 -14.8 +6.2 -6.2 (bbH 8TeV, scale, pdf) +# +7.1 -7.8 +7.6 -7.1 (ggH 7TeV, scale, pdf) +# +10.4 -14.6 +6.1 -6.1 (bbH 7TeV, scale, pdf) + +## Add new YR3 inclusive-only systematics +cmb.cp().process(['bbH']).AddSyst( + cmb, 'QCDscale_bbH', 'lnN', ch.SystMap('era') + (['7TeV'], (0.854, 1.104)) + (['8TeV'], (0.852, 1.103))) + +cmb.cp().process(['bbH']).AddSyst( + cmb, 'pdf_gg', 'lnN', ch.SystMap('era') + (['7TeV'], 1.061) + (['8TeV'], 1.062)) + +def ModUEPS(sys): + if sys.name() != 'UEPS': return + if 'ggH' in sys.process() or 'bbH' in sys.process(): + sys.set_name('UEPS_ggH') + elif 'qqH' in sys.process(): + sys.set_name('UEPS_qqH') + elif 'WH' in sys.process() or 'ZH' in sys.process(): + sys.set_name('UEPS_VH') + else: + raise Exception('process %s not recognised' % (sys.process())) + if ((sys.channel() in ['ee', 'mm', 'em'] and sys.bin_id() >= 4) or + (sys.channel() in ['et', 'mt'] and sys.bin_id() >= 6) or + (sys.channel() in ['tt'] and sys.bin_id() >= 2)) : + sys.set_name(sys.name() + '_vbf') + + +cmb.ForEachSyst(ModUEPS) + +cmb.cp().syst_name(['QCDscale_ggH2in']).ForEachSyst(lambda sys: sys.set_name('QCDscale_ggH2in_vbf')) + +def FixMe(sys): + if sys.process().startswith('ggH_hww') and sys.name() == 'pdf_qqbar': + print sys + sys.set_process(sys.process().replace('ggH','qqH')) + print sys + +cmb.ForEachSyst(FixMe) + +writer_htt = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt', + '$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA_lhchcg.root') +writer_htt.SetVerbosity(1) +writer_htt.WriteCards('output/htt-YR3-hpt-bbH', cmb.cp().analysis(['htt'])) + +writer_vhtt = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$BINID_$ERA.txt', + '$TAG/common/$ANALYSIS.input_$ERA_lhchcg.root') +writer_vhtt.SetVerbosity(1) +writer_vhtt.WriteCards('output/htt-YR3-hpt-bbH', cmb.cp().analysis(['vhtt'])) diff --git a/CombineHarvester/CombineTools/scripts/scaleGGH.py b/CombineHarvester/CombineTools/scripts/scaleGGH.py index 00267e5e..32c65046 100755 --- a/CombineHarvester/CombineTools/scripts/scaleGGH.py +++ b/CombineHarvester/CombineTools/scripts/scaleGGH.py @@ -4,40 +4,108 @@ import ROOT as R import glob -SCALES = { - ('mt', 1) : 1.029022621, #0jet-low - ('mt', 2) : 1.037426484, #0jet-high - ('mt', 3) : 1.005208333, #1jet-low - ('mt', 4) : 1.014984227, #1jet-high (low boost) - ('mt', 5) : 0.873225152, #1jet-high (med boost) - ('mt', 6) : 0.948453608, #vbf loose - ('mt', 7) : 0.896551724, #vbf tight - ('et', 1) : 1.029022621, #0jet-low - ('et', 2) : 1.037426484, #0jet-high - ('et', 3) : 1.005208333, #1jet-low - ('et', 4) : 1.014984227, #1jet-high (low boost) - ('et', 5) : 0.873225152, #1jet-high (med boost) - ('et', 6) : 0.948453608, #vbf loose - ('et', 7) : 0.896551724, #vbf tight - ('em', 0) : 1.029022621, #0jet-low - ('em', 1) : 1.037426484, #0jet-high - ('em', 2) : 1.005208333, #1jet-low - ('em', 3) : 0.975298126, #1jet-high - ('em', 4) : 0.948453608, #vbf loose - ('em', 5) : 0.896551724, #vbf tight - ('ee', 0) : 1.029022621, #0jet-low - ('ee', 1) : 1.037426484, #0jet-high - ('ee', 2) : 1.005208333, #1jet-low - ('ee', 3) : 0.975298126, #1jet-high - ('ee', 4) : 0.948453608, #vbf - ('mm', 0) : 1.029022621, #0jet-low - ('mm', 1) : 1.037426484, #0jet-high - ('mm', 2) : 1.005208333, #1jet-low - ('mm', 3) : 0.975298126, #1jet-high - ('mm', 4) : 0.948453608, #vbf - ('tt', 0) : 0.873225152, #1jet-high (med boost) - ('tt', 1) : 0.873225152, #1jet-high (high boost) - ('tt', 2) : 0.948453608 #vbf +# OLD inclusive distribution +# SCALES = { +# ('mt', 1) : 1.029022621, #0jet-low +# ('mt', 2) : 1.037426484, #0jet-high +# ('mt', 3) : 1.005208333, #1jet-low +# ('mt', 4) : 1.014984227, #1jet-high (low boost) +# ('mt', 5) : 0.873225152, #1jet-high (med boost) +# ('mt', 6) : 0.948453608, #vbf loose +# ('mt', 7) : 0.896551724, #vbf tight +# ('et', 1) : 1.029022621, #0jet-low +# ('et', 2) : 1.037426484, #0jet-high +# ('et', 3) : 1.005208333, #1jet-low +# ('et', 4) : 1.014984227, #1jet-high (low boost) +# ('et', 5) : 0.873225152, #1jet-high (med boost) +# ('et', 6) : 0.948453608, #vbf loose +# ('et', 7) : 0.896551724, #vbf tight +# ('em', 0) : 1.029022621, #0jet-low +# ('em', 1) : 1.037426484, #0jet-high +# ('em', 2) : 1.005208333, #1jet-low +# ('em', 3) : 0.975298126, #1jet-high +# ('em', 4) : 0.948453608, #vbf loose +# ('em', 5) : 0.896551724, #vbf tight +# ('ee', 0) : 1.029022621, #0jet-low +# ('ee', 1) : 1.037426484, #0jet-high +# ('ee', 2) : 1.005208333, #1jet-low +# ('ee', 3) : 0.975298126, #1jet-high +# ('ee', 4) : 0.948453608, #vbf +# ('mm', 0) : 1.029022621, #0jet-low +# ('mm', 1) : 1.037426484, #0jet-high +# ('mm', 2) : 1.005208333, #1jet-low +# ('mm', 3) : 0.975298126, #1jet-high +# ('mm', 4) : 0.948453608, #vbf +# ('tt', 0) : 0.873225152, #1jet-high (med boost) +# ('tt', 1) : 0.873225152, #1jet-high (high boost) +# ('tt', 2) : 0.948453608 #vbf +# } + +# 2D reweighting +SCALES_7TEV = { + ('mt', 1) : 1.027, #0jet-low + ('mt', 2) : 1.044, #0jet-high + ('mt', 3) : 0.924, #1jet-low + ('mt', 4) : 0.925, #1jet-high (low boost) + ('mt', 5) : 0.782, #1jet-high (med boost) + ('mt', 6) : 0.962, #vbf + ('et', 1) : 1.027, #0jet-low + ('et', 2) : 1.044, #0jet-high + ('et', 3) : 0.924, #1jet-low + ('et', 4) : 0.925, #1jet-high (low boost) + ('et', 5) : 0.782, #1jet-high (med boost) + ('et', 6) : 0.962, #vbf + ('em', 0) : 1.027, #0jet-low + ('em', 1) : 1.044, #0jet-high + ('em', 2) : 0.924, #1jet-low + ('em', 3) : 0.889, #1jet-high + ('em', 4) : 0.962, #vbf + ('ee', 0) : 1.027, #0jet-low + ('ee', 1) : 1.044, #0jet-high + ('ee', 2) : 0.924, #1jet-low + ('ee', 3) : 0.889, #1jet-high + ('ee', 4) : 0.962, #vbf + ('mm', 0) : 1.027, #0jet-low + ('mm', 1) : 1.044, #0jet-high + ('mm', 2) : 0.924, #1jet-low + ('mm', 3) : 0.889, #1jet-high + ('mm', 4) : 0.962, #vbf +} + +SCALES_8TEV = { + ('mt', 1) : 1.051, #0jet-low + ('mt', 2) : 1.061, #0jet-high + ('mt', 3) : 0.955, #1jet-low + ('mt', 4) : 0.961, #1jet-high (low boost) + ('mt', 5) : 0.800, #1jet-high (med boost) + ('mt', 6) : 1.103, #vbf loose + ('mt', 7) : 1.069, #vbf tight + ('et', 1) : 1.051, #0jet-low + ('et', 2) : 1.061, #0jet-high + ('et', 3) : 0.955, #1jet-low + ('et', 4) : 0.961, #1jet-high (low boost) + ('et', 5) : 0.800, #1jet-high (med boost) + ('et', 6) : 1.103, #vbf loose + ('et', 7) : 1.069, #vbf tight + ('em', 0) : 1.051, #0jet-low + ('em', 1) : 1.061, #0jet-high + ('em', 2) : 0.955, #1jet-low + ('em', 3) : 0.915, #1jet-high + ('em', 4) : 1.103, #vbf loose + ('em', 5) : 1.069, #vbf tight + ('ee', 0) : 1.051, #0jet-low + ('ee', 1) : 1.061, #0jet-high + ('ee', 2) : 0.955, #1jet-low + ('ee', 3) : 0.915, #1jet-high + ('ee', 4) : 1.087, #vbf + ('mm', 0) : 1.051, #0jet-low + ('mm', 1) : 1.061, #0jet-high + ('mm', 2) : 0.955, #1jet-low + ('mm', 3) : 0.915, #1jet-high + ('mm', 4) : 1.087, #vbf + ('tt', 0) : 0.800, #1jet-high (med boost) + ('tt', 1) : 0.798, #1jet-high (high boost) + ('tt', 2) : 1.087 #vbf } def DoScales(cmb, scales): @@ -56,7 +124,8 @@ def DoScales(cmb, scales): cmb_for_scaling = cmb.cp().analysis(['vhtt'], False) -DoScales(cmb_for_scaling, SCALES) +DoScales(cmb_for_scaling.cp().era(['7TeV']), SCALES_7TEV) +DoScales(cmb_for_scaling.cp().era(['8TeV']), SCALES_8TEV) writer_htt = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt', '$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA.root') diff --git a/CombineHarvester/CombineTools/scripts/scaleYR2ToYR3.py b/CombineHarvester/CombineTools/scripts/scaleYR2ToYR3.py index c3ba5eeb..ffc7002d 100755 --- a/CombineHarvester/CombineTools/scripts/scaleYR2ToYR3.py +++ b/CombineHarvester/CombineTools/scripts/scaleYR2ToYR3.py @@ -43,12 +43,12 @@ def DoScales(cmb, scales): DoScales(cmb_for_scaling.cp().era(['8TeV']), SCALES_XSEC_8TEV) DoScales(cmb_for_scaling, SCALES_BR) -writer_htt = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA_lhchcg.txt', - '$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA_lhchcg.root') +writer_htt = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt', + '$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA.root') writer_htt.SetVerbosity(1) writer_htt.WriteCards('output/htt-YR3', cmb.cp().analysis(['htt'])) -writer_vhtt = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$BINID_$ERA_lhchcg.txt', - '$TAG/common/$ANALYSIS.input_$ERA_lhchcg.root') +writer_vhtt = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$BINID_$ERA.txt', + '$TAG/common/$ANALYSIS.input_$ERA.root') writer_vhtt.SetVerbosity(1) writer_vhtt.WriteCards('output/htt-YR3', cmb.cp().analysis(['vhtt'])) diff --git a/CombineHarvester/CombineTools/src/BinByBin.cc b/CombineHarvester/CombineTools/src/BinByBin.cc index b50f4bb3..67a5908c 100644 --- a/CombineHarvester/CombineTools/src/BinByBin.cc +++ b/CombineHarvester/CombineTools/src/BinByBin.cc @@ -27,8 +27,14 @@ void BinByBinFactory::MergeBinErrors(CombineHarvester &cb) { unsigned bbb_removed = 0; CombineHarvester tmp = std::move(cb.cp().bin({bin}).histograms()); std::vector procs; - tmp.ForEachProc([&](Process *p) { - procs.push_back(p); + tmp.ForEachProc([&](Process *p) { + if (p->shape()->GetSumw2N() == 0) { + std::cout << "Process " << p->process() + << " does not continue the weights information needed for " + "valid errors, skipping\n"; + } else { + procs.push_back(p); + } }); if (procs.size() == 0) continue; @@ -88,6 +94,12 @@ void BinByBinFactory::AddBinByBin(CombineHarvester &src, CombineHarvester &dest) for (unsigned i = 0; i < procs.size(); ++i) { if (!procs[i]->shape()) continue; TH1 const* h = procs[i]->shape(); + if (h->GetSumw2N() == 0) { + std::cout << "Process " << procs[i]->process() + << " does not continue the weights information needed for " + "valid errors, skipping\n"; + continue; + } unsigned n_pop_bins = 0; for (int j = 1; j <= h->GetNbinsX(); ++j) { if (h->GetBinContent(j) > 0.0) ++n_pop_bins; diff --git a/CombineHarvester/CombineTools/src/CombineHarvester.cc b/CombineHarvester/CombineTools/src/CombineHarvester.cc index 09a1aae9..576b6862 100644 --- a/CombineHarvester/CombineTools/src/CombineHarvester.cc +++ b/CombineHarvester/CombineTools/src/CombineHarvester.cc @@ -604,8 +604,12 @@ std::shared_ptr CombineHarvester::SetupWorkspace( // 2) No: Ok will clone it in. Is the ws name already in use? if (!name_in_use) { // - No: clone with same name and return - wspaces_[std::string(ws.GetName())] = std::shared_ptr( - reinterpret_cast(ws.Clone())); + // IMPORTANT: Don't used RooWorkspace::Clone(), it seems to introduce + // bugs + // wspaces_[std::string(ws.GetName())] = std::shared_ptr( + // reinterpret_cast(ws.Clone())); + wspaces_[std::string(ws.GetName())] = + std::make_shared(RooWorkspace(ws)); return wspaces_.at(ws.GetName()); } @@ -637,8 +641,12 @@ std::shared_ptr CombineHarvester::SetupWorkspace( << " already defined, renaming to " << new_name << "\n"; - wspaces_[new_name] = std::shared_ptr( - reinterpret_cast(ws.Clone(new_name.c_str()))); + // wspaces_[new_name] = std::shared_ptr( + // reinterpret_cast(ws.Clone(new_name.c_str()))); + std::shared_ptr new_wsp = + std::make_shared(RooWorkspace(ws)); + new_wsp->SetName(new_name.c_str()); + wspaces_[new_name] = new_wsp; return wspaces_.at(new_name); } diff --git a/CombineHarvester/CombineTools/test/MSSMExample.cpp b/CombineHarvester/CombineTools/test/MSSMExample.cpp new file mode 100644 index 00000000..a7133ce4 --- /dev/null +++ b/CombineHarvester/CombineTools/test/MSSMExample.cpp @@ -0,0 +1,117 @@ +#include +#include +#include +#include +#include +#include +#include +#include "boost/filesystem.hpp" +#include "CombineTools/interface/CombineHarvester.h" +#include "CombineTools/interface/Utilities.h" +#include "CombineTools/interface/HttSystematics.h" +#include "CombineTools/interface/CardWriter.h" +#include "CombineTools/interface/CopyTools.h" +#include "CombineTools/interface/BinByBin.h" + +using namespace std; + +int main() { + ch::CombineHarvester cb; + + typedef vector> Categories; + typedef vector VString; + + string auxiliaries = string(getenv("CMSSW_BASE")) + "/src/auxiliaries/"; + string aux_shapes = auxiliaries +"shapes/"; + string aux_pruning = auxiliaries +"pruning/"; + +//So far only mutau added for MSSM, need to copy over all the systematics for other channels + + VString chns = + {"mt"}; + + map input_folders = { + {"mt", "Imperial"} + }; + + map bkg_procs; + bkg_procs["mt"] = {"ZTT", "W", "QCD", "ZL", "ZJ", "TT", "VV"}; + + VString sig_procs = {"ggH", "bbH"}; + + map cats; + cats["mt_8TeV"] = { + {8, "muTau_nobtag"}, {9, "muTau_btag"}}; + + auto masses = ch::MassesFromRange( + "90,100,120-140:10,140-200:20,200-500:50,600-1000:100"); + + for (auto chn : chns) { + cb.AddObservations( + {"*"}, {"htt"}, {"8TeV"}, {chn}, cats[chn+"_8TeV"]); + cb.AddProcesses( + {"*"}, {"htt"}, {"8TeV"}, {chn}, bkg_procs[chn], cats[chn+"_8TeV"], false); + cb.AddProcesses( + masses, {"htt"}, {"8TeV"}, {chn}, sig_procs, cats[chn+"_8TeV"], true); + } + + ch::AddMSSMSystematics(cb); + + cout << ">> Extracting histograms from input root files...\n"; + for (string chn : chns) { + string file = aux_shapes + input_folders[chn] + "/htt_" + chn + + ".inputs-mssm-" + "8TeV" + "-0.root"; + cb.cp().channel({chn}).era({"8TeV"}).backgrounds().ExtractShapes( + file, "$BIN/$PROCESS", "$BIN/$PROCESS_$SYSTEMATIC"); + cb.cp().channel({chn}).era({"8TeV"}).signals().ExtractShapes( + file, "$BIN/$PROCESS$MASS", "$BIN/$PROCESS$MASS_$SYSTEMATIC"); + } + + map signal_types = { + //{"ggH", {"ggh", "ggH", "ggA"}}, + //{"bbH", {"bbh", "bbH", "bbA"}} + {"ggH", {"ggH"}}, + {"bbH", {"bbH"}} + }; + cout << "Scaling signal process rates for acceptance...\n"; + for (string e : {"8TeV"}) { + for (string p : sig_procs) { + cout << "Scaling for process " << p << " and era " << e << "\n"; + auto gr = ch::TGraphFromTable( + "input/xsecs_brs/mssm_" + p + "_" + e + "_accept.txt", "mPhi", + "accept"); + cb.cp().process(signal_types[p]).era({e}).ForEachProc([&](ch::Process *proc) { + double m = boost::lexical_cast(proc->mass()); + proc->set_rate(proc->rate() * gr.Eval(m)); + }); + } + } + + cout << ">> Setting standardised bin names...\n"; + ch::SetStandardBinNames(cb); + + string folder = "output/mssm_cards/LIMITS"; + boost::filesystem::create_directories(folder); + boost::filesystem::create_directories(folder + "/common"); + for (auto m : masses) { + boost::filesystem::create_directories(folder + "/" + m); + } + + for (string chn : chns) { + TFile output((folder + "/common/htt_" + chn + ".input.root").c_str(), + "RECREATE"); + auto bins = cb.cp().channel({chn}).bin_set(); + for (auto b : bins) { + for (auto m : masses) { + cout << ">> Writing datacard for bin: " << b << " and mass: " << m + << "\r" << flush; + cb.cp().channel({chn}).bin({b}).mass({m, "*"}).WriteDatacard( + folder + "/" + m + "/" + b + ".txt", output); + } + } + output.Close(); + } + + + cout << "\n>> Done!\n"; +} diff --git a/CombineHarvester/Rules.mk b/CombineHarvester/Rules.mk index 4d260f66..8f827671 100644 --- a/CombineHarvester/Rules.mk +++ b/CombineHarvester/Rules.mk @@ -1,3 +1,3 @@ -SUBDIRS := CombineTools +SUBDIRS := CombineTools CombinePdfs LIB_DEPS := LIB_EXTRA := diff --git a/CombineHarvester/mk/header.mk b/CombineHarvester/mk/header.mk index 2f36ca19..91777cf4 100644 --- a/CombineHarvester/mk/header.mk +++ b/CombineHarvester/mk/header.mk @@ -10,4 +10,5 @@ SRC_EXT := cc PY_EXT := pycc INC_DIR := interface PKG_FLAGS := +PY_MODULES := endef diff --git a/data/New_CH_protocols.txt b/data/New_CH_protocols.txt new file mode 100644 index 00000000..6bd39bcb --- /dev/null +++ b/data/New_CH_protocols.txt @@ -0,0 +1,49 @@ +#This file is a first set of instructions for reproducing Run 1 results using the new Combine Harvester code. + +################### SM Legacy Results ######################## + +# Old datacard format: +cd HiggsAnalysis/HiggsToTauTau/CombineHarvester/CombineTools +./bin/SMLegacyExample +# OR use the existing python version: +python scripts/SMLegacyExample.py + +# This produces datacards in the usual format. The old scripts +# limit.py --asymptotic or limit.py --max-likelihood could be used to +# calculate asymptotic limit or maximum likelihood. + +# New datacard format using morphing: +cd HiggsAnalysis/HiggsToTauTau/CombineHarvester/CombinePdfs +./bin/SMLegacyMorphing +cd output/sm_cards_morphed/cmb/ +text2workspace.py -b combinedCard.txt -o combinedCard.root --default-morphing shape2 +combine -M MaxLikelihoodFit --freezeNuisances MH -m 125 combinedCard.root +# or can use combineTool.py, which also supports ranges of masses to test e.g: +combineTool.py -M MaxLikelihoodFit -m "110:145:5" --freezeNuisances MH --opts='vanilla' combinedCard.root + +################### MSSM Legacy Results ###################### + +# So far this is only setup for mutau channel, and no bbb uncertainties are +# added + +# Old datacard format: +cd HiggsAnalysis/HiggsToTauTau/CombineHarvester/CombineTools +./bin/MSSMExample + +# This produces datacards in the usual format. The old scripts +# lxb-xsec2tanb.py and limit.py --tanb+ could then be run to produce limits +# scaled by tanb (see old mssm_protocol.txt script) + +# New datacard format using morphing: +cd HiggsAnalysis/HiggsToTauTau/CombineHarvester/CombinePdfs +./bin/MorphingMSSM +cd output/mssm_test +text2workspace.py -b htt_mt_mssm.txt -o combinedCard.root --default-morphing shape2 +# for e.g. mA=160, tanb=9: +combine -M Asymptotic -n .tanb9.00.mA160 --freezeNuisances tanb,mA --setPhysicsModelParameters mA=160,tanb=9 -m 0 combinedCard.root +# Or: +combineTool.py -M Asymptotic -m 0 --freezeNuisances tanb,mA --setPhysicsModelParameters mA=160,tanb=9 --opts='vanilla' combinedCard.root +# The -n is to ensure the tanb and mA values are stored in the name of the +# output file from combine. The -m option is required by combine and sticks an mH value +# on the end of the filename also, so we just set this to 0. + diff --git a/scripts/combineTool.py b/scripts/combineTool.py new file mode 100755 index 00000000..6478799e --- /dev/null +++ b/scripts/combineTool.py @@ -0,0 +1,507 @@ +#!/usr/bin/env python + +import argparse +import os +import re +import sys +import json +import math +import itertools +import stat + +OPTS = { + 'vanilla' : '--minimizerStrategy 0 --minimizerTolerance 0.1 --cminOldRobustMinimize 0', + 'prefitAsimovSToy' : '-M GenerateOnly --expectSignal 1 -t -1 --saveToys --saveWorkspace --noMCbonly 1', + 'prefitAsimovBToy' : '-M GenerateOnly --expectSignal 0 -t -1 --saveToys --saveWorkspace --noMCbonly 1', + 'robust' : '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreScan --cminPreFit 1 --X-rtd FITTER_DYN_STEP --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0', + 'robustL' : '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreScan --cminPreFit 1 --X-rtd FITTER_DYN_STEP --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --minimizerToleranceForMinos 0.001', + 'robustLNoScan' : '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreFit 1 --X-rtd FITTER_DYN_STEP --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --minimizerToleranceForMinos 0.001', + 'robustNew' : '--robustFit 1 --minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --minimizerAlgoForMinos Minuit2 --minimizerStrategyForMinos 0 --cminPreScan --cminPreFit 1 --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --X-rtd FITTER_NEW_CROSSING_ALGO --X-rtd FITTER_NEVER_GIVE_UP --X-rtd FITTER_BOUND --minimizerToleranceForMinos 0.1', + 'MLHesse': '--minimizerTolerance 0.1 --minimizerAlgo Minuit2 --minimizerStrategy 0 --cminFallbackAlgo "Minuit2,0:0.1" --cminFallbackAlgo "Minuit2,Minimize,0:0.1" --cminOldRobustMinimize 0 --out ./ --minos none --skipBOnlyFit --noMCbonly 1 --cminPreScan' +} + +DRY_RUN=False + +JOB_PREFIX="""#!/bin/sh +ulimit -s unlimited +cd %(CMSSW_BASE)s/src +export SCRAM_ARCH=%(SCRAM_ARCH)s +eval `scramv1 runtime -sh` +cd %(PWD)s +""" % ({ + 'CMSSW_BASE' : os.environ['CMSSW_BASE'], + 'SCRAM_ARCH' : os.environ['SCRAM_ARCH'], + 'PWD' : os.environ['PWD'] + }) + + +def run(command): + print command + if not DRY_RUN: return os.system(command) + +def split_vals(vals): + res = set() + first = vals.split(',') + for f in first: + second = re.split('[:|]', f) + print second + if len(second) == 1: res.add(second[0]) + if len(second) == 3: + x1 = float(second[0]) + ndigs = '0' + split_step = second[2].split('.') + if len(split_step) == 2: + ndigs = len(split_step[1]) + fmt = '%.'+str(ndigs)+'f' + while x1 < float(second[1]) + 0.001: + res.add(fmt % x1) + x1 += float(second[2]) + return sorted([x for x in res], key = lambda x : float(x)) + +def list_from_workspace(file, workspace, set): + res = [] + wsFile = ROOT.TFile(file) + argSet = wsFile.Get(workspace).set(set) + it = argSet.createIterator() + var = it.Next() + while var: + #var.Print() + res.append(var.GetName()) + var = it.Next() + return res + +def prefit_from_workspace(file, workspace, params): + res = { } + wsFile = ROOT.TFile(file) + ws = wsFile.Get(workspace) + for p in params: + var = ws.var(p) + res[p] = [var.getVal()+var.getErrorLo(), var.getVal(), var.getVal()+var.getErrorHi()] + return res + +def get_singles_results(file, scanned, columns): + res = { } + f = ROOT.TFile(file) + if f is None or f.IsZombie(): return None + t = f.Get("limit") + for i,param in enumerate(scanned): + res[param] = { } + for col in columns: + allvals = [getattr(evt, col) for evt in t] + res[param][col] = [allvals[i*2+2], allvals[0], allvals[i*2+1]] + return res + +class BasicCombine: + description = 'Just passes options through to combine with special treatment for a few args' + requires_root = False + def __init__(self): + pass + def attach_intercept_args(self, group): + group.add_argument('-m', '--mass') + group.add_argument('--points') + group.add_argument('--name', '-n', default='Test') + def attach_args(self, group): + group.add_argument('--opts', nargs='+', default=[], help='Add preset combine option groups') + group.add_argument('--coalesce', '-c', type=int, default=1, help='comine this many jobs') + group.add_argument('--split-points', type=int, default=0, help='If > 0 splits --algo grid jobs') + def set_args(self, known, unknown): + self.args = known + self.passthru = unknown + if hasattr(args, 'opts'): + for opt in args.opts: + self.passthru.append(OPTS[opt]) + def run(self, command, name=None): + print command + if self.args.gen_job: + assert name is not None + fname = 'job'+name+'.sh' + with open(fname, "w") as text_file: + text_file.write(JOB_PREFIX + 'eval ' + command) + st = os.stat(fname) + os.chmod(fname, st.st_mode | stat.S_IEXEC) + #print JOB_PREFIX + command + else: + if not DRY_RUN: return os.system(command) + def run_method(self): + ## Put the method back in because we always take it out + if hasattr(self.args, 'method'): + self.passthru = ['-M', self.args.method] + self.passthru + del self.args.method + + if self.args.points is not None: self.passthru.extend(['--points', self.args.points]) + taskqueue = [] + namequeue = [] + ## Now split the mass values + if self.args.mass is not None: + mass_vals = split_vals(self.args.mass) + for m in mass_vals: + taskqueue.append('combine %s -m %s' % (' '.join(self.passthru), m)) + namequeue.append(args.name) + else: + taskqueue.append('combine %s' % (' '.join(self.passthru))) + namequeue.append(args.name) + + if (self.args.split_points is not None and + self.args.split_points > 0 and + self.args.points is not None): + points = int(args.points) + split = self.args.split_points + start = 0 + ranges = [ ] + while (start + (split-1)) <= points: + ranges.append((start, start + (split-1))) + start += split + if start < points: + ranges.append((start, points - 1)) + newqueue = [ ] + newnamequeue = [ ] + for name, task in itertools.izip(namequeue, taskqueue): + for r in ranges: + newqueue.append(task + ' --firstPoint %i --lastPoint %i' % (r[0], r[1])) + newnamequeue.append(name + '_POINTS_%i_%i' % (r[0], r[1])) + taskqueue = newqueue + namequeue = newnamequeue + # add the name back + for name, task in itertools.izip(namequeue, taskqueue): + task += ' -n %s' % name + self.run(task, 'job'+name) + + +class SpecialCombine(BasicCombine): + def __init__(self): + BasicCombine.__init__(self) + def attach_intercept_args(self, group): + pass + def attach_args(self, group): + BasicCombine.attach_args(self, group) + def run_method(self): + pass + +class PrintWorkspace(SpecialCombine): + description = 'Load a Workspace and call Print()' + requires_root = True + def __init__(self): + SpecialCombine.__init__(self) + def attach_args(self, group): + group.add_argument('input', help='The input specified as FILE:WORKSPACE') + ws_in = args.input.split(':') + f = ROOT.TFile(ws_in[0]) + ws = f.Get(ws_in[1]) + ws.Print() + +class PrintSingles(SpecialCombine): + description = 'Print the output of MultimDitFit --algo singles' + requires_root = True + def __init__(self): + SpecialCombine.__init__(self) + def attach_args(self, group): + group.add_argument('input', help='The input file') + group.add_argument('-P', '--POIs', help='The params that were scanned (in scan order)') + def run_method(self): + POIs = args.POIs.split(',') + res = get_singles_results(args.input, POIs, POIs) + for p in POIs: + val = res[p][p] + print '%s = %.3f -%.3f/+%.3f' % (p, val[1], val[1] - val[0], val[2] - val[1]) + + +class RenameDataSet(SpecialCombine): + description = 'Change the name of a dataset in an existing workspace' + requires_root = True + def __init__(self): + SpecialCombine.__init__(self) + def attach_args(self, group): + group.add_argument('input', help='The input specified as FILE:WORKSPACE:DATASET or FILE:WORKSPACE') + group.add_argument('output', help='The output specified as FILE:WORKSPACE:DATASET or FILE:WORKSPACE') + group.add_argument('-d','--data', help='Source data from other file, either FILE:WORKSPACE:DATA or FILE:DATA') + def run_method(self): + ws_in = args.input.split(':') + print '>> Input: ' + str(ws_in) + ws_out = args.output.split(':') + print '>> Output: ' + str(ws_out) + f = ROOT.TFile(ws_in[0]) + ws = f.Get(ws_in[1]) + if len(ws_in) == 3: + data = ws.data(ws_in[2]) + else: + ws_d = args.data.split(':') + print '>> Data: ' + str(ws_d) + f_d = ROOT.TFile(ws_d[0]) + if len(ws_d) == 2: + data = f_d.Get(ws_d[1]) + else: + data = f_d.Get(ws_d[1]).data(ws_d[2]) + getattr(ws,'import')(data) + ws.SetName(ws_out[1]) + if len(ws_out) == 3: + data.SetName(ws_out[2]) + ws.writeToFile(ws_out[0]) + + +class CovMatrix(SpecialCombine): + description = 'Build a fit covariance matrix from scan results' + requires_root = True + def m_init__(self): + SpecialCombine.__init__(self) + def attach_args(self, group): + group.add_argument('-i', '--input', nargs='+', default=[], help='The input file containing the MultiDimFit singles mode output') + group.add_argument('-o', '--output', help='The output name in the format file:prefix') + group.add_argument('-P', '--POIs', help='The params that were scanned (in scan order)') + group.add_argument('--POIs-from-set', help='Extract from file:workspace:set instead') + group.add_argument('--compare', help='Compare to RooFitResult') + def run_method(self): + POIs = [] + if args.POIs is not None: + POIs = args.POIs.split(',') + if args.POIs_from_set is not None: + ws_in = args.POIs_from_set.split(':') + print ws_in + POIs = list_from_workspace(ws_in[0], ws_in[1], ws_in[2]) + res = { } + if len(args.input) == 1: + res.update(get_singles_results(args.input, POIs, POIs)) + elif len(args.input) > 1: + assert len(args.input) == len(POIs) + for i in range(len(POIs)): + res.update(get_singles_results(args.input[i], [POIs[i]], POIs)) + for p in POIs: + val = res[p][p] + print '%s = %.3f -%.3f/+%.3f' % (p, val[1], val[1] - val[0], val[2] - val[1]) + print res + cor = ROOT.TMatrixDSym(len(POIs)) + cov = ROOT.TMatrixDSym(len(POIs)) + for i,p in enumerate(POIs): + cor[i][i] = ROOT.Double(1.) # diagonal correlation is 1 + cov[i][i] = ROOT.Double(pow((res[p][p][2] - res[p][p][0])/2.,2.)) # symmetrized error + for i,ip in enumerate(POIs): + for j,jp in enumerate(POIs): + if i == j: continue + val_i = ((res[ip][jp][2] - res[ip][jp][0])/2.)/math.sqrt(cov[j][j]) + val_j = ((res[jp][ip][2] - res[jp][ip][0])/2.)/math.sqrt(cov[i][i]) + correlation = (val_i+val_j)/2. # take average correlation? + #correlation = min(val_i,val_j, key=abs) # take the max? + cor[i][j] = correlation + cor[j][i] = correlation + covariance = correlation * math.sqrt(cov[i][i]) * math.sqrt(cov[j][j]) + cov[i][j] = covariance + cov[j][i] = covariance + compare = args.compare is not None + if compare: + f_in = args.compare.split(':') + f = ROOT.TFile(f_in[0]) + fitres = f.Get(f_in[1]) + fitres_cov = ROOT.TMatrixDSym(len(POIs)) + fitres_cov_src = fitres.covarianceMatrix() + fitres_cor = ROOT.TMatrixDSym(len(POIs)) + fitres_cor_src = fitres.correlationMatrix() + ipos = [] + for p in POIs: + ipos.append(fitres.floatParsFinal().index(p)) + for i,ip in enumerate(POIs): + for j,jp in enumerate(POIs): + fitres_cor[i][j] = ROOT.Double(fitres_cor_src[ipos[i]][ipos[j]]) + fitres_cov[i][j] = ROOT.Double(fitres_cov_src[ipos[i]][ipos[j]]) + print 'My correlation matrix:' + cor.Print() + if compare: + print 'RooFitResult correlation matrix:' + fitres_cor.Print() + print 'My covariance matrix:' + cov.Print() + if compare: + print 'RooFitResult covariance matrix:' + fitres_cov.Print() + if args.output is not None: + out = args.output.split(':') + fout = ROOT.TFile(out[0], 'RECREATE') + prefix = out[1] + fout.WriteTObject(cor, prefix+'_cor') + h_cor = self.fix_TH2(ROOT.TH2D(cor), POIs) + fout.WriteTObject(h_cor, prefix+'_h_cor') + fout.WriteTObject(cov, prefix+'_cov') + h_cov = self.fix_TH2(ROOT.TH2D(cov), POIs) + fout.WriteTObject(h_cov, prefix+'_h_cov') + if compare: + fout.WriteTObject(fitres_cor, prefix+'_comp_cor') + h_cor_compare = self.fix_TH2(ROOT.TH2D(fitres_cor), POIs) + fout.WriteTObject(h_cor_compare, prefix+'_comp_h_cor') + fout.WriteTObject(fitres_cov, prefix+'_comp_cov') + h_cov_compare = self.fix_TH2(ROOT.TH2D(fitres_cov), POIs) + fout.WriteTObject(h_cov_compare, prefix+'_comp_h_cov') + def fix_TH2(self, h, labels): + h_fix = h.Clone() + for y in range(1, h.GetNbinsY()+1): + for x in range(1, h.GetNbinsX()+1): + h_fix.SetBinContent(x, y, h.GetBinContent(x, h.GetNbinsY() + 1 - y)) + for x in range(1, h_fix.GetNbinsX()+1): + h_fix.GetXaxis().SetBinLabel(x, labels[x-1]) + for y in range(1, h_fix.GetNbinsY()+1): + h_fix.GetYaxis().SetBinLabel(y, labels[-y]) + return h_fix + + + + + +class Impacts(SpecialCombine): + description = 'Calculate nuisance parameter impacts' + requires_root = True + def __init__(self): + SpecialCombine.__init__(self) + def attach_intercept_args(self, group): + group.add_argument('-m', '--mass', required=True) + group.add_argument('-d', '--datacard', required=True) + group.add_argument('--redefineSignalPOIs') + group.add_argument('--name', '-n', default='Test') + def attach_args(self, group): + SpecialCombine.attach_args(self, group) + group.add_argument('--offset', default=0, type=int, + help='Start the loop over parameters with this offset (default: %(default)s)') + group.add_argument('--advance', default=1, type=int, + help='Advance this many parameters each step in the loop (default: %(default)s') + group.add_argument('--named', metavar='PARAM1,PARAM2,...', + help=('By default the list of nuisance parameters will be loaded from the input workspace. ' + 'Use this option to specify a different list')) + group.add_argument('--doInitialFit', action='store_true', + help=('Find the crossings of all the POIs. Must have the output from this before running with --doFits')) + group.add_argument('--splitInitial', action='store_true', + help=('In the initial fits generate separate jobs for each POI')) + group.add_argument('--doFits', action='store_true', + help=('Actually run the fits for the nuisance parameter impacts, otherwise just looks for the results')) + group.add_argument('--output', '-o', + help=('write output json to a file')) + def run_method(self): + offset = self.args.offset + advance = self.args.advance + passthru = self.passthru + mh = self.args.mass + ws = self.args.datacard + name = self.args.name if self.args.name is not None else '' + named = [] + if args.named is not None: + named = args.named.split(',') + # Put intercepted args back + passthru.extend(['-m', mh]) + passthru.extend(['-d', ws]) + pass_str = ' '.join(passthru) + paramList = [] + if args.redefineSignalPOIs is not None: + poiList = args.redefineSignalPOIs.split(',') + else: + poiList = list_from_workspace(ws, 'w', 'ModelConfig_POI') + #print 'Have nuisance parameters: ' + str(paramList) + print 'Have POIs: ' + str(poiList) + poistr = ','.join(poiList) + if args.doInitialFit: + if args.splitInitial: + for poi in poiList: + self.run('combine -M MultiDimFit -n _initialFit_%(name)s_POI_%(poi)s --algo singles --redefineSignalPOIs %(poistr)s --floatOtherPOIs 1 --saveInactivePOI 1 -P %(poi)s %(pass_str)s --altCommit' % vars(), '_initialFit_%(name)s_POI_%(poi)s' % vars()) + else: + self.run('combine -M MultiDimFit -n _initialFit_%(name)s --algo singles --redefineSignalPOIs %(poistr)s %(pass_str)s --altCommit' % vars(), '_initialFit_%(name)s') + sys.exit(0) + initialRes = get_singles_results('higgsCombine_initialFit_%(name)s.MultiDimFit.mH%(mh)s.root' % vars(), poiList, poiList) + if len(named) > 0: + paramList = named + else: + paramList = list_from_workspace(ws, 'w', 'ModelConfig_NuisParams') + print 'Have nuisance parameters: ' + str(len(paramList)) + prefit = prefit_from_workspace(ws, 'w', paramList) + res = { } + res["POIs"] = [] + res["params"] = [] + for poi in poiList: + res["POIs"].append({"name" : poi, "fit" : initialRes[poi][poi]}) + counter = offset + missing = [ ] + while counter < len(paramList): + pres = { } + param = paramList[counter] + print 'Doing param ' + str(counter) + ': ' + param + if args.doFits: + self.run('combine -M MultiDimFit -n _paramFit_%(name)s_%(param)s --algo singles --redefineSignalPOIs %(param)s,%(poistr)s -P %(param)s --floatOtherPOIs 1 --saveInactivePOI 1 %(pass_str)s --altCommit' % vars(), '_paramFit_%(param)s' % vars()) + if not args.dry_run: + paramScanRes = get_singles_results('higgsCombine_paramFit_%(name)s_%(param)s.MultiDimFit.mH%(mh)s.root' % vars(), [param], poiList + [param]) + if paramScanRes is None: + missing.append((counter,param)) + counter = counter + advance + continue + pres.update({"name" : param, "fit" : paramScanRes[param][param], "prefit" : prefit[param]}) + for p in poiList: + pres.update({p : paramScanRes[param][p], 'impact_'+p : (paramScanRes[param][p][2] - paramScanRes[param][p][0])/2.}) + res['params'].append(pres) + counter = counter + advance + + + jsondata = json.dumps(res, sort_keys=True, indent=2, separators=(',', ': ')) + print jsondata + if args.output is not None: + with open(args.output, 'w') as out_file: + out_file.write(jsondata) + if len(missing) > 0: + print 'Missing inputs: ' + str(missing) + +def register_method(parser, method_dict, method_class): + class_name = method_class.__name__ + parser.description += ' %-20s : %s\n' % (class_name, method_class.description) + method_dict[class_name] = method_class + +parser = argparse.ArgumentParser( + prog='combineTool.py', + add_help=False, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + +parser.description = 'Available methods:\n\n' +methods = {} +register_method(parser, methods, BasicCombine) +register_method(parser, methods, PrintWorkspace) +register_method(parser, methods, RenameDataSet) +register_method(parser, methods, Impacts) +register_method(parser, methods, CovMatrix) +register_method(parser, methods, PrintSingles) + +parser.add_argument('-M', '--method') +parser.add_argument('--dry-run', action='store_true', help='Commands are echoed to the screen but not run') +parser.add_argument('--gen-job', action='store_true', help='Commands are echoed to the screen but not run') + + +(args, unknown) = parser.parse_known_args() + +if args.method is None: + parser.print_help() + sys.exit(0) + +DRY_RUN = args.dry_run + +method = methods[args.method]() if args.method in methods else BasicCombine() + +# Importing ROOT is quite slow, so only import if the chosen method requires it +if method.__class__.requires_root: + #print 'Importing ROOT' + import ROOT + ROOT.PyConfig.IgnoreCommandLineOptions = True + ROOT.gSystem.Load('libHiggsAnalysisCombinedLimit') + +# One group of options that are specific to the chosen method +tool_group = parser.add_argument_group('%s options' % method.__class__.__name__, 'options specific to this method') +# And another group for combine options that will be intercepted +intercept_group = parser.add_argument_group('combine options', 'standard combine options that will be re-interpreted') + +# Let the chosen method create the arguments in both groups +method.attach_intercept_args(intercept_group) +method.attach_args(tool_group) + +# Now we can add the normal help option +parser.add_argument('-h', '--help', action='help') + +(args, unknown) = parser.parse_known_args() + +# Print these for debugging +#print args +#print unknown + +method.set_args(args, unknown) +method.run_method() + + +