First work on statcomm

UMN-CMS · Sep 24, 2024 · a252419 · a252419
1 parent e50c409
commit a252419
Show file tree

Hide file tree

Showing 5 changed files with 992 additions and 46 deletions.
diff --git a/common/previous_talks.tex b/common/previous_talks.tex
@@ -1,4 +1,4 @@
 \prevtalk{2023-08-26}{ 
 \href{https://indico.cern.ch/event/1324384/\#2-rpv-single-top-squark-analys}{2023-10-06: Introduction Talk}}
 
-\prevtalk{2024-05-01}{Background Estimation}
+\prevtalk{2024-05-01}{\href{https://indico.cern.ch/event/1423342/#6-rpv-single-stop-search-multi}{2024-06-14: Background Estimation}}
diff --git a/gaussian_process_fit/gaussian_process_fit.tex b/gaussian_process_fit/gaussian_process_fit.tex
@@ -5,8 +5,8 @@
 \usepackage{amsthm, amssymb}
 \usepackage{subcaption}
 
-\DeclareRobustCommand{\bbone}{\text{\usefont{U}{bbold}{m}{n}1}}
-\DeclareMathOperator{\Ex}{\mathbb{E}}% expected value
+\def\bbone{\text{\usefont{U}{bbold}{m}{n}1}}
+\def{\Ex}{\mathbb{E}}% expected value
 
 \newcommand{\gp}{\mathcal{GP}}
 \newcommand{\Normal}[2]{\mathcal{N}\left( #1 , #2 \right)}

diff --git a/gaussian_process_presentation/figures/higgs.png b/gaussian_process_presentation/figures/higgs.png
diff --git a/gaussian_process_presentation/gaussian_process_presentation.tex b/gaussian_process_presentation/gaussian_process_presentation.tex
@@ -10,30 +10,30 @@
 \usepackage{tikz-feynman}
 \pgfkeys{/tikzfeynman/warn luatex=false}
 \usepgfplotslibrary{groupplots}
-%\includeonlyframes{current}
+% \includeonlyframes{current}
 \usepackage{listings}
 
 \definecolor{codegreen}{rgb}{0,0.6,0}
 \definecolor{codegray}{rgb}{0.5,0.5,0.5}
 \definecolor{codepurple}{rgb}{0.58,0,0.82}
 \definecolor{backcolour}{rgb}{0.95,0.95,0.92}
 \lstdefinestyle{mystyle}{
-    backgroundcolor=\color{backcolour},   
-    commentstyle=\color{codegreen},
-    keywordstyle=\color{magenta},
-    numberstyle=\tiny\color{codegray},
-    stringstyle=\color{codepurple},
-    basicstyle=\ttfamily\footnotesize,
-    breakatwhitespace=false,         
-    breaklines=true,                 
-    captionpos=b,                    
-    keepspaces=true,                 
-    numbers=left,                    
-    numbersep=5pt,                  
-    showspaces=false,                
-    showstringspaces=false,
-    showtabs=false,                  
-    tabsize=2
+  backgroundcolor=\color{backcolour},   
+  commentstyle=\color{codegreen},
+  keywordstyle=\color{magenta},
+  numberstyle=\tiny\color{codegray},
+  stringstyle=\color{codepurple},
+  basicstyle=\ttfamily\footnotesize,
+  breakatwhitespace=false,         
+  breaklines=true,                 
+  captionpos=b,                    
+  keepspaces=true,                 
+  numbers=left,                    
+  numbersep=5pt,                  
+  showspaces=false,                
+  showstringspaces=false,
+  showtabs=false,                  
+  tabsize=2
 }
 \lstset{style=mystyle}
 
@@ -64,7 +64,7 @@
 
 \title[Single Stop Update]{RPV Single Stop Search Update}
 \subtitle{Non-parametric 2D Background Estimation Using Gaussian Processes}
-\date{2024-05-30}
+\date{2024-06-14}
 
 \begin{document}
 
@@ -122,7 +122,8 @@
   \end{itemize}
 
   \begin{center}
-    \graphiccite{figures/xsec.png}{0.5}{rasmussen_gaussian_2006}\hspace{1em}
+    %\graphiccite{figures/xsec.png}{0.5}{rasmussen_gaussian_2006}\hspace{1em}
+    \includegraphics[width=0.5\textwidth]{figures/xsec.png}
     \scalebox{0.7}{\includestandalone{\commonfiles{general/single_stop}}}
   \end{center}
 
@@ -284,7 +285,12 @@
     \end{col}
     \begin{col}
       \begin{center}
-        \graphiccite{figures/fit_table}{1}{zisopoulos_parametric_2023}
+        \begin{onlyenv}<1>
+          \includegraphics[width=\textwidth]{figures/higgs}
+        \end{onlyenv}
+        \begin{onlyenv}<2>
+          \graphiccite{figures/fit_table}{1}{zisopoulos_parametric_2023}
+        \end{onlyenv}
       \end{center}
     \end{col}
   \end{splitcol}
@@ -416,7 +422,7 @@
 %   \end{center}
 % \end{frame}
 
-\begin{frame}{Kernels and Scales}
+\begin{frame}[label=kernelscales]{Kernels and Scales}
   \begin{itemize}
   \item The choice of kernel is the most important aspect of Gaussian processes. 
   \item The choice of $k(x,y)$ reflects our understanding of how the points should be correlated, how smooth the functions should be, etc. 
@@ -630,8 +636,6 @@
 
 \section[Statistical Considerations]{Preliminary Statistical Strategy}
 
-
-
 \begin{frame}{Strategy Overview}
   \begin{block}{}
     We see that GPR can provide a quality estimate of the background over a wide range of blinding windows. How can we use this to extract signal and set limits?
@@ -686,18 +690,18 @@
       \end{onlyenv}
       \begin{onlyenv}<2>
         \begin{itemize}
-        \item We diagonalize the MVN to produce ``eigen-variation'', compatible with combine's statistical model.
+        \item We diagonalize the MVN to produce ``eigen-variations'', compatible with combine's statistical model.
         \item The MVN mean is used as the nominal background estimate.
         \item Key transformation used is this:
           \begin{equation}
             z \sim \mathcal{N} \left( \mu,\Sigma \right) \implies  A z + b \sim \mathcal{N} \left( A \mu + b , A \Sigma A^{T} \right)
           \end{equation}
-        %\item Since $\Sigma$ is PSD, we can write $\Sigma = Q \Lambda Q^{T} = Q \Lambda^{1/2} \Lambda^{1/2} Q^{T}$
+          % \item Since $\Sigma$ is PSD, we can write $\Sigma = Q \Lambda Q^{T} = Q \Lambda^{1/2} \Lambda^{1/2} Q^{T}$
 
-        %\item Suppose that our posterior MVN is given by $\mathcal{N} \left( \mu,\Sigma \right)$ of dimension N. Then if $z_{n} \sim \mathcal{N} \left( 0,1 \right) $ it follows that
-        %  \begin{equation}
-        %    Q \Lambda^{1/2} z + \mu \sim \mathcal{N} \left(  \mu ,  \Sigma \right)
-        %  \end{equation}
+          % \item Suppose that our posterior MVN is given by $\mathcal{N} \left( \mu,\Sigma \right)$ of dimension N. Then if $z_{n} \sim \mathcal{N} \left( 0,1 \right) $ it follows that
+          %   \begin{equation}
+          %     Q \Lambda^{1/2} z + \mu \sim \mathcal{N} \left(  \mu ,  \Sigma \right)
+          %   \end{equation}
         \end{itemize}
 
       \end{onlyenv}
@@ -707,7 +711,7 @@
         \item We generate a datacard using these variations.
         \end{itemize}
 
-        \begin{lstlisting}
+\begin{lstlisting}
 bin               SignalRegion  SignalRegion      
 process           Signal        BackgroundEstimate
 process           0             1                 
@@ -717,25 +721,25 @@
 EVAR_1     shape  -             1                 
 EVAR_2     shape  -             1                 
 EVAR_3     shape  -             1                 
-        \end{lstlisting}
+\end{lstlisting}
 
       \end{onlyenv}
       \begin{onlyenv}<4>
         \begin{itemize}
         \item Combine can be run as usual to produce significance estimates.
         \end{itemize}
-        \begin{lstlisting}
+\begin{lstlisting}
 $ combine -M Significance datacard.txt -t -1 --expectSignal=0
  -- Significance -- 
 Significance: 0
 Done in 0.04 min (cpu), 0.04 min (real)
-        \end{lstlisting}
-        \begin{lstlisting}
+\end{lstlisting}
+\begin{lstlisting}
 $ combine -M Significance datacard.txt -t -1 --expectSignal=1
  -- Significance -- 
 Significance: 3.38566
 Done in 0.02 min (cpu), 0.02 min (real)
-        \end{lstlisting}
+\end{lstlisting}
 
         \begin{center}
           \small Examples of running combine with $m_{\stopq} = 1500 \text{GeV}$ $m_{\chargino} = 600 \text{GeV}$
@@ -808,6 +812,21 @@ \section{Appendix}
   \end{center}
 \end{frame}
 
+\begin{frame}{Gaussian Processes as Function Distributions}
+  Gaussian process allow us to define distributions over the space of functions. Given a gaussian process $\mathcal{GP} \left( m(x) , k(x,x') \right)$, and some function $h$, then
+  \begin{equation}
+    \mathbf{h} \sim N(m(X) , k(X,X))
+  \end{equation}
+  Given $n$ points in $\mathbb{R}^{k}$, the gaussian process defined a $n$ dimensional multivariate gaussian $\mathcal{N}$. If a function $h(x)$ has has values $h_1,h_2,...,h_{n}$ at those points, then
+  \begin{equation}
+    p \left( h \right) \sim \mathcal{N}(h_{1}, ..., h_{n})
+  \end{equation}
+
+  \begin{center}
+    \graphiccite{figures/prior_and_conditioning}{0.7}{rasmussen_gaussian_2006}
+  \end{center}
+\end{frame}
+
 \begin{frame}{Model Selection}
   \begin{itemize}
   \item The choice of kernel, both in terms of structure and hyperparameters, is referred to as model selection. 
@@ -819,21 +838,51 @@ \section{Appendix}
   \end{itemize}
 \end{frame}
 
-\begin{frame}{Gaussian Processes as Function Distributions}
-  Gaussian process allow us to define distributions over the space of functions. Given a gaussian process $\mathcal{GP} \left( m(x) , k(x,x') \right)$, and some function $h$, then
-  \begin{equation}
-    \mathbf{h} \sim N(m(X) , k(X,X))
-  \end{equation}
-  Given $n$ points in $\mathbb{R}^{k}$, the gaussian process defined a $n$ dimensional multivariate gaussian $\mathcal{N}$. If a function $h(x)$ has has values $h_1,h_2,...,h_{n}$ at those points, then
+
+\begin{frame}{Model Selection in Detail}
+  \begin{itemize}
+  \item A fully Bayesian approach involves constructing a joint probability distribution over both oberved quantities, parameters, model hyperparameters, and models themselves.
+  \end{itemize}
   \begin{equation}
-    p \left( h \right) \sim \mathcal{N}(h_{1}, ..., h_{n})
+    \label{eq:1}
+    p \left( \bm{w} | \bm{y}, \bm{X}, \bm{\theta}, \mathcal{H} \right)
+    = \frac{p \left( \bm{y} | \bm{w}, \bm{X}, \mathcal{H} \right) p \left( \bm{w} | \bm{\theta}, \mathcal{H} \right) }
+    {p \left( \bm{y} | \bm{\theta},\bm{X}, \mathcal{H} \right) }
   \end{equation}
+  \begin{itemize}
+  \item Repeated applications of Baye's rule yields, in turn, the distributions over the model hyperparameters and finally the models themselves.
+  \item However, it is almost always computationally infeasible to actually execute this complete model. 
+  \item Instead, we perform a type II MLE, as described in slide~\ref{kernelscales}.
+  \end{itemize}
+\end{frame}
+
+\begin{frame}[fragile]{Probabistic Programming With Pyro/Numpyro}
+  \begin{itemize}
+  \item Probablistic programming languages are a powerful technique for statistical modeling.
+  \item Allowing the model to be described in code allows for much greater flexiblity and interpretibility than a Bayesian network or a static tool like Combine.
+  \end{itemize}
 
   \begin{center}
-    \graphiccite{figures/prior_and_conditioning}{0.7}{rasmussen_gaussian_2006}
+\begin{lstlisting}[language=Python,
+    basicstyle=\ttfamily\scriptsize,
+]
+def statModel(bkg_mean, bkg_transform, signal_dist, observed=None):
+    r = pyro.sample("rate", dist.Uniform(-20, 20))
+    with pyro.plate("background_variations", bkg_transform.shape[1]):
+        b = pyro.sample("raw_variations", dist.Normal(0, 1))
+    background = bkg_mean + bkg_transform @ b
+    obs_hist = (r * signal_dist) + background
+    with pyro.plate("bins", bkg_mean.shape[0]):
+        return pyro.sample("observed", dist.Poisson(torch.clamp(obs_hist, 1)), obs=observed)
+\end{lstlisting}
   \end{center}
 \end{frame}
 
+\begin{frame}{Combine Model}
+    \centering
+    \includegraphics[width=\textwidth]{figures/CombineLikelihoodEqns.png}
+\end{frame}
+
 
 \end{document}