From 941b1ea46c1bbe83721f5415c5a3777fa24e12e2 Mon Sep 17 00:00:00 2001
From: amirDahari1 <amir.dahari@mail.huji.ac.il>
Date: Fri, 26 Jul 2024 19:37:47 +0100
Subject: [PATCH 1/6] updating fig_model_error to incorporate the classical
 predictions

---
 paper_figures/fig_model_error.py     | 65 +++++++++++-------------
 paper_figures/prediction interval.py | 74 ++++++++++++++++++----------
 representativity/prediction_error.py | 36 ++++++--------
 representativity/validation.py       |  1 +
 4 files changed, 95 insertions(+), 81 deletions(-)

diff --git a/paper_figures/fig_model_error.py b/paper_figures/fig_model_error.py
index 4de8a1c..dd435c6 100755
--- a/paper_figures/fig_model_error.py
+++ b/paper_figures/fig_model_error.py
@@ -4,16 +4,18 @@
 import math
 import numpy as np
 
-fill_color = [1, 0.49803922, 0.05490196, 0.2]
+# fill_color = [1, 0.49803922, 0.05490196, 0.2]
 sigma_color = [0.4, 0.9, 0.0, 1]
-fill_color_dark = fill_color.copy()
-fill_color_dark[-1] = 0.5
+# fill_color_dark = fill_color.copy()
+# fill_color_dark[-1] = 0.5
 
 def scatter_plot(ax, res, title, xlabel, ylabel):
     
-    pred_data, fit_data = res
+    pred_data, fit_data, oi_data = res
+    
+    ax.scatter(fit_data, oi_data, s=0.2, label='Classical predictions')
+    ax.scatter(fit_data, pred_data, s=0.2, c="orange", label='Model predictions')
     
-    ax.scatter(fit_data, pred_data, s=0.2, label='Predictions')
     
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
@@ -21,20 +23,18 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     
     errs = (fit_data-pred_data)/pred_data 
     max_val = (np.max([np.max(fit_data), np.max(pred_data)]))
-    print(max_val)
     x = np.linspace(0,max_val,100)
     ax.plot(x, x, label = 'Ideal predictions', color='black')
     errs = np.sort(errs) 
     std = np.std(errs) 
     
-    z = stats.norm.interval(0.9)[1]
-    err = std*z
-    print(f'std = {std}')
-    print(f'mean = {np.mean(errs)}')
-    ax.plot(x ,x/(1+err), c=fill_color_dark, ls='--', linewidth=1)
-    fill_1 = ax.fill_between(x, np.ones(x.shape[0])*(max_val),x/(1+err), alpha=0.2, label = f'95% confidence range', color=fill_color)
+    # z = stats.norm.interval(0.9)[1]
+    # err = std*z
+    # print(f'std = {std}')
+    # print(f'mean = {np.mean(errs)}')
+    # ax.plot(x ,x/(1+err), c=fill_color_dark, ls='--', linewidth=1)
+    # fill_1 = ax.fill_between(x, np.ones(x.shape[0])*(max_val),x/(1+err), alpha=0.2, label = f'95% confidence range', color=fill_color)
     ax.set_aspect('equal', adjustable='box')
-    return errs, err
 
 with_cls = False
 fig, axs = plt.subplots(2,3+with_cls)
@@ -42,17 +42,17 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
 dims = ['2D', '3D']
 edge_length = ['1536', '448']
 for i, dim in enumerate(dims):
-    dim_data, micro_names = prediction_error.data_micros(dim)
-    pred_cls_all, fit_cls_all, _, vfs = prediction_error.pred_vs_fit_all_data(dim, edge_length[i], num_runs=9, std_not_cls=False)    
-    cls_results = [pred_cls_all, fit_cls_all]
+    pred_cls_all, fit_cls_all, oi_cls_all, _, vfs = prediction_error.pred_vs_fit_all_data(dim, edge_length[i], num_runs=9, std_not_cls=False)    
+    cls_results = [pred_cls_all, fit_cls_all, oi_cls_all]
     # calculate the standard deviation instead of the cls:
     vfs = np.array(vfs)
     vfs_one_minus_vfs = vfs*(1-vfs)  
     dim_int = int(dim[0])
     cur_edge_length = int(edge_length[i])
-    pred_std_all = ((pred_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5
-    fit_std_all = ((fit_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5
-    std_results = [pred_std_all, fit_std_all]
+    std_results = [((cls_res/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5 for cls_res in cls_results]
+    # pred_std_all = ((pred_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5
+    # fit_std_all = ((fit_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5
+    # std_results = [pred_std_all, fit_std_all]
     dim_str = dim[0]
     x_labels = [f'True CLS $a_{int(dim[0])}$', f'True Phase Fraction std $\sigma_{int(dim[0])}$']
     cls_math = r'\tilde{a}_{2}' if dim == '2D' else r'\tilde{a}_{3}'
@@ -77,12 +77,13 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
         ax_ylabel = y_labels[j]
         ax_title = titles[j]
         
-        _ = scatter_plot(ax, res, ax_title, ax_xlabel, ax_ylabel)
+        scatter_plot(ax, res, ax_title, ax_xlabel, ax_ylabel)
         
         if (j == 0 or not with_cls) and dim == '2D':
             ax.legend(loc='upper left')        
 
     # Fit a normal distribution to the data:
+    pred_std_all, fit_std_all = std_results[:2]
     errs = (fit_std_all-pred_std_all)/pred_std_all*100
     mu, std = stats.norm.fit(errs)
 
@@ -95,43 +96,37 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     max_val = np.max([np.max(errs), -np.min(errs)])
     y, x, _ = ax2.hist(errs, range=[-max_val, max_val], bins=50, alpha=0.6, density=True)
 
-    print(f'Is the hist normal: {stats.normaltest(errs)}')
-    print(f'mean = {errs.mean()}')
-    print(f'median = {np.median(errs)}')
     # Plot the PDF.
     xmin, xmax = x.min(), x.max()
-    print(xmin, xmax)
     max_abs = max(np.abs(np.array([xmin, xmax])))
     x = np.linspace(xmin, xmax, 100)
     p = stats.norm.pdf(x, mu, std)
-    
-    print(fill_color)
-    
-    ax2.plot(x, p, linewidth=2, color=fill_color_dark, label=f'Fitted normal distribution')
+        
+    ax2.plot(x, p, linewidth=2, label=f'Fitted normal distribution')
     title = f'{dim} std PE distribution, {title_suffix}' 
     ax2.set_title(title)
     ax2.set_xlabel(r'Prediction Percentage Error ($\frac{\sigma_{%s}-\tilde{\sigma_{%s}}}{\tilde{\sigma_{%s}}}\cdot100$)' %(dim_str, dim_str, dim_str))
-    err = std*stats.norm.interval(0.9)[1]
+    # err = std*stats.norm.interval(0.9)[1]
     ax2.vlines(0, ymin=0, ymax=y.max(), color='black')
     ax2.vlines(std, ymin=0, ymax=y.max(), ls='--', color=sigma_color, label = r'$\sigma_{mod}$')
-    ax2.vlines(err, ymin=0, ymax=y.max(), ls='--', color=fill_color_dark, label = r'$\sigma_{mod}\cdot Z_{90\%}$')
-    trunc_x = x[x<err]
-    ax2.fill_between(trunc_x, p[x<err],np.zeros(trunc_x.shape[0]), alpha=0.2, color=fill_color)
+    # ax2.vlines(err, ymin=0, ymax=y.max(), ls='--', color=fill_color_dark, label = r'$\sigma_{mod}\cdot Z_{90\%}$')
+    # trunc_x = x[x<err]
+    # ax2.fill_between(trunc_x, p[x<err],np.zeros(trunc_x.shape[0]), alpha=0.2, color=fill_color)
     if i == 0:
-        ax2.legend(loc='upper left')
+        ax2.legend()
 
     # Plot the std error by size of edge length:
     ax3 = axs[i, ax_idx+1]
     run_data, _ = prediction_error.data_micros(dim)
     edge_lengths = run_data['edge_lengths_pred']
     start_idx = 2
-    pred_error, stds = prediction_error.std_error_by_size(dim, edge_lengths, num_runs=10, start_idx=start_idx, std_not_cld=True)
+    pred_error, stds = prediction_error.std_error_by_size(dim, edge_lengths, num_runs=10, start_idx=start_idx, std_not_cls=True)
     stds = stds*100  # convert to percentage error
     edge_lengths = edge_lengths[start_idx:]
     ax3.scatter(edge_lengths, stds, color='black')
     edge_length_pos = edge_lengths.index(cur_edge_length)
     ax3.scatter(edge_lengths[edge_length_pos], stds[edge_length_pos], color=sigma_color, label=r'$\sigma_{mod}$ for img size $%s^%s$'%(edge_length[i], dim_str))
-    ax3.plot(edge_lengths, pred_error, label='Prediction error fit')
+    ax3.plot(edge_lengths, pred_error*100, label='Prediction error fit')
     ax3.set_xlabel('Img size')
     ax3.set_xticks(edge_lengths[::2], [r'$%s^%s$'%(i,dim_str) for i in edge_lengths[::2]])
     ax3.set_ylabel(r'Model percentage error std $\sigma_{mod}$ [%]')
diff --git a/paper_figures/prediction interval.py b/paper_figures/prediction interval.py
index d80e40a..aaeb717 100644
--- a/paper_figures/prediction interval.py	
+++ b/paper_figures/prediction interval.py	
@@ -6,6 +6,14 @@
 
 
 def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
+    fig, ax = plt.subplots(2,3)
+    fig.set_size_inches(14,9)
+    
+    plt.subplots_adjust(wspace=0.4, hspace=0.4)
+    im = np.load('im_for_prediction_interval_fig.npy')
+    ax[0,0].imshow(im[:200,:200])
+    ax[0,0].set_title('Image for prediction interval')
+    ax[0,0].axis('off')
     std_dist_divisions = 1001
     num_stds = min(pred_std/std_dist_std - pred_std/std_dist_std/10, 6)
     # first make the "weights distribution", the normal distribution of the stds
@@ -13,8 +21,11 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     x_std_dist_bounds = [pred_std - num_stds*std_dist_std, pred_std + num_stds*std_dist_std]
     x_std_dist = np.linspace(*x_std_dist_bounds, std_dist_divisions)
     std_dist = util.normal_dist(x_std_dist, mean=pred_std, std=std_dist_std)
-    plt.plot(x_std_dist, std_dist)
-    plt.show()
+    ax[0,1].plot(x_std_dist, std_dist, c='green')
+    ax[0,1].vlines(pred_std, 0, np.max(std_dist), color='orange', label = r'Predicted std $\tilde{\sigma}$')
+    ax[0,1].set_title('std likelihood (weights distribution)')
+    ax[0,1].set_xlabel('Standard deviation')
+    ax[0,1].legend()
     integral_std_dist = np.trapz(std_dist, x_std_dist)
     print(integral_std_dist)
     # Next, make the pf distributions, each row correspond to a different std, with 
@@ -26,20 +37,31 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     pf_mesh, std_mesh = np.meshgrid(pf_x_1d, x_std_dist)
     # before normalising by weight:
     pf_dist_before_norm = util.normal_dist(pf_mesh, mean=pf_locs, std=std_mesh)
-    plt.contourf(pf_mesh, std_mesh, pf_dist_before_norm, levels=100, cmap = 'jet') 
-    plt.show()
+    mid_std_dist = pf_dist_before_norm[std_dist_divisions//2,:]
+    ax[0,2].plot(pf_x_1d, mid_std_dist, c='orange')
+    ax[0,2].vlines(image_pf, 0, np.max(mid_std_dist), color='g', label = 'Observed $\Phi(\omega)$')
+    ax[0,2].set_title('Likelihood of $\phi$ trusting std prediction completely')
+    ax[0,2].set_xlabel('Phase fraction')
+    ax[0,2].legend()
+
+    ax[1,0].contourf(pf_mesh, std_mesh, pf_dist_before_norm, levels=100, cmap = 'plasma') 
+    # ax[1,0].vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Observed $\Phi(\omega)$')
+    ax[1,0].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = r'Predicted std $\tilde{\sigma}$')
+    ax[1,0].set_title('Likelihood of $\phi$ before normalisation')
+    ax[1,0].set_xlabel('Phase fraction')
+    ax[1,0].set_ylabel('Standard deviation')
+    ax[1,0].legend()
     # normalise by weight:
     pf_dist = (pf_dist_before_norm.T * std_dist).T
-    plt.contourf(pf_mesh, std_mesh, pf_dist, levels=100, cmap = 'jet') 
-    plt.title('Likelihood of $\phi$')
-    plt.xlabel('Phase fraction')
-    plt.ylabel('Standard deviation')
-    plt.vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Observed $\Phi(\omega)$')
-    plt.hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = r'Predicted std $\tilde{\sigma}$')
-    plt.legend()
+    ax[1,1].contourf(pf_mesh, std_mesh, pf_dist, levels=100, cmap = 'plasma') 
+    ax[1,1].set_title('Likelihood of $\phi$')
+    ax[1,1].set_xlabel('Phase fraction')
+    ax[1,1].set_ylabel('Standard deviation')
+    ax[1,1].vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Observed $\Phi(\omega)$')
+    ax[1,1].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = r'Predicted std $\tilde{\sigma}$')
+    ax[1,1].legend()
     pf_dist_integral_col = np.trapz(pf_dist, x_std_dist, axis=0)
     pf_dist_integral = np.trapz(pf_dist_integral_col, pf_x_1d)
-    plt.show()
     
     # ax = plt.axes(projection='3d')
     # ax.plot_surface(pf_mesh, std_mesh, pf_dist, cmap='viridis',\
@@ -51,7 +73,7 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     # mu, std = norm.fit(sum_dist_norm)
     # p = norm.pdf(pf_x_1d, mu, std)
     # print(mu, std)
-    mid_std_dist = pf_dist_before_norm[std_dist_divisions//2,:]
+    
     # both need a bit of normalization for symmetric bounds (they're both very close to 1)
     sum_dist_norm /= np.trapz(sum_dist_norm, pf_x_1d)
     mid_std_dist /= np.trapz(mid_std_dist, pf_x_1d)
@@ -65,17 +87,17 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     # cum_sum_mid_std_dist /= np.trapz(mid_std_dist, pf_x_1d)
     alpha_mid_std_dist_end = np.where(cum_sum_mid_std_dist > alpha)[0][0]
     alpha_mid_std_dist_beginning = np.where(cum_sum_mid_std_dist > 1-alpha)[0][0]
-    plt.plot(pf_x_1d, mid_std_dist, c='blue', label = 'middle normal dist')
-    plt.vlines(pf_x_1d[alpha_mid_std_dist_end], 0, np.max(mid_std_dist), color='blue', label = r'95% confidence bounds')
-    plt.vlines(pf_x_1d[alpha_mid_std_dist_beginning], 0, np.max(mid_std_dist), color='blue')
-    plt.plot(pf_x_1d, sum_dist_norm, c='orange', label = 'summed mixture dist')
+    ax[1,2].plot(pf_x_1d, mid_std_dist, c='orange', label = 'middle normal dist')
+    ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_end], 0, np.max(mid_std_dist), color='orange', label = r'95% confidence bounds')
+    ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_beginning], 0, np.max(mid_std_dist), color='orange')
+    ax[1,2].plot(pf_x_1d, sum_dist_norm, c='blue', label = 'summed mixture dist')
     # new_std_dist = normal_dist(pf_x_1d, mean=image_pf, std=0.02775)
-    # plt.plot(pf_x_1d, new_std_dist, c='green', label = 'new std dist')
-    plt.vlines(pf_x_1d[alpha_sum_dist_norm_end], 0, np.max(sum_dist_norm), color='orange', label = r'95% confidence bounds')
-    plt.vlines(pf_x_1d[alpha_sum_dist_norm_beginning], 0, np.max(sum_dist_norm), color='orange')
-
-    plt.legend()
-    plt.show()
+    # ax[1,2].plot(pf_x_1d, new_std_dist, c='green', label = 'new std dist')
+    ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_end], 0, np.max(sum_dist_norm), color='blue', label = r'95% confidence bounds')
+    ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_beginning], 0, np.max(sum_dist_norm), color='blue')
+    ax[1,2].vlines(image_pf, 0, np.max(sum_dist_norm), color='g', label = 'Observed $\Phi(\omega)$')
+    ax[1,2].set_title('Likelihood of $\phi$ (sum over rows)')
+    ax[1,2].legend()
     print(np.trapz(sum_dist_norm, pf_x_1d))
     print(np.trapz(mid_std_dist, pf_x_1d))
 
@@ -83,9 +105,9 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
 
 
 if __name__ == "__main__":
-    image_pf = 0.3
-    pred_std = image_pf/100*2/3
-    std_dist_std = pred_std/4
+    image_pf = 0.1013
+    pred_std = 0.00121
+    std_dist_std = pred_std*0.25
     time_bf = time.time()
     plot_likelihood_of_phi(image_pf, pred_std, std_dist_std)
     print(f'time taken = {time.time()-time_bf}')
diff --git a/representativity/prediction_error.py b/representativity/prediction_error.py
index 7502b35..d7bb8ba 100755
--- a/representativity/prediction_error.py
+++ b/representativity/prediction_error.py
@@ -20,7 +20,7 @@ def error_by_size_estimation(dim, run_number=0, std_not_cls=True):
     edge_lengths_pred = data_dim['edge_lengths_pred']
     stds = []
     for edge_length in edge_lengths_pred:
-        _, _, err, std, pfs = comparison_results(data_dim, micro_names, dim,
+        _, _, _, err, std, pfs = comparison_results(data_dim, micro_names, dim,
                                           str(edge_length), run_number, std_not_cls=std_not_cls)
         stds.append(std)
     return stds
@@ -40,26 +40,20 @@ def data_micros(dim):
 def comparison_results(data_dim, micro_names, dim, edge_length, run_number, std_not_cls=False):
     micros_data = [data_dim[n] for n in micro_names]
     pfs = np.array([m_data['vf'] for m_data in micros_data])
-    fit_data_all = np.array([m_data['fit_ir_vf'] for m_data in micros_data])
-    fit_err_pf = np.array([m_data['fit_err_vf'][edge_length] for m_data in micros_data])
-    pred_data_all = np.array([m_data[f'run_{run_number}']['pred_ir_vf'][edge_length] for m_data in micros_data])
-    # pred_ir_oi_pf = np.array([m_data[f'run_{run_number}']['pred_ir_one_im_fit_vf'][edge_length] for m_data in micros_data])
+    fit_cls_all = np.array([m_data['fit_ir_vf'] for m_data in micros_data])
+    pred_cls_all = np.array([m_data[f'run_{run_number}']['pred_ir_vf'][edge_length] for m_data in micros_data])
+    pred_cls_oi_all = np.array([m_data[f'run_{run_number}']['pred_ir_one_im_fit_vf'][edge_length] for m_data in micros_data])
 
-    pred_err_pf = np.array([m_data[f'run_{run_number}']['pred_err_vf'][edge_length] for m_data in micros_data])
     if std_not_cls:
         pfs = np.array(pfs)
         pfs_one_minus_pfs = pfs*(1-pfs)  
         dim_int = int(dim[0])
         edge_length = int(edge_length)
-        pred_data_all = ((pred_data_all/edge_length)**dim_int*pfs_one_minus_pfs)**0.5
-        fit_data_all = ((fit_data_all/edge_length)**dim_int*pfs_one_minus_pfs)**0.5
-    # ir_results = [fit_ir_pf, pred_ir_pf, pred_ir_oi_pf]
-    ir_results = [fit_data_all, pred_data_all]
-    err_results = [fit_err_pf, pred_err_pf]
-    pred_data = ir_results[1] 
-    fit_data = ir_results[0]
-
-    errs = (pred_data-fit_data)/pred_data  # percentage error of the prediction
+        pred_cls_all = ((pred_cls_all/edge_length)**dim_int*pfs_one_minus_pfs)**0.5
+        fit_cls_all = ((fit_cls_all/edge_length)**dim_int*pfs_one_minus_pfs)**0.5
+    # ir_results = [fit_data_all, pred_data_all, pred_ir_oi_pf]
+
+    errs = (pred_cls_all-fit_cls_all)/pred_cls_all  # percentage error of the prediction
     errs = np.sort(errs)  # easier to see the distribution of errors
     std = np.std(errs) 
     z = norm.interval(0.9)[1]
@@ -68,7 +62,7 @@ def comparison_results(data_dim, micro_names, dim, edge_length, run_number, std_
     # print(f'mean = {np.mean(errs)}')
     # print(f'mape = {np.mean(np.abs(errs))}')
     # print(f'error = {err}')
-    return pred_data, fit_data, err, std, pfs
+    return pred_cls_all, fit_cls_all, pred_cls_oi_all, err, std, pfs
 
 def pred_vs_fit_all_data(dim, edge_length, num_runs=5, std_not_cls=True):
     pred_data_all = []
@@ -78,21 +72,23 @@ def pred_vs_fit_all_data(dim, edge_length, num_runs=5, std_not_cls=True):
     pfs_all = []
     for i in range(num_runs):
         data_micro = data_micros(dim)
-        pred_data, fit_data, _, std, pfs = comparison_results(*data_micro, dim, edge_length, i, std_not_cls=std_not_cls)
+        pred_data, fit_data, pred_data_oi, _, std, pfs = comparison_results(*data_micro, dim, edge_length, i, std_not_cls=std_not_cls)
         pred_data_all.append(pred_data)
         # pred_data_oi_all.append(pred_oi_data)
         fit_data_all.append(fit_data)
+        pred_data_oi_all.append(pred_data_oi)
         stds.append(std)
         pfs_all.append(pfs) 
     pred_data_all = np.concatenate(pred_data_all)
     # pred_data_oi_all = np.concatenate(pred_data_oi_all)
     fit_data_all = np.concatenate(fit_data_all)
+    pred_data_oi_all = np.concatenate(pred_data_oi_all)
     pfs_all = np.concatenate(pfs_all)
     std = np.array(stds).sum(axis=0)/num_runs
-    return pred_data_all, fit_data_all, std, pfs_all
+    return pred_data_all, fit_data_all, pred_data_oi_all, std, pfs_all
 
 def plot_pred_vs_fit(dim, edge_length, num_runs=5, std_not_cls=True):
-    pred_data_all, fit_data_all, _, pfs = pred_vs_fit_all_data(dim, edge_length, num_runs, std_not_cls=std_not_cls)
+    pred_data_all, fit_data_all, _, _, pfs = pred_vs_fit_all_data(dim, edge_length, num_runs, std_not_cls=std_not_cls)
     pred_data_all, fit_data_all = np.array(pred_data_all), np.array(fit_data_all)
     
     errs = (fit_data_all-pred_data_all)/pred_data_all  # percentage error of the prediction
@@ -165,7 +161,7 @@ def optimal_slopes(dim, num_runs=5):
     slopes = []
     stds = []
     for edge_length in edge_lengths_pred:
-        pred_data, fit_data, std, pfs = pred_vs_fit_all_data(dim, str(edge_length), num_runs)
+        pred_data, fit_data, _, std, pfs = pred_vs_fit_all_data(dim, str(edge_length), num_runs)
         stds.append(std)
         optimal_slope = find_optimal_slope(pred_data, fit_data)
         slopes.append(optimal_slope)
diff --git a/representativity/validation.py b/representativity/validation.py
index 898fb6c..4d3b8db 100755
--- a/representativity/validation.py
+++ b/representativity/validation.py
@@ -137,6 +137,7 @@ def ps_error_prediction(dim, data, confidence, error_target):
                     start_idx = [np.random.randint(0, large_shape[i]-edge_length) for i in range(int(dim[0]))]
                     end_idx = [start_idx[i]+edge_length for i in range(int(dim[0]))]
                     small_im = large_im_stack[0][start_idx[0]:end_idx[0], start_idx[1]:end_idx[1]]
+                    # np.save(f'./small_im_{gen_name}_{args}_{edge_length}.npy', small_im)
                     small_im_pf = torch.mean(small_im)
                     one_im_stat_analysis_cls = util.one_img_stat_analysis_error(small_im, small_im.mean())
                     print(f'One image stat analysis cls: {one_im_stat_analysis_cls}')

From 1eeec8ddb53c1f15e2293f2405827289ee8d5cea Mon Sep 17 00:00:00 2001
From: amirDahari1 <amir.dahari@mail.huji.ac.il>
Date: Fri, 26 Jul 2024 21:38:26 +0100
Subject: [PATCH 2/6] continuing fig err

---
 paper_figures/fig_model_error.py | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/paper_figures/fig_model_error.py b/paper_figures/fig_model_error.py
index dd435c6..5994845 100755
--- a/paper_figures/fig_model_error.py
+++ b/paper_figures/fig_model_error.py
@@ -6,27 +6,35 @@
 
 # fill_color = [1, 0.49803922, 0.05490196, 0.2]
 sigma_color = [0.4, 0.9, 0.0, 1]
+model_color = "tab:cyan"
+oi_color = "tab:orange"
 # fill_color_dark = fill_color.copy()
 # fill_color_dark[-1] = 0.5
 
 def scatter_plot(ax, res, title, xlabel, ylabel):
     
     pred_data, fit_data, oi_data = res
+    max_val = (np.max([np.max(fit_data), np.max(pred_data)]))
+    x = np.linspace(0,max_val,100)
+    ax.plot(x, x, label = 'Ideal predictions', color='black')
+
+    ax.scatter(fit_data, oi_data, alpha=0.7, s=0.3, c=oi_color, label='Classical predictions')
+    oi_errs = (fit_data-oi_data)/oi_data
+    oi_mean = np.mean(oi_errs)
+    ax.plot(x, x*(1-oi_mean), color=oi_color, linestyle='--', dashes=[2.5, 5], label="Mean")
     
-    ax.scatter(fit_data, oi_data, s=0.2, label='Classical predictions')
-    ax.scatter(fit_data, pred_data, s=0.2, c="orange", label='Model predictions')
-    
+    ax.scatter(fit_data, pred_data, alpha=0.5, s=0.3, c=model_color, label="Our model's predictions")
+    model_errs = (fit_data-pred_data)/pred_data 
+    model_mean = np.mean(model_errs)
+    ax.plot(x, x*(1-model_mean), color=model_color, linestyle='--', dashes=[2.5, 5], label="Mean")
+
     
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
     ax.set_title(title)
     
-    errs = (fit_data-pred_data)/pred_data 
-    max_val = (np.max([np.max(fit_data), np.max(pred_data)]))
-    x = np.linspace(0,max_val,100)
-    ax.plot(x, x, label = 'Ideal predictions', color='black')
-    errs = np.sort(errs) 
-    std = np.std(errs) 
+    model_errs = np.sort(model_errs) 
+    std = np.std(model_errs) 
     
     # z = stats.norm.interval(0.9)[1]
     # err = std*z
@@ -79,7 +87,7 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
         
         scatter_plot(ax, res, ax_title, ax_xlabel, ax_ylabel)
         
-        if (j == 0 or not with_cls) and dim == '2D':
+        if (j == 0 or not with_cls) :
             ax.legend(loc='upper left')        
 
     # Fit a normal distribution to the data:
@@ -94,7 +102,7 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     counts, bins = np.histogram(errs)
     
     max_val = np.max([np.max(errs), -np.min(errs)])
-    y, x, _ = ax2.hist(errs, range=[-max_val, max_val], bins=50, alpha=0.6, density=True)
+    y, x, _ = ax2.hist(errs, range=[-max_val, max_val], bins=50, alpha=0.6, color=model_color, density=True)
 
     # Plot the PDF.
     xmin, xmax = x.min(), x.max()

From 4178f968cf6609573bb29699398a58e4cd4b1eeb Mon Sep 17 00:00:00 2001
From: amirDahari1 <amir.dahari@mail.huji.ac.il>
Date: Sun, 28 Jul 2024 11:26:29 +0100
Subject: [PATCH 3/6] finished fig total error

---
 paper_figures/fig_model_error.py     | 61 +++++++++++-----------------
 paper_figures/prediction interval.py | 58 +++++++++++++++-----------
 2 files changed, 58 insertions(+), 61 deletions(-)

diff --git a/paper_figures/fig_model_error.py b/paper_figures/fig_model_error.py
index 5994845..83aa583 100755
--- a/paper_figures/fig_model_error.py
+++ b/paper_figures/fig_model_error.py
@@ -4,14 +4,12 @@
 import math
 import numpy as np
 
-# fill_color = [1, 0.49803922, 0.05490196, 0.2]
-sigma_color = [0.4, 0.9, 0.0, 1]
+sigma_color = "tab:pink"
 model_color = "tab:cyan"
 oi_color = "tab:orange"
-# fill_color_dark = fill_color.copy()
-# fill_color_dark[-1] = 0.5
+dashes = [2.5, 5]
 
-def scatter_plot(ax, res, title, xlabel, ylabel):
+def scatter_plot(ax, res, xlabel, ylabel):
     
     pred_data, fit_data, oi_data = res
     max_val = (np.max([np.max(fit_data), np.max(pred_data)]))
@@ -21,27 +19,19 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     ax.scatter(fit_data, oi_data, alpha=0.7, s=0.3, c=oi_color, label='Classical predictions')
     oi_errs = (fit_data-oi_data)/oi_data
     oi_mean = np.mean(oi_errs)
-    ax.plot(x, x*(1-oi_mean), color=oi_color, linestyle='--', dashes=[2.5, 5], label="Mean")
+    ax.plot(x, x*(1-oi_mean), color=oi_color, linestyle='--', dashes=[2.5, 5], label="Mean predictions")
     
     ax.scatter(fit_data, pred_data, alpha=0.5, s=0.3, c=model_color, label="Our model's predictions")
     model_errs = (fit_data-pred_data)/pred_data 
     model_mean = np.mean(model_errs)
-    ax.plot(x, x*(1-model_mean), color=model_color, linestyle='--', dashes=[2.5, 5], label="Mean")
+    ax.plot(x, x*(1-model_mean), color=model_color, linestyle='--', dashes=[2.5, 5], label="Mean predictions")
 
-    
     ax.set_xlabel(xlabel)
     ax.set_ylabel(ylabel)
-    ax.set_title(title)
     
     model_errs = np.sort(model_errs) 
     std = np.std(model_errs) 
     
-    # z = stats.norm.interval(0.9)[1]
-    # err = std*z
-    # print(f'std = {std}')
-    # print(f'mean = {np.mean(errs)}')
-    # ax.plot(x ,x/(1+err), c=fill_color_dark, ls='--', linewidth=1)
-    # fill_1 = ax.fill_between(x, np.ones(x.shape[0])*(max_val),x/(1+err), alpha=0.2, label = f'95% confidence range', color=fill_color)
     ax.set_aspect('equal', adjustable='box')
 
 with_cls = False
@@ -58,17 +48,13 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     dim_int = int(dim[0])
     cur_edge_length = int(edge_length[i])
     std_results = [((cls_res/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5 for cls_res in cls_results]
-    # pred_std_all = ((pred_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5
-    # fit_std_all = ((fit_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5
-    # std_results = [pred_std_all, fit_std_all]
     dim_str = dim[0]
     x_labels = [f'True CLS $a_{int(dim[0])}$', f'True Phase Fraction std $\sigma_{int(dim[0])}$']
     cls_math = r'\tilde{a}_{2}' if dim == '2D' else r'\tilde{a}_{3}'
     std_math = r'\tilde{\sigma}_{2}' if dim == '2D' else r'\tilde{\sigma}_{3}'
     y_labels = ['Predicted CLS $%s$' %cls_math, 'Predicted Phase Fraction std $%s$' %std_math]
-    title_suffix = r'img size $%s^%s$' %(edge_length[i], dim_str)
-    titles = [f'{dim} CLS comparison, '+title_suffix, f'{dim} std comparison, '+title_suffix]
-    # sa_results = [err_exp_sa[pred_err_sa!=math.isnan], pred_err_sa[pred_err_sa!=math.nan]]
+    # title_suffix = r'Image size $%s^%s$' %(edge_length[i], dim_str)
+    # titles = [f'{dim} CLS comparison, '+title_suffix, f'{dim} std comparison, '+title_suffix]
     
     for j, res in enumerate([cls_results, std_results]):
         ax_idx = j
@@ -80,14 +66,12 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
  
         ax = axs[i, ax_idx]
         
-        # print(f'slope = {slope} and intercept = {intercept}')
         ax_xlabel = x_labels[j]
         ax_ylabel = y_labels[j]
-        ax_title = titles[j]
         
-        scatter_plot(ax, res, ax_title, ax_xlabel, ax_ylabel)
+        scatter_plot(ax, res, ax_xlabel, ax_ylabel)
         
-        if (j == 0 or not with_cls) :
+        if (j == 0 or not with_cls and i == 0):
             ax.legend(loc='upper left')        
 
     # Fit a normal distribution to the data:
@@ -102,7 +86,8 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     counts, bins = np.histogram(errs)
     
     max_val = np.max([np.max(errs), -np.min(errs)])
-    y, x, _ = ax2.hist(errs, range=[-max_val, max_val], bins=50, alpha=0.6, color=model_color, density=True)
+    y, x, _ = ax2.hist(errs, range=[-max_val, max_val], bins=50, alpha=0.6, color=model_color, density=True, label="Our model's predictions")
+    mean_percentage_error = np.mean(errs)
 
     # Plot the PDF.
     xmin, xmax = x.min(), x.max()
@@ -110,18 +95,14 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     x = np.linspace(xmin, xmax, 100)
     p = stats.norm.pdf(x, mu, std)
         
-    ax2.plot(x, p, linewidth=2, label=f'Fitted normal distribution')
-    title = f'{dim} std PE distribution, {title_suffix}' 
-    ax2.set_title(title)
     ax2.set_xlabel(r'Prediction Percentage Error ($\frac{\sigma_{%s}-\tilde{\sigma_{%s}}}{\tilde{\sigma_{%s}}}\cdot100$)' %(dim_str, dim_str, dim_str))
-    # err = std*stats.norm.interval(0.9)[1]
-    ax2.vlines(0, ymin=0, ymax=y.max(), color='black')
-    ax2.vlines(std, ymin=0, ymax=y.max(), ls='--', color=sigma_color, label = r'$\sigma_{mod}$')
-    # ax2.vlines(err, ymin=0, ymax=y.max(), ls='--', color=fill_color_dark, label = r'$\sigma_{mod}\cdot Z_{90\%}$')
-    # trunc_x = x[x<err]
-    # ax2.fill_between(trunc_x, p[x<err],np.zeros(trunc_x.shape[0]), alpha=0.2, color=fill_color)
+    ax2.vlines(0, ymin=0, ymax=y.max(), color='black', label = 'Ideal predictions')
+    ax2.vlines(mean_percentage_error, ymin=0, ymax=y.max(), linestyles=[(0,(2.5, 5))], color=model_color, label = r'Mean predictions')
+    ax2.plot(x, p, linewidth=2,label=f'Fitted normal distribution')
+    ax2.vlines(std, ymin=0, ymax=y.max(), ls='--', color=sigma_color, label = r'Standard deviation $\sigma_{mod}$')
+    
     if i == 0:
-        ax2.legend()
+        ax2.legend(loc='upper left')
 
     # Plot the std error by size of edge length:
     ax3 = axs[i, ax_idx+1]
@@ -138,9 +119,15 @@ def scatter_plot(ax, res, title, xlabel, ylabel):
     ax3.set_xlabel('Img size')
     ax3.set_xticks(edge_lengths[::2], [r'$%s^%s$'%(i,dim_str) for i in edge_lengths[::2]])
     ax3.set_ylabel(r'Model percentage error std $\sigma_{mod}$ [%]')
-    ax3.set_title(f'Error by Edge Length {dim}')
     if i == 0:
         ax3.legend(loc='upper right')
+    
+
+if not with_cls:
+    titles_1st_row = ['(a)', '(b)', '(c)']
+    [ax.set_title(titles_1st_row[i]) for i, ax in enumerate(axs[0])]
+    titles_2nd_row = ['(d)', '(e)', '(f)']
+    [ax.set_title(titles_2nd_row[i]) for i, ax in enumerate(axs[1])]
 
 plt.tight_layout()
 fig.savefig('fig_model_error.pdf', format='pdf')         
diff --git a/paper_figures/prediction interval.py b/paper_figures/prediction interval.py
index aaeb717..cc49d4b 100644
--- a/paper_figures/prediction interval.py	
+++ b/paper_figures/prediction interval.py	
@@ -11,9 +11,12 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     
     plt.subplots_adjust(wspace=0.4, hspace=0.4)
     im = np.load('im_for_prediction_interval_fig.npy')
-    ax[0,0].imshow(im[:200,:200])
-    ax[0,0].set_title('Image for prediction interval')
-    ax[0,0].axis('off')
+    ax[0,0].imshow(im[:200,:200], cmap='gray')
+    ax[0,0].set_xticks([0,150])
+    ax[0,0].set_yticks([0,150])
+    ax[0,0].set_title('(a)')
+    # ax[0,0].set_ylabel('Sample image')
+    # ax[0,0].axis('off')
     std_dist_divisions = 1001
     num_stds = min(pred_std/std_dist_std - pred_std/std_dist_std/10, 6)
     # first make the "weights distribution", the normal distribution of the stds
@@ -21,10 +24,11 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     x_std_dist_bounds = [pred_std - num_stds*std_dist_std, pred_std + num_stds*std_dist_std]
     x_std_dist = np.linspace(*x_std_dist_bounds, std_dist_divisions)
     std_dist = util.normal_dist(x_std_dist, mean=pred_std, std=std_dist_std)
-    ax[0,1].plot(x_std_dist, std_dist, c='green')
-    ax[0,1].vlines(pred_std, 0, np.max(std_dist), color='orange', label = r'Predicted std $\tilde{\sigma}$')
-    ax[0,1].set_title('std likelihood (weights distribution)')
+    ax[0,1].plot(x_std_dist, std_dist, c='green', label="Likelihood of true std")
+    ax[0,1].vlines(pred_std, 0, np.max(std_dist), linestyle="--", color='orange', label = "Model's predicted std")
+    ax[0,1].set_title('(b)')
     ax[0,1].set_xlabel('Standard deviation')
+    ax[0,1].set_yticks([0,1000])
     ax[0,1].legend()
     integral_std_dist = np.trapz(std_dist, x_std_dist)
     print(integral_std_dist)
@@ -38,31 +42,33 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     # before normalising by weight:
     pf_dist_before_norm = util.normal_dist(pf_mesh, mean=pf_locs, std=std_mesh)
     mid_std_dist = pf_dist_before_norm[std_dist_divisions//2,:]
-    ax[0,2].plot(pf_x_1d, mid_std_dist, c='orange')
-    ax[0,2].vlines(image_pf, 0, np.max(mid_std_dist), color='g', label = 'Observed $\Phi(\omega)$')
-    ax[0,2].set_title('Likelihood of $\phi$ trusting std prediction completely')
+    ax[0,2].plot(pf_x_1d, mid_std_dist, c='orange', label='Likelihood of true phase \nfraction, completely trusting\nstd prediction as true std')
+    ax[0,2].vlines(image_pf, 0, np.max(mid_std_dist),  linestyle="--", color='g', label = 'Observed phase fraction $\Phi(\omega)$')
+    ax[0,2].set_title('(c)')
     ax[0,2].set_xlabel('Phase fraction')
+    ax[0,2].set_yticks([0,300])
     ax[0,2].legend()
 
     ax[1,0].contourf(pf_mesh, std_mesh, pf_dist_before_norm, levels=100, cmap = 'plasma') 
     # ax[1,0].vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Observed $\Phi(\omega)$')
-    ax[1,0].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = r'Predicted std $\tilde{\sigma}$')
-    ax[1,0].set_title('Likelihood of $\phi$ before normalisation')
+    ax[1,0].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = "(c) - Likelihood of true phase\nfraction using predicted std")
+    ax[1,0].set_title('(d)')
     ax[1,0].set_xlabel('Phase fraction')
     ax[1,0].set_ylabel('Standard deviation')
     ax[1,0].legend()
     # normalise by weight:
     pf_dist = (pf_dist_before_norm.T * std_dist).T
-    ax[1,1].contourf(pf_mesh, std_mesh, pf_dist, levels=100, cmap = 'plasma') 
-    ax[1,1].set_title('Likelihood of $\phi$')
+    contour = ax[1,1].contourf(pf_mesh, std_mesh, pf_dist, levels=100, cmap = 'plasma') 
+    ax[1,1].set_title('(e)')
     ax[1,1].set_xlabel('Phase fraction')
     ax[1,1].set_ylabel('Standard deviation')
-    ax[1,1].vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Observed $\Phi(\omega)$')
-    ax[1,1].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = r'Predicted std $\tilde{\sigma}$')
+    ax[1,1].vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Multiplying each row by std\nlikelihood weights of (b)')
+    ax[1,1].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = '(c) after multiplied by the\ncenter weight of (b)')
     ax[1,1].legend()
     pf_dist_integral_col = np.trapz(pf_dist, x_std_dist, axis=0)
     pf_dist_integral = np.trapz(pf_dist_integral_col, pf_x_1d)
     
+    # fig.colorbar(contour, ax=[ax[1,0],ax[1,1]],orientation='vertical')
     # ax = plt.axes(projection='3d')
     # ax.plot_surface(pf_mesh, std_mesh, pf_dist, cmap='viridis',\
     #                 edgecolor='green')
@@ -87,16 +93,19 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     # cum_sum_mid_std_dist /= np.trapz(mid_std_dist, pf_x_1d)
     alpha_mid_std_dist_end = np.where(cum_sum_mid_std_dist > alpha)[0][0]
     alpha_mid_std_dist_beginning = np.where(cum_sum_mid_std_dist > 1-alpha)[0][0]
-    ax[1,2].plot(pf_x_1d, mid_std_dist, c='orange', label = 'middle normal dist')
-    ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_end], 0, np.max(mid_std_dist), color='orange', label = r'95% confidence bounds')
-    ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_beginning], 0, np.max(mid_std_dist), color='orange')
-    ax[1,2].plot(pf_x_1d, sum_dist_norm, c='blue', label = 'summed mixture dist')
+    
+    ax[1,2].plot(pf_x_1d, sum_dist_norm, c='blue', label = "Likelihood of true phase fraction\nusing integration of (e)'s rows")
     # new_std_dist = normal_dist(pf_x_1d, mean=image_pf, std=0.02775)
     # ax[1,2].plot(pf_x_1d, new_std_dist, c='green', label = 'new std dist')
-    ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_end], 0, np.max(sum_dist_norm), color='blue', label = r'95% confidence bounds')
-    ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_beginning], 0, np.max(sum_dist_norm), color='blue')
-    ax[1,2].vlines(image_pf, 0, np.max(sum_dist_norm), color='g', label = 'Observed $\Phi(\omega)$')
-    ax[1,2].set_title('Likelihood of $\phi$ (sum over rows)')
+    ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_end], 0, np.max(sum_dist_norm), linestyle="--", color='blue', label = r'95% confidence bounds')
+    ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_beginning], 0, np.max(sum_dist_norm), linestyle="--", color='blue')
+    ax[1,2].vlines(image_pf, 0, np.max(sum_dist_norm), linestyle="--",color='g', label = 'Observed phase frcation $\Phi(\omega)$')
+    ax[1,2].plot(pf_x_1d, mid_std_dist, c='orange', label = '(c)')
+    ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_beginning], 0, np.max(sum_dist_norm), linestyle="--", color='orange')
+    ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_end], 0, np.max(sum_dist_norm), linestyle="--", color='orange', label = r'(c) 95% confidence interval')
+    ax[1,2].set_xlabel('Phase fraction')
+    ax[1,2].set_title('(f)')
+    ax[1,2].set_yticks([0,300])
     ax[1,2].legend()
     print(np.trapz(sum_dist_norm, pf_x_1d))
     print(np.trapz(mid_std_dist, pf_x_1d))
@@ -104,10 +113,11 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
 
 
 
+
 if __name__ == "__main__":
     image_pf = 0.1013
     pred_std = 0.00121
-    std_dist_std = pred_std*0.25
+    std_dist_std = pred_std*0.27
     time_bf = time.time()
     plot_likelihood_of_phi(image_pf, pred_std, std_dist_std)
     print(f'time taken = {time.time()-time_bf}')

From 80960489dbb06ce1cf1ac798fce8a96022c2c09f Mon Sep 17 00:00:00 2001
From: amirDahari1 <amir.dahari@mail.huji.ac.il>
Date: Mon, 29 Jul 2024 18:56:26 +0100
Subject: [PATCH 4/6] starting to re-do fig2

---
 paper_figures/fig2.py                | 167 +++++++++++++++------------
 paper_figures/prediction interval.py |  21 ++--
 2 files changed, 106 insertions(+), 82 deletions(-)

diff --git a/paper_figures/fig2.py b/paper_figures/fig2.py
index f606f0a..18f4de2 100755
--- a/paper_figures/fig2.py
+++ b/paper_figures/fig2.py
@@ -7,97 +7,114 @@
 from scipy.optimize import curve_fit
 from scipy import stats
 import torch
-from representativity import util
+from representativity import util, microlib_statistics
 from torch.nn.functional import interpolate
 
-with open("data_gen2.json", "r") as fp:
-    data = json.load(fp)["generated_data"]
+# with open("data_gen2.json", "r") as fp:
+#     data = json.load(fp)["generated_data"]
 
-l=len(list(data.keys()))
+# l=len(list(data.keys()))
 # l=3
 c=[(0,0,0), (0.5,0.5,0.5)]
 # plotting = [f'microstructure{f}' for f in [235, 209,205,177]]
-plotting = [f'microstructure{f}' for f in [235, 228, 205,177]]
+plotting_ims = [f'microstructure{f}' for f in [235, 228, 205,177]]
 
 # plotting = [k for k in data.keys()]
-l = len(plotting)
+l = len(plotting_ims)
 fig, axs = plt.subplots(l, 3)
 fig.set_size_inches(12, l*4)
 preds = [[],[]]
 irs = [[],[]]
-sas =[]
-i=0
-for n in list(data.keys()):
-    if n not in plotting:
+
+all_data, micros, netG, v_names, run_v_names = microlib_statistics.json_preprocessing()
+lens_for_fit = list(range(500, 1000, 20))  # lengths of images for fitting L_characteristic
+plotting_ims = [micro for micro in micros if micro.split('/')[-1] in plotting_ims]
+# run the statistical analysis on the microlib dataset
+for i, p in enumerate(plotting_ims):
+
+    try:
+        netG.load_state_dict(torch.load(p + "_Gen.pt"))
+    except:  # if the image is greayscale it's excepting because there's only 1 channel
         continue
-    img = tifffile.imread(f'/home/amir/microlibDataset/{n}/{n}.tif')
-    d = data[n]
-    d1 = data[n]
-    
+    imsize = 1600
+    lf = imsize//32 + 2  # the size of G's input
+    many_images = util.generate_image(netG, lf=lf, threed=False, reps=50)
+    pf = many_images.mean().cpu().numpy()
+    small_imsize = 512
+    img = many_images[0].detach().cpu().numpy()[:small_imsize, :small_imsize]
+
     csets = [['black', 'black'], ['gray', 'gray']]
-    for j, met in enumerate(['vf', 'sa']):
-        cs = csets[j]
-        img_dims = [np.array([int(im_len)]*2) for im_len in d['ls']]
-        ns = util.ns_from_dims(img_dims, d[f'ir_{met}'])
-        berns_vf = util.bernouli(d[f'{met}'], ns)
-        axs[i, 1].scatter(d['ls'], d[f'err_exp_{met}'], c=cs[0], s=8, marker = 'x', label = f'{met} errors from sampling')
-        axs[i, 1].plot(d['ls'], berns_vf, c=cs[0], label = f'{met} errors from fitted IR')
-        # axs[i, 1].plot(d[f'ls'], d[f'err_model_{met}'], c=cs[0], label = f'{met} errors from bernouli') 
-        y = d[f'tpc_{met}'][0]  # TODO write that in sa tpc, only the first direction is shown, or do something else, maybe normalise the tpc? We can do sum, because that's how we calculate the ir!
-        x = d[f'tpc_{met}_dist']
-        y = np.array(y)
+    conf = 0.95
+    errs = util.real_image_stats(many_images, lens_for_fit, pf, conf=conf)
+    stds = errs / stats.norm.interval(conf)[1] * pf / 100
+    # print(stds)
+    vars = stds**2
+    # from variations to L_characteristic using image size and phase fraction
+    clss = (np.array(lens_for_fit)**2*vars/pf/(1-pf))**0.5
+    print(clss)
+    
+    # cs = csets[j]
+    # img_dims = [np.array([int(im_len)]*2) for im_len in d['ls']]
+    # ns = util.ns_from_dims(img_dims, d[f'ir_{met}'])
+    # berns_vf = util.bernouli(d[f'{met}'], ns)
+    # axs[i, 1].scatter(d['ls'], d[f'err_exp_{met}'], c=cs[0], s=8, marker = 'x', label = f'{met} errors from sampling')
+    axs[i, 1].plot(lens_for_fit, clss, label = f'Characteristic length scale')
+    axs[i, 1].set_ylim(0, 28)
+    # # axs[i, 1].plot(d[f'ls'], d[f'err_model_{met}'], c=cs[0], label = f'{met} errors from bernouli') 
+    # y = d[f'tpc_{met}'][0]  # TODO write that in sa tpc, only the first direction is shown, or do something else, maybe normalise the tpc? We can do sum, because that's how we calculate the ir!
+    # x = d[f'tpc_{met}_dist']
+    # y = np.array(y)
+    
+    # # TODO erase this afterwards:
+    # if met=='vf':
+    #     ir = np.round(d[f'ir_vf'], 1)
+    #     axs[i, 2].plot(x, y, c=cs[1], label=f'Volume fraction 2PC')
+    #     axs[i, 2].axhline(d['vf']**2, linestyle='dashed', label='$p^2$')
+    # axs[i, 2].plot([0,ir],[d['vf']**2-0.02, d['vf']**2-0.02], c='green', linewidth=3, label=r'$\tilde{a}_2$')
+    # ticks = [0, int(ir), 20, 40, 60, 80, 100]
+    # ticklabels = map(str, ticks)
+    # axs[i, 2].set_xticks(ticks)
+    # axs[i, 2].set_xticklabels(ticklabels)
+    # axs[i,2].fill_between(x, d['vf']**2, y, alpha=0.5, label=f'Integrated part')
+    # axs[i,2].legend()
+        
         
-        # TODO erase this afterwards:
-        if met=='vf':
-            ir = np.round(d[f'ir_vf'], 1)
-            axs[i, 2].plot(x, y, c=cs[1], label=f'Volume fraction 2PC')
-            axs[i, 2].axhline(d['vf']**2, linestyle='dashed', label='$p^2$')
-            axs[i, 2].plot([0,ir],[d['vf']**2-0.02, d['vf']**2-0.02], c='green', linewidth=3, label=r'$\tilde{a}_2$')
-            ticks = [0, int(ir), 20, 40, 60, 80, 100]
-            ticklabels = map(str, ticks)
-            axs[i, 2].set_xticks(ticks)
-            axs[i, 2].set_xticklabels(ticklabels)
-            axs[i,2].fill_between(x, d['vf']**2, y, alpha=0.5, label=f'Integrated part')
-            axs[i,2].legend()
-            
-            
-        # axs[i, 2].scatter(x[knee], y_data[knee]/y.max(), c =cs[1], marker = 'x', label=f'{met} ir from tpc', s=100)
-        ir = d[f'ir_{met}']
-        pred_ir = util.tpc_to_ir(d[f'tpc_{met}_dist'], d[f'tpc_{met}']) 
-        pred_ir = pred_ir * 1.61
-        # axs[i, 2].scatter(x[round(pred_ir)], y[round(pred_ir)], c =cs[1], marker = 'x', label=f'{met} predicted tpc IR', s=100)
-        # axs[i, 2].scatter(x[round(ir)], y[round(ir)], facecolors='none', edgecolors = cs[1], label=f'{met} fitted IR', s=100)
-
-        irs[j].append(ir)
-        if i ==0:
-            axs[i,1].legend()
-            axs[i,2].legend()
-        axs[i,1].set_xlabel('Image length size [pixels]')
-        axs[i,1].set_ylabel('Volume fraction percentage error [%]')
-        axs[i,2].set_ylabel('2-point correlation function')
-    ir = np.round(d[f'ir_vf'], 2)
-    sas.append(d['sa'])
-    im = img[0]*255
-    si_size, nirs = 160, 5
-    sicrop = int(ir*nirs)
-    print(ir, sicrop)
-    subim=torch.tensor(im[-sicrop:,-sicrop:]).unsqueeze(0).unsqueeze(0).float()
-    subim = interpolate(subim, size=(si_size,si_size), mode='nearest')[0,0]
-    subim = np.stack([subim]*3, axis=-1)
+    # # axs[i, 2].scatter(x[knee], y_data[knee]/y.max(), c =cs[1], marker = 'x', label=f'{met} ir from tpc', s=100)
+    # ir = d[f'ir_{met}']
+    # pred_ir = util.tpc_to_ir(d[f'tpc_{met}_dist'], d[f'tpc_{met}']) 
+    # pred_ir = pred_ir * 1.61
+    # # axs[i, 2].scatter(x[round(pred_ir)], y[round(pred_ir)], c =cs[1], marker = 'x', label=f'{met} predicted tpc IR', s=100)
+    # # axs[i, 2].scatter(x[round(ir)], y[round(ir)], facecolors='none', edgecolors = cs[1], label=f'{met} fitted IR', s=100)
+
+    # irs[j].append(ir)
+    # if i ==0:
+    #     axs[i,1].legend()
+    #     axs[i,2].legend()
+    # axs[i,1].set_xlabel('Image length size [pixels]')
+    # axs[i,1].set_ylabel('Volume fraction percentage error [%]')
+    # axs[i,2].set_ylabel('2-point correlation function')
+    # ir = np.round(d[f'ir_vf'], 2)
+    # im = img[0]*255
+    # si_size, nirs = 160, 5
+    # sicrop = int(ir*nirs)
+    # print(ir, sicrop)
+    # subim=torch.tensor(im[-sicrop:,-sicrop:]).unsqueeze(0).unsqueeze(0).float()
+    # subim = interpolate(subim, size=(si_size,si_size), mode='nearest')[0,0]
+    # subim = np.stack([subim]*3, axis=-1)
     
-    subim[:5,:,:] = 125
-    subim[:,:5,:] = 125
-    # subim[5:20, 5:50, :]
-    subim[10:15, 10:10+si_size//nirs, :] = 0
-    subim[10:15, 10:10+si_size//nirs, 1:-1] = 125
-
-    im = np.stack([im]*3, axis=-1)
-    im[-si_size:,-si_size:] = subim
-    axs[i, 0].imshow(im)
-    axs[i, 0].set_xticks([])
-    axs[i, 0].set_yticks([])
-    axs[i, 0].set_ylabel(f'M{n[1:]}')
-    axs[i, 0].set_xlabel(f'Volume fraction '+ r'$\tilde{a}_2$: '+ f'{ir}   Inset mag: x{np.round(si_size/sicrop, 2)}')
+    # subim[:5,:,:] = 125
+    # subim[:,:5,:] = 125
+    # # subim[5:20, 5:50, :]
+    # subim[10:15, 10:10+si_size//nirs, :] = 0
+    # subim[10:15, 10:10+si_size//nirs, 1:-1] = 125
+
+    # im = np.stack([im]*3, axis=-1)
+    # im[-si_size:,-si_size:] = subim
+    # axs[i, 0].imshow(im)
+    # axs[i, 0].set_xticks([])
+    # axs[i, 0].set_yticks([])
+    # axs[i, 0].set_ylabel(f'M{n[1:]}')
+    # axs[i, 0].set_xlabel(f'Volume fraction '+ r'$\tilde{a}_2$: '+ f'{ir}   Inset mag: x{np.round(si_size/sicrop, 2)}')
 
     i+=1
 
diff --git a/paper_figures/prediction interval.py b/paper_figures/prediction interval.py
index cc49d4b..f6e4a20 100644
--- a/paper_figures/prediction interval.py	
+++ b/paper_figures/prediction interval.py	
@@ -12,8 +12,8 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     plt.subplots_adjust(wspace=0.4, hspace=0.4)
     im = np.load('im_for_prediction_interval_fig.npy')
     ax[0,0].imshow(im[:200,:200], cmap='gray')
-    ax[0,0].set_xticks([0,150])
-    ax[0,0].set_yticks([0,150])
+    ax[0,0].set_xticks([])
+    ax[0,0].set_yticks([])
     ax[0,0].set_title('(a)')
     # ax[0,0].set_ylabel('Sample image')
     # ax[0,0].axis('off')
@@ -42,28 +42,34 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     # before normalising by weight:
     pf_dist_before_norm = util.normal_dist(pf_mesh, mean=pf_locs, std=std_mesh)
     mid_std_dist = pf_dist_before_norm[std_dist_divisions//2,:]
-    ax[0,2].plot(pf_x_1d, mid_std_dist, c='orange', label='Likelihood of true phase \nfraction, completely trusting\nstd prediction as true std')
+    ax[0,2].plot(pf_x_1d, mid_std_dist, c='orange', label='Likelihood of the material\nphase fraction, trusting\nstd prediction as true std')
     ax[0,2].vlines(image_pf, 0, np.max(mid_std_dist),  linestyle="--", color='g', label = 'Observed phase fraction $\Phi(\omega)$')
     ax[0,2].set_title('(c)')
     ax[0,2].set_xlabel('Phase fraction')
     ax[0,2].set_yticks([0,300])
     ax[0,2].legend()
 
-    ax[1,0].contourf(pf_mesh, std_mesh, pf_dist_before_norm, levels=100, cmap = 'plasma') 
+    c1 = ax[1,0].contourf(pf_mesh, std_mesh, pf_dist_before_norm, levels=100, cmap = 'plasma') 
+    for c in c1.collections:
+        c.set_edgecolor("face")
     # ax[1,0].vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Observed $\Phi(\omega)$')
-    ax[1,0].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = "(c) - Likelihood of true phase\nfraction using predicted std")
+    ax[1,0].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = "(c) - Likelihood of the material\nphase fraction using predicted\nstd")
     ax[1,0].set_title('(d)')
     ax[1,0].set_xlabel('Phase fraction')
     ax[1,0].set_ylabel('Standard deviation')
+    ax[1,0].set_yticks(ax[1,0].get_yticks()[1:-1:2])
     ax[1,0].legend()
     # normalise by weight:
     pf_dist = (pf_dist_before_norm.T * std_dist).T
-    contour = ax[1,1].contourf(pf_mesh, std_mesh, pf_dist, levels=100, cmap = 'plasma') 
+    c2 = ax[1,1].contourf(pf_mesh, std_mesh, pf_dist, levels=100, cmap = 'plasma') 
+    for c in c2.collections:
+        c.set_edgecolor("face")
     ax[1,1].set_title('(e)')
     ax[1,1].set_xlabel('Phase fraction')
     ax[1,1].set_ylabel('Standard deviation')
     ax[1,1].vlines(image_pf, x_std_dist_bounds[0], x_std_dist_bounds[1], colors='g', label = 'Multiplying each row by std\nlikelihood weights of (b)')
     ax[1,1].hlines(pred_std, pf_x_bounds[0], pf_x_bounds[1], colors='orange', label = '(c) after multiplied by the\ncenter weight of (b)')
+    ax[1,1].set_yticks(ax[1,1].get_yticks()[1:-1:2])
     ax[1,1].legend()
     pf_dist_integral_col = np.trapz(pf_dist, x_std_dist, axis=0)
     pf_dist_integral = np.trapz(pf_dist_integral_col, pf_x_1d)
@@ -94,7 +100,7 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     alpha_mid_std_dist_end = np.where(cum_sum_mid_std_dist > alpha)[0][0]
     alpha_mid_std_dist_beginning = np.where(cum_sum_mid_std_dist > 1-alpha)[0][0]
     
-    ax[1,2].plot(pf_x_1d, sum_dist_norm, c='blue', label = "Likelihood of true phase fraction\nusing integration of (e)'s rows")
+    ax[1,2].plot(pf_x_1d, sum_dist_norm, c='blue', label = "Likelihood of the material\nphase fraction by integrating\nthe rows of (e)")
     # new_std_dist = normal_dist(pf_x_1d, mean=image_pf, std=0.02775)
     # ax[1,2].plot(pf_x_1d, new_std_dist, c='green', label = 'new std dist')
     ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_end], 0, np.max(sum_dist_norm), linestyle="--", color='blue', label = r'95% confidence bounds')
@@ -109,6 +115,7 @@ def plot_likelihood_of_phi(image_pf, pred_std, std_dist_std):
     ax[1,2].legend()
     print(np.trapz(sum_dist_norm, pf_x_1d))
     print(np.trapz(mid_std_dist, pf_x_1d))
+    plt.savefig('prediction_interval.pdf', format='pdf')
 
 
 

From 11127594a564395be3f251da7bdc63664168b42f Mon Sep 17 00:00:00 2001
From: amirDahari1 <amir.dahari@mail.huji.ac.il>
Date: Tue, 30 Jul 2024 11:10:03 +0100
Subject: [PATCH 5/6] remaking fig2 with fft and radial tpc

---
 paper_figures/fig2.py | 62 +++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 34 deletions(-)

diff --git a/paper_figures/fig2.py b/paper_figures/fig2.py
index 18f4de2..0b9656d 100755
--- a/paper_figures/fig2.py
+++ b/paper_figures/fig2.py
@@ -9,6 +9,7 @@
 import torch
 from representativity import util, microlib_statistics
 from torch.nn.functional import interpolate
+from mpl_toolkits.axes_grid1 import make_axes_locatable
 
 # with open("data_gen2.json", "r") as fp:
 #     data = json.load(fp)["generated_data"]
@@ -22,9 +23,10 @@
 # plotting = [k for k in data.keys()]
 l = len(plotting_ims)
 fig, axs = plt.subplots(l, 3)
-fig.set_size_inches(12, l*4)
+fig.set_size_inches(12, l*3.5)
 preds = [[],[]]
 irs = [[],[]]
+colors = {"cls": 'tab:orange', "stds": 'tab:green'}
 
 all_data, micros, netG, v_names, run_v_names = microlib_statistics.json_preprocessing()
 lens_for_fit = list(range(500, 1000, 20))  # lengths of images for fitting L_characteristic
@@ -38,7 +40,7 @@
         continue
     imsize = 1600
     lf = imsize//32 + 2  # the size of G's input
-    many_images = util.generate_image(netG, lf=lf, threed=False, reps=50)
+    many_images = util.generate_image(netG, lf=lf, threed=False, reps=10)
     pf = many_images.mean().cpu().numpy()
     small_imsize = 512
     img = many_images[0].detach().cpu().numpy()[:small_imsize, :small_imsize]
@@ -46,49 +48,41 @@
     csets = [['black', 'black'], ['gray', 'gray']]
     conf = 0.95
     errs = util.real_image_stats(many_images, lens_for_fit, pf, conf=conf)
+    sizes_for_fit = [[lf,lf] for lf in lens_for_fit]
+    real_cls = util.fit_cls(errs, sizes_for_fit, pf)
     stds = errs / stats.norm.interval(conf)[1] * pf / 100
+    std_fit = (real_cls**2/(np.array(lens_for_fit)**2)*pf*(1-pf))**0.5
     # print(stds)
     vars = stds**2
     # from variations to L_characteristic using image size and phase fraction
     clss = (np.array(lens_for_fit)**2*vars/pf/(1-pf))**0.5
     print(clss)
     
-    # cs = csets[j]
-    # img_dims = [np.array([int(im_len)]*2) for im_len in d['ls']]
-    # ns = util.ns_from_dims(img_dims, d[f'ir_{met}'])
-    # berns_vf = util.bernouli(d[f'{met}'], ns)
-    # axs[i, 1].scatter(d['ls'], d[f'err_exp_{met}'], c=cs[0], s=8, marker = 'x', label = f'{met} errors from sampling')
-    axs[i, 1].plot(lens_for_fit, clss, label = f'Characteristic length scale')
+    axs_twin = axs[i, 1].twinx()
+    stds_scatter = axs_twin.scatter(lens_for_fit, stds, s=8, color=colors["stds"], label = f'Standard deviations')
+    twin_fit = axs_twin.plot(lens_for_fit, std_fit, color=colors["stds"], label = f'Standard deviations fit')
+    axs_twin.tick_params(axis='y', labelcolor=colors["stds"])
+    axs_twin.set_ylim(0, 0.025)
+    cls_scatter = axs[i, 1].scatter(lens_for_fit, clss, s=8, color=colors["cls"], label = f'Characteristic length scales')
+    cls_fit = axs[i, 1].hlines(real_cls, lens_for_fit[0], lens_for_fit[-1], color=colors["cls"], label = f'Characteristic length scales fit')
+    axs[i, 1].tick_params(axis='y', labelcolor=colors["cls"])
     axs[i, 1].set_ylim(0, 28)
-    # # axs[i, 1].plot(d[f'ls'], d[f'err_model_{met}'], c=cs[0], label = f'{met} errors from bernouli') 
-    # y = d[f'tpc_{met}'][0]  # TODO write that in sa tpc, only the first direction is shown, or do something else, maybe normalise the tpc? We can do sum, because that's how we calculate the ir!
-    # x = d[f'tpc_{met}_dist']
-    # y = np.array(y)
     
-    # # TODO erase this afterwards:
-    # if met=='vf':
-    #     ir = np.round(d[f'ir_vf'], 1)
-    #     axs[i, 2].plot(x, y, c=cs[1], label=f'Volume fraction 2PC')
-    #     axs[i, 2].axhline(d['vf']**2, linestyle='dashed', label='$p^2$')
-    # axs[i, 2].plot([0,ir],[d['vf']**2-0.02, d['vf']**2-0.02], c='green', linewidth=3, label=r'$\tilde{a}_2$')
-    # ticks = [0, int(ir), 20, 40, 60, 80, 100]
-    # ticklabels = map(str, ticks)
-    # axs[i, 2].set_xticks(ticks)
-    # axs[i, 2].set_xticklabels(ticklabels)
-    # axs[i,2].fill_between(x, d['vf']**2, y, alpha=0.5, label=f'Integrated part')
-    # axs[i,2].legend()
+    dims = len(img.shape)
+    # print(f'starting tpc radial')
+    tpc = util.tpc_radial(img, threed=False)
+    cls = util.tpc_to_cls(tpc, img, img.shape)
+
+    contour = axs[i, 2].contourf(tpc, cmap='plasma', levels = 200)
+    divider = make_axes_locatable(axs[i, 2])
+    cax = divider.append_axes('right', size='5%', pad=0.05)
+    fig.colorbar(contour, cax=cax, orientation='vertical')
         
+    if i == 0:
+        plots = [stds_scatter, twin_fit[0], cls_scatter, cls_fit]
+        label_plots = [plot.get_label() for plot in plots]
+        axs[i,1].legend(plots, label_plots)
         
-    # # axs[i, 2].scatter(x[knee], y_data[knee]/y.max(), c =cs[1], marker = 'x', label=f'{met} ir from tpc', s=100)
-    # ir = d[f'ir_{met}']
-    # pred_ir = util.tpc_to_ir(d[f'tpc_{met}_dist'], d[f'tpc_{met}']) 
-    # pred_ir = pred_ir * 1.61
-    # # axs[i, 2].scatter(x[round(pred_ir)], y[round(pred_ir)], c =cs[1], marker = 'x', label=f'{met} predicted tpc IR', s=100)
-    # # axs[i, 2].scatter(x[round(ir)], y[round(ir)], facecolors='none', edgecolors = cs[1], label=f'{met} fitted IR', s=100)
-
-    # irs[j].append(ir)
-    # if i ==0:
-    #     axs[i,1].legend()
     #     axs[i,2].legend()
     # axs[i,1].set_xlabel('Image length size [pixels]')
     # axs[i,1].set_ylabel('Volume fraction percentage error [%]')

From 4d704cddbf84096d8cfb1e37f5629bcfe72b0d0b Mon Sep 17 00:00:00 2001
From: amirDahari1 <amir.dahari@mail.huji.ac.il>
Date: Tue, 30 Jul 2024 15:24:44 +0100
Subject: [PATCH 6/6] merging branches

---
 representativity/prediction_error.py     |  4 ++
 representativity/tpc_expectation_test.py | 49 ------------------------
 representativity/validation.py           |  3 ++
 3 files changed, 7 insertions(+), 49 deletions(-)

diff --git a/representativity/prediction_error.py b/representativity/prediction_error.py
index d7bb8ba..ef410e8 100755
--- a/representativity/prediction_error.py
+++ b/representativity/prediction_error.py
@@ -8,6 +8,10 @@
 from scipy.optimize import curve_fit
 from functools import partial
 
+import os
+
+print(os.getcwd())
+
 '''
 File: prediction_error.py
 
diff --git a/representativity/tpc_expectation_test.py b/representativity/tpc_expectation_test.py
index 0e27d37..1e79589 100755
--- a/representativity/tpc_expectation_test.py
+++ b/representativity/tpc_expectation_test.py
@@ -9,18 +9,6 @@
 import time
 from scipy.stats import norm
 
-def generate_sg_tpc(netG, mode, imsize):
-    '''
-    This function is used to predict the integral range of the microstructure.
-    '''
-    lf = imsize//32 + 2  # the size of G's input
-    single_img = util.generate_image(netG, lf=lf, threed=mode=='3D', reps=1)
-    if single_img.any():
-        single_img = single_img.cpu()[0]
-        dims = len(single_img.shape)
-        tpc = util.tpc_radial(single_img, threed=dims == 3)
-        return tpc
-
 def tpc_by_radius(tpc):
     tpc = np.array(tpc)
     middle_idx = np.array(tpc.shape)//2
@@ -47,36 +35,6 @@ def tpc_by_radius(tpc):
             tpc_res.append(np.sum(tpc_vec[dist_bool])/np.sum(vec_arr[dist_bool]))
     return pf, pf**2, tpc_res, dist_indices
 
-def tpc_check():
-    all_data, micros, netG, v_names, run_v_names = ms.json_preprocessing()
-    
-    edge_lengths_pred = all_data[f'data_gen_2D']['edge_lengths_pred']
-    for j, p in enumerate(micros):
-
-        try:
-            netG.load_state_dict(torch.load(p + "_Gen.pt"))
-        except:  # if the image is greayscale it's excepting because there's only 1 channel
-            continue
-        n = p.split('/')[-1]
-        args = (edge_lengths_pred[10], netG, '2D')
-        tpc_results, pfs, pf_squares = tpcs_radius(generate_sg_tpc, args)
-            # print(f'{len(pf_squares)} / {test_runs} done for {n}')
-        mean_tpc_results = np.mean(np.stack(tpc_results,axis=0), axis=0)
-        tpc_fig.plot(mean_tpc_results, label='mean tpc')
-        real_pf_squared = np.mean(pfs)**2
-        # print(f'real pf squared = {np.round(real_pf_squared, 6)}')
-        # print(f'pf squared = {np.round(np.mean(pf_squares), 6)}')
-        tpc_fig.plot([real_pf_squared]*len(mean_tpc_results), label='real pf squared')
-        tpc_fig.plot([np.mean(pf_squares)]*len(mean_tpc_results), label='pf squared')
-        tpc_fig.xlabel('Growing Radius')
-        tpc_fig.ylabel('TPC')
-        tpc_fig.legend()
-        tpc_fig.savefig(f'tpc_results/{n}_tpc.png')
-        tpc_fig.close()
-        # print(f'end tpc = {np.round(np.mean(end_tpc_results), 6)}')
-        # print(f'end tpc std = {np.round(np.std(end_tpc_results), 6)}\n')
-        print(f'{p} done\n')
-
 def tpcs_radius(gen_func, test_runs, args):
     tpc_results = []
     tpcs = []
@@ -133,7 +91,6 @@ def fill_img_with_circles(img, circle_radius, circle_centers):
     return img
 
 if __name__ == '__main__':
-    # tpc_check()
     pfs =[]
     imsize = 100
     circle_size = 20
@@ -160,12 +117,6 @@ def fill_img_with_circles(img, circle_radius, circle_centers):
         axs[f'circle{i}'].set_xlabel(f'$\omega_{i}$')
         axs[f'circle{i}'].set_xticks([])
         axs[f'circle{i}'].set_yticks([])
-
-    
-    
-    # im0_tpc = make_tpc(circle_ims[0])
-    # im0_tpc_by_radius = tpc_by_radius(im0_tpc)[2]
-    # tpc_fig.plot(im0_tpc_by_radius, label='TPC of $\omega_0$')
     
     tpc_results, pfs, pf_squares, dist_len = tpcs_radius(make_circles_tpc, run_tests, args=args)
     bins = 30
diff --git a/representativity/validation.py b/representativity/validation.py
index 4d3b8db..fc3f370 100755
--- a/representativity/validation.py
+++ b/representativity/validation.py
@@ -118,6 +118,7 @@ def ps_error_prediction(dim, data, confidence, error_target):
     large_im_repeats = 1
     in_the_bounds_w_model = []
     in_the_bounds_wo_model = []
+    iters = 0
     for generator, params in ps_generators.items():
         for value_comb in product(*params.values()):
             args = {key: value for key, value in zip(params.keys(), value_comb)}
@@ -143,6 +144,8 @@ def ps_error_prediction(dim, data, confidence, error_target):
                     print(f'One image stat analysis cls: {one_im_stat_analysis_cls}')
                     one_im_clss.append(one_im_stat_analysis_cls)
                     for i in range(2):
+                        print(f'Iteration {iters}')
+                        iters += 1
                         with_model = i == 0
                         im_err, l_for_err_target, cls = util.make_error_prediction(small_im, 
                                 conf=confidence, err_targ=error_target, model_error=with_model, n_divisions=301)