From a7cf40c3bcba676258b885129e47c4cac887e6f3 Mon Sep 17 00:00:00 2001 From: rmdocherty Date: Tue, 30 Jul 2024 11:45:06 +0100 Subject: [PATCH] copied files from dev branch to here --- paper_figures/fig1.py | 104 ++++--- paper_figures/fig2.py | 207 +++++++------ paper_figures/fig3.py | 4 +- paper_figures/fig4.py | 0 paper_figures/fig_model_error.py | 254 ++++++++++------ paper_figures/prediction interval.py | 154 +++++----- .../correction_fitting/microlib_statistics.py | 193 ++++++------ .../correction_fitting/model_params.py | 53 ++-- .../correction_fitting/prediction_error.py | 46 ++- representativity/validation.py | 189 +++++------- tests/tpc_expectation_test.py | 281 +++++++----------- 11 files changed, 698 insertions(+), 787 deletions(-) mode change 100755 => 100644 paper_figures/fig1.py mode change 100755 => 100644 paper_figures/fig2.py mode change 100755 => 100644 paper_figures/fig3.py mode change 100755 => 100644 paper_figures/fig4.py mode change 100755 => 100644 paper_figures/fig_model_error.py mode change 100755 => 100644 representativity/correction_fitting/microlib_statistics.py mode change 100755 => 100644 representativity/correction_fitting/model_params.py mode change 100755 => 100644 representativity/correction_fitting/prediction_error.py mode change 100755 => 100644 representativity/validation.py mode change 100755 => 100644 tests/tpc_expectation_test.py diff --git a/paper_figures/fig1.py b/paper_figures/fig1.py old mode 100755 new mode 100644 index 12d8a76..337811b --- a/paper_figures/fig1.py +++ b/paper_figures/fig1.py @@ -4,37 +4,34 @@ import json import numpy as np import torch -from representativity.old import util +from representativity import util from torch.nn.functional import interpolate - def sample_error(img, ls, vf=0.5): err = [] dims = len(img.shape) - 1 for l in ls: if dims == 1: - vfs = torch.mean(img[:, : l * l], dim=(1)) - elif dims == 2: + vfs = torch.mean(img[:, :l*l], dim=(1)) + elif dims == 2: vfs = torch.mean(img[:, :l, :l], dim=(1, 2)) else: # 3D - new_l = int(l ** (2 / 3)) + new_l = int(l**(2/3)) vfs = torch.mean(img[:, :new_l, :new_l, :new_l], dim=(1, 2, 3)) std = torch.std(vfs.cpu()) - err.append(100 * ((1.96 * std) / vf)) # percentage of error from 0.5 + err.append(100*((1.96*std)/vf)) # percentage of error from 0.5 return err - errs = [] berns = [] - def generate_microlib_data(): - mode = "2D" + mode = '2D' # Dataset path and list of subfolders # with open("micro_names.json", "r") as fp: # TODO change this later - # micro_names = json.load(fp) - plotting = [f"microstructure{f}" for f in [228, 235, 205, 177]] - projects = [f"/home/amir/microlibDataset/{p}/{p}" for p in plotting] + # micro_names = json.load(fp) + plotting = [f'microstructure{f}' for f in [228, 235, 205, 177]] + projects = [f'/home/amir/microlibDataset/{p}/{p}' for p in plotting] # Load generator network netG = util.load_generator(projects[0]) # Edge lengths to test @@ -46,74 +43,73 @@ def generate_microlib_data(): num_projects = len(projects) projects = projects[:num_projects] - reps = 1000 if mode == "2D" else 400 + reps = 1000 if mode=='2D' else 400 for j, proj in enumerate(projects): # Make an image of micro netG.load_state_dict(torch.load(proj + "_Gen.pt")) - img = util.generate_image(netG, lf=l, reps=reps, threed=mode == "3D") + img = util.generate_image(netG,lf=l,reps=reps, threed=mode=='3D') print(img.size()) if img.any(): microstructure_name = proj.split("/")[-1] vf = torch.mean(img).cpu().item() img = [img] - print(f"starting tpc") - img_dims = [np.array((l,) * (len(img.shape) - 1)) for l in edge_lengths] + print(f'starting tpc') + img_dims = [np.array((l,)*(len(img.shape)-1)) for l in edge_lengths] err_exp = util.real_image_stats(img, edge_lengths, vf) return data # with open("data_gen.json", "r") as fp: -# data = json.load(fp)['generated_data'] +# data = json.load(fp)['generated_data'] -n = "microstructure205" -img3 = tifffile.imread(f"/home/amir/microlibDataset/{n}/{n}.tif") +n = 'microstructure205' +img3 = tifffile.imread(f'/home/amir/microlibDataset/{n}/{n}.tif') d = generate_microlib_data() -ls = torch.arange(300, 800, 20) +ls = torch.arange(300,800, 20) img_dims = [np.array((l, l)) for l in ls] ns = util.ns_from_dims(img_dims, 1) -img1 = torch.rand((1000, 1, ls[-1], ls[-1]), device=torch.device("cuda:0")).float() -img1[img1 <= d["vf"]] = 0 -img1[img1 > d["vf"]] = 1 -img1 = 1 - img1 -errs.append(sample_error(img1[:, 0], ls, d["vf"])) -berns.append(util.bernouli(d["vf"], ns)) - -img2 = interpolate( - img1[:, :, : ls[-1] // 2, : ls[-1] // 2], scale_factor=(2, 2), mode="nearest" -) -errs.append(sample_error(img2[:, 0], ls, d["vf"])) +img1 = torch.rand((1000, 1, ls[-1],ls[-1]), device = torch.device('cuda:0')).float() +img1[img1<=d['vf']] = 0 +img1[img1>d['vf']] = 1 +img1 = 1-img1 +errs.append(sample_error(img1[:,0], ls, d['vf'])) +berns.append(util.bernouli(d['vf'], ns)) + +img2 = interpolate(img1[:,:,:ls[-1]//2, :ls[-1]//2], scale_factor=(2,2), mode='nearest') +errs.append(sample_error(img2[:,0], ls, d['vf'])) ns = util.ns_from_dims(img_dims, 2) -berns.append(util.bernouli(d["vf"], ns)) +berns.append(util.bernouli(d['vf'], ns)) -errs.append(d[f"err_exp_vf"][::4]) -ns = util.ns_from_dims(img_dims, d["fac_vf"]) -berns.append(util.bernouli(d["vf"], ns)) +errs.append(d[f'err_exp_vf'][::4]) +ns = util.ns_from_dims(img_dims, d['fac_vf']) +berns.append(util.bernouli(d['vf'], ns)) -fig, axs = plt.subplots(2, 2) -fig.set_size_inches(8, 8) -l = 150 -axs[0, 0].imshow(img1[0, 0, :l, :l].cpu(), cmap="gray") -axs[0, 1].imshow(img2[0, 0, :l, :l].cpu(), cmap="gray") -axs[1, 0].imshow(img3[0, :150, :150], cmap="gray") +fig, axs = plt.subplots(2,2) +fig.set_size_inches(8,8) +l=150 +axs[0,0].imshow(img1[0,0, :l, :l].cpu(), cmap='gray') +axs[0,1].imshow(img2[0,0, :l, :l].cpu(), cmap='gray') +axs[1,0].imshow(img3[0, :150, :150], cmap='gray') -cs = ["r", "b", "g"] -labs = ["a) random 1x1", "b) random 2x2", "c) micrograph 205"] +cs = ['r', 'b', 'g'] +labs = ['a) random 1x1', 'b) random 2x2', 'c) micrograph 205'] for l, err, bern, c in zip(labs, errs, berns, cs): - axs[1, 1].scatter(ls, err, c=c, s=8, marker="x") - axs[1, 1].plot(ls, bern, lw=1, c=c, label=l) -axs[1, 1].legend() -axs[0, 0].set_title("a) Random 1x1 (vf = 0.845, $a_2$ = 1)") -axs[0, 1].set_title("b) Random 2x2 (vf = 0.845, $a_2$ = 2)") -axs[1, 0].set_title("c) Micro. 205 (vf = 0.845, $a_2$ = 7.526)") -axs[1, 1].set_title("d) Experimental integral range fit") -axs[1, 1].set_xlabel("Image length size [pixels]") -axs[1, 1].set_ylabel("Volume fraction percentage error [%]") -axs[1, 1].set_ylim(bottom=0) + axs[1,1].scatter(ls, err, c=c, s=8,marker = 'x') + axs[1,1].plot(ls, bern, lw=1, c=c, label=l) +axs[1,1].legend() +axs[0,0].set_title('a) Random 1x1 (vf = 0.845, $a_2$ = 1)') +axs[0,1].set_title('b) Random 2x2 (vf = 0.845, $a_2$ = 2)') +axs[1,0].set_title('c) Micro. 205 (vf = 0.845, $a_2$ = 7.526)') +axs[1,1].set_title('d) Experimental integral range fit') +axs[1,1].set_xlabel('Image length size [pixels]') +axs[1,1].set_ylabel('Volume fraction percentage error [%]') +axs[1,1].set_ylim(bottom=0) plt.tight_layout() for ax in axs.ravel()[:3]: ax.set_xticks([]) ax.set_yticks([]) -plt.savefig("fig1.pdf", format="pdf") +plt.savefig('fig1.pdf', format='pdf') + diff --git a/paper_figures/fig2.py b/paper_figures/fig2.py old mode 100755 new mode 100644 index 187af41..0b9656d --- a/paper_figures/fig2.py +++ b/paper_figures/fig2.py @@ -3,128 +3,118 @@ import tifffile import json import numpy as np - # import kneed from scipy.optimize import curve_fit from scipy import stats import torch -from representativity.old import util +from representativity import util, microlib_statistics from torch.nn.functional import interpolate +from mpl_toolkits.axes_grid1 import make_axes_locatable -with open("data_gen2.json", "r") as fp: - data = json.load(fp)["generated_data"] +# with open("data_gen2.json", "r") as fp: +# data = json.load(fp)["generated_data"] -l = len(list(data.keys())) +# l=len(list(data.keys())) # l=3 -c = [(0, 0, 0), (0.5, 0.5, 0.5)] +c=[(0,0,0), (0.5,0.5,0.5)] # plotting = [f'microstructure{f}' for f in [235, 209,205,177]] -plotting = [f"microstructure{f}" for f in [235, 228, 205, 177]] +plotting_ims = [f'microstructure{f}' for f in [235, 228, 205,177]] # plotting = [k for k in data.keys()] -l = len(plotting) +l = len(plotting_ims) fig, axs = plt.subplots(l, 3) -fig.set_size_inches(12, l * 4) -preds = [[], []] -irs = [[], []] -sas = [] -i = 0 -for n in list(data.keys()): - if n not in plotting: +fig.set_size_inches(12, l*3.5) +preds = [[],[]] +irs = [[],[]] +colors = {"cls": 'tab:orange', "stds": 'tab:green'} + +all_data, micros, netG, v_names, run_v_names = microlib_statistics.json_preprocessing() +lens_for_fit = list(range(500, 1000, 20)) # lengths of images for fitting L_characteristic +plotting_ims = [micro for micro in micros if micro.split('/')[-1] in plotting_ims] +# run the statistical analysis on the microlib dataset +for i, p in enumerate(plotting_ims): + + try: + netG.load_state_dict(torch.load(p + "_Gen.pt")) + except: # if the image is greayscale it's excepting because there's only 1 channel continue - img = tifffile.imread(f"/home/amir/microlibDataset/{n}/{n}.tif") - d = data[n] - d1 = data[n] - - csets = [["black", "black"], ["gray", "gray"]] - for j, met in enumerate(["vf", "sa"]): - cs = csets[j] - img_dims = [np.array([int(im_len)] * 2) for im_len in d["ls"]] - ns = util.ns_from_dims(img_dims, d[f"ir_{met}"]) - berns_vf = util.bernouli(d[f"{met}"], ns) - axs[i, 1].scatter( - d["ls"], - d[f"err_exp_{met}"], - c=cs[0], - s=8, - marker="x", - label=f"{met} errors from sampling", - ) - axs[i, 1].plot(d["ls"], berns_vf, c=cs[0], label=f"{met} errors from fitted IR") - # axs[i, 1].plot(d[f'ls'], d[f'err_model_{met}'], c=cs[0], label = f'{met} errors from bernouli') - y = d[f"tpc_{met}"][ - 0 - ] # TODO write that in sa tpc, only the first direction is shown, or do something else, maybe normalise the tpc? We can do sum, because that's how we calculate the ir! - x = d[f"tpc_{met}_dist"] - y = np.array(y) - - # TODO erase this afterwards: - if met == "vf": - ir = np.round(d[f"ir_vf"], 1) - axs[i, 2].plot(x, y, c=cs[1], label=f"Volume fraction 2PC") - axs[i, 2].axhline(d["vf"] ** 2, linestyle="dashed", label="$p^2$") - axs[i, 2].plot( - [0, ir], - [d["vf"] ** 2 - 0.02, d["vf"] ** 2 - 0.02], - c="green", - linewidth=3, - label=r"$\tilde{a}_2$", - ) - ticks = [0, int(ir), 20, 40, 60, 80, 100] - ticklabels = map(str, ticks) - axs[i, 2].set_xticks(ticks) - axs[i, 2].set_xticklabels(ticklabels) - axs[i, 2].fill_between( - x, d["vf"] ** 2, y, alpha=0.5, label=f"Integrated part" - ) - axs[i, 2].legend() - - # axs[i, 2].scatter(x[knee], y_data[knee]/y.max(), c =cs[1], marker = 'x', label=f'{met} ir from tpc', s=100) - ir = d[f"ir_{met}"] - pred_ir = util.tpc_to_ir(d[f"tpc_{met}_dist"], d[f"tpc_{met}"]) - pred_ir = pred_ir * 1.61 - # axs[i, 2].scatter(x[round(pred_ir)], y[round(pred_ir)], c =cs[1], marker = 'x', label=f'{met} predicted tpc IR', s=100) - # axs[i, 2].scatter(x[round(ir)], y[round(ir)], facecolors='none', edgecolors = cs[1], label=f'{met} fitted IR', s=100) - - irs[j].append(ir) - if i == 0: - axs[i, 1].legend() - axs[i, 2].legend() - axs[i, 1].set_xlabel("Image length size [pixels]") - axs[i, 1].set_ylabel("Volume fraction percentage error [%]") - axs[i, 2].set_ylabel("2-point correlation function") - ir = np.round(d[f"ir_vf"], 2) - sas.append(d["sa"]) - im = img[0] * 255 - si_size, nirs = 160, 5 - sicrop = int(ir * nirs) - print(ir, sicrop) - subim = torch.tensor(im[-sicrop:, -sicrop:]).unsqueeze(0).unsqueeze(0).float() - subim = interpolate(subim, size=(si_size, si_size), mode="nearest")[0, 0] - subim = np.stack([subim] * 3, axis=-1) - - subim[:5, :, :] = 125 - subim[:, :5, :] = 125 - # subim[5:20, 5:50, :] - subim[10:15, 10 : 10 + si_size // nirs, :] = 0 - subim[10:15, 10 : 10 + si_size // nirs, 1:-1] = 125 - - im = np.stack([im] * 3, axis=-1) - im[-si_size:, -si_size:] = subim - axs[i, 0].imshow(im) - axs[i, 0].set_xticks([]) - axs[i, 0].set_yticks([]) - axs[i, 0].set_ylabel(f"M{n[1:]}") - axs[i, 0].set_xlabel( - f"Volume fraction " - + r"$\tilde{a}_2$: " - + f"{ir} Inset mag: x{np.round(si_size/sicrop, 2)}" - ) - - i += 1 + imsize = 1600 + lf = imsize//32 + 2 # the size of G's input + many_images = util.generate_image(netG, lf=lf, threed=False, reps=10) + pf = many_images.mean().cpu().numpy() + small_imsize = 512 + img = many_images[0].detach().cpu().numpy()[:small_imsize, :small_imsize] + + csets = [['black', 'black'], ['gray', 'gray']] + conf = 0.95 + errs = util.real_image_stats(many_images, lens_for_fit, pf, conf=conf) + sizes_for_fit = [[lf,lf] for lf in lens_for_fit] + real_cls = util.fit_cls(errs, sizes_for_fit, pf) + stds = errs / stats.norm.interval(conf)[1] * pf / 100 + std_fit = (real_cls**2/(np.array(lens_for_fit)**2)*pf*(1-pf))**0.5 + # print(stds) + vars = stds**2 + # from variations to L_characteristic using image size and phase fraction + clss = (np.array(lens_for_fit)**2*vars/pf/(1-pf))**0.5 + print(clss) + + axs_twin = axs[i, 1].twinx() + stds_scatter = axs_twin.scatter(lens_for_fit, stds, s=8, color=colors["stds"], label = f'Standard deviations') + twin_fit = axs_twin.plot(lens_for_fit, std_fit, color=colors["stds"], label = f'Standard deviations fit') + axs_twin.tick_params(axis='y', labelcolor=colors["stds"]) + axs_twin.set_ylim(0, 0.025) + cls_scatter = axs[i, 1].scatter(lens_for_fit, clss, s=8, color=colors["cls"], label = f'Characteristic length scales') + cls_fit = axs[i, 1].hlines(real_cls, lens_for_fit[0], lens_for_fit[-1], color=colors["cls"], label = f'Characteristic length scales fit') + axs[i, 1].tick_params(axis='y', labelcolor=colors["cls"]) + axs[i, 1].set_ylim(0, 28) + + dims = len(img.shape) + # print(f'starting tpc radial') + tpc = util.tpc_radial(img, threed=False) + cls = util.tpc_to_cls(tpc, img, img.shape) + + contour = axs[i, 2].contourf(tpc, cmap='plasma', levels = 200) + divider = make_axes_locatable(axs[i, 2]) + cax = divider.append_axes('right', size='5%', pad=0.05) + fig.colorbar(contour, cax=cax, orientation='vertical') + + if i == 0: + plots = [stds_scatter, twin_fit[0], cls_scatter, cls_fit] + label_plots = [plot.get_label() for plot in plots] + axs[i,1].legend(plots, label_plots) + + # axs[i,2].legend() + # axs[i,1].set_xlabel('Image length size [pixels]') + # axs[i,1].set_ylabel('Volume fraction percentage error [%]') + # axs[i,2].set_ylabel('2-point correlation function') + # ir = np.round(d[f'ir_vf'], 2) + # im = img[0]*255 + # si_size, nirs = 160, 5 + # sicrop = int(ir*nirs) + # print(ir, sicrop) + # subim=torch.tensor(im[-sicrop:,-sicrop:]).unsqueeze(0).unsqueeze(0).float() + # subim = interpolate(subim, size=(si_size,si_size), mode='nearest')[0,0] + # subim = np.stack([subim]*3, axis=-1) + + # subim[:5,:,:] = 125 + # subim[:,:5,:] = 125 + # # subim[5:20, 5:50, :] + # subim[10:15, 10:10+si_size//nirs, :] = 0 + # subim[10:15, 10:10+si_size//nirs, 1:-1] = 125 + + # im = np.stack([im]*3, axis=-1) + # im[-si_size:,-si_size:] = subim + # axs[i, 0].imshow(im) + # axs[i, 0].set_xticks([]) + # axs[i, 0].set_yticks([]) + # axs[i, 0].set_ylabel(f'M{n[1:]}') + # axs[i, 0].set_xlabel(f'Volume fraction '+ r'$\tilde{a}_2$: '+ f'{ir} Inset mag: x{np.round(si_size/sicrop, 2)}') + + i+=1 plt.tight_layout() -plt.savefig("fig2.pdf", format="pdf") +plt.savefig('fig2.pdf', format='pdf') # fig, axs = plt.subplots(1,2) # fig.set_size_inches(10,5) @@ -154,3 +144,8 @@ # # res = 100*(np.mean((y-targ)**2/targ))**0.5 # print(res,res2) + + + + + diff --git a/paper_figures/fig3.py b/paper_figures/fig3.py old mode 100755 new mode 100644 index 23cadd8..151e8d9 --- a/paper_figures/fig3.py +++ b/paper_figures/fig3.py @@ -1,9 +1,9 @@ -simport json +import json import numpy as np from scipy.optimize import curve_fit import sys sys.path.append('../') -from representativity.old import util +from representativity import util import matplotlib.pyplot as plt from matplotlib import animation from scipy.optimize import minimize diff --git a/paper_figures/fig4.py b/paper_figures/fig4.py old mode 100755 new mode 100644 diff --git a/paper_figures/fig_model_error.py b/paper_figures/fig_model_error.py old mode 100755 new mode 100644 index 4de8a1c..96c71a2 --- a/paper_figures/fig_model_error.py +++ b/paper_figures/fig_model_error.py @@ -1,67 +1,99 @@ -from representativity import prediction_error +from representativity.correction_fitting import prediction_error import matplotlib.pyplot as plt -from scipy import stats -import math +from scipy import stats + import numpy as np -fill_color = [1, 0.49803922, 0.05490196, 0.2] -sigma_color = [0.4, 0.9, 0.0, 1] -fill_color_dark = fill_color.copy() -fill_color_dark[-1] = 0.5 - -def scatter_plot(ax, res, title, xlabel, ylabel): - - pred_data, fit_data = res - - ax.scatter(fit_data, pred_data, s=0.2, label='Predictions') - +sigma_color = "tab:pink" +model_color = "tab:cyan" +oi_color = "tab:orange" +dashes = [2.5, 5] + + +def scatter_plot(ax, res, xlabel, ylabel): + + pred_data, fit_data, oi_data = res + max_val = np.max([np.max(fit_data), np.max(pred_data)]) + x = np.linspace(0, max_val, 100) + ax.plot(x, x, label="Ideal predictions", color="black") + + ax.scatter( + fit_data, oi_data, alpha=0.7, s=0.3, c=oi_color, label="Classical predictions" + ) + oi_errs = (fit_data - oi_data) / oi_data + oi_mean = np.mean(oi_errs) + ax.plot( + x, + x * (1 - oi_mean), + color=oi_color, + linestyle="--", + dashes=[2.5, 5], + label="Mean predictions", + ) + + ax.scatter( + fit_data, + pred_data, + alpha=0.5, + s=0.3, + c=model_color, + label="Our model's predictions", + ) + model_errs = (fit_data - pred_data) / pred_data + model_mean = np.mean(model_errs) + ax.plot( + x, + x * (1 - model_mean), + color=model_color, + linestyle="--", + dashes=[2.5, 5], + label="Mean predictions", + ) + ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) - ax.set_title(title) - - errs = (fit_data-pred_data)/pred_data - max_val = (np.max([np.max(fit_data), np.max(pred_data)])) - print(max_val) - x = np.linspace(0,max_val,100) - ax.plot(x, x, label = 'Ideal predictions', color='black') - errs = np.sort(errs) - std = np.std(errs) - - z = stats.norm.interval(0.9)[1] - err = std*z - print(f'std = {std}') - print(f'mean = {np.mean(errs)}') - ax.plot(x ,x/(1+err), c=fill_color_dark, ls='--', linewidth=1) - fill_1 = ax.fill_between(x, np.ones(x.shape[0])*(max_val),x/(1+err), alpha=0.2, label = f'95% confidence range', color=fill_color) - ax.set_aspect('equal', adjustable='box') - return errs, err + + model_errs = np.sort(model_errs) + std = np.std(model_errs) + + ax.set_aspect("equal", adjustable="box") + with_cls = False -fig, axs = plt.subplots(2,3+with_cls) -fig.set_size_inches(4*(3+with_cls),8) -dims = ['2D', '3D'] -edge_length = ['1536', '448'] +fig, axs = plt.subplots(2, 3 + with_cls) +fig.set_size_inches(4 * (3 + with_cls), 8) +dims = ["2D", "3D"] +edge_length = ["1536", "448"] for i, dim in enumerate(dims): - dim_data, micro_names = prediction_error.data_micros(dim) - pred_cls_all, fit_cls_all, _, vfs = prediction_error.pred_vs_fit_all_data(dim, edge_length[i], num_runs=9, std_not_cls=False) - cls_results = [pred_cls_all, fit_cls_all] + pred_cls_all, fit_cls_all, oi_cls_all, _, vfs = ( + prediction_error.pred_vs_fit_all_data( + dim, edge_length[i], num_runs=9, std_not_cls=False + ) + ) + cls_results = [pred_cls_all, fit_cls_all, oi_cls_all] # calculate the standard deviation instead of the cls: vfs = np.array(vfs) - vfs_one_minus_vfs = vfs*(1-vfs) + vfs_one_minus_vfs = vfs * (1 - vfs) dim_int = int(dim[0]) cur_edge_length = int(edge_length[i]) - pred_std_all = ((pred_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5 - fit_std_all = ((fit_cls_all/cur_edge_length)**dim_int*vfs_one_minus_vfs)**0.5 - std_results = [pred_std_all, fit_std_all] + std_results = [ + ((cls_res / cur_edge_length) ** dim_int * vfs_one_minus_vfs) ** 0.5 + for cls_res in cls_results + ] dim_str = dim[0] - x_labels = [f'True CLS $a_{int(dim[0])}$', f'True Phase Fraction std $\sigma_{int(dim[0])}$'] - cls_math = r'\tilde{a}_{2}' if dim == '2D' else r'\tilde{a}_{3}' - std_math = r'\tilde{\sigma}_{2}' if dim == '2D' else r'\tilde{\sigma}_{3}' - y_labels = ['Predicted CLS $%s$' %cls_math, 'Predicted Phase Fraction std $%s$' %std_math] - title_suffix = r'img size $%s^%s$' %(edge_length[i], dim_str) - titles = [f'{dim} CLS comparison, '+title_suffix, f'{dim} std comparison, '+title_suffix] - # sa_results = [err_exp_sa[pred_err_sa!=math.isnan], pred_err_sa[pred_err_sa!=math.nan]] - + x_labels = [ + f"True CLS $a_{int(dim[0])}$", + f"True Phase Fraction std $\sigma_{int(dim[0])}$", + ] + cls_math = r"\tilde{a}_{2}" if dim == "2D" else r"\tilde{a}_{3}" + std_math = r"\tilde{\sigma}_{2}" if dim == "2D" else r"\tilde{\sigma}_{3}" + y_labels = [ + "Predicted CLS $%s$" % cls_math, + "Predicted Phase Fraction std $%s$" % std_math, + ] + # title_suffix = r'Image size $%s^%s$' %(edge_length[i], dim_str) + # titles = [f'{dim} CLS comparison, '+title_suffix, f'{dim} std comparison, '+title_suffix] + for j, res in enumerate([cls_results, std_results]): ax_idx = j if not with_cls: @@ -69,79 +101,105 @@ def scatter_plot(ax, res, title, xlabel, ylabel): continue else: ax_idx = 0 - + ax = axs[i, ax_idx] - - # print(f'slope = {slope} and intercept = {intercept}') + ax_xlabel = x_labels[j] ax_ylabel = y_labels[j] - ax_title = titles[j] - - _ = scatter_plot(ax, res, ax_title, ax_xlabel, ax_ylabel) - - if (j == 0 or not with_cls) and dim == '2D': - ax.legend(loc='upper left') + + scatter_plot(ax, res, ax_xlabel, ax_ylabel) + + if j == 0 or not with_cls and i == 0: + ax.legend(loc="upper left") # Fit a normal distribution to the data: - errs = (fit_std_all-pred_std_all)/pred_std_all*100 + pred_std_all, fit_std_all = std_results[:2] + errs = (fit_std_all - pred_std_all) / pred_std_all * 100 mu, std = stats.norm.fit(errs) ax_idx = 1 + with_cls ax2 = axs[i, ax_idx] - + # Plot the histogram. counts, bins = np.histogram(errs) - + max_val = np.max([np.max(errs), -np.min(errs)]) - y, x, _ = ax2.hist(errs, range=[-max_val, max_val], bins=50, alpha=0.6, density=True) + y, x, _ = ax2.hist( + errs, + range=[-max_val, max_val], + bins=50, + alpha=0.6, + color=model_color, + density=True, + label="Our model's predictions", + ) + mean_percentage_error = np.mean(errs) - print(f'Is the hist normal: {stats.normaltest(errs)}') - print(f'mean = {errs.mean()}') - print(f'median = {np.median(errs)}') # Plot the PDF. xmin, xmax = x.min(), x.max() - print(xmin, xmax) max_abs = max(np.abs(np.array([xmin, xmax]))) x = np.linspace(xmin, xmax, 100) p = stats.norm.pdf(x, mu, std) - - print(fill_color) - - ax2.plot(x, p, linewidth=2, color=fill_color_dark, label=f'Fitted normal distribution') - title = f'{dim} std PE distribution, {title_suffix}' - ax2.set_title(title) - ax2.set_xlabel(r'Prediction Percentage Error ($\frac{\sigma_{%s}-\tilde{\sigma_{%s}}}{\tilde{\sigma_{%s}}}\cdot100$)' %(dim_str, dim_str, dim_str)) - err = std*stats.norm.interval(0.9)[1] - ax2.vlines(0, ymin=0, ymax=y.max(), color='black') - ax2.vlines(std, ymin=0, ymax=y.max(), ls='--', color=sigma_color, label = r'$\sigma_{mod}$') - ax2.vlines(err, ymin=0, ymax=y.max(), ls='--', color=fill_color_dark, label = r'$\sigma_{mod}\cdot Z_{90\%}$') - trunc_x = x[x alpha)[0][0] - alpha_sum_dist_norm_beginning = np.where(cum_sum_sum_dist_norm > 1 - alpha)[0][0] - cum_sum_mid_std_dist = np.cumsum(mid_std_dist * np.diff(pf_x_1d)[0]) + alpha_sum_dist_norm_beginning = np.where(cum_sum_sum_dist_norm > 1-alpha)[0][0] + cum_sum_mid_std_dist = np.cumsum(mid_std_dist*np.diff(pf_x_1d)[0]) # cum_sum_mid_std_dist /= np.trapz(mid_std_dist, pf_x_1d) alpha_mid_std_dist_end = np.where(cum_sum_mid_std_dist > alpha)[0][0] - alpha_mid_std_dist_beginning = np.where(cum_sum_mid_std_dist > 1 - alpha)[0][0] - plt.plot(pf_x_1d, mid_std_dist, c="blue", label="middle normal dist") - plt.vlines( - pf_x_1d[alpha_mid_std_dist_end], - 0, - np.max(mid_std_dist), - color="blue", - label=r"95% confidence bounds", - ) - plt.vlines( - pf_x_1d[alpha_mid_std_dist_beginning], 0, np.max(mid_std_dist), color="blue" - ) - plt.plot(pf_x_1d, sum_dist_norm, c="orange", label="summed mixture dist") + alpha_mid_std_dist_beginning = np.where(cum_sum_mid_std_dist > 1-alpha)[0][0] + + ax[1,2].plot(pf_x_1d, sum_dist_norm, c='blue', label = "Likelihood of the material\nphase fraction by integrating\nthe rows of (e)") # new_std_dist = normal_dist(pf_x_1d, mean=image_pf, std=0.02775) - # plt.plot(pf_x_1d, new_std_dist, c='green', label = 'new std dist') - plt.vlines( - pf_x_1d[alpha_sum_dist_norm_end], - 0, - np.max(sum_dist_norm), - color="orange", - label=r"95% confidence bounds", - ) - plt.vlines( - pf_x_1d[alpha_sum_dist_norm_beginning], 0, np.max(sum_dist_norm), color="orange" - ) - - plt.legend() - plt.show() + # ax[1,2].plot(pf_x_1d, new_std_dist, c='green', label = 'new std dist') + ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_end], 0, np.max(sum_dist_norm), linestyle="--", color='blue', label = r'95% confidence bounds') + ax[1,2].vlines(pf_x_1d[alpha_sum_dist_norm_beginning], 0, np.max(sum_dist_norm), linestyle="--", color='blue') + ax[1,2].vlines(image_pf, 0, np.max(sum_dist_norm), linestyle="--",color='g', label = 'Observed phase frcation $\Phi(\omega)$') + ax[1,2].plot(pf_x_1d, mid_std_dist, c='orange', label = '(c)') + ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_beginning], 0, np.max(sum_dist_norm), linestyle="--", color='orange') + ax[1,2].vlines(pf_x_1d[alpha_mid_std_dist_end], 0, np.max(sum_dist_norm), linestyle="--", color='orange', label = r'(c) 95% confidence interval') + ax[1,2].set_xlabel('Phase fraction') + ax[1,2].set_title('(f)') + ax[1,2].set_yticks([0,300]) + ax[1,2].legend() print(np.trapz(sum_dist_norm, pf_x_1d)) print(np.trapz(mid_std_dist, pf_x_1d)) + plt.savefig('prediction_interval.pdf', format='pdf') + + + if __name__ == "__main__": - image_pf = 0.3 - pred_std = image_pf / 100 * 2 / 3 - std_dist_std = pred_std / 4 + image_pf = 0.1013 + pred_std = 0.00121 + std_dist_std = pred_std*0.27 time_bf = time.time() plot_likelihood_of_phi(image_pf, pred_std, std_dist_std) - print(f"time taken = {time.time()-time_bf}") - print(util.get_prediction_interval(image_pf, pred_std, std_dist_std)) + print(f'time taken = {time.time()-time_bf}') + print(util.get_prediction_interval(image_pf, pred_std, std_dist_std)) \ No newline at end of file diff --git a/representativity/correction_fitting/microlib_statistics.py b/representativity/correction_fitting/microlib_statistics.py old mode 100755 new mode 100644 index 229fd17..18bf145 --- a/representativity/correction_fitting/microlib_statistics.py +++ b/representativity/correction_fitting/microlib_statistics.py @@ -1,5 +1,5 @@ import os -from representativity.old import util +from representativity import util import torch import matplotlib.pyplot as plt import json @@ -8,99 +8,92 @@ from scipy.stats import norm -""" +''' File: microlib_statistics.py Description: This script is used to generate the statistics for the representativity of microstructures from the microlib dataset. The statistics generated are then saved in a json file, to be analyzed and used for representativity prediction of a new microstructure or micrograph. -""" - +''' def insert_v_names_in_all_data(all_data, mode, n, v_names, run_v_names, run_number=0): - """ - This function is used to insert the names of the variables that will be used to store the statistics - """ - dim_data = all_data[f"data_gen_{mode}"] + ''' + This function is used to insert the names of the variables that will be used to store the statistics''' + dim_data = all_data[f'data_gen_{mode}'] if n not in dim_data: dim_data[n] = {} for v_name in v_names: if v_name not in dim_data[n]: dim_data[n][v_name] = {} micro_data = dim_data[n] - if f"run_{run_number}" not in micro_data: - micro_data[f"run_{run_number}"] = {} - run_data = micro_data[f"run_{run_number}"] + if f'run_{run_number}' not in micro_data: + micro_data[f'run_{run_number}'] = {} + run_data = micro_data[f'run_{run_number}'] for run_v_name in run_v_names: if run_v_name not in run_data: run_data[run_v_name] = {} def run_ir_prediction(netG, n, mode, imsize, all_data, conf, run_number=0): - """ + ''' This function is used to predict the integral range of the microstructure. - """ - run_data = all_data[f"data_gen_{mode}"][n][f"run_{run_number}"] - lf = imsize // 32 + 2 # the size of G's input - single_img = util.generate_image(netG, lf=lf, threed=mode == "3D", reps=1) + ''' + run_data = all_data[f'data_gen_{mode}'][n][f'run_{run_number}'] + lf = imsize//32 + 2 # the size of G's input + single_img = util.generate_image(netG, lf=lf, threed=mode=='3D', reps=1) if single_img.any(): single_img = single_img.cpu()[0] - pred_err_vf, _, pred_ir_vf = util.make_error_prediction( - single_img, conf=conf, model_error=False, correction=False, mxtpc=200 - ) - print(f"pred ir {imsize} = {np.round(pred_ir_vf, 3)}") + pred_err_vf, _, pred_ir_vf = util.make_error_prediction(single_img, + conf=conf, model_error=False, correction=False, mxtpc=200) + print(f'pred ir {imsize} = {np.round(pred_ir_vf, 3)}') im_vf = single_img.mean().cpu().item() one_im_stat_pred = util.one_img_stat_analysis_error(single_img, im_vf) - print(f"one im stat pred ir = {one_im_stat_pred}") - if "pred_ir_one_im_fit_vf" not in run_data: - run_data["pred_ir_one_im_fit_vf"] = {} - run_data["pred_ir_one_im_fit_vf"][str(imsize)] = one_im_stat_pred - run_data["pred_ir_vf"][str(imsize)] = pred_ir_vf - run_data["pred_err_vf"][str(imsize)] = pred_err_vf - + print(f'one im stat pred ir = {one_im_stat_pred}') + if 'pred_ir_one_im_fit_vf' not in run_data: + run_data['pred_ir_one_im_fit_vf'] = {} + run_data['pred_ir_one_im_fit_vf'][str(imsize)] = one_im_stat_pred + run_data['pred_ir_vf'][str(imsize)] = pred_ir_vf + run_data['pred_err_vf'][str(imsize)] = pred_err_vf def run_statistical_fit_analysis(netG, n, mode, edge_lengths_fit, all_data): - """ - This function is used to run the statistical fit analysis on the microstructure, + ''' + This function is used to run the statistical fit analysis on the microstructure, and find the "true" integral range of the microstructure. - """ - imsize = 512 if mode == "3D" else 1600 - lf = imsize // 32 + 2 # the size of G's input - reps = 50 if mode == "3D" else 150 - many_imgs = util.generate_image(netG, lf=lf, threed=mode == "3D", reps=reps) + ''' + imsize = 512 if mode=='3D' else 1600 + lf = imsize//32 + 2 # the size of G's input + reps = 50 if mode=='3D' else 150 + many_imgs = util.generate_image(netG, lf=lf, threed=mode=='3D', reps=reps) vf = torch.mean(many_imgs).cpu().item() - print(f"{n} vf = {vf}") - all_data[f"data_gen_{mode}"][n]["vf"] = vf + print(f'{n} vf = {vf}') + all_data[f'data_gen_{mode}'][n]['vf'] = vf fit_ir_vf = util.stat_analysis_error(many_imgs, vf, edge_lengths_fit) - cur_fit_ir_vf = all_data[f"data_gen_{mode}"][n]["fit_ir_vf"] - print(f"{n} cur fit ir vf = {cur_fit_ir_vf}") - print(f"{n} fit ir vf = {fit_ir_vf}") + cur_fit_ir_vf = all_data[f'data_gen_{mode}'][n]['fit_ir_vf'] + print(f'{n} cur fit ir vf = {cur_fit_ir_vf}') + print(f'{n} fit ir vf = {fit_ir_vf}') # fit_ir_vf_oi = util.stat_analysis_error(many_imgs[0].unsqueeze(0), vf, edge_lengths_fit) fit_ir_vf_classic = util.stat_analysis_error_classic(many_imgs, vf) - print(f"{n} fit ir vf classic = {fit_ir_vf_classic}") - all_data[f"data_gen_{mode}"][n]["fit_ir_vf"] = fit_ir_vf + print(f'{n} fit ir vf classic = {fit_ir_vf_classic}') + all_data[f'data_gen_{mode}'][n]['fit_ir_vf'] = fit_ir_vf return fit_ir_vf, vf -def compare_statistical_fit_error( - n, mode, edge_lengths_pred, fit_ir_vf, vf, all_data, conf -): - """ +def compare_statistical_fit_error(n, mode, edge_lengths_pred, fit_ir_vf, vf, all_data, conf): + ''' This function is used to compare the statistical fit error to the prediction error. - """ - n_dims = 2 if mode == "2D" else 3 - img_sizes = [(l,) * n_dims for l in edge_lengths_pred] + ''' + n_dims = 2 if mode=='2D' else 3 + img_sizes = [(l,)*n_dims for l in edge_lengths_pred] fit_errs_vf = util.bernouli(vf, util.ns_from_dims(img_sizes, fit_ir_vf), conf=conf) for i in range(len(edge_lengths_pred)): imsize = edge_lengths_pred[i] - all_data[f"data_gen_{mode}"][n]["fit_err_vf"][imsize] = fit_errs_vf[i] - + all_data[f'data_gen_{mode}'][n]['fit_err_vf'][imsize] = fit_errs_vf[i] def json_preprocessing(): - """ + ''' This function is used to load the data from the microlib dataset, and to prepare the json file - """ + ''' # Load the statistics file with open("microlib_statistics_periodic.json", "r") as fp: @@ -109,42 +102,37 @@ def json_preprocessing(): # Dataset path and list of subfolders with open("micro_names.json", "r") as fp: micro_names = json.load(fp) - micros = [f"/home/amir/microlibDataset/{p}/{p}" for p in micro_names] + micros = [f'/home/amir/microlibDataset/{p}/{p}' for p in micro_names] # Load placeholder generator netG = util.load_generator(micros[0]) - v_names = ["vf", "fit_err_vf"] - run_v_names = ["pred_ir_vf", "pred_err_vf", "pred_ir_one_im_fit_vf"] + v_names = ['vf', 'fit_err_vf'] + run_v_names = ['pred_ir_vf', 'pred_err_vf', 'pred_ir_one_im_fit_vf'] - modes = ["2D", "3D"] + modes = ['2D', '3D'] for mode in modes: - if f"data_gen_{mode}" not in all_data: - all_data[f"data_gen_{mode}"] = {} + if f'data_gen_{mode}' not in all_data: + all_data[f'data_gen_{mode}'] = {} # Edge lengths for the experimental statistical analysis: - all_data["data_gen_2D"]["edge_lengths_fit"] = list( - range(500, 1000, 20) - ) # TODO change back to 500 - all_data["data_gen_3D"]["edge_lengths_fit"] = list(range(350, 450, 10)) + all_data['data_gen_2D']['edge_lengths_fit'] = list(range(500, 1000, 20)) # TODO change back to 500 + all_data['data_gen_3D']['edge_lengths_fit'] = list(range(350, 450, 10)) # Edge lengths for the predicted integral range: - all_data["data_gen_2D"]["edge_lengths_pred"] = list(range(8 * 32, 65 * 32, 4 * 32)) - all_data["data_gen_3D"]["edge_lengths_pred"] = list(range(4 * 32, 19 * 32, 1 * 32)) + all_data['data_gen_2D']['edge_lengths_pred'] = list(range(8*32, 65*32, 4*32)) + all_data['data_gen_3D']['edge_lengths_pred'] = list(range(4*32, 19*32, 1*32)) return all_data, micros, netG, v_names, run_v_names - -def run_microlib_statistics( - cur_modes=["2D", "3D"], run_s=False, run_p=True, run_number=0 -): - """ - This function is used to run the statistical analysis on the microlib dataset. +def run_microlib_statistics(cur_modes=['2D', '3D'], run_s=False, run_p=True, run_number=0): + ''' + This function is used to run the statistical analysis on the microlib dataset. It will generate the statistics for each microstructure in the dataset, and save it in a json file. param cur_modes: list of modes to run the statistical analysis on. param run_s: if True, run the statistical fit analysis. param run_p: if True, run the prediction of the integral range. param run_number: the number of the run for the integral range prediction. - """ + ''' all_data, micros, netG, v_names, run_v_names = json_preprocessing() @@ -156,63 +144,50 @@ def run_microlib_statistics( netG.load_state_dict(torch.load(p + "_Gen.pt")) except: # if the image is greayscale it's excepting because there's only 1 channel continue - + t_micro = time.time() for mode in cur_modes: - print(f"{mode} mode") + print(f'{mode} mode') - edge_lengths_fit = all_data[f"data_gen_{mode}"]["edge_lengths_fit"] - edge_lengths_pred = all_data[f"data_gen_{mode}"]["edge_lengths_pred"] - - n = p.split("/")[-1] - insert_v_names_in_all_data( - all_data, mode, n, v_names, run_v_names, run_number - ) # insert var names in all_data - - conf = 0.95 # confidence level for the prediction and stat. fit error + edge_lengths_fit = all_data[f'data_gen_{mode}']['edge_lengths_fit'] + edge_lengths_pred = all_data[f'data_gen_{mode}']['edge_lengths_pred'] + + n = p.split('/')[-1] + insert_v_names_in_all_data(all_data, mode, n, v_names, run_v_names, run_number) # insert var names in all_data + + conf = 0.95 # confidence level for the prediction and stat. fit error if run_p: - print(f"{n} starting prediction") + print(f'{n} starting prediction') for imsize in edge_lengths_pred: run_ir_prediction(netG, n, mode, imsize, all_data, conf, run_number) if run_s: - print(f"{n} starting statistical fit") - fit_ir_vf, vf = run_statistical_fit_analysis( - netG, n, mode, edge_lengths_fit, all_data - ) - compare_statistical_fit_error( - n, mode, edge_lengths_pred, fit_ir_vf, vf, all_data, conf - ) - + print(f'{n} starting statistical fit') + fit_ir_vf, vf = run_statistical_fit_analysis(netG, n, mode, edge_lengths_fit, all_data) + compare_statistical_fit_error(n, mode, edge_lengths_pred, fit_ir_vf, vf, all_data, conf) + with open(f"microlib_statistics_periodic.json", "w") as fp: - json.dump(all_data, fp) - - print( - f"\ntime for micro {n} {cur_modes} = {np.round((time.time()-t_micro)/60, 2)} minutes\n" - ) + json.dump(all_data, fp) - print(f"{_+1}/{len(micros)} microstructures done") - print(f"total time = {np.round((time.time()-total_time_0)/60, 2)} minutes") + print(f'\ntime for micro {n} {cur_modes} = {np.round((time.time()-t_micro)/60, 2)} minutes\n') + print(f'{_+1}/{len(micros)} microstructures done') + print(f'total time = {np.round((time.time()-total_time_0)/60, 2)} minutes') -def main_run_microlib_statistics( - cur_modes=["2D", "3D"], run_s=False, run_p=True, num_runs=5 -): +def main_run_microlib_statistics(cur_modes=['2D', '3D'], run_s=False, run_p=True, num_runs=5): # first run the statistical analysis on the microlib dataset if run_s: - print(f"Running statistical analysis fit on {cur_modes} mode(s)") + print(f'Running statistical analysis fit on {cur_modes} mode(s)') run_microlib_statistics(cur_modes=cur_modes, run_s=True, run_p=False) # then run the integral range prediction multiple times if run_p: - print(f"Running integral range prediction on {cur_modes} mode(s)") + print(f'Running integral range prediction on {cur_modes} mode(s)') for run_number in range(num_runs, 10): - print(f"Run number {run_number}\n") - run_microlib_statistics( - cur_modes, run_s=False, run_p=True, run_number=run_number - ) + print(f'Run number {run_number}\n') + run_microlib_statistics(cur_modes, run_s=False, run_p=True, run_number=run_number) -if __name__ == "__main__": - main_run_microlib_statistics(cur_modes=["3D"], run_s=False, run_p=True, num_runs=9) +if __name__ == '__main__': + main_run_microlib_statistics(cur_modes=['3D'], run_s=False, run_p=True, num_runs=9) diff --git a/representativity/correction_fitting/model_params.py b/representativity/correction_fitting/model_params.py old mode 100755 new mode 100644 index d382a79..98946d5 --- a/representativity/correction_fitting/model_params.py +++ b/representativity/correction_fitting/model_params.py @@ -1,5 +1,5 @@ import numpy as np -import representativity.old.util as util +import util import json from scipy.stats import norm from scipy.optimize import minimize @@ -20,35 +20,32 @@ # pred_err_sa = np.array([data[n]['pred_err_sa'] for n in data.keys()]) gen_data = datafin["generated_data"] -conf = 0.95 +conf=0.95 for micro_n in gen_data: cur_micro = gen_data[micro_n] - tpc_vf_dist, tpc_vf = np.arange(100 + 1, dtype=np.float64), cur_micro["tpc_vf"] + tpc_vf_dist, tpc_vf = np.arange(100+1, dtype=np.float64), cur_micro["tpc_vf"] pred_IR = util.tpc_to_ir(tpc_vf_dist, tpc_vf) - + k = 1000 - m_pred = util.ns_from_dims([np.array([1000, 1000])], pred_IR) - m_statistical = util.ns_from_dims([np.array([1000, 1000])], cur_micro["ir_vf"]) + m_pred = util.ns_from_dims([np.array([1000,1000])], pred_IR) + m_statistical = util.ns_from_dims([np.array([1000,1000])], cur_micro["ir_vf"]) vf = cur_micro["vf"] z = norm.interval(conf)[1] - pred_err = z * ((vf * (1 - vf) / m_pred[0]) ** 0.5) / vf * 100 - statistical_err = z * ((vf * (1 - vf) / m_statistical[0]) ** 0.5) / vf * 100 + pred_err = z*((vf*(1-vf)/m_pred[0])**0.5)/vf*100 + statistical_err = z*((vf*(1-vf)/m_statistical[0])**0.5)/vf*100 predicted_IR_err_vf.append(pred_err) stat_analysis_IR_err_vf.append(statistical_err) # print(stats.norm.interval(conf, scale=std_bern)[1], std_bern) -predicted_IR_err_vf, stat_analysis_IR_err_vf = np.array(predicted_IR_err_vf), np.array( - stat_analysis_IR_err_vf -) - +predicted_IR_err_vf, stat_analysis_IR_err_vf = np.array(predicted_IR_err_vf), np.array(stat_analysis_IR_err_vf) def scatter_plot_IR(stat_IR, pred_IR, std): - plt.scatter(stat_IR, pred_IR, s=4, label="Predictions") - x = np.arange(0, 20) - plt.plot(x, x, label="Ideal fit", c="orange") - plt.xlabel("VF error from statistical analysis") - plt.ylabel("VF error from tpc analysis") + plt.scatter(stat_IR, pred_IR, s=4, label='Predictions') + x = np.arange(0,20) + plt.plot(x,x,label = 'Ideal fit', c='orange') + plt.xlabel('VF error from statistical analysis') + plt.ylabel('VF error from tpc analysis') plt.locator_params(nbins=4) plt.xticks() ax = plt.gca() @@ -56,19 +53,16 @@ def scatter_plot_IR(stat_IR, pred_IR, std): # print(err) # ax.plot(x - x*err, x, c='black', ls='--', linewidth=1) # ax.plot(x ,x -x*err, label = f'95% confidence ', c='black', ls='--', linewidth=1) - ax.set_aspect("equal", adjustable="box") - plt.legend(loc="lower right") - plt.savefig("ir_pred.png") + ax.set_aspect('equal', adjustable='box') + plt.legend(loc='lower right') + plt.savefig('ir_pred.png') plt.show() - # it's not clear where does this 3 is coming from initial_guess = np.array([3, 0]) # slope and intercept initial guess minimise_args = (predicted_IR_err_vf, stat_analysis_IR_err_vf) -bounds = [(-10, 10), (-10, 10)] -best_slope_and_intercept = minimize( - util.mape_linear_objective, initial_guess, args=minimise_args, bounds=bounds -) +bounds = [(-10,10),(-10,10)] +best_slope_and_intercept = minimize(util.mape_linear_objective, initial_guess, args=minimise_args, bounds=bounds) print(best_slope_and_intercept) # scatter_plot_IR(stat_analysis_IR_vf, predicted_IR_vf) @@ -76,13 +70,14 @@ def scatter_plot_IR(stat_IR, pred_IR, std): slope, intercept = best_slope_and_intercept.x new_pred_err_vf = util.linear_fit(predicted_IR_err_vf, slope, intercept) -model_errors = (stat_analysis_IR_err_vf - new_pred_err_vf) / stat_analysis_IR_err_vf +model_errors = (stat_analysis_IR_err_vf - new_pred_err_vf)/stat_analysis_IR_err_vf mu, std = norm.fit(model_errors) scatter_plot_IR(stat_analysis_IR_err_vf, new_pred_err_vf, std) -print(f"number of micros = {len(stat_analysis_IR_err_vf)}") -print(f"mu = {mu}") -print(f"std = {std}") +print(f'number of micros = {len(stat_analysis_IR_err_vf)}') +print(f'mu = {mu}') +print(f'std = {std}') print(util.mape_linear_objective([2.7, intercept], *minimise_args)) + diff --git a/representativity/correction_fitting/prediction_error.py b/representativity/correction_fitting/prediction_error.py old mode 100755 new mode 100644 index b027686..c48cea7 --- a/representativity/correction_fitting/prediction_error.py +++ b/representativity/correction_fitting/prediction_error.py @@ -1,7 +1,7 @@ import json import numpy as np import time -from representativity.old import util +from representativity import util from scipy.stats import norm import matplotlib.pyplot as plt from scipy.optimize import minimize @@ -21,7 +21,7 @@ def error_by_size_estimation(dim, run_number=0, std_not_cls=True): edge_lengths_pred = data_dim["edge_lengths_pred"] stds = [] for edge_length in edge_lengths_pred: - _, _, err, std, pfs = comparison_results( + _, _, _, err, std, pfs = comparison_results( data_dim, micro_names, dim, @@ -50,40 +50,36 @@ def comparison_results( ): micros_data = [data_dim[n] for n in micro_names] pfs = np.array([m_data["vf"] for m_data in micros_data]) - fit_data_all = np.array([m_data["fit_ir_vf"] for m_data in micros_data]) - fit_err_pf = np.array([m_data["fit_err_vf"][edge_length] for m_data in micros_data]) - pred_data_all = np.array( + fit_cls_all = np.array([m_data["fit_ir_vf"] for m_data in micros_data]) + pred_cls_all = np.array( [ m_data[f"run_{run_number}"]["pred_ir_vf"][edge_length] for m_data in micros_data ] ) - # pred_ir_oi_pf = np.array([m_data[f'run_{run_number}']['pred_ir_one_im_fit_vf'][edge_length] for m_data in micros_data]) - - pred_err_pf = np.array( + pred_cls_oi_all = np.array( [ - m_data[f"run_{run_number}"]["pred_err_vf"][edge_length] + m_data[f"run_{run_number}"]["pred_ir_one_im_fit_vf"][edge_length] for m_data in micros_data ] ) + if std_not_cls: pfs = np.array(pfs) pfs_one_minus_pfs = pfs * (1 - pfs) dim_int = int(dim[0]) edge_length = int(edge_length) - pred_data_all = ( - (pred_data_all / edge_length) ** dim_int * pfs_one_minus_pfs + pred_cls_all = ( + (pred_cls_all / edge_length) ** dim_int * pfs_one_minus_pfs ) ** 0.5 - fit_data_all = ( - (fit_data_all / edge_length) ** dim_int * pfs_one_minus_pfs + fit_cls_all = ( + (fit_cls_all / edge_length) ** dim_int * pfs_one_minus_pfs ) ** 0.5 - # ir_results = [fit_ir_pf, pred_ir_pf, pred_ir_oi_pf] - ir_results = [fit_data_all, pred_data_all] - err_results = [fit_err_pf, pred_err_pf] - pred_data = ir_results[1] - fit_data = ir_results[0] + # ir_results = [fit_data_all, pred_data_all, pred_ir_oi_pf] - errs = (pred_data - fit_data) / pred_data # percentage error of the prediction + errs = ( + pred_cls_all - fit_cls_all + ) / pred_cls_all # percentage error of the prediction errs = np.sort(errs) # easier to see the distribution of errors std = np.std(errs) z = norm.interval(0.9)[1] @@ -92,7 +88,7 @@ def comparison_results( # print(f'mean = {np.mean(errs)}') # print(f'mape = {np.mean(np.abs(errs))}') # print(f'error = {err}') - return pred_data, fit_data, err, std, pfs + return pred_cls_all, fit_cls_all, pred_cls_oi_all, err, std, pfs def pred_vs_fit_all_data(dim, edge_length, num_runs=5, std_not_cls=True): @@ -103,24 +99,26 @@ def pred_vs_fit_all_data(dim, edge_length, num_runs=5, std_not_cls=True): pfs_all = [] for i in range(num_runs): data_micro = data_micros(dim) - pred_data, fit_data, _, std, pfs = comparison_results( + pred_data, fit_data, pred_data_oi, _, std, pfs = comparison_results( *data_micro, dim, edge_length, i, std_not_cls=std_not_cls ) pred_data_all.append(pred_data) # pred_data_oi_all.append(pred_oi_data) fit_data_all.append(fit_data) + pred_data_oi_all.append(pred_data_oi) stds.append(std) pfs_all.append(pfs) pred_data_all = np.concatenate(pred_data_all) # pred_data_oi_all = np.concatenate(pred_data_oi_all) fit_data_all = np.concatenate(fit_data_all) + pred_data_oi_all = np.concatenate(pred_data_oi_all) pfs_all = np.concatenate(pfs_all) std = np.array(stds).sum(axis=0) / num_runs - return pred_data_all, fit_data_all, std, pfs_all + return pred_data_all, fit_data_all, pred_data_oi_all, std, pfs_all def plot_pred_vs_fit(dim, edge_length, num_runs=5, std_not_cls=True): - pred_data_all, fit_data_all, _, pfs = pred_vs_fit_all_data( + pred_data_all, fit_data_all, _, _, pfs = pred_vs_fit_all_data( dim, edge_length, num_runs, std_not_cls=std_not_cls ) pred_data_all, fit_data_all = np.array(pred_data_all), np.array(fit_data_all) @@ -204,7 +202,7 @@ def optimal_slopes(dim, num_runs=5): slopes = [] stds = [] for edge_length in edge_lengths_pred: - pred_data, fit_data, std, pfs = pred_vs_fit_all_data( + pred_data, fit_data, _, std, pfs = pred_vs_fit_all_data( dim, str(edge_length), num_runs ) stds.append(std) diff --git a/representativity/validation.py b/representativity/validation.py old mode 100755 new mode 100644 index 0dd68d7..4d3b8db --- a/representativity/validation.py +++ b/representativity/validation.py @@ -1,4 +1,4 @@ -from representativity.old import util +from representativity import util from porespy.generators import * import os import matplotlib.pyplot as plt @@ -9,55 +9,43 @@ def factors_to_params(args, im_shape): - """ + ''' This function is used to convert the factors of the parameters to the actual parameters. - """ + ''' l = np.mean(im_shape) size = np.prod(im_shape) - l_matching = [match for match in args if "_factor_l" in match] - size_matching = [match for match in args if "_factor_size" in match] + l_matching = [match for match in args if '_factor_l' in match] + size_matching = [match for match in args if '_factor_size' in match] for match in l_matching: - arg = match.split("_")[0] - args[arg] = int(l / args.pop(match)) + arg = match.split('_')[0] + args[arg] = int(l/args.pop(match)) for match in size_matching: - arg = match.split("_")[0] - args[arg] = int(size / args.pop(match)) + arg = match.split('_')[0] + args[arg] = int(size/args.pop(match)) return args - def get_ps_generators(): - """Returns a dictionary with the porespy generators and their arguments.""" - ps_generators = { - blobs: { - "blobiness_factor_l": [50, 100, 150, 200, 250], - "porosity": list(np.arange(0.1, 0.6, 0.1)), - }, - fractal_noise: { - "frequency": list(np.arange(0.015, 0.05, 0.01)), - "octaves": [2, 7, 12], - "uniform": [True], - "mode": ["simplex", "value"], - }, - # voronoi_edges: {'r': [1,2,3], 'ncells_factor_size': np.array([100,1000,10000])} - } + '''Returns a dictionary with the porespy generators and their arguments.''' + ps_generators = {blobs: {'blobiness_factor_l': [50, 100,150,200, 250], 'porosity': list(np.arange(0.1,0.6,0.1))}, + fractal_noise: {'frequency': list(np.arange(0.015,0.05,0.01)), 'octaves': [2,7,12], 'uniform': [True], 'mode': ['simplex', 'value']}, + # voronoi_edges: {'r': [1,2,3], 'ncells_factor_size': np.array([100,1000,10000])} + } return ps_generators - def get_gen_name(generator, value_comb): - """Returns the name of the generator with the given values.""" + '''Returns the name of the generator with the given values.''' value_comb = list(value_comb) for i in range(len(value_comb)): value = value_comb[i] if isinstance(value, float): value_comb[i] = np.round(value, 3) value_comb = list(np.array(value_comb).astype(str)) - value_comb = "_".join(value_comb) - return f"{generator.__name__}_{value_comb}" - + value_comb = '_'.join(value_comb) + return f'{generator.__name__}_{value_comb}' def get_ps_gen_names(): - """Returns a list with the names of the porespy generators.""" + '''Returns a list with the names of the porespy generators.''' ps_generators = get_ps_generators() gen_names = [] for generator, params in ps_generators.items(): @@ -65,11 +53,10 @@ def get_ps_gen_names(): gen_names.append(get_gen_name(generator, value_comb)) return gen_names - def json_validation_preprocessing(): - """ + ''' This function is used to prepare the json file for the validation - """ + ''' # Load the statistics file if os.path.exists("validation.json"): @@ -85,34 +72,31 @@ def json_validation_preprocessing(): # TODO we need to see that the blobiness_factor is linear and ncells is quadratic or cubic. - modes = ["2D", "3D"] + modes = ['2D', '3D'] for mode in modes: - if f"validation_{mode}" not in all_data: - all_data[f"validation_{mode}"] = {} + if f'validation_{mode}' not in all_data: + all_data[f'validation_{mode}'] = {} for gen_name in gen_names: - if gen_name not in all_data[f"validation_{mode}"]: - all_data[f"validation_{mode}"][gen_name] = {} + if gen_name not in all_data[f'validation_{mode}']: + all_data[f'validation_{mode}'][gen_name] = {} # for v_name in v_names: # if v_name not in all_data[f'validation_{mode}'][gen_name]: # all_data[f'validation_{mode}'][gen_name][v_name] = {} # Large im sizes for stat. analysis: - all_data["validation_2D"]["large_im_size"] = [10000, 10000] # TODO make this bigger - all_data["validation_3D"]["large_im_size"] = [3000, 3000, 3000] + all_data['validation_2D']['large_im_size'] = [10000, 10000] # TODO make this bigger + all_data['validation_3D']['large_im_size'] = [3000, 3000, 3000] # Edge lengths for the experimental statistical analysis: - all_data["validation_2D"]["edge_lengths_fit"] = list( - range(500, 1000, 20) - ) # TODO change back to 500 - all_data["validation_3D"]["edge_lengths_fit"] = list(range(350, 450, 10)) + all_data['validation_2D']['edge_lengths_fit'] = list(range(500, 1000, 20)) # TODO change back to 500 + all_data['validation_3D']['edge_lengths_fit'] = list(range(350, 450, 10)) # Edge lengths for the predicted integral range: - all_data["validation_2D"]["edge_lengths_pred"] = [600, 800, 1000, 1200, 1400] - all_data["validation_3D"]["edge_lengths_pred"] = [300, 350, 400] + all_data['validation_2D']['edge_lengths_pred'] = [600, 800, 1000, 1200, 1400] + all_data['validation_3D']['edge_lengths_pred'] = [300, 350, 400] return all_data - def get_large_im_stack(generator, large_shape, large_im_repeats, args): large_ims = [] for _ in range(large_im_repeats): @@ -124,14 +108,13 @@ def get_large_im_stack(generator, large_shape, large_im_repeats, args): large_ims.append(large_im) return torch.stack(large_ims, axis=0) - def ps_error_prediction(dim, data, confidence, error_target): ps_generators = get_ps_generators() errs = [] true_clss = [] clss = [] one_im_clss = [] - large_shape = data[f"validation_{dim}"]["large_im_size"] + large_shape = data[f'validation_{dim}']['large_im_size'] large_im_repeats = 1 in_the_bounds_w_model = [] in_the_bounds_wo_model = [] @@ -140,86 +123,60 @@ def ps_error_prediction(dim, data, confidence, error_target): args = {key: value for key, value in zip(params.keys(), value_comb)} gen_name = get_gen_name(generator, value_comb) args = factors_to_params(args, im_shape=large_shape) - large_im_stack = get_large_im_stack( - generator, large_shape, large_im_repeats, args - ) + large_im_stack = get_large_im_stack(generator, large_shape, large_im_repeats, args) true_pf = torch.mean(large_im_stack) - edge_lengths_fit = data[f"validation_{dim}"]["edge_lengths_fit"] - true_cls = util.stat_analysis_error( - large_im_stack, true_pf, edge_lengths_fit - ) - print(f"Generator {gen_name} with {args}:") - print(f"True cls: {true_cls}") - data[f"validation_{dim}"][gen_name]["true_cls"] = true_cls - edge_lengths_pred = data[f"validation_{dim}"]["edge_lengths_pred"] + edge_lengths_fit = data[f'validation_{dim}']['edge_lengths_fit'] + true_cls = util.stat_analysis_error(large_im_stack, true_pf, edge_lengths_fit) + print(f'Generator {gen_name} with {args}:') + print(f'True cls: {true_cls}') + data[f'validation_{dim}'][gen_name]['true_cls'] = true_cls + edge_lengths_pred = data[f'validation_{dim}']['edge_lengths_pred'] for edge_length in edge_lengths_pred: for _ in range(20): - true_error = util.bernouli_from_cls( - true_cls, true_pf, [edge_length] * int(dim[0]) - ) - start_idx = [ - np.random.randint(0, large_shape[i] - edge_length) - for i in range(int(dim[0])) - ] - end_idx = [start_idx[i] + edge_length for i in range(int(dim[0]))] - small_im = large_im_stack[0][ - start_idx[0] : end_idx[0], start_idx[1] : end_idx[1] - ] + true_error = util.bernouli_from_cls(true_cls, true_pf, [edge_length]*int(dim[0])) + start_idx = [np.random.randint(0, large_shape[i]-edge_length) for i in range(int(dim[0]))] + end_idx = [start_idx[i]+edge_length for i in range(int(dim[0]))] + small_im = large_im_stack[0][start_idx[0]:end_idx[0], start_idx[1]:end_idx[1]] + # np.save(f'./small_im_{gen_name}_{args}_{edge_length}.npy', small_im) small_im_pf = torch.mean(small_im) - one_im_stat_analysis_cls = util.one_img_stat_analysis_error( - small_im, small_im.mean() - ) - print(f"One image stat analysis cls: {one_im_stat_analysis_cls}") + one_im_stat_analysis_cls = util.one_img_stat_analysis_error(small_im, small_im.mean()) + print(f'One image stat analysis cls: {one_im_stat_analysis_cls}') one_im_clss.append(one_im_stat_analysis_cls) for i in range(2): with_model = i == 0 - im_err, l_for_err_target, cls = util.make_error_prediction( - small_im, - conf=confidence, - err_targ=error_target, - model_error=with_model, - n_divisions=301, - ) + im_err, l_for_err_target, cls = util.make_error_prediction(small_im, + conf=confidence, err_targ=error_target, model_error=with_model, n_divisions=301) true_clss.append(true_cls) clss.append(cls) - bounds = [ - (1 - im_err) * small_im_pf, - (1 + im_err) * small_im_pf, - ] - print(f"Bounds: {bounds}") - print(f"True PF: {true_pf}") + bounds = [(1-im_err)*small_im_pf, (1+im_err)*small_im_pf] + print(f'Bounds: {bounds}') + print(f'True PF: {true_pf}') if true_pf >= bounds[0] and true_pf <= bounds[1]: if with_model: in_the_bounds_w_model.append(1) else: in_the_bounds_wo_model.append(1) - else: + else: if with_model: in_the_bounds_w_model.append(0) else: - in_the_bounds_wo_model.append(0) + in_the_bounds_wo_model.append(0) if with_model: - print("With model:") - print( - f"current right percentage: {np.mean(in_the_bounds_w_model)}" - ) + print('With model:') + print(f'current right percentage: {np.mean(in_the_bounds_w_model)}') else: - print("Without model:") - print( - f"current right percentage: {np.mean(in_the_bounds_wo_model)}" - ) - print(f"edge_length {edge_length}:") - print(f"cls: {cls}") - print(f"true error: {true_error[0]:.2f}") - print(f"error: {im_err*100:.2f}\n") - print(f"Length for error target: {l_for_err_target}") - if (in_the_bounds_wo_model[-1] == 1) and ( - in_the_bounds_w_model[-1] == 0 - ): - print("The model is not working properly. Exiting...") + print('Without model:') + print(f'current right percentage: {np.mean(in_the_bounds_wo_model)}') + print(f'edge_length {edge_length}:') + print(f'cls: {cls}') + print(f'true error: {true_error[0]:.2f}') + print(f'error: {im_err*100:.2f}\n') + print(f'Length for error target: {l_for_err_target}') + if (in_the_bounds_wo_model[-1] == 1) and (in_the_bounds_w_model[-1] == 0): + print('The model is not working properly. Exiting...') os.sys.exit() - print("\n") - + print('\n') + # plt.imshow(im[150:350,150:350]) # plt.title(f'{generator.__name__} with {args}') # print(f'Error: {100*im_err:.2f} %') @@ -231,15 +188,12 @@ def ps_error_prediction(dim, data, confidence, error_target): # plt.close() return errs, true_clss, clss, one_im_clss - -if __name__ == "__main__": - shape = [1000, 1000] +if __name__ == '__main__': + shape = [1000,1000] all_data = json_validation_preprocessing() - dim = "2D" + dim = '2D' # get porespy generators: - errs, true_clss, clss, one_im_clss = ps_error_prediction( - dim, all_data, confidence=0.95, error_target=0.05 - ) + errs, true_clss, clss, one_im_clss = ps_error_prediction(dim, all_data, confidence=0.95, error_target=0.05) # plt.scatter(true_clss, clss, label='CLS') # plt.scatter(true_clss, one_im_clss, label='One image stat analysis') # max_value = max(max(true_clss), max(clss), max(one_im_clss)) @@ -248,3 +202,6 @@ def ps_error_prediction(dim, data, confidence, error_target): # plt.ylabel('CLS') # plt.legend() # plt.show() + + + diff --git a/tests/tpc_expectation_test.py b/tests/tpc_expectation_test.py old mode 100755 new mode 100644 index 3fac312..0e27d37 --- a/tests/tpc_expectation_test.py +++ b/tests/tpc_expectation_test.py @@ -1,4 +1,4 @@ -from representativity.old import util +from representativity import util import torch import matplotlib.pyplot as plt import matplotlib.mlab as mlab @@ -9,86 +9,80 @@ import time from scipy.stats import norm - def generate_sg_tpc(netG, mode, imsize): - """ + ''' This function is used to predict the integral range of the microstructure. - """ - lf = imsize // 32 + 2 # the size of G's input - single_img = util.generate_image(netG, lf=lf, threed=mode == "3D", reps=1) + ''' + lf = imsize//32 + 2 # the size of G's input + single_img = util.generate_image(netG, lf=lf, threed=mode=='3D', reps=1) if single_img.any(): single_img = single_img.cpu()[0] dims = len(single_img.shape) tpc = util.tpc_radial(single_img, threed=dims == 3) return tpc - def tpc_by_radius(tpc): tpc = np.array(tpc) - middle_idx = np.array(tpc.shape) // 2 - pf = tpc[tuple(map(slice, middle_idx, middle_idx + 1))].item() + middle_idx = np.array(tpc.shape)//2 + pf = tpc[tuple(map(slice, middle_idx, middle_idx+1))].item() # print(f'pf squared = {np.round(pf**2, 5)}') dist_arr = np.indices(tpc.shape) dist_arr = np.abs((dist_arr.T - middle_idx.T).T) - img_volume = np.prod(middle_idx + 1) - vec_arr = np.prod(middle_idx[0] + 1 - dist_arr, axis=0) / img_volume + img_volume = np.prod(middle_idx+1) + vec_arr = np.prod(middle_idx[0]+1 - dist_arr, axis=0)/img_volume dist_arr = np.sqrt(np.sum(dist_arr**2, axis=0)) - end_dist = int( - np.max(dist_arr) - ) # TODO this needs to be changed to the maximum distanse from the center 200*sqrt(2) - sum_circle = np.sum(vec_arr[dist_arr <= end_dist]) + end_dist = int(np.max(dist_arr)) # TODO this needs to be changed to the maximum distanse from the center 200*sqrt(2) + sum_circle = np.sum(vec_arr[dist_arr<=end_dist]) n_bins = 81 - jump = sum_circle / n_bins + jump = sum_circle/n_bins dist_indices = [0] tpc_res = [pf] # tpc_res is the tpc result of the tpc by growing radiuses - tpc_vec = vec_arr * tpc - for i in range(0, end_dist + 1, 1): - # for i in range(len(dist_indices)-1): + tpc_vec = vec_arr*tpc + for i in range(0, end_dist+1, 1): + # for i in range(len(dist_indices)-1): # dist_bool = (dist_arr>dist_indices[i]) & (dist_arr<=dist_indices[i+1]) - dist_bool = (dist_arr >= dist_indices[-1]) & (dist_arr < i) + dist_bool = (dist_arr>=dist_indices[-1]) & (dist_arr jump or i == end_dist: dist_indices.append(i) - tpc_res.append(np.sum(tpc_vec[dist_bool]) / np.sum(vec_arr[dist_bool])) + tpc_res.append(np.sum(tpc_vec[dist_bool])/np.sum(vec_arr[dist_bool])) return pf, pf**2, tpc_res, dist_indices - def tpc_check(): all_data, micros, netG, v_names, run_v_names = ms.json_preprocessing() - - edge_lengths_pred = all_data[f"data_gen_2D"]["edge_lengths_pred"] + + edge_lengths_pred = all_data[f'data_gen_2D']['edge_lengths_pred'] for j, p in enumerate(micros): try: netG.load_state_dict(torch.load(p + "_Gen.pt")) except: # if the image is greayscale it's excepting because there's only 1 channel continue - n = p.split("/")[-1] - args = (edge_lengths_pred[10], netG, "2D") + n = p.split('/')[-1] + args = (edge_lengths_pred[10], netG, '2D') tpc_results, pfs, pf_squares = tpcs_radius(generate_sg_tpc, args) - # print(f'{len(pf_squares)} / {test_runs} done for {n}') - mean_tpc_results = np.mean(np.stack(tpc_results, axis=0), axis=0) - tpc_fig.plot(mean_tpc_results, label="mean tpc") - real_pf_squared = np.mean(pfs) ** 2 + # print(f'{len(pf_squares)} / {test_runs} done for {n}') + mean_tpc_results = np.mean(np.stack(tpc_results,axis=0), axis=0) + tpc_fig.plot(mean_tpc_results, label='mean tpc') + real_pf_squared = np.mean(pfs)**2 # print(f'real pf squared = {np.round(real_pf_squared, 6)}') # print(f'pf squared = {np.round(np.mean(pf_squares), 6)}') - tpc_fig.plot([real_pf_squared] * len(mean_tpc_results), label="real pf squared") - tpc_fig.plot([np.mean(pf_squares)] * len(mean_tpc_results), label="pf squared") - tpc_fig.xlabel("Growing Radius") - tpc_fig.ylabel("TPC") + tpc_fig.plot([real_pf_squared]*len(mean_tpc_results), label='real pf squared') + tpc_fig.plot([np.mean(pf_squares)]*len(mean_tpc_results), label='pf squared') + tpc_fig.xlabel('Growing Radius') + tpc_fig.ylabel('TPC') tpc_fig.legend() - tpc_fig.savefig(f"tpc_results/{n}_tpc.png") + tpc_fig.savefig(f'tpc_results/{n}_tpc.png') tpc_fig.close() # print(f'end tpc = {np.round(np.mean(end_tpc_results), 6)}') # print(f'end tpc std = {np.round(np.std(end_tpc_results), 6)}\n') - print(f"{p} done\n") - + print(f'{p} done\n') def tpcs_radius(gen_func, test_runs, args): tpc_results = [] tpcs = [] pf_squares = [] pfs = [] - for _ in [args[0]] * test_runs: + for _ in [args[0]]*test_runs: img_tpc = gen_func(*args) tpcs.append(img_tpc) pf, pf_square, tpc_res, distances = tpc_by_radius(img_tpc) @@ -96,47 +90,41 @@ def tpcs_radius(gen_func, test_runs, args): tpc_results.append(np.array(tpc_res)) pf_squares.append(pf_square) if (len(pf_squares) % 10) == 0: - print(f"{len(pf_squares)} / {test_runs} done.") + print(f'{len(pf_squares)} / {test_runs} done.') return tpc_results, pfs, pf_squares, distances - def make_tpc(img): dims = len(img.shape) tpc = util.tpc_radial(img, threed=dims == 3, periodic=False) return tpc - def make_circles_tpc(imsize, circle_radius, pf): img = make_circles_2D(imsize, circle_radius, pf) return make_tpc(img) - def make_circles_2D(imsize, circle_radius, pf): - """ - This function is used to create an image with circles of the same size, + ''' + This function is used to create an image with circles of the same size, which are randomly placed in the image. The pf is the volume fraction of the image, in expectation. - """ - img = np.zeros([imsize + 2 * circle_radius] * 2) - circle_area = np.pi * circle_radius**2 + ''' + img = np.zeros([imsize+2*circle_radius]*2) + circle_area = np.pi*circle_radius**2 # the probability of a pixel being in a circle (1 minus not appearing in any # circle around it): - p_of_center_circle = 1 - (1 - pf) ** (1 / circle_area) + p_of_center_circle = 1 - (1-pf)**(1/circle_area) circle_centers = np.random.rand(*img.shape) < p_of_center_circle circle_centers = np.array(np.where(circle_centers)) time_before_circles = time.time() fill_img_with_circles(img, circle_radius, circle_centers) return img[circle_radius:-circle_radius, circle_radius:-circle_radius] - def fill_img_with_circles(img, circle_radius, circle_centers): - """Fills the image with circles of the same size given by the cicle_radius, - with the centers given in circle_centers.""" + '''Fills the image with circles of the same size given by the cicle_radius, + with the centers given in circle_centers.''' dist_arr = np.indices(img.shape) dist_arr_T = dist_arr.T - dist_arr_reshape = dist_arr_T.reshape( - np.prod(dist_arr_T.shape[:2]), dist_arr_T.shape[-1] - ) + dist_arr_reshape = dist_arr_T.reshape(np.prod(dist_arr_T.shape[:2]), dist_arr_T.shape[-1]) distances = cdist(dist_arr_reshape, circle_centers.T) if distances.size == 0: return img @@ -144,161 +132,100 @@ def fill_img_with_circles(img, circle_radius, circle_centers): img[min_distances <= circle_radius] = 1 return img - -if __name__ == "__main__": +if __name__ == '__main__': # tpc_check() - pfs = [] + pfs =[] imsize = 100 circle_size = 20 pf = 0.5 - args = (imsize, circle_size, pf) + args = (imsize, circle_size, pf) run_tests = 10000 - fig, axs = plt.subplot_mosaic( - [ - ["circle0", "circle1", "TPC figure", "TPC figure"], - ["circle2", "pf_hist", "TPC figure", "TPC figure"], - ] - ) - fig.set_size_inches(16, 16 * 7 / 16) + fig, axs = plt.subplot_mosaic([['circle0', 'circle1', 'TPC figure', 'TPC figure'], + ['circle2', 'pf_hist', 'TPC figure', 'TPC figure']]) + fig.set_size_inches(16,16*7/16) # axs['circle1'].set_aspect('equal') # tpc_fig.set_aspect('equal') - tpc_fig = axs["TPC figure"] - plt.figtext( - 0.31, - 0.9, - f"4 random {imsize}$^2$ images with $E[\Phi]$ = {pf} and circle diameter = {circle_size*2}", - ha="center", - va="center", - fontsize=12, - ) + tpc_fig = axs['TPC figure'] + plt.figtext(0.31, 0.9, f'4 random {imsize}$^2$ images with $E[\Phi]$ = {pf} and circle diameter = {circle_size*2}', ha='center', va='center', fontsize=12) - plt.suptitle( - f"Visual presentation of the proof of Theorem 2 in the simple case of random circles." - ) + plt.suptitle(f'Visual presentation of the proof of Theorem 2 in the simple case of random circles.') n_circle_im = 3 circle_ims = [make_circles_2D(imsize, circle_size, pf) for _ in range(n_circle_im)] for i in range(n_circle_im): - axs[f"circle{i}"].imshow(circle_ims[i], cmap="gray") - circle_pf = np.round(circle_ims[i].mean(), 3) - axs[f"circle{i}"].set_ylabel(f"$\Phi(\omega_{i})={circle_pf}$") - axs[f"circle{i}"].set_xlabel(f"$\omega_{i}$") - axs[f"circle{i}"].set_xticks([]) - axs[f"circle{i}"].set_yticks([]) - + axs[f'circle{i}'].imshow(circle_ims[i], cmap='gray') + circle_pf = np.round(circle_ims[i].mean(),3) + axs[f'circle{i}'].set_ylabel(f'$\Phi(\omega_{i})={circle_pf}$') + axs[f'circle{i}'].set_xlabel(f'$\omega_{i}$') + axs[f'circle{i}'].set_xticks([]) + axs[f'circle{i}'].set_yticks([]) + + + # im0_tpc = make_tpc(circle_ims[0]) # im0_tpc_by_radius = tpc_by_radius(im0_tpc)[2] # tpc_fig.plot(im0_tpc_by_radius, label='TPC of $\omega_0$') - - tpc_results, pfs, pf_squares, dist_len = tpcs_radius( - make_circles_tpc, run_tests, args=args - ) + + tpc_results, pfs, pf_squares, dist_len = tpcs_radius(make_circles_tpc, run_tests, args=args) bins = 30 - axs["pf_hist"].hist(pfs, bins=bins, density=True) - axs["pf_hist"].set_xlim(0, 1) + axs['pf_hist'].hist(pfs, bins=bins, density=True) + axs['pf_hist'].set_xlim(0, 1) # add a 'best fit' line mu, sigma = norm.fit(pfs) - print(f"mu = {mu}, sigma = {sigma}") + print(f'mu = {mu}, sigma = {sigma}') y = norm.pdf(np.linspace(0, 1, 100), mu, sigma) - axs["pf_hist"].plot(np.linspace(0, 1, 100), y, linewidth=2) + axs['pf_hist'].plot(np.linspace(0, 1, 100), y, linewidth=2) dist_len = np.array(dist_len) mean_tpc = np.mean(tpc_results, axis=0) - tpc_fig.plot(mean_tpc, label="Mean TPC") - len_tpc = len(tpc_results[0]) + tpc_fig.plot(mean_tpc, label='Mean TPC') + len_tpc = len(tpc_results[0]) pf_squared = np.mean(pf_squares) - label_pf_squared = f"$E[\Phi^2]$ = {np.round(pf_squared, 4)}" - tpc_fig.plot([pf_squared] * len_tpc, label=label_pf_squared) + label_pf_squared = f'$E[\Phi^2]$ = {np.round(pf_squared, 4)}' + tpc_fig.plot([pf_squared]*len_tpc, label=label_pf_squared) # print(f'pf squared = {np.round(pf_squared, 7)}') - true_pf_squared = np.mean(pfs) ** 2 - label_true_pf_squared = f"$E[\Phi]^2$ = {np.round(true_pf_squared, 4)}" - tpc_fig.plot([true_pf_squared] * len_tpc, label=label_true_pf_squared) + true_pf_squared = np.mean(pfs)**2 + label_true_pf_squared = f'$E[\Phi]^2$ = {np.round(true_pf_squared, 4)}' + tpc_fig.plot([true_pf_squared]*len_tpc, label=label_true_pf_squared) # print(f'true pf squared = {np.round(true_pf_squared, 7)}') - - tpc_fig.axvline( - x=np.where(dist_len == circle_size * 2)[0][0], - color="black", - linestyle="--", - label="Circle Diameter", - ) + + tpc_fig.axvline(x=np.where(dist_len==circle_size*2)[0][0], color='black', linestyle='--', label='Circle Diameter') len_tpc = len(tpc_results[0]) - fill_1 = tpc_fig.fill_between( - np.arange(len_tpc), - mean_tpc, - [pf_squared] * len_tpc, - alpha=0.2, - where=mean_tpc >= pf_squared, - label=f"Area $A_1$", - ) - fill_2 = tpc_fig.fill_between( - np.arange(len_tpc), - [pf_squared] * len_tpc, - mean_tpc, - alpha=0.2, - where=np.logical_and( - dist_len <= circle_size * 2, np.array(mean_tpc < pf_squared) - ), - label=f"Area $A_2$", - ) - fill_3 = tpc_fig.fill_between( - np.arange(len_tpc), - [pf_squared] * len_tpc, - mean_tpc, - alpha=0.2, - where=dist_len >= circle_size * 2, - label=f"Area B", - ) + fill_1 = tpc_fig.fill_between(np.arange(len_tpc), mean_tpc,[pf_squared]*len_tpc, alpha=0.2, + where=mean_tpc>=pf_squared,label = f'Area $A_1$') + fill_2 = tpc_fig.fill_between(np.arange(len_tpc), [pf_squared]*len_tpc,mean_tpc, alpha=0.2, + where=np.logical_and(dist_len<=circle_size*2, np.array(mean_tpc=circle_size*2,label = f'Area B') text_jump = 0.017 - tpc_fig.text(3, pf / 2, "$A_1$", fontsize=12) - tpc_fig.text(16.8, (pf_squared + true_pf_squared) / 2, "$A_2$", fontsize=12) - tpc_fig.text(40, (pf_squared + true_pf_squared) / 2 - 0.0005, "B", fontsize=12) + tpc_fig.text(3, pf/2, '$A_1$', fontsize=12) + tpc_fig.text(16.8, (pf_squared+true_pf_squared)/2, '$A_2$', fontsize=12) + tpc_fig.text(40, (pf_squared+true_pf_squared)/2 - 0.0005, 'B', fontsize=12) # tpc_fig.text(10, 0.079, '$\Phi$ calculates phase fraction.', fontsize=12) # tpc_fig.text(22, 0.068, 'The variance of $\Phi$ is:', fontsize=12) - tpc_fig.text( - 22, - pf_squared + text_jump * 7, - r"How the model predicts the variance of $\Phi$:", - fontsize=12, - ) - tpc_fig.text( - 22, pf_squared + text_jump * 6, "$Var[\Phi]=E[\Phi^2]-E[\Phi]^2$", fontsize=12 - ) - tpc_fig.text( - 22, - pf_squared + text_jump * 5, - r"$Var[\Phi]=\frac{1}{C_{40}}\cdot B$ (Normalization of B's width)", - fontsize=12, - ) - tpc_fig.text( - 22, - pf_squared + text_jump * 4, - r"$\sum_r{(E[T_r]-E[\Phi^2])}=0$, So", - fontsize=12, - ) - tpc_fig.text(22, pf_squared + text_jump * 3, r"$A_1-A_2=B$", fontsize=12) - tpc_fig.text(22, pf_squared + text_jump * 2, "Which results in:", fontsize=12) - tpc_fig.text( - 22, - pf_squared + text_jump * 1, - r"$Var[\Phi]=\frac{1}{C_{40}}\cdot (A_1-A_2)=E[\Psi]$", - fontsize=12, - ) - - tpc_fig.set_title( - f"Mean TPC of {run_tests} of these random {imsize}$^2$ circle images" - ) - - tpc_fig.set_ylim(true_pf_squared - 0.005, pf + 0.01) + tpc_fig.text(22, pf_squared+text_jump*7, r'How the model predicts the variance of $\Phi$:', fontsize=12) + tpc_fig.text(22, pf_squared+text_jump*6, '$Var[\Phi]=E[\Phi^2]-E[\Phi]^2$', fontsize=12) + tpc_fig.text(22, pf_squared+text_jump*5, r"$Var[\Phi]=\frac{1}{C_{40}}\cdot B$ (Normalization of B's width)", fontsize=12) + tpc_fig.text(22, pf_squared+text_jump*4, r'$\sum_r{(E[T_r]-E[\Phi^2])}=0$, So', fontsize=12) + tpc_fig.text(22, pf_squared+text_jump*3, r'$A_1-A_2=B$', fontsize=12) + tpc_fig.text(22, pf_squared+text_jump*2, 'Which results in:', fontsize=12) + tpc_fig.text(22, pf_squared+text_jump*1, r'$Var[\Phi]=\frac{1}{C_{40}}\cdot (A_1-A_2)=E[\Psi]$', fontsize=12) + + tpc_fig.set_title(f'Mean TPC of {run_tests} of these random {imsize}$^2$ circle images') + + tpc_fig.set_ylim(true_pf_squared-0.005, pf+0.01) dist_ticks = list(dist_len[:-5:5]) + [dist_len[-1]] - x_ticks_labels = list(np.arange(0, len_tpc - 5, 5)) + [len_tpc - 1] + x_ticks_labels = list(np.arange(0, len_tpc-5, 5)) + [len_tpc-1] tpc_fig.set_xticks(x_ticks_labels, dist_ticks) - tpc_fig.set_xlabel("TPC distance r") - tpc_fig.set_ylabel(f"Mean TPC $E[T_r]$", labelpad=-20) + tpc_fig.set_xlabel('TPC distance r') + tpc_fig.set_ylabel(f'Mean TPC $E[T_r]$', labelpad=-20) # tpc_fig.set_yscale('log') - tpc_fig.set_yticks( - [pf**2, np.round(pf_squared, 4), pf], [pf**2, np.round(pf_squared, 4), pf] - ) + tpc_fig.set_yticks([pf**2, np.round(pf_squared, 4), pf], [pf**2, np.round(pf_squared, 4), pf]) tpc_fig.legend() - plt.savefig(f"tpc_results/circles_tpc_visual_proof.pdf", format="pdf") + plt.savefig(f'tpc_results/circles_tpc_visual_proof.pdf', format='pdf') + + + +