From 35a877f25e76354f5b02a43a3e679b5b9be195ca Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Wed, 18 Dec 2024 17:26:24 +0100 Subject: [PATCH 01/28] Add documentation to ADA SVR --- doc_conf/references.bib | 17 ++++++++++++ hidimstat/adaptive_permutation_threshold.py | 29 ++++++++++++--------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/doc_conf/references.bib b/doc_conf/references.bib index 8fa0983..372bbdb 100644 --- a/doc_conf/references.bib +++ b/doc_conf/references.bib @@ -177,4 +177,21 @@ @article{liuFastPowerfulConditional2021 archiveprefix = {arxiv}, keywords = {Statistics - Methodology}, file = {/home/ahmad/Zotero/storage/8HRQZX3H/Liu et al. - 2021 - Fast and Powerful Conditional Randomization Testin.pdf;/home/ahmad/Zotero/storage/YFNDKN2B/2006.html} +} + +@article{gaonkar_deriving_2012, + title = {Deriving statistical significance maps for {SVM} based image classification and group comparisons}, + volume = {15}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3703958/}, + abstract = {Population based pattern analysis and classification for quantifying structural and functional differences between diverse groups has been shown to be a powerful tool for the study of a number of diseases, and is quite commonly used especially in neuroimaging. The alternative to these pattern analysis methods, namely mass univariate methods such as voxel based analysis and all related methods, cannot detect multivariate patterns associated with group differences, and are not particularly suitable for developing individual-based diagnostic and prognostic biomarkers. A commonly used pattern analysis tool is the support vector machine ({SVM}). Unlike univariate statistical frameworks for morphometry, analytical tools for statistical inference are unavailable for the {SVM}. In this paper, we show that null distributions ordinarily obtained by permutation tests using {SVMs} can be analytically approximated from the data. The analytical computation takes a small fraction of the time it takes to do an actual permutation test, thereby rendering it possible to quickly create statistical significance maps derived from {SVMs}. Such maps are critical for understanding imaging patterns of group differences and interpreting which anatomical regions are important in determining the classifier's decision.}, + pages = {723--730}, + number = {0}, + journaltitle = {Medical image computing and computer-assisted intervention : {MICCAI} ... International Conference on Medical Image Computing and Computer-Assisted Intervention}, + journal = {Med Image Comput Comput Assist Interv}, + author = {Gaonkar, Bilwaj and Davatzikos, Christos}, + urldate = {2024-12-16}, + year = {2012}, + pmid = {23285616}, + pmcid = {PMC3703958}, + file = {PubMed Central Full Text PDF:/home/likusch/Zotero/storage/DX8QQAF5/Gaonkar and Davatzikos - 2012 - Deriving statistical significance maps for SVM based image classification and group comparisons.pdf:application/pdf}, } \ No newline at end of file diff --git a/hidimstat/adaptive_permutation_threshold.py b/hidimstat/adaptive_permutation_threshold.py index 54c3cb1..df932ec 100644 --- a/hidimstat/adaptive_permutation_threshold.py +++ b/hidimstat/adaptive_permutation_threshold.py @@ -2,7 +2,10 @@ def ada_svr(X, y, rcond=1e-3): - """Statistical inference procedure presented in Gaonkar et al. [1]_. + """ + ADA-SVR: Adaptive Permutation Threshold Support Vector Regression + + Statistical inference procedure presented in :footcite:t:`gaonkar_deriving_2012`. Parameters ----------- @@ -15,6 +18,7 @@ def ada_svr(X, y, rcond=1e-3): rcond : float, optional (default=1e-3) Cutoff for small singular values. Singular values smaller than `rcond` * largest_singular_value are set to zero. + Deafult value is 1e-3. Returns ------- @@ -26,23 +30,22 @@ def ada_svr(X, y, rcond=1e-3): References ---------- - .. [1] Gaonkar, B., & Davatzikos, C. (2012, October). Deriving statistical - significance maps for SVM based image classification and group - comparisons. In International Conference on Medical Image Computing - and Computer-Assisted Intervention (pp. 723-730). Springer, Berlin, - Heidelberg. - """ + .. footbibliography:: + """ X = np.asarray(X) - K = np.linalg.pinv(np.dot(X, X.T), rcond=rcond) - sum_K = np.sum(K) - - L = -np.outer(np.sum(K, axis=0), np.sum(K, axis=1)) / sum_K - C = np.dot(X.T, K + L) + ## compute matrix C, (see eq.6 of [1]) + # invert matrix X*X' + XXT_inv = np.linalg.pinv(np.dot(X, X.T), rcond=rcond) + # partial computation of the 2nd term of the equation + sum_XXT_inv = np.sum(XXT_inv) + L = -np.outer(np.sum(XXT_inv, axis=0), np.sum(XXT_inv, axis=1)) / sum_XXT_inv + C = np.dot(X.T, XXT_inv + L) + ## compute vector W, (see eq.4 of [1]) beta_hat = np.dot(C, y) - + ## compute standard deviation of the distribution of W, (see eq.12 of [1]) scale = np.sqrt(np.sum(C**2, axis=1)) return beta_hat, scale From 45d9992038eefcad6701be9d5c5ea2386d0112df Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Wed, 18 Dec 2024 17:29:00 +0100 Subject: [PATCH 02/28] Change name of the file --- hidimstat/__init__.py | 2 +- hidimstat/{adaptive_permutation_threshold.py => ada_svr.py} | 0 hidimstat/test/test_adaptive_permutation_threshold.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename hidimstat/{adaptive_permutation_threshold.py => ada_svr.py} (100%) diff --git a/hidimstat/__init__.py b/hidimstat/__init__.py index cdc2188..24e0321 100644 --- a/hidimstat/__init__.py +++ b/hidimstat/__init__.py @@ -1,4 +1,4 @@ -from .adaptive_permutation_threshold import ada_svr +from .ada_svr import ada_svr from .clustered_inference import clustered_inference, hd_inference from .desparsified_lasso import desparsified_group_lasso, desparsified_lasso from .Dnn_learner_single import DnnLearnerSingle diff --git a/hidimstat/adaptive_permutation_threshold.py b/hidimstat/ada_svr.py similarity index 100% rename from hidimstat/adaptive_permutation_threshold.py rename to hidimstat/ada_svr.py diff --git a/hidimstat/test/test_adaptive_permutation_threshold.py b/hidimstat/test/test_adaptive_permutation_threshold.py index 6fca754..cbad161 100644 --- a/hidimstat/test/test_adaptive_permutation_threshold.py +++ b/hidimstat/test/test_adaptive_permutation_threshold.py @@ -5,7 +5,7 @@ import numpy as np from numpy.testing import assert_almost_equal -from hidimstat.adaptive_permutation_threshold import ada_svr +from hidimstat.ada_svr import ada_svr from hidimstat.scenario import multivariate_1D_simulation from hidimstat.stat_tools import pval_from_scale From e28b8b8546c8dcebacc59e605e2af3ca770464e2 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 19 Dec 2024 13:41:14 +0100 Subject: [PATCH 03/28] fix bug in example --- examples/plot_fmri_data_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/plot_fmri_data_example.py b/examples/plot_fmri_data_example.py index 25b89b8..dbe14c0 100644 --- a/examples/plot_fmri_data_example.py +++ b/examples/plot_fmri_data_example.py @@ -54,7 +54,7 @@ from sklearn.linear_model import Ridge from sklearn.utils import Bunch -from hidimstat.adaptive_permutation_threshold import ada_svr +from hidimstat.ada_svr import ada_svr from hidimstat.clustered_inference import clustered_inference from hidimstat.ensemble_clustered_inference import ensemble_clustered_inference from hidimstat.permutation_test import permutation_test, permutation_test_cv From 0a2fab41e1124d54eeb10450aaa4d1ed3ff1f650 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 19 Dec 2024 17:52:26 +0100 Subject: [PATCH 04/28] Fix some error in conf of sphinx --- doc_conf/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc_conf/conf.py b/doc_conf/conf.py index 0042a95..417bf98 100644 --- a/doc_conf/conf.py +++ b/doc_conf/conf.py @@ -217,9 +217,9 @@ "python": ("https://docs.python.org/3", None), "numpy": ("https://numpy.org/devdocs", None), "scipy": ("https://scipy.github.io/devdocs", None), - "matplotlib": ("https://matplotlib.org", None), + "matplotlib": ("https://matplotlib.org/stable/", None), "sklearn": ("https://scikit-learn.org/stable", None), - "numba": ("https://numba.pydata.org/numba-doc/latest", None), + "numba": ("https://numba.readthedocs.io/en/stable/", None), "joblib": ("https://joblib.readthedocs.io/en/latest", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), "seaborn": ("https://seaborn.pydata.org/", None), @@ -228,7 +228,6 @@ examples_dirs = ["../examples"] gallery_dirs = ["auto_examples"] -import mne scrapers = ("matplotlib",) try: @@ -240,6 +239,7 @@ pass if any(x in scrapers for x in ("pyvista")): from traits.api import push_exception_handler + import mne push_exception_handler(reraise_exceptions=True) report_scraper = mne.report._ReportScraper() From 02adbb5784be79cf12f747ca46e7053f565fd8ca Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 19 Dec 2024 18:15:55 +0100 Subject: [PATCH 05/28] Remove all the warning and error for generate docstring --- doc_conf/conf.py | 2 +- .../plot_diabetes_variable_importance_example.py | 16 ++++++++-------- hidimstat/adaptive_permutation_threshold.py | 7 +++++-- hidimstat/knockoff_aggregation.py | 2 ++ hidimstat/knockoffs.py | 4 +++- hidimstat/loco.py | 5 +++-- hidimstat/multi_sample_split.py | 4 ++-- hidimstat/noise_std.py | 6 +++--- hidimstat/permutation_test.py | 6 +++--- hidimstat/scenario.py | 10 +++++----- hidimstat/standardized_svr.py | 2 +- hidimstat/stat_tools.py | 4 ++-- 12 files changed, 38 insertions(+), 30 deletions(-) diff --git a/doc_conf/conf.py b/doc_conf/conf.py index 417bf98..74002ef 100644 --- a/doc_conf/conf.py +++ b/doc_conf/conf.py @@ -90,7 +90,7 @@ # built documents. # # The short X.Y version. -from hidimstat._version import __version__ # noqa +from hidimstat import __version__ # The full version, including alpha/beta/rc tags. release = __version__ diff --git a/examples/plot_diabetes_variable_importance_example.py b/examples/plot_diabetes_variable_importance_example.py index 280f2b0..280f844 100644 --- a/examples/plot_diabetes_variable_importance_example.py +++ b/examples/plot_diabetes_variable_importance_example.py @@ -61,13 +61,13 @@ ############################################################################# # Load the diabetes dataset -# ------------------------------ +# ------------------------- diabetes = load_diabetes() X, y = diabetes.data, diabetes.target ############################################################################# # Fit a baseline model on the diabetes dataset -# ------------------------------ +# -------------------------------------------- # We use a Ridge regression model with a 10-fold cross-validation to fit the # diabetes dataset. @@ -88,7 +88,7 @@ print(f"Fold {i}: {mse}") ############################################################################# # Fit a baselien model on the diabetes dataset -# ------------------------------ +# -------------------------------------------- # We use a Ridge regression model with a 10-fold cross-validation to fit the # diabetes dataset. @@ -110,7 +110,7 @@ ############################################################################# # Measure the importance of variables using the CPI method -# ------------------------------ +# -------------------------------------------------------- cpi_importance_list = [] for i, (train_index, test_index) in enumerate(kf.split(X)): @@ -131,7 +131,7 @@ ############################################################################# # Measure the importance of variables using the LOCO method -# ------------------------------ +# --------------------------------------------------------- loco_importance_list = [] @@ -151,7 +151,7 @@ ############################################################################# # Measure the importance of variables using the permutation method -# ------------------------------ +# ---------------------------------------------------------------- pi_importance_list = [] @@ -172,7 +172,7 @@ ############################################################################# # Define a function to compute the p-value from importance values -# ------------------------------ +# --------------------------------------------------------------- def compute_pval(vim): mean_vim = np.mean(vim, axis=0) std_vim = np.std(vim, axis=0) @@ -182,7 +182,7 @@ def compute_pval(vim): ############################################################################# # Analyze the results -# ------------------------------ +# ------------------- cpi_vim_arr = np.array([x["importance"] for x in cpi_importance_list]) / 2 diff --git a/hidimstat/adaptive_permutation_threshold.py b/hidimstat/adaptive_permutation_threshold.py index 66e525d..6485ef4 100644 --- a/hidimstat/adaptive_permutation_threshold.py +++ b/hidimstat/adaptive_permutation_threshold.py @@ -2,10 +2,13 @@ def ada_svr(X, y, rcond=1e-3): - """Statistical inference procedure presented in Gaonkar et al. [1]_. + """ + Adaptative Permutation Threshold for SVR + + Statistical inference procedure presented in Gaonkar et al. [1]_. Parameters - ----------- + ---------- X : ndarray, shape (n_samples, n_features) Data. diff --git a/hidimstat/knockoff_aggregation.py b/hidimstat/knockoff_aggregation.py index e020fa5..36b285a 100644 --- a/hidimstat/knockoff_aggregation.py +++ b/hidimstat/knockoff_aggregation.py @@ -35,6 +35,8 @@ def knockoff_aggregation( random_state=None, ): """ + Aggregation of Multiple knockoffs + This function implements the aggregation of multiple knockoffs introduced by :footcite:t:`pmlr-v119-nguyen20a` diff --git a/hidimstat/knockoffs.py b/hidimstat/knockoffs.py index 214bc8e..6cb786d 100644 --- a/hidimstat/knockoffs.py +++ b/hidimstat/knockoffs.py @@ -29,7 +29,9 @@ def model_x_knockoff( n_jobs=1, seed=None, ): - """Model-X Knockoff inference procedure to control False Discoveries Rate, + """Model-X Knockoff + + It's an inference procedure to control False Discoveries Rate, based on :footcite:t:`candesPanningGoldModelX2017` Parameters diff --git a/hidimstat/loco.py b/hidimstat/loco.py index aa7f448..87cbe7b 100644 --- a/hidimstat/loco.py +++ b/hidimstat/loco.py @@ -9,8 +9,9 @@ class LOCO(BaseEstimator): """ - Leave-One-Covariate-Out (LOCO) algorithm as described in - :footcite:t:`Chamma_NeurIPS2023`. + Leave-One-Covariate-Out (LOCO) + + It's an algorithm as described in :footcite:t:`Chamma_NeurIPS2023`. Parameters diff --git a/hidimstat/multi_sample_split.py b/hidimstat/multi_sample_split.py index 60e444b..a4eb246 100644 --- a/hidimstat/multi_sample_split.py +++ b/hidimstat/multi_sample_split.py @@ -5,7 +5,7 @@ def aggregate_medians(list_one_sided_pval): """Aggregation of survival function values taking twice the median Parameters - ----------- + ---------- list_one_sided_pval : ndarray, shape (n_iter, n_features) List of one-sided p-values. @@ -38,7 +38,7 @@ def aggregate_quantiles(list_one_sided_pval, gamma_min=0.2): """Aggregation of survival function values by adaptive quantile procedure Parameters - ----------- + ---------- list_one_sided_pval : ndarray, shape (n_iter, n_features) List of one-sided p-values. diff --git a/hidimstat/noise_std.py b/hidimstat/noise_std.py index 4d953ea..276b2c7 100644 --- a/hidimstat/noise_std.py +++ b/hidimstat/noise_std.py @@ -9,7 +9,7 @@ def reid(X, y, eps=1e-2, tol=1e-4, max_iter=10000, n_jobs=1, seed=0): """Estimation of noise standard deviation using Reid procedure Parameters - ----------- + ---------- X : ndarray, shape (n_samples, n_features) Data. @@ -92,7 +92,7 @@ def group_reid( """Estimation of the covariance matrix using group Reid procedure Parameters - ----------- + ---------- X : ndarray, shape (n_samples, n_features) Data. @@ -265,7 +265,7 @@ def empirical_snr(X, y, beta, noise=None): """Compute the SNR for the linear model: y = X beta + noise Parameters - ----------- + ---------- X : ndarray or scipy.sparse matrix, shape (n_samples, n_features) Data. diff --git a/hidimstat/permutation_test.py b/hidimstat/permutation_test.py index 3492385..ed38a9c 100644 --- a/hidimstat/permutation_test.py +++ b/hidimstat/permutation_test.py @@ -22,7 +22,7 @@ def permutation_test_cv( """Cross-validated permutation test shuffling the target Parameters - ----------- + ---------- X : ndarray, shape (n_samples, n_features) Data. @@ -97,7 +97,7 @@ def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, ver """Permutation test shuffling the target Parameters - ----------- + ---------- X : ndarray, shape (n_samples, n_features) Data. @@ -167,7 +167,7 @@ def step_down_max_T(stat, permutation_stats): """Step-down maxT algorithm for computing adjusted p-values Parameters - ----------- + ---------- stat : ndarray, shape (n_features,) Statistic computed on the original (unpermutted) problem. diff --git a/hidimstat/scenario.py b/hidimstat/scenario.py index 9b8d4a5..a1e3cc4 100644 --- a/hidimstat/scenario.py +++ b/hidimstat/scenario.py @@ -20,7 +20,7 @@ def multivariate_1D_simulation( """Generate 1D data with Toeplitz design matrix Parameters - ----------- + ---------- n_samples : int Number of samples. @@ -82,7 +82,7 @@ def generate_2D_weight(shape, roi_size): """Create a 2D weight map with four ROIs Parameters - ----------- + ---------- shape : tuple (n_x, n_z) Shape of the data in the simulation. @@ -108,7 +108,7 @@ def generate_3D_weight(shape, roi_size): """Create a 3D weight map with five ROIs Parameters - ----------- + ---------- shape : tuple (n_x, n_y, n_z) Shape of the data in the simulation. @@ -147,7 +147,7 @@ def multivariate_simulation( """Generate a multivariate simulation with 2D or 3D data Parameters - ----------- + ---------- n_samples : int Number of samples. @@ -230,7 +230,7 @@ def multivariate_temporal_simulation( """Generate 1D temporal data with constant design matrix Parameters - ----------- + ---------- n_samples : int Number of samples. diff --git a/hidimstat/standardized_svr.py b/hidimstat/standardized_svr.py index e6dcca3..183b36d 100644 --- a/hidimstat/standardized_svr.py +++ b/hidimstat/standardized_svr.py @@ -9,7 +9,7 @@ def standardized_svr(X, y, Cs=np.logspace(-7, 1, 9), n_jobs=1): """Cross-validated SVR Parameters - ----------- + ---------- X : ndarray, shape (n_samples, n_features) Data. diff --git a/hidimstat/stat_tools.py b/hidimstat/stat_tools.py index 0b6f41f..0ced13e 100644 --- a/hidimstat/stat_tools.py +++ b/hidimstat/stat_tools.py @@ -273,7 +273,7 @@ def zscore_from_pval(pval, one_minus_pval=None, distrib="norm"): """Computing z-scores from one-sided p-values. Parameters - ----------- + ---------- pval : ndarray, shape (n_features,) p-value, with numerically accurate values for positive effects (ie., for p-value close to zero). @@ -361,7 +361,7 @@ def two_sided_pval_from_pval(pval, one_minus_pval=None, distrib="norm"): """Computing two-sided p-value from one-sided p-values. Parameters - ----------- + ---------- pval : ndarray, shape (n_features,) p-value, with numerically accurate values for positive effects (ie., for p-value close to zero). From 358bd6808f08eefd18062a56ecedfc5e284f6886 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 19 Dec 2024 18:27:27 +0100 Subject: [PATCH 06/28] Format files --- doc_conf/conf.py | 2 +- hidimstat/adaptive_permutation_threshold.py | 2 +- hidimstat/knockoff_aggregation.py | 4 ++-- hidimstat/knockoffs.py | 4 ++-- hidimstat/loco.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc_conf/conf.py b/doc_conf/conf.py index 74002ef..6918e08 100644 --- a/doc_conf/conf.py +++ b/doc_conf/conf.py @@ -90,7 +90,7 @@ # built documents. # # The short X.Y version. -from hidimstat import __version__ +from hidimstat import __version__ # The full version, including alpha/beta/rc tags. release = __version__ diff --git a/hidimstat/adaptive_permutation_threshold.py b/hidimstat/adaptive_permutation_threshold.py index 6485ef4..933fb9e 100644 --- a/hidimstat/adaptive_permutation_threshold.py +++ b/hidimstat/adaptive_permutation_threshold.py @@ -4,7 +4,7 @@ def ada_svr(X, y, rcond=1e-3): """ Adaptative Permutation Threshold for SVR - + Statistical inference procedure presented in Gaonkar et al. [1]_. Parameters diff --git a/hidimstat/knockoff_aggregation.py b/hidimstat/knockoff_aggregation.py index 36b285a..ceeed56 100644 --- a/hidimstat/knockoff_aggregation.py +++ b/hidimstat/knockoff_aggregation.py @@ -35,8 +35,8 @@ def knockoff_aggregation( random_state=None, ): """ - Aggregation of Multiple knockoffs - + Aggregation of Multiple knockoffs + This function implements the aggregation of multiple knockoffs introduced by :footcite:t:`pmlr-v119-nguyen20a` diff --git a/hidimstat/knockoffs.py b/hidimstat/knockoffs.py index 6cb786d..30eed4c 100644 --- a/hidimstat/knockoffs.py +++ b/hidimstat/knockoffs.py @@ -29,8 +29,8 @@ def model_x_knockoff( n_jobs=1, seed=None, ): - """Model-X Knockoff - + """Model-X Knockoff + It's an inference procedure to control False Discoveries Rate, based on :footcite:t:`candesPanningGoldModelX2017` diff --git a/hidimstat/loco.py b/hidimstat/loco.py index 87cbe7b..a3afd86 100644 --- a/hidimstat/loco.py +++ b/hidimstat/loco.py @@ -9,8 +9,8 @@ class LOCO(BaseEstimator): """ - Leave-One-Covariate-Out (LOCO) - + Leave-One-Covariate-Out (LOCO) + It's an algorithm as described in :footcite:t:`Chamma_NeurIPS2023`. From df8007860ca122cd0dc62e36f4ee6072466d91f1 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Fri, 20 Dec 2024 11:53:35 +0100 Subject: [PATCH 07/28] Include methods description in the examples --- doc_conf/conf.py | 2 + examples/README.txt | 2 +- examples/__init__.py | 0 examples/_utils/__init__.py | 0 examples/_utils/plot_dataset.py | 30 +++++++++++++++ examples/methods/README.txt | 9 +++++ examples/methods/ada_svr.py | 65 +++++++++++++++++++++++++++++++++ hidimstat/ada_svr.py | 14 +++++++ 8 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 examples/__init__.py create mode 100644 examples/_utils/__init__.py create mode 100644 examples/_utils/plot_dataset.py create mode 100644 examples/methods/README.txt create mode 100644 examples/methods/ada_svr.py diff --git a/doc_conf/conf.py b/doc_conf/conf.py index 6918e08..a511006 100644 --- a/doc_conf/conf.py +++ b/doc_conf/conf.py @@ -259,6 +259,8 @@ "abort_on_example_error": False, "image_scrapers": scrapers, "show_memory": True, + 'filename_pattern': r'\.py', + 'ignore_pattern': r'__init__\.py', # 'reference_url': { # 'numpy': 'http://docs.scipy.org/doc/numpy-1.9.1', # 'scipy': 'http://docs.scipy.org/doc/scipy-0.17.0/reference', diff --git a/examples/README.txt b/examples/README.txt index aab3104..389adc2 100644 --- a/examples/README.txt +++ b/examples/README.txt @@ -5,4 +5,4 @@ Examples Gallery .. contents:: Contents :local: - :depth: 3 + :depth: 0 diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/_utils/__init__.py b/examples/_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/_utils/plot_dataset.py b/examples/_utils/plot_dataset.py new file mode 100644 index 0000000..eb2b2b2 --- /dev/null +++ b/examples/_utils/plot_dataset.py @@ -0,0 +1,30 @@ +import numpy as np +import matplotlib.pyplot as plt + +def plot_dataset1D(X, y, beta, title='Toy dataset'): + """ + Plot a 1D toy dataset with the true regression line. + Parameters: + - X: Input features (n_samples, n_features) + - y: Vectors of the regression (n_samples,) + - beta: Coefficients of the true variaable of importance (n_features,) + - title: Title of the plot (str) + """ + # Create a figure and a set of subplots + fig, ([ax11, ax12], [ax21, ax22]) = plt.subplots(2, 2, width_ratios=[0.9, 0.01], height_ratios=[0.9, 0.1]) + ax11.imshow(X, aspect='auto', interpolation='nearest') + ax11.set_ylabel('n samples') + ax11.set_xlabel('n features') + ax11.set_title('X:data', fontdict={'fontweight': 'bold'}) + ax12.imshow(np.expand_dims(y,1), aspect='auto', interpolation='nearest') + ax12.set_title('y:regression', fontdict={'fontweight': 'bold'}) + ax12.yaxis.tick_right() + ax12.set_xticks([]) + ax21.imshow(np.expand_dims(beta,0), aspect='auto', interpolation='nearest') + ax21.set_xlabel('beta:variable of importance', fontdict={'fontweight': 'bold'}) + ax21.set_yticks([]) + ax22.axis('off') + plt.suptitle(title) + plt.subplots_adjust(hspace=0.3) + fig.tight_layout() + # plt.show() diff --git a/examples/methods/README.txt b/examples/methods/README.txt new file mode 100644 index 0000000..fc85296 --- /dev/null +++ b/examples/methods/README.txt @@ -0,0 +1,9 @@ +.. _general_examples: + +Description of the methods of the package +========================================= +The package contains the following methods: + +.. contents:: Contents + :local: + :depth: 0 diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py new file mode 100644 index 0000000..2e92b10 --- /dev/null +++ b/examples/methods/ada_svr.py @@ -0,0 +1,65 @@ +""" +ADA-SVR: Adaptive Permutation Threshold Support Vector Regression +================================================================== +Statistical inference procedure presented in :footcite:t:`gaonkar_deriving_2012`. +""" + +############################################################################# +# Imports needed for this script +# ------------------------------ +import matplotlib.pyplot as plt +import numpy as np +from hidimstat.ada_svr import ada_svr +from hidimstat.scenario import multivariate_1D_simulation +from examples._utils.plot_dataset import plot_dataset1D + +############################################################################# +# Generate toy dataset +# --------------------------- +# + +# Parameters for the generation of data +n_samples, n_features = 20, 50 +support_size = 4 + +X, y, beta, _ = multivariate_1D_simulation(n_samples=5, n_features=20, + support_size=1, sigma=0.1, + shuffle=False, seed=42) +plot_dataset1D(X=X, y=y, beta=beta) + +plt.figure() +plt.plot(np.arange(n_features)) +plt.xlabel("Features") +plt.ylabel("Values") +plt.show() + +############################################################################# +# Usage the methods +# ----------------- +# +# The method is called as follows: +# +# .. code-block:: python +# +# ada_svr(X, y, rcond=1e-3) +# +# where: +# +# - ``X`` is the design matrix of shape ``(n_samples, n_features)``. +# +# - ``y`` is the target vector of shape ``(n_samples,)``. +# +# - ``rcond`` is the cuoff for small singular values ``float``. +# +# The method returns a dictionary containing the following keys: +# +# - ``beta_hat`` is the estimated of vector of importances variable of shape ``(n_features,)`` +# +# - ``scale`` is the vector of the standard deviation of a gaussians distribution of beta_hat ``(n_features,)`` + + + +############################################################################# +# References +# ---------- +# .. footbibliography:: \ No newline at end of file diff --git a/hidimstat/ada_svr.py b/hidimstat/ada_svr.py index df932ec..33b045d 100644 --- a/hidimstat/ada_svr.py +++ b/hidimstat/ada_svr.py @@ -1,4 +1,5 @@ import numpy as np +from hidimstat.stat_tools import pval_from_scale def ada_svr(X, y, rcond=1e-3): @@ -49,3 +50,16 @@ def ada_svr(X, y, rcond=1e-3): scale = np.sqrt(np.sum(C**2, axis=1)) return beta_hat, scale + + +def get_pval(beta_hat, scale, distrib="norm", eps=1e-14): + """ + Computing one-sided p-values corrrected for multiple testing + from simple testing one-sided p-values. + + ################# + For details see: pval_from_scale + ################# + + """ + return pval_from_scale(beta_hat, scale, distrib=distrib, eps=eps) \ No newline at end of file From 2a8f3c4683e5fd195b7d7ccee5f76b7ef8456bde Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Fri, 20 Dec 2024 14:50:59 +0100 Subject: [PATCH 08/28] fix documentation --- doc_conf/index.rst | 19 +++++++++---------- examples/methods/README.txt | 2 -- examples/plot_knockoff_aggregation.py | 2 +- examples/plot_variable_importance_classif.py | 12 ++++++------ 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/doc_conf/index.rst b/doc_conf/index.rst index 2eb5224..cac85cc 100644 --- a/doc_conf/index.rst +++ b/doc_conf/index.rst @@ -46,8 +46,7 @@ is also needed to install ``pytest``. Documentation & Examples ------------------------ -Documentation about the main HiDimStat functions is available -`here `_ and examples are available `here `_. +Documentation of HiDimStat is composed of an `API `_ and `examples `_. As of now, there are three different examples (Python scripts) that illustrate how to use the main HiDimStat functions. @@ -118,15 +117,15 @@ Application to source localization (MEG/EEG data): Single/Group statistically validated importance using conditional permutations: -* Chamma, A., Thirion, B., & Engemann, D. (2024). **Variable importance in -high-dimensional settings requires grouping**. In Proceedings of the 38th -Conference of the Association for the Advancement of Artificial -Intelligence(AAAI 2024), Vancouver, Canada. +* Chamma, A., Thirion, B., & Engemann, D. (2024). Variable importance in + high-dimensional settings requires grouping. In Proceedings of the 38th + Conference of the Association for the Advancement of Artificial + Intelligence(AAAI 2024), Vancouver, Canada. -* Chamma, A., Engemann, D., & Thirion, B. (2023). **Statistically Valid Variable -Importance Assessment through Conditional Permutations**. In Proceedings of the -37th Conference on Neural Information Processing Systems (NeurIPS 2023), New -Orleans, USA. +* Chamma, A., Engemann, D., & Thirion, B. (2023). Statistically Valid Variable + Importance Assessment through Conditional Permutations. In Proceedings of the + 37th Conference on Neural Information Processing Systems (NeurIPS 2023), New + Orleans, USA. If you use our packages, we would appreciate citations to the relevant aforementioned papers. diff --git a/examples/methods/README.txt b/examples/methods/README.txt index fc85296..ade42f8 100644 --- a/examples/methods/README.txt +++ b/examples/methods/README.txt @@ -1,5 +1,3 @@ -.. _general_examples: - Description of the methods of the package ========================================= The package contains the following methods: diff --git a/examples/plot_knockoff_aggregation.py b/examples/plot_knockoff_aggregation.py index c56121d..a690480 100644 --- a/examples/plot_knockoff_aggregation.py +++ b/examples/plot_knockoff_aggregation.py @@ -1,6 +1,6 @@ """ Knockoff aggregation on simulated data -============================= +====================================== In this example, we show an example of variable selection using model-X Knockoffs introduced by :footcite:t:`Candes_2018`. A notable diff --git a/examples/plot_variable_importance_classif.py b/examples/plot_variable_importance_classif.py index 14718e0..bf5091c 100644 --- a/examples/plot_variable_importance_classif.py +++ b/examples/plot_variable_importance_classif.py @@ -20,7 +20,7 @@ ############################################################################# # Imports needed -# ------------------------------ +# -------------- import matplotlib.lines as mlines import matplotlib.pyplot as plt @@ -37,7 +37,7 @@ ############################################################################# # Generate the data -# ------------------------------ +# ----------------- # We generate the data using a multivariate normal distribution with a Toeplitz # correlation matrix. The target variable is generated using a non-linear function # of the features. To make the problem more intuitive, we generate a non-linear @@ -81,7 +81,7 @@ ############################################################################# # Visualize the data -# ------------------------------ +# ------------------ fig, axes = plt.subplots( 1, @@ -115,7 +115,7 @@ ############################################################################# # Variable importance inference -# ------------------------------ +# ----------------------------- # We use two different Support Vector Machine models, one with a linear kernel and # one with a polynomial kernel of degree 2, well specified to capture the non-linear # relationship between the features and the target variable. We then use the CPI and @@ -208,7 +208,7 @@ ############################################################################# # Compute the p-values for the variable importance -# ------------------------------ +# ------------------------------------------------ pval_arr = np.zeros((n_features, 3)) for j in range(n_features): @@ -218,7 +218,7 @@ ############################################################################# # Visualize the variable importance -# ------------------------------ +# --------------------------------- # Here we plot the variable importance and highlight the features that are considered # important, with a p-value lower than 0.05, using a diamond marker. We also highlight # the true important features, used to generate the target variable, with a star marker. From d3871f41e0510c69f845625634860a8abebd1794 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 14:38:58 +0100 Subject: [PATCH 09/28] Add example for ADA-SVR --- examples/_utils/plot_dataset.py | 120 +++++++++++++++++++++++---- examples/methods/ada_svr.py | 143 +++++++++++++++++++++++++------- hidimstat/__init__.py | 3 +- hidimstat/ada_svr.py | 7 +- hidimstat/permutation_test.py | 4 +- 5 files changed, 228 insertions(+), 49 deletions(-) diff --git a/examples/_utils/plot_dataset.py b/examples/_utils/plot_dataset.py index eb2b2b2..833345c 100644 --- a/examples/_utils/plot_dataset.py +++ b/examples/_utils/plot_dataset.py @@ -1,7 +1,9 @@ import numpy as np import matplotlib.pyplot as plt +from matplotlib.colors import LogNorm +from scipy import stats -def plot_dataset1D(X, y, beta, title='Toy dataset'): +def plot_dataset1D(X, y, beta, title="Toy dataset"): """ Plot a 1D toy dataset with the true regression line. Parameters: @@ -11,20 +13,110 @@ def plot_dataset1D(X, y, beta, title='Toy dataset'): - title: Title of the plot (str) """ # Create a figure and a set of subplots - fig, ([ax11, ax12], [ax21, ax22]) = plt.subplots(2, 2, width_ratios=[0.9, 0.01], height_ratios=[0.9, 0.1]) - ax11.imshow(X, aspect='auto', interpolation='nearest') - ax11.set_ylabel('n samples') - ax11.set_xlabel('n features') - ax11.set_title('X:data', fontdict={'fontweight': 'bold'}) - ax12.imshow(np.expand_dims(y,1), aspect='auto', interpolation='nearest') - ax12.set_title('y:regression', fontdict={'fontweight': 'bold'}) + fig, ([ax11, ax12], [ax21, ax22]) = plt.subplots( + 2, 2, width_ratios=[0.9, 0.01], height_ratios=[0.9, 0.1], figsize=(6.4, 4.8) + ) + im_X = ax11.imshow(X, aspect="auto", interpolation="nearest") + cbaxes_X = fig.add_axes([0.05, 0.275, 0.03, 0.6]) + col_X = plt.colorbar(im_X, cax=cbaxes_X, location="left") + col_X.ax.set_xlabel("X values", loc="center", labelpad=10) + col_X.ax.xaxis.set_label_position("top") + ax11.set_ylabel("n samples") + ax11.set_xlabel("n features") + ax11.set_title("X:data", fontdict={"fontweight": "bold"}) + im_Y = ax12.imshow(np.expand_dims(y, 1), aspect="auto", interpolation="nearest") + ax12.set_ylabel("y:regression ", fontdict={"fontweight": "bold"}) ax12.yaxis.tick_right() ax12.set_xticks([]) - ax21.imshow(np.expand_dims(beta,0), aspect='auto', interpolation='nearest') - ax21.set_xlabel('beta:variable of importance', fontdict={'fontweight': 'bold'}) + cbaxes_Y = fig.add_axes([0.95, 0.275, 0.03, 0.6]) + col_Y = plt.colorbar(im_Y, cax=cbaxes_Y, location="left") + col_Y.ax.set_xlabel("Y values ", loc="center", labelpad=10) + col_Y.ax.xaxis.set_label_position("top") + ax21.imshow(np.expand_dims(beta, 0), aspect="auto", interpolation="nearest") + ax21.set_xlabel("beta:variable of importance", fontdict={"fontweight": "bold"}) ax21.set_yticks([]) - ax22.axis('off') + ax22.axis("off") plt.suptitle(title) - plt.subplots_adjust(hspace=0.3) - fig.tight_layout() - # plt.show() + plt.subplots_adjust(hspace=0.3, left=0.15, right=0.85) + + +def plot_validate_variable_importance(beta, beta_hat, vmin=0., vmax=1.): + """ + Validate the variable importance estimation. + """ + fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6.4, 2.8)) + ax1.imshow(np.expand_dims(beta, 0), vmin=vmin, vmax=vmax) + ax1.set_xlabel("beta:variable of importance", fontdict={"fontweight": "bold"}) + ax1.set_yticks([]) + im = ax2.imshow(np.expand_dims(beta_hat, 0), vmin=vmin, vmax=vmax) + ax2.set_xlabel("beta hat:variable of importance", fontdict={"fontweight": "bold"}) + ax2.set_yticks([]) + plt.colorbar( + im, ax=ax2, orientation="horizontal", label="Variable importance", pad=0.5 + ) + plt.subplots_adjust(top=1.0, bottom=0.2) + + +def plot_pvalue_H0(beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_corrected, vmin=0., vmax=1.): + """ + Validate the variable is importance. + """ + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(6.4, 4.8)) + im_beta_hat = ax1.imshow(np.expand_dims(beta_hat, 0), vmin=vmin, vmax=vmax) + ax1.set_title("beta hat:variable of importance", fontdict={"fontweight": "bold"}) + ax1.set_yticks([]) + colbar_beta_hat = plt.colorbar( + im_beta_hat, ax=ax1, orientation="horizontal", label="Variable importance", pad=0.2 + ) + colbar_beta_hat.ax.xaxis.labelpad = 0 + im_pvalue = ax2.imshow(np.expand_dims(pvalue, 0), norm=LogNorm(vmin=np.min(pvalue), vmax=np.max(pvalue)), cmap=plt.cm.viridis.reversed()) + ax2.set_title("pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + ax2.set_yticks([]) + colbar_pvalue = plt.colorbar( + im_pvalue, ax=ax2, orientation="horizontal", label="pvalue", pad=0.2 + ) + colbar_pvalue.ax.xaxis.labelpad = 0 + im_pvalue_corrected = ax3.imshow(np.expand_dims(pvalue_corrected, 0), norm=LogNorm(vmin=np.min(pvalue_corrected), vmax=np.max(pvalue_corrected)), cmap=plt.cm.viridis.reversed()) + ax3.set_title("corrected pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + ax3.set_yticks([]) + colbar_pvalue_corrected = plt.colorbar( + im_pvalue_corrected, ax=ax3, orientation="horizontal", label="corrected pvalue", pad=0.2 + ) + colbar_pvalue_corrected.ax.xaxis.labelpad = 0 + plt.subplots_adjust(top=1.0, hspace=0.3) + +def plot_pvalue_H1(beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_corrected, vmin=0., vmax=1.): + """ + Validate the variable is not importance. + """ + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(6.4, 4.8)) + im_beta_hat = ax1.imshow(np.expand_dims(beta_hat, 0), vmin=vmin, vmax=vmax) + ax1.set_title("beta hat:variable of importance", fontdict={"fontweight": "bold"}) + ax1.set_yticks([]) + colbar_beta_hat = plt.colorbar( + im_beta_hat, ax=ax1, orientation="horizontal", label="Variable importance", pad=0.2 + ) + colbar_beta_hat.ax.xaxis.labelpad = 0 + im_pvalue = ax2.imshow(np.expand_dims(one_minus_pvalue_corrected, 0), norm=LogNorm(vmin=np.min(one_minus_pvalue), vmax=np.max(one_minus_pvalue_corrected)), cmap=plt.cm.viridis.reversed()) + ax2.set_title("pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + ax2.set_yticks([]) + colbar_pvalue = plt.colorbar( + im_pvalue, ax=ax2, orientation="horizontal", label="pvalue", pad=0.2 + ) + colbar_pvalue.ax.xaxis.labelpad = 0 + im_pvalue_corrected = ax3.imshow(np.expand_dims(one_minus_pvalue_corrected, 0), norm=LogNorm(vmin=np.min(one_minus_pvalue_corrected), vmax=np.max(one_minus_pvalue_corrected)), cmap=plt.cm.viridis.reversed()) + ax3.set_title("corrected pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + ax3.set_yticks([]) + colbar_pvalue_corrected = plt.colorbar( + im_pvalue_corrected, ax=ax3, orientation="horizontal", label="corrected pvalue", pad=0.2 + ) + colbar_pvalue_corrected.ax.xaxis.labelpad = 0 + plt.subplots_adjust(top=1.0, hspace=0.3) + +def plot_compare_proba_estimated(proba, beta_hat, scale): + fig, axs = plt.subplots(5, 5, figsize=(6.4, 4.8)) + for index, ax in enumerate(axs.flat): + ax.hist(proba[:,index], bins=100, density=True, alpha=0.5, color="b", label="proba") + x = np.linspace( - 3*scale[index], + 3*scale[index], 100) + ax.plot(x, stats.norm.pdf(x, 0.0, scale[index]), 'r-', lw=2, label="norm pdf") + ax.set_title(f"beta_hat[{index}]") \ No newline at end of file diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index 2e92b10..05873d5 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -9,57 +9,140 @@ # ------------------------------ import matplotlib.pyplot as plt import numpy as np -from hidimstat.ada_svr import ada_svr +from hidimstat.ada_svr import ada_svr, ada_svr_pvalue +from hidimstat.permutation_test import permutation_test +from sklearn.svm import SVR from hidimstat.scenario import multivariate_1D_simulation -from examples._utils.plot_dataset import plot_dataset1D +from examples._utils.plot_dataset import ( + plot_dataset1D, + plot_validate_variable_importance, + plot_pvalue_H0, + plot_pvalue_H1, + plot_compare_proba_estimated, +) ############################################################################# # Generate toy dataset -# --------------------------- +# -------------------- # # Parameters for the generation of data -n_samples, n_features = 20, 50 -support_size = 4 +n_samples, n_features = 20, 100 +support_size = 1 -X, y, beta, _ = multivariate_1D_simulation(n_samples=5, n_features=20, - support_size=1, sigma=0.1, - shuffle=False, seed=42) +X, y, beta, _ = multivariate_1D_simulation( + n_samples=n_samples, + n_features=n_features, + support_size=support_size, + sigma=0.1, + shuffle=False, + seed=42, +) plot_dataset1D(X=X, y=y, beta=beta) - -plt.figure() -plt.plot(np.arange(n_features)) -plt.xlabel("Features") -plt.ylabel("Values") -plt.show() - ############################################################################# # Usage the methods # ----------------- +# see the API for more details about the optional parameter: :py:func:`hidimstat.ada_svr` + +beta_hat, scale = ada_svr(X, y, rcond=1e-1) + +############################################################################# +# | **beta_hat** is the estimated variable of importance +# | **scale** is the standard deviation of the distribution of the coefficients + +############################################################################# +# Plot the results +# ---------------- # -# The method is called as follows: -# -# .. code-block:: python -# -# ada_svr(X, y, rcond=1e-3) -# -# where: -# -# - ``X`` is the design matrix of shape ``(n_samples, n_features)``. +plot_validate_variable_importance(beta, beta_hat) + +############################################################################# +# The result shows that the only variable of importance has a higher score +# but the difference between beta and beta_hat is quite different. + +############################################################################# # -# - ``y`` is the target vector of shape ``(n_samples,)``. +# The pvalue and the corrected pvalue can help to be confident in the previous results. + + +pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation = ( + ada_svr_pvalue(beta_hat, scale) +) +plot_pvalue_H0( + beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation +) + +############################################################################# +# The pvalue and the corrected pvalue show that the confidence in the variable of importance +# is high. The pvalue and the corrected pvalue are close to one. +# We can conclude that the variable of importance is estimated in this case. + +plot_pvalue_H1( + beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation +) +############################################################################# +# The results for the alternative hipothese shows that the confidence for not important variables +# is high. The 1-pvalues are not significant. However, the corrected 1-pvalue is close to one. +# We can conclude that the not importance variables are estimated in this case. + +############################################################################# # -# - ``rcond`` is the cuoff for small singular values ``float``. +# Principle of the methods +# ------------------------ +# The ADA-SVR method is a statistical inference procedure that estimates the variable of importance of a Support Vector Regression (SVR). +# The method is a simplification of the permutation test for SVR (see :py:func:`hidimstat.permutation_test`). +# The principle is to shuffle the target variable and to estimate the variable of importance with the SVR in order to estimate the distribution of the coefficients of the SVR. For ADA-SVR, the distribution of the coefficients of the SVR is assumed to be a normal distribution centred around zeros. ADA-SVR uses the central limit theorem to estimate the +# standard deviation of this normal distribution for each coefficient (see the following figure). # -# The method returns a dictionary containing the following keys: +# .. figure:: ../../../examples/figures/ada_svr_1.jpg +# :width: 400 +# :alt: Figure 1 of ::footcite:ct:`Gaonkar et al. 2012 `. +# :align: center +# :name: fig:ada_svr_1 +# +# Figure 1 of :footcite:ct:`Gaonkar et al. 2012 `. +# **Left**: Concept of support vector machines in 2-D space +# **Right**: Permutation testing for support vector machines +# + +############################################################################# +# Comparison with the permutation test for SVR for validating the approach +# ------------------------------------------------------------------------ # -# - ``beta_hat`` is the estimated of vector of importances variable of shape ``(n_features,)`` +estimator = SVR(kernel="linear", epsilon=0., gamma="scale", C=1.0) +estimator.fit(X, y) +beta_hat_svr = estimator.coef_ + +# compare that the coefficiants are the same that the one of SVR +assert np.max(np.abs(beta_hat-beta_hat_svr.T[:,0])) < 1e-4 + +############################################################################# +# This coefficient of SVR is an estimation of the importance of variables. +# For estimating the confidence interval of the importance of variables, +# ADA-SVR uses propose to estimate the distribution of the coefficients +# of SVR, instead of using the classical permutation test for this estimation. + +############################################################################# +# Estimation of the distribution of the coefficients of SVR +# --------------------------------------------------------- # -# - ``scale`` is the vector of the standard deviation of a gaussians distribution of beta_hat ``(n_features,)`` +proba = permutation_test( + X, y, estimator=estimator, n_permutations=10000, n_jobs=8, seed=42, proba=True +) +plot_compare_proba_estimated(proba, beta_hat, scale) +############################################################################# +# **Compare the distribution of the coefficients of SVR with the estimation of the distribution +# of the coefficients by ADA-SVR** +print("ADA-SVR assumes that the normal distribution of the coefficient is center around zeros.\n", + "Our estimation is that the maximum deviation is: {:.4f}\n".format(np.max(np.abs(np.mean(proba, axis=0)))), + "ADA-SVR provides that the standard deviation the normal distribution for each coefficients.\n", + "The maximum of difference between AdA-SVR estimation and our estimation is:{:.4f}".format( + np.max(np.abs(scale - np.std(proba, axis=0))/scale))) +plt.show() ############################################################################# # References # ---------- -# .. footbibliography:: \ No newline at end of file +# .. footbibliography:: diff --git a/hidimstat/__init__.py b/hidimstat/__init__.py index 24e0321..f861751 100644 --- a/hidimstat/__init__.py +++ b/hidimstat/__init__.py @@ -1,4 +1,4 @@ -from .ada_svr import ada_svr +from .ada_svr import ada_svr, ada_svr_pvalue from .clustered_inference import clustered_inference, hd_inference from .desparsified_lasso import desparsified_group_lasso, desparsified_lasso from .Dnn_learner_single import DnnLearnerSingle @@ -22,6 +22,7 @@ __all__ = [ "ada_svr", + "ada_svr_pvalue", "aggregate_quantiles", "clustered_inference", "dcrt_zero", diff --git a/hidimstat/ada_svr.py b/hidimstat/ada_svr.py index 33b045d..0016d26 100644 --- a/hidimstat/ada_svr.py +++ b/hidimstat/ada_svr.py @@ -6,10 +6,11 @@ def ada_svr(X, y, rcond=1e-3): """ ADA-SVR: Adaptive Permutation Threshold Support Vector Regression - Statistical inference procedure presented in :footcite:t:`gaonkar_deriving_2012`. + | Statistical inference procedure presented in :footcite:t:`gaonkar_deriving_2012`. + | For example of usage see :ref:`example `. Parameters - ----------- + ---------- X : ndarray, shape (n_samples, n_features) Data. @@ -52,7 +53,7 @@ def ada_svr(X, y, rcond=1e-3): return beta_hat, scale -def get_pval(beta_hat, scale, distrib="norm", eps=1e-14): +def ada_svr_pvalue(beta_hat, scale, distrib="norm", eps=1e-14): """ Computing one-sided p-values corrrected for multiple testing from simple testing one-sided p-values. diff --git a/hidimstat/permutation_test.py b/hidimstat/permutation_test.py index ed38a9c..6a23a67 100644 --- a/hidimstat/permutation_test.py +++ b/hidimstat/permutation_test.py @@ -93,7 +93,7 @@ def permutation_test_cv( return pval_corr, one_minus_pval_corr -def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, verbose=1): +def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, verbose=1, proba=False): """Permutation test shuffling the target Parameters @@ -140,6 +140,8 @@ def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, ver ) permutation_stats = np.array(permutation_stats) + if proba: + return permutation_stats[:,0,:] two_sided_pval_corr = step_down_max_T(stat, permutation_stats) stat_sign = np.sign(stat) From f8684696cb4d1d3955a0c30d5481f4e5bb49ce7d Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 14:48:56 +0100 Subject: [PATCH 10/28] Add functions for get pvalue and fix format and doctsring --- doc_conf/api.rst | 1 + hidimstat/ada_svr.py | 10 ++++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/doc_conf/api.rst b/doc_conf/api.rst index 885873f..f5b4176 100644 --- a/doc_conf/api.rst +++ b/doc_conf/api.rst @@ -16,6 +16,7 @@ Functions :toctree: generated/ ada_svr + ada_svr_pvalue aggregate_quantiles clustered_inference data_simulation diff --git a/hidimstat/ada_svr.py b/hidimstat/ada_svr.py index 0016d26..30c5fa0 100644 --- a/hidimstat/ada_svr.py +++ b/hidimstat/ada_svr.py @@ -57,10 +57,8 @@ def ada_svr_pvalue(beta_hat, scale, distrib="norm", eps=1e-14): """ Computing one-sided p-values corrrected for multiple testing from simple testing one-sided p-values. - - ################# - For details see: pval_from_scale - ################# - + + For details see: :py:func:`hidimstat.pval_from_scale` + """ - return pval_from_scale(beta_hat, scale, distrib=distrib, eps=eps) \ No newline at end of file + return pval_from_scale(beta_hat, scale, distrib=distrib, eps=eps) From b1c14e147c5718afb135906855adb1d2b2a30beb Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 14:49:27 +0100 Subject: [PATCH 11/28] Fix format --- examples/methods/ada_svr.py | 62 +++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index 05873d5..60a94bf 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -42,7 +42,8 @@ ############################################################################# # Usage the methods # ----------------- -# see the API for more details about the optional parameter: :py:func:`hidimstat.ada_svr` +# see the API for more details about the optional parameter: +# :py:func:`hidimstat.ada_svr` beta_hat, scale = ada_svr(X, y, rcond=1e-1) @@ -62,7 +63,8 @@ ############################################################################# # -# The pvalue and the corrected pvalue can help to be confident in the previous results. +# The pvalue and the corrected pvalue can help to be confident in the previous +# results. pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation = ( @@ -73,53 +75,61 @@ ) ############################################################################# -# The pvalue and the corrected pvalue show that the confidence in the variable of importance -# is high. The pvalue and the corrected pvalue are close to one. +# The pvalue and the corrected pvalue show that the confidence in the variable +# of importance is high. The pvalue and the corrected pvalue are close to one. # We can conclude that the variable of importance is estimated in this case. plot_pvalue_H1( beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_correlation ) ############################################################################# -# The results for the alternative hipothese shows that the confidence for not important variables -# is high. The 1-pvalues are not significant. However, the corrected 1-pvalue is close to one. -# We can conclude that the not importance variables are estimated in this case. +# The results for the alternative hipothese shows that the confidence for not +# important variables is high. The 1-pvalues are not significant. However, the +# corrected 1-pvalue is close to one. We can conclude that the not importance +# variables are estimated in this case. ############################################################################# # # Principle of the methods # ------------------------ -# The ADA-SVR method is a statistical inference procedure that estimates the variable of importance of a Support Vector Regression (SVR). -# The method is a simplification of the permutation test for SVR (see :py:func:`hidimstat.permutation_test`). -# The principle is to shuffle the target variable and to estimate the variable of importance with the SVR in order to estimate the distribution of the coefficients of the SVR. For ADA-SVR, the distribution of the coefficients of the SVR is assumed to be a normal distribution centred around zeros. ADA-SVR uses the central limit theorem to estimate the -# standard deviation of this normal distribution for each coefficient (see the following figure). +# The ADA-SVR method is a statistical inference procedure that estimates the +# variable of importance of a Support Vector Regression (SVR). +# The method is a simplification of the permutation test for SVR +# (see :py:func:`hidimstat.permutation_test`). +# The principle is to shuffle the target variable and to estimate the variable +# of importance with the SVR in order to estimate the distribution of the +# coefficients of the SVR. For ADA-SVR, the distribution of the coefficients of +# the SVR is assumed to be a normal distribution centred around zeros. ADA-SVR +# uses the central limit theorem to estimate the +# standard deviation of this normal distribution for each coefficient +# (see the :ref:`. # :align: center # :name: fig:ada_svr_1 -# +# # Figure 1 of :footcite:ct:`Gaonkar et al. 2012 `. # **Left**: Concept of support vector machines in 2-D space # **Right**: Permutation testing for support vector machines -# +# ############################################################################# # Comparison with the permutation test for SVR for validating the approach # ------------------------------------------------------------------------ # -estimator = SVR(kernel="linear", epsilon=0., gamma="scale", C=1.0) +estimator = SVR(kernel="linear", epsilon=0.0, gamma="scale", C=1.0) estimator.fit(X, y) beta_hat_svr = estimator.coef_ # compare that the coefficiants are the same that the one of SVR -assert np.max(np.abs(beta_hat-beta_hat_svr.T[:,0])) < 1e-4 +assert np.max(np.abs(beta_hat - beta_hat_svr.T[:, 0])) < 1e-4 ############################################################################# # This coefficient of SVR is an estimation of the importance of variables. # For estimating the confidence interval of the importance of variables, -# ADA-SVR uses propose to estimate the distribution of the coefficients +# ADA-SVR uses propose to estimate the distribution of the coefficients # of SVR, instead of using the classical permutation test for this estimation. ############################################################################# @@ -133,13 +143,19 @@ plot_compare_proba_estimated(proba, beta_hat, scale) ############################################################################# -# **Compare the distribution of the coefficients of SVR with the estimation of the distribution -# of the coefficients by ADA-SVR** -print("ADA-SVR assumes that the normal distribution of the coefficient is center around zeros.\n", - "Our estimation is that the maximum deviation is: {:.4f}\n".format(np.max(np.abs(np.mean(proba, axis=0)))), - "ADA-SVR provides that the standard deviation the normal distribution for each coefficients.\n", - "The maximum of difference between AdA-SVR estimation and our estimation is:{:.4f}".format( - np.max(np.abs(scale - np.std(proba, axis=0))/scale))) +# **Compare the distribution of the coefficients of SVR with the +# estimation of the distribution of the coefficients by ADA-SVR** +print( + "ADA-SVR assumes that the normal distribution of the coefficient is", + "center around zeros.\n", + "Our estimation is that the maximum deviation is: {:.4f}\n".format( + np.max(np.abs(np.mean(proba, axis=0))) + ), + "ADA-SVR provides that the standard deviation the normal distribution", + "for each coefficients.\n", + "The maximum of difference between AdA-SVR estimation and our estimation", + "is:{:.4f}".format(np.max(np.abs(scale - np.std(proba, axis=0)) / scale)), +) plt.show() ############################################################################# From 7cf0c4db093e31b6e8e7acecbcd9466dd5fd2e1a Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 14:50:43 +0100 Subject: [PATCH 12/28] Add figure for ADA-SVR --- examples/figures/ada_svr_1.jpg | Bin 0 -> 25807 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 examples/figures/ada_svr_1.jpg diff --git a/examples/figures/ada_svr_1.jpg b/examples/figures/ada_svr_1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b5f67d66e294910808c6016a6c97191fc715dae7 GIT binary patch literal 25807 zcmbrl1ymf}wl3NPX`DcC*8suYEx5Zj79`L#65KsNaEIW*-Q5%1-QC^YdHnm`z0cYI zIcL24?yc@QdUUNC-8E~iHSPQ6>)h)a;Ju85v;+VO3JQ?$_5!>v0z?7uaBx64Sa={1 zh=2f(h>VGXjD&=Yhw%XolL()Lm7giKa-NPvU76t@(T)!s;X;h z>*^aCn>sqXx_f&2`UfT^r>19S=jIpIH#WDncXs#o56&+xudZ)y@9rP|(hCXz{r{@< zFU|g@7y6rC?_gk{VSs<>1@+GPO`y?XV98kEFhrDrUu-{+v--kgevZzp{DVNjrgDa5 zWH*k8P07Aab^e!X|IqAzPcgs$BhCJ$*njA?06>O@dYe3Gbbt`x_Tf`{(QV~^wRC=w zzyzQ#a{Nq$?wW#>L{)ymQPxwxgRl836^OJLnU#617`2O-Mdo3yOmc8|tW|lC|J%_e zf%VNoiz5e>`V=V_whH<485h~rm%eAPbWMtwK&YA;_9a0md;B+~UqYhlHG_@YVV!P8 ziMBDg(49VBcf^5(7G-`r0hr3d9U(No-u++6S{uz>ugY0MN?u*Osld=JPGxc2kE$bN z)}$V2Aw<3l+lgiuMiqDNlW*tQzdU4ewMFo@hy#5-W+j(JNNK>7E1}6a0x*@u|9jE= zsb}#*YY;J$Kx^{~z=*7VZ}ZSD`-9JG;6)Ce$J@aA4|)_wP(idw^PgIY=kOl1A4ZmFB1UvH^S zqpnX9?WH&S^!ti*>I$GNj``n;W0|pv=Gji(LTii3{w=al4TbV8ng}!TGQlFS*5PVa zJQ|T1R_YJTH!z32jQy6|LHasDNpx{GoCY2YF?<>M=>|$|dpAT89!EY#C+2vcr}NY9 z^O*N(rj8DzJ&1#9{DhKPF+<2Ox+(J%U2s`*k)rm?D%j2TZZS`427BmYUsX=4tML+B ztaF#d1(j{)`w13V-_`t)lM}GNv9X3az=lO=+1fiD+!UW~BL8`Xd^X`=5tDP38U0rJ zDu2aj;8l9{q#e!V#4{6F=S7pvKe&9eS$^A6`|aNn7;PH@wM_~3d|LA z5U4FUOkOnlp1_9(z{gl|5MFzWM}U3YX@r8riS{9u3tdEOP0smOjM6_7`$bRrQWvT|VjAhqQ>&LLwUYpP4yLccmfze0Fiqh5(QbA%9S(Uz9 zj&d;y-y6TO)jcfv81)eQf+Q@+&;UFaSO9YHa@MCzOr z2QMdY14;~UIx~@5kO!_|6TaH z-iNLiY?tf7e)>}Slw%=qnD6%5dubTLun*l%Sm7I$S%d3?%?9uFL~Zi*$O0u3lyP_S z5KfmoX`s2*w^xHco`sj@2b{{UWSISQ7IY3gaDE4*=Gk9qYPuYx6F^&+>VVV(`J+c zNT_I;N8Y^jW?Y=MGM&hpJH$clhe}}l9GzM}-5$>R5?7f5tR=gE`vg4)BvRoiS2KM(eH$9hyEXs$7Cu^~^e%|1Lrqqd;Wf1C|!u-e8L&aZ~S5q8j9>5W2z@j(IDKa{vl zr^TT$R#V1g|EO`OSwC3{^XK%^AP^a~G;P|%U*VeN5JL6fZ=KF!Mv+;;?dOw~AguED zaksNg&Me$dr*5mpzExV(^Q`pGn?ZjCnD#NN#|JS2uD0qQsl(gf-&wUtuie)W49=kO zo5$#u7-FdTDXjQYI^6z375aSsQqsO~XpaGj(q0lr#;GGCi_jZSv`p~K2IKbe#{k`G zA?F`G4RpuJCLP@KCmreV7(F&?Sh2q4JfDP)c-l3To}#;OqS#H!`w;|G<9n=Fele7$ zp#Gwu+`!K%nOU;Twca;(CpVW>9nnLcbI)7mgn|0Z< z?nSXGG^KGsoIosWTZFbb_RqTg?_o<_51AT&9Z!BRepocs#$4^jr~A*EN`1V$sS(qk zJJ&hd66hBO)#&Isvt~5XM|i#$5mu0LI%WKf;M3?@f%en3HhHuRF`HhWmV#%-=TLg@ zPSk1+jV5=O`^_QKws4P2S&=i7D!vLBjx~_5RxJpn)WE}^9=|s;Nf4@)$5QiE3|YA{ zzHiUA2NIIFzBB=LOGs$iBtTkBGGiV#BlHtut~5m0<+1c%#F_&q(B^@i640A@L7$_F zdHTtO0U2I2@23B@VhAO>(e9zZLN^4JuYmEaX15K&zgb*3UbT%zi!kixtJ)w|}7g;cw_KUOSkI#yBl6_wCL6;d#3S6hT8_9`w}qf%43 zwe>R;@#?rOsuJ67X9H5ZFoft!_c=d->?9uT$5YvRre5pukp%61V5SgpTBpZr9Z(owINjipa*mATJvt!c|SY!^F7dHv;Vh& znzDNNiT+3r=zBmOOL#ZtT6DVYoyk;%T;h94b~Bm-JjKeaWpPhSXBPY_?ssMMLy%kA&*aI*`OUTm4mW5YJTc{#w6+Bl>rvrUX#d7H6>Hr z2z;hSBNkrCC(XWnFoUZ}5-o&zho%IHl}Zt7@g(-Xhc&21;t|^QkItZZB$j#6kB^&_ zRoqYU#>Re>JvTTLViGOQD%zi9pHZ%U1(cA`y#iWL(GEUMgK-?~u?qj(l`CqVo5m&d z&CcQ|)o}6J3k`e$JZ+_Z$K$3TVWA)l$+t1if36m(2&Sr=+)6nPb>?A}7Z`|r%qH7b z02*vSo3G&?I7T9g6zO}G5j2P5<%*z8g5C>`qumm$mf;$PrQ#1YpVg)0=u&M^`pX20 zC)M2bkV`{t$U%ppwT3z3J?PxV2fOj5#beGbn}qA(vR3qwZlNm_($Hju0O;^MRC74X zIe#(+Wgu2Mk$#7j(00CBf`SR#f(CKy-c;@#aj>^PvYb`4sG853e0t7lT%ov*wEaEm z*%|)}wO-;JE~&hmGTE^&PGZjSlX{>l@XeC&05gf`6dDt;d!?&Gi$jW^^!sJ0~!bZS&JHU$ZqFprcBMxY9F6bi#)`Qn6o2yg`x zbe)&dX7 zkIPvngTih&vF+N zQ)C<(6P~{GFVurwl?i_BFZS5+o+wi@zoZ|lQ%y|lSNS9O!W^+i!S#`*t_1g0lEf0v zX}uHmH7KZ$qODUL0{RdA6H@51qIluLdPCAn3S(U8fBy7r_H)n;{<=%2u*$Q*jv2*- ze7Z+PowiPbt}KYNK8Z-?h|{F3`#QXR8=a%*XY-K}y7ST*}`Ao)h`*&}ho{La3!VoxJ7 z38njJ*K>OCdEekk0M{S7ZwlEcC+I&9 zl*iD+Ee{egq<-Xss`w~+pN7ET8|mR#ilM9_NhK5W598@(h1GvozQGjqgjX;(cW^0F zt#8++>+Z*B+t>={$?C$dsR3^Y&jXB>^ELlZv%JpOzSTJAJ@?MwnYgxe!+>C)nBeSq zr|KfKuT)p;e4(k?h8jwdpQP02KnW=$oU?+BwIrP=2|?8l>L)54K_>}PO?gn0*$<}L z=JlfeOJTW+ew&LQu?#Vde91n1?ZCUD(H<{Cf9NR zj);Ao>8=!+z*2zHxYoWGWpe*$-`1}e$b&cU5yXGyT2Ij6y(B}x(TCCZaALuoR;1@K zpZ2Cb)Jqs0mKhfne4BI)WLtk+s~H?XsMu?kP78z5Waa0W-s9kN!SyYyVZJ1xTxki%JkB~UZ(IxrqU49y(HlB zHf5XWuFwP44)lpurd@B)G89tn%$7>I( z>yD{AIX%A|d{d9Vg%iwC-BLl1)c9rrt8CMymOIw0gg7Bnv<2GB&C#mwb8&D*#NG6s zPfkxNl*O^kxc{EVqDO1RG>n~^G$d&2j2wvL^8e7irA)^>zv8G@9!9qB8XpMR>x9)Z z!3WRD8yE@OvC;*#zh!8%+1_*5o38)~DhFPVNo~a&q{o2h<)4c@k0bt-n|OF-PXF$G zI1Ckb?bSILcvsac5=Qp9#5U_;6^-6<9@6P1T5A<+#=RX64uS33?Und~O81`?hg{%9 zTvUM(ILQKzNezUUNy9rciBQZI1%{`?y8|=H>YG0G?|fdDWm05uk7)NT zax0FhabnENH>O^w(-&SfHLAyrBBAk(A()JCE&P>E_Y=c~Qrak5lp@=NSpc4*pbWZm z0Xw63pGBUbuDjN`-qzfsfkW-HOf~J}fmZA&-g3X{$f(b{;t{w!dK`A^l(zZ@R>aU1 z3#ybG{DIP(H}bt{*}sTj zEykrpJ_&QPcj4jnNPz8=FNcLBTbtXa7y`>3t;!b%LkIT)uhJ1%)rI+K)gQBqh+UEG z3)kUo3l3Dk7qY~W1kY(!c}62>4-yZzG3$TmgrBf4SD=JAdL_CLhO~38d$S zMXto{7=q^#cE~uiKRc&aulu6NuWiCw?MEqkyXgNl%r<_PE7|TD$_lAuQOj1f!qU z@nM#frME{5cPVCCA6;Y1wcw$Al%hw@mSs|X7f~7pv z5JhGcnu)0GvCt=+s%$UoCx6bq&4&-CI{D@<2MDdl9e2Xwu)9hTi&c$8! zOJt&U75^(>R#{7*@I0cp$f8LMC%H6yifialpuoozO5;l^hU?+my_~D(f&ewkH6bZy-W|`dgA1Brk;Mx&8~bPo%>Xa)_bu@df!_t zTpo&6@#1}Si-bE;`oUWgB7w+S!UX+2df7tiV@JiaD<)A^MT|iLI0-x$=W@35!2ENJ z)eVhkt6vLjdraH+xkkw0v5%uHKL*wuw%ko-?SJjZ?$?!64`~0~HRMr$&}h|}+ewy@ zr?yW$r}pRP+K>&^6V;vEY>4T?mi$WR!_#T_)Y*oHrTAwR{YhvOGB_wewu`Y~CpLb( zlGL-kb$@;Avv0>a{uGFMy zM?ytGQWrVa_lP@Bdh9<8yJlO6Ye`(23;P4sb;6`Vq#zXA(`zNir#3o?b4pkt}2>jEb$sL&LuG}>5 z(An;wL!MkV!SzL#IS%j(HMD=Dm2*(8a?3(-fVw zbf8rknOK69hEXlbua@$lF2LG-{CKJ|C1wYD*8kJmyiJ<~l4*AK?x1Q^Z!4leb3RZ&0t!K67b9aRILOp(wuCnD9cr*o%10dS@$%krMtE;h@+2Uj6!#9emYQ#G}cWWAI)ikRKOYIoH){`W^my6l9H?_LERF%X@1cj;1Ac= zN_o~Buras|g7MNS$*YwsP>j*suVw`4iaT2obz#s)OJzn+ZM}D&svTTRIBP3mKs+8V zC%J`jsE` z&AW5T$3M^F8$bx<1Awj|QsgjJZ48o{lHW@Tl}piIvTun9KaTLK9pz7s{87&7+khIv zt%mg>Mu#FRfL3-TA=r_1K50Zdltjf|PxE`t6Mf=HtC!SE+A!7Z_nUb!hE>PkUs@{H z{9mvp78XlOYrau0iz9v_gG-xl^$;^f)%u`<9wp9zRoG$c^nyK;=~{DFs|v}dkF+P6 zqJI7e#@#|dz$H_BzT))vL3QvD%CS&7z|koyrKEM`Es=-K6jVm|9pyhgy2s4Z1IfLKRRq0H$A{~vlU4N1hzbh0fEy^z_&#u5@ZR>F=!3H z`R&JMYTRz3+53^*a$^T6evV^1QvGyPV(Z5v)J$0NV49_xP*wZ^9xnl^Xl-dy41s+3 zt0Y$7x6nrc8h`F=DK(Gnt*r99#8)!dO>tl~u-*wXbN4?zjsZahcg@n373n7gKW)T5 zNWxSu%9Qnuti!o(#y+TSpG%}!ZQ!v(3S(AG19{YUqSZ}ShB7s&DW8HO-7W{cg9NE! z4o=()OVkDlSvEc*mKir{J6owKwbPKID!9+Q0z#fp?8^@p!Vjo2T}v6h)Vq3JSrPU~ z9QTMohX4*o>T}_<;|@6ectp&Xh8n)XtB8#9M+xSrf(*td`k6rLRoG=OI&x-uVJp7X zexAiW*BOWw&~oSBnRNyQYHPbR9(1jc4CxaQ*ncCN6j&lh4C*A3pw#wnK=S>D^&!o{ zrjy6k{0-w&5$qj2pM`v4FJ1vy0Jpg0l|;;P%;fMaX_8fmBoTw=DYMA7jN7qmECmBB zaIp86w{2`M?h@Oy)Jy(zObp7h&hr%@bD4qef^n#DmJ#w%md2X#G_Mlm>6>edkE>Qp zXcCr5J!P+P+(rV~yIf(qF)i&n_SjhAZ(&X{3V#pKgfT_ncRm#+{=qMj&GEAZzcjj* z(wDhG?|X#Qh;6rwqbIwp6?%Iuh91foQ*vXRTfwMMfNNd&*{Zs1LGCohfD~%hZMe0T z6pmwi;-pcT^V%6;qiFvXfOH*7S4z3WugTnr(qi}HBurm9M8YyMDuFNpO5d{pdX6TC z2$lh0OM)r11^eb~=d63*(xs5GS*ELUlpo29ihlAccKcvt7Qo4f-BN^4Dh7mlW9to7 zIEyb`9SpTnn6YnI`$FPOT`$u);UT+@_obPp$jZ@atRu#n`<4RBPbv%QXWeoLt=PikN;>o}3 zIqlM?jG=2LC>*LgAdkHT?-VZeDe~a)f9Qc2ucwXr^Fx6PI=Lbo4kum7D>|*z0qB#| zvFTp%OWo%men)i~)t?h5VqE&fzycm7!_?HH<8`Wv8C|{ggRsEs0>}#_i2?hl-P3!lxvrFZmJxsciJzEXWl^^D5TPPD|MIdFe>-R3g03`1mIydSE^s| zhjUznx^%iKde-Ys;m+(O8^5QAx7(@#eM5Ko9dN6x_1z?`u zX{{i?0x~mX-=fQaU$Ey)J|27U$5i4fjSX_F)xT z#_X@ENJJ%b0-BGVZ4&0t`Tqd>LujD?9q^of1>m@ZDvy^t99jujA~am$4KL*iBLef6 z37=0Sz6phLgl|9^Ye+(Py;?a_3iPzRP5LeL2bPA!0+r@S9o8IkrdmwbhYdR%Ne~Wa z^<}XmhsT%eVpxRng2WPa&PIwi_*zw@7!B+PF&aY1$70}){Q7T2peZvcBjhQ06UsMS z85}%i)oKIrtX?K}#EEAm!-~I5hrXM$NkWsOGr1~tXCgGm_eySZEDz)tCCVyPem`MC z0W7nhBdRbz&XL%BximO2Kz3VXS9Kx_7`2E}ZuKdH8BcC3LT18Xh3kJ`66~^~)2Wl1P zbUW9>+mz?b9w63|cgN_&D_{?0L+A-h_5~AaQOy|x*J>+{@ghWvQFsI>mrfV<3JA!| ziEPbr`R?gZ=-rV*-L;Hio=(di?z!psX<$Jv=e?VMH^B7H?qE>o93^bl3v2RA!ZH3i zWA?YmwY1qlH=4pm)P=UmBIszx>N`qj_)C=*1fDK*OB4$>}|fH`au@`q@B_j6U96rDR) z+YdS=Hn<_0BrZY7Z2tQ*jScj*qjw5N)+Uo%kTf$nhZ^6Y_l#@MgL{C%=o47JO9sRn zi_~_PHc36(LS0?`Ik^g1cN=#KrUi07-;T<)$XmCtHRxWfJ!}>f#QoCkc-Y)QUu!Lo zKYctZwh>Ea`x-S2h=ZLER0;8B#H2OCl_y)f&7RVf8HL4(PT%wwe^Q6*vx@w=CJZT{ z!k|-+SG=Iks&*JG_S1iJjSip+c^v|?UaF9b%GVYXzuFZ7D$rDq!}5CYJu&E$mIaA zzwf26Nux)h3!y5k=(E*5Fem8R|GsV$&RFc&kDeE_dJ3qiS%Aca{ zZ0~~u%y%n)^PG&dO#E!HW$7j_vucnSliqnQj9Sr37BW^^rhoB26f)xnsL zVL1tkXE*SV;1pE8%{fk|Ci&;Ykv3hGffW-BMi}oyAl+?Z({(XdI@eeUCCidtMU*t5 zy9SPtju@j4fQL$?#QDzDj1-CLX(;9Nr;m@>u_&_8nn0nI7ZGM_U2F5q*T%-c1EiRh z&%cZ@t?rlQxKvZXm`$t;1G1!xiO|+g+f&y2vDzvgOnz2wL^ARk&KK|Q&HE5^MYrFhx)^Ny zCbA#ph~GGcIiyb~t{p$tcwk`cTBMLsqLV^)mkZJI$oQzhcJ&K2kmN^`dP6gB+&ZG3 zt~kaYAi zU|6;B z*)Pzo zs&mw+y0MW;S;8#!)wB{}3LuG&t#nO#M=V(H1IWCMpvBj$ZsMvj%QoJjKFt5lS0w{w zPt0(y$w}yNTBxJgMe%cN=L1h|vT1^5)W9NJt#mK6kj!$HG_?u3RjFM0odmzoD?nI) z>lFY+@#b0-|9OJ#f1Q{q1NLC+>^^;bAX*7U9{E_;P@68S7>_EvNO7|nBjBcjH!-}P z2J^X_9ff6)khV6==e!Z2LR($9zE@B#j|1b&5Ed*O2_h0st}HObHPoLFJ3 zSHL>v+3qudz5NL$@`)Dh`xCbWs&8)eD?s1&%}H9mak<*hez=&u1RTF1#DF&h0Ws+F znPQL!m#x4^V(uT5u9K0A<3T%nAH`+j&Ua0SZ;Z&lSdsrB@80&@0#3543Qd%0+8iKz zDpsj1*Qn=xeH4j+Y%g)HT2~@ zWP9R-yrDMG9SZY@t#yDDM*s7Q)|uOPqG5pBTZqM1_$B;FsO*C?Z$WJ!8bdaL!EH=m z7fBQCwQHxBDoDs^)DJ zikm9Ma7qIrFed0LK5R9ENoQ6}D@=7CT{Q5b**f0{2x(vquUykz%Vs|g$l|x2cU?Zx zxPN+$wP%T`BM9J^ItHB%hftZ7Zb$ETQcJB3znfC`g>ofEgSk%GOFRlUM}X9B=e<)T z6OAdupC9#`w2Vcoh3I7`4=k8IJ!&U_8o5|Wsa6RAd}wE7ld_Y?mR4$@us}effKKB^ z*#d|N5{tIW)VEW{E?bHL@5l{kazV9b2zf`$Hf!VAS#Z6tXPR>}TX(LvSS3vu92f1{ zCwR?+hg+s=b(U9x`L0Res@48tVaPj%}1(Z?np*65JOIdQe;NoNuj)_!^w$M8a;(tae3lS*#vjAkn0^l-!Xd zXZmhQ%%9Z9#rwBezC~`?_f`((7Rf?}`jAo*B|j~zlGu?db7Ll|qannfaS=aZJX%5J z;vD)6$|wAwFHQ`Uk=k@OvcoKWv3;R@En8k8y!PWwp6hy3Dw6!tpRgPZ@wDvVuk(-b zM&LXxWUw3Dk3Y|#;TafIKc^!RD5Feq57i&79!wj*kRo(cY$t_81U82h+UCybt%CHI zbVRA9%j7<=z8l{%JQk>;fsU=5wk-32fL*@p^?F+v*N}O;ny5Y-@j41iZAUN<*1Q5x zoUS!i<*I60nwsltL-<0o@?6!0!H8kV>33KbZ>65no?KH1b)u~!vSQe=Ghid@W$~{N zn+);_@D_iggB&feIfC=esBRPYibxjS%${KwyifNEkIRp3kdPoh@)mFFn7K36{GH@1 zN+^$VLNPVdbe|*}BbCo#s}?{xP6^Jc`;#~fyR8jfzCQbx#LK>g4y(cA{5t*zZkA_M zAv!ze1M%@YVurskI8)p4xPSMPmsF3@lrgSuSO+^(f^A^0hGr)8gc9MuU`Py=Y5DsG ze&(hm3^QYYH2CqLz8yY1#mO6eK@np7K^T372kO(NJygAaKSln84NausBDuK-3)7QUPFQMVsYDrYZq+hT1o^O=FJ zj1>MX9Wgw@y1QYlQ7%G}+<@bPGTy@tjd0_SxkSsYS14adQb=8<5*3*x8-V9G z;wL@18)IjKzX)a&f<~R%8$XB@H{M_)gwHSg?uVVHjsZ2rU5DGdwFJA33s&cJwHFml zZWYlIccikM zYqPTIQ;xC6IyCQjfS$LuP?6FOSb1^Kw^f_yvRUd9;#ATKi9UrUi2@PBa_>9fm{d zMD?s6U0_}TsSag+A+*w$oPX%(U@_I`9HEy4n&b)OabY)+k!S>1`BaF43;q)^;a@bA za>2XM^LO6BgC>;R+MN1#2U`TC(Ft9;VW@)V)TEC00*CF=SR2}yPbE;m1s@`DQqn8- zn*W|g^*0{;|F>CnaqfL=RWSJRap5=zTUj`89Mx$p@@J&yJaCV0x(#ai0EYzCdvbn= zd8x&z0ftnXXm;w*6v112ud(^)AoquqTYfZde2gSfCyv7^l_>TWDD zzl6a4{;~(yzsjXH0V`zim*~uR$cNwD0Mo;{?!3cII&B!a!LlG?=JJ&H9uAJRQ!_FMCQ_2eLQuTVjeDj7J*tR|1># zA}5s20BHnf3!YJ2db@3~??T09tR0xp3P}gg>Wg%79!=x577KLPi2RImuBiTPw9RxQk ziT>p6>Ga;@wgj_#i62+HYBGNq5UMYri1Pe|9T}e@Push|lyU#YMh0GR6v87jyQz$8 z7@CM(fSpf?hJQmqNr3t#ROKJro-Dm2@emum+#R~7-RI6-rbO}a(|+D`0Vm?0#;aQ$5F@vhiRN&7V^D``%|xAZMQ`_&$(k;c4>+2 zLI(qR#$c^?viL%hq&hT1>{9A`mo2+zP!BQH1cz_FbkQG%z6!q+?nFiEW7d{1{$C<| zaLlZ(n}Rh^vm$4kooAFR9gGPyr6$!TAL^)r2U#?)w}XiT>uLnxP!Q+W{B{l;okpw) zSqKfGeCYZVLmWp@=a(e&?~CayG^>B60|se6_|~xOa#IHo@eJFb;oOx8d^LlQT}T;x z)G8IP1l}s!gq@D^@Vk!zmAN`wiFag^c9U;p3%8{M2IZ3|OqQ#`J3qW6=#5 z`F0fmC_io@Nw?!Y=r=ZHfbsf9y^O|Q?f@^=Wdyp8dfXc=!?%Ec>!x_}jzp^Q9Cw!T z!*hX5>@mjsKr#7{xmSR5q|83eFI4s?)-mE~h8)N|dBn#+4tGC~h6V9h!z-w0TeTMX zv$ccSe&@W^6dt(=T^>WwZCOEP51OI>_MWG>^OX{eMrA)r4vRc(f|zhQ832dW?i;UJ zsqC9o(|n(Cq5H;^t#lN+$}FxgBNN(ccq?cc9eD4HDnWbE-nn;qQJxKHq~o_f3UrJR zt6$1_Ss0!-xUX%t(Q$P@m_=H>;DHl(i4I3gJ19W?nNjpHfROpeL%E#%KdDtHQIJTNb%hYxu=LnX!8rrc)9m3|o!R2sry?v$ zJ|wA_F9MxUVVaU_$~!_o7=P^OX&4HI`f87Egz)ZxC5kU(&lRtD4`{M#c>YNj{^utu z#Rl-{dkeu^`^{YY6|i_ks3!fk69p5ydgVaiD@3q3J#GP|1NXkd z8>~ksq&>?%iZ;h_Q|gYATfx0bVN5-Sn)8ULM*^DUc!X`6u=&zlXB^4;aRe>%O0dU= zXOzH$d&0wts&%+)a=`%PSGf#|A^;!cPdPpfYn<%{T%E%0*YnC}NhG4MG>7@g%HR-Q zPC|!iI;&kHVK#^Bk`w$;xYMq_sN>!r5fDeA74N?OC|mgzPs3bAjYm_dBSV{*B-#G! z%`FS=*ML6S)HULtH-{m!g`rj!+|9)S_6u*}UG0?HP73j30cOcrL_PC0-OJfQ`;SvS z6f(ed-t>6R$91}_)_^Pg@Gvf?(Kb9K)(qoki($J-v^#OfuT99BlC7~A3Q`L#6@i$4;jEtJ8_boU@VoCW8BN z2Jz#Bx#QB~>6pbqLT0*IhF3tu8mff-f||WNRtppT1*I)1{m3qqWFZH5(fDJTk&EEa zytxq<_unQ(zknbhMkZw!x+e}J-Yz%1R0)}jTrs`j!NWqlH|9T}yy z3bP~@GOu8UjXTj6XLN3t#>O;}1ZNlOXEe*{0~#pKGJD_Tq9&_k3!8${CZv9HEgk)B zC`z=UjFHlDVT2kt<(fD5Dh!XwBn->&x5g#i&))st5?{KfUI7zVWL(^?jEiL+7DG3Tv!fzHbG|)qWIOfFM<4m=|9@-w4ivuD7`kTXpCfh-` z5`4B-r9x;2irG&*%`?V6axbRcGac(9gXwR}?}$VDWjJR=G1c{3^NS25X`-|=`wnD7 zbR4TMOz$yUFU%!GP{Bj^;67ussULa8o1KTP*phuORlQ#RVHJX3Ivl+%=(~STPs$1nk>3h4rRY*?d-zp@??GK!-?^<3i zUu0KO-Z~%fysxYPZf_|Ey8gl_dsMeQa!hyEk1{0cDW_j(z~}6mQXSWtGzrQCc}AKA zE_ej=J_Wm#BwWCwT6=XwVlhavcvX?G5*%AZFY@P|G!&01ac9`$qh?E0aiIple~S3( zQ-D$t0NMu@!0-m!;JWoD8GFKI!dM>r6G)VXza%}|o`%fSHpmuhSDs@r-;j*X5 z*yg2*Z+|ZZJ!kfnRN5s&h~)ZdtC|)}!WLi7xu3f{nEF<-qTUNrWabvp z2OcaC41yU>D749*>WsDJ?PE+OjIg8=%$b4Ccrvr z30QdNF=t6^+v_n+y%bnpxvytl(#pECO>}gPK*9A%I@}j9Nb+@Yu?)pRX*$i9?^jj` zE%Lwe&;Kqb{fpE3L-C#w;|Pobd`VmJ)#mt7LRy*~Vy{rguXV#jML;WsIpL%QrjS1)rU&>Q{6GL`Y@&t03NF4FuOJPZV)+ zg<;Gv;GJQaBQUL!H%65QfycQ8_<^mkfjRVY z_BwcKU3i47(NXHb_ifgycg&~?2i2)3|Exv??wqWQFmdjOUoWB^so?5u?mJWEw!)Pt1)Al+h z8-|@#nkW&%LhgeGuapQL3as!=E3Pii33Ke*hwpABiiPBhGzR1vQQ1C7sQ!3c_Nf{Y z)j5J`W+t;u#8o|0MXWOW>;!y1YKCTr4SPI>TxyTvv!aTHBLZhTWjlY%=Vp*OA;>odeg4$oo^k>=m*$)Gcz&rwmZGn)KqoH z(3!sv(1ctfw7#|yG7B6`#ZLb}rF>^tQ(L!gC;|e4^eP=`B1P#PB{b5HQbnnuh8lV&Gzn7n&Hlc7&OQ5k&cFL(Wj%S;nq|#3*BtL0;~g*S zxxh%)Mke?&#jj$=cbNY0goJRShv`*ZHowDR2#5 zuYc)DYwY0w*S{%rf79sx^Eky>tabE3_LXRx^1I(yYkCKN^IU8ir^CmZC%inHw9 z)`%%y@s)A--umJpuhrWpaUR_FEmPCUZ8qxeTHnIfnOHuDME|%J+F%FlGB?yDz%$@K z9!hyH7K$1rbJgGLZdYL%dDC!q8hBykC%6PSW-{s~lCBLu{IM5$_a0k;F!%r~nT+Mz|dV=I-(+Zm5Xd~8+1zlWEHk43HOU0pEXNG%|u zW^@&0FY|t8`*Ke>JKK!ucKO)Z{-PPvOOfF<)n_wm)RG4Zkt(pZLV^AseTzh;0_lbc8!nY<$_}o zrMr#HYT4<*(F>Aez%Lo=b^rqQJbfi9kjWObrl#PoNY4?K7?9#!9|I1LU!6yLRccnc z%9`BPt=LixmxEnw_}5N)T{N$*q|T)ugbA5d7g{>|%}aPNz6jlDnrKr(iOxUvP%j%0 z1BOZaEPH6}KB|QH?nzfY%8w@tOyN>z?9vxXz!~em0LC~%5^-eUx!#}q zp*WF2{W1K_r3F*fCK=xG-~XV39i3o;po@PI1K!DwdR`8_Exa8ci)eNu$gLs zi@?t#{Vjs()S-a8?#K{46BpzR(oXxf zKG+XVn&iev#jgDd;eF=P3Zi&-y+AcAw`h^#x^kSUmMZ-n5dB`4j+z11bCN1oedtVp!yHtm=}>6qg-kK%SRgr zmS9p8a49R+sM{4rgRJCyse_IXHSNI)USv1~`rY=5!3f7zWR4KOCMXCj{0s11wmRdO zChV7)NCQ`qhhmcF-{SFly~+lWAeP5 zN>EFsurS^6wmuefDP^y?*&*nwt1&_m^bLbDrsVz8F#N3pe<@kGchIeK@~}M2TY6tM z*sRD;j`0Z zjOvU!3-dm+SyU5kI{6-ysC!HUP4!5De%q)GaaU|87}PG9-I;K9<&k|M>E&{FYZOu! z^jKyQ8l{Jnmnd|u5BlSu1%Zg9afvY79S}>8o+@BRwK|=gJW9-aoWP_#5PXugICau| zJ3NqeNrD7fl?`g@%p?{SvQ-wW}KhFk=FbdS;~RPC)yiA zK@|64o+bWbkt%>K!boDR9ieDAhONM(9JSB^uOVbOv3YkTN}=u4;g#GJ)h`xTpsRWK z-<}+T57@VV0XRj7a2V2G>%0i(ffWyRVfJ*M?@ zc~aDWl;G|k3)M^>2sR3I(Qo0H+g!=l_c^_cI04E!U^XMfzK(UU28OSa|FPcKIOzXU z$~xe{T^GyelZXefOGfgwJ6=mqop;jgwX{bUBT%gVp}k+AIV~_2}oZX`9G5Wp2hX)DMbl79X|WWk-Xg;zLFFdPNv5^w6Vpb3os3^oNAmM+mz) zaekYs`D|a-Nm;Mp+3lFeZ482`Gfw4Kx$iYghgV(s6?-Zc%t7-*;>pUDFfl^UpU9I6 zW{*!#7b(Ww&b344iA?#1bLZNc#e-u&$9j`F61G(HRs$Sqyr?fJ>umdh)!juv!^-?J zMvnMdtvE;{M-7y#DFIDgg_Rofz08St1+OBI(v@T8P-ovWIN8m21S%ey>&dtY| zAR;$$bgIscnxzmf*&oa;qOV@2+If?dnEHR@Mf_GPmYk8CA)^&wyK*Cx26*C0vn^Ht z;WC!sxxWQgBLA`I{V(cS6-!-@-PS*3)3dTan?J9hWdlq2ITF&F;qOie1k>B77aS_d zmX*!bi!aYV+?%Cqt7~=k43BQ_tkQl>o_4_?mU{CeT>Q!Tu9DK08Q94ENBfV4=`SHd z1wmled1mpUyXuUu72lb}{)7!zHZXKF##<vu+6O{taoHD-!Ab8Bw~7iAh9 zog6Rbpo+B=guV!Wy2CKy{#X;}JNOS%>%{&zFpIOe;E6z@F zui{!N$fHbrM-hVBbSGtE%~V58_-gxJv%mkHBPuekM#uXP9h?8;cY59ZpSGhl_|)2` zm)RpmQS_$OEaTwd^%#sceJFtZHiTMHKd2AjZzF)YRusTY{r)6>qViw@BCbBA!4Y*d z$Z95m?u`Cp6TRX(O1fnIGt47&|5WbH{`%VspspC4?RqK!Yh^?F7a+eUeC5&dwOUYK z+<6##Z-@N2-%m=$o3x2hN9=GLI{6+>97!$^!MCUecdI${W7i z?&u_#{T6ygz-sCddC1i;W7jk(ynlxE-Sgr3rp?0Lp`O;RywG)B9@I62P8c@DVW2=W z%b9Z~zUkt|+GeEbgxm*oBK7=*X#c`m-Qc=o{o%PSNsgSpd8osdHRu_w=9Yyl$L#$3 z#TSt>q&+Fb(AE0)W}AMisqNVzYHy^%%W#ay?@A71nmpNy%V3kFHZSSWs{j?81p33bBj94jv$BD0kc~`wJ zfiQf`i+K93`t46k*7qi*HeYi}KsK7J-*~9t&?HVLJ0ed2F8+Nj#vaq4MAh{Yb3T_jEQ?r-E58%8iPMaT;C@Q_y#88x#fXqRA73&MY@s7a_qexLt)noHukq@_Q z&Zv}tz17yNjG}knbAy{rh?%-bM5Wn978zH_GXdwZLVrh>YF5JkP<~ge(@^?tSymQm zkFk!=6aFPzzStJ1_(G$5K9NEkaIuGqkV6S^_vG?TgqnSQTu=I3};=eJGy17GqVUIyTLso6ULgT^K06h$N1N^suhvx;i~Seo%lBZjUKj zJ>YkRI6J^LJh<(@-B)&ROBzrZ%%^bbwVk&mYhwq8e4XtMl9v+(63D8GdT%kxuRUmX zIqM{INe^C*K0DC*GlsGTb$!#sBZTnHo<_~&S)S@ei;BGCoHDaL&Q0<~hzqGP?~-v4 zI*Fvo(5uQ z^{kC}UqBfszDNQlZ;k%uDX;P+?EXEOE_=_e#SV$*UTCk!zAda91MTHTaMMdp1k@yE~hIakERg#y%l zd@xt4Fb~BE0XQxbW$Q~yKli2Yjo@^8(Vi^i?=Yt@kUweRm`(E9Ey)}BqFO?Gn1p;}4OUO{ZaJMf?5J3G z|64Rb^#A3S{_h;M|DWrIw5GfHH)_JPfMsE&rp{D0%W$u7K|fl&fj~d%wE;zeCjMe> z6_T~_pz7A4gxjE!4UznFg2ueW3Lh!>!%o3rwve#2%5XQj>bNR*(jGDKlJu z(7XPoceQR}{Y)KsZpHMd2Evtz0(r!x$8?WSZkUzklT=mN)gv^73*s5|UpmC8mM7Q= zTK?OvlU_8^2dCB{IvQd&@m0iry`4-#Mp=YoVz3VXW@6&eTnl-}0(x*;P^ljFGZC;Y zmLw0Yf^mu-wWZkk*B`4CeIkZyP8)jM2GvIkmof7kN8VIIlVT7zWSM5S_-+tK&%NMhSF4eX6j#_>`Dx`r~)QM*-`K+UQ@lmF%NYnFQ>E8b!|$yhV7Ri%5_ zq`U#iOi0S!Yl&Y{4Xs3~LR{i%T= z{w^Rp&oHbMuZ1t>TBs8I1D6p&@->~-klNk>LUl@052e7{iyC&$!^gfj$1LGA86!U4 z(v^JKF}0L{&CM!vBJ^4!V8qCW)nml+p~UD%;e!puxU?Rw@Myjm`FrRilK$m6*-)=v zxUDxjW{2hW&TI%d>2~QqLO=f%dH(15*D-5)BG%O?OE39nTRciVx-sfKsm99jq*=;r zs9|TWEd_i=5K`@1mU%EO@hjaWgQTcFY)SUP_XxSg@IC}evBBx<{C59A4eI~_OGMTz zr+*hJ5a6KrwIu}1lZF!DO&Cf_eDcr-o-{bx;PuDseM3c+Ll0`H*@qB8FG+^Br4`XOTxuz-qqyMr!mX36tWa&c*_PBl1iw|H}7*) zI^joSbspae2R2{FE`WNCUlq_=LX3ED=85o=HQE_=Ol;QA5c9RkqFn7uBRRYIR%h1p z@9R;s36{J2XJLB1)69xxRa(EL#>GB&yJ$T|=9I4_r*$(OOoogRX4b??d=jCV-)?MC zgP&NdKdODQ=*H_{BEt>tw`hzHacr6W`P2mj=p~QviLwwX;JfBkw+UJYtJTaIauM}f z&wqSyqOsY$OoOel8oRdmvQw*8qU}-65V`nZFzFR3iIdXe2oS&*-z<%jKZ^YUyFJw& zx6l2vq`lWb%B!PD9U!)AqB)|(ydf{LMwV4&WgZhIVwGfNB-U#)zkyIqx~>}i83lXO zoKLCjXl(ea+t`n1jh5cTurESiTvHx5%6DzHdI)r5IUlt|PtsQ~NiQ%fTjehEJLI|4 zkhC4Yj&CloZLv~Fo#?8cqc^d0FaywxvebG(uN))fv;^DZYGx}}8z~OCl;19FWIY&d zH~;rE&v;rvC?a&zYdD;sqE~*gJprB(D?D`kn5ulJ@f0tK9qRiL;;WqsvlX$lYkbxJ z#@-mAm`%EHY6LiB|3DK|aOX&-D!r-5b>no_0@PCDSp5=t@tm-Hx_O!ZJkh?8@g@&n}|_mpTjE zw_>l&ig#prhx;bNViXitiSXs9se)u)BHm6CAn9t#wlmiS%KClr!tPDCoYent=M!Rz zHrA+1)1R{gYOYlQLhw=nKkSH}z(Vw2t8vg1$dl(btOlD!HiXPWh~}*(4qwI31KVSa zQ{(B41jGcox1dy$o(gwNnlYUp`RXSajNp^SMn%uus_v98XhX#7-LFPEtiq6O!6Ev@ zt?ByR-r_-hd^zmwvZq~JRi289NQovRwV9_ZK{b9tl;XtEy388u&@w&*IqeK*RUo^` zH&-E=I|swDJ4sf3!a3y&@=qdPK&50a-86{Q}HWQHPqGJBDsa()iz z1AMa1yEem4C5ta4eMIptG#`>87zm z_<1}N9cnJu)VT+?04OFuMbhFOY?}9r?><3h4$m}P*;0bz!n~`uYna^6e` z1qlW68Mlrf-J4GJyzmTkpB9p@8(QRWJJ>pkm$@ji#*EYjgq42Eqg-0H9rbdO`0}R~ z7kZ=#rncf(moy`m83u&7pA+IM^Odwe9cJve2I>+k=y>JIG)@*xUyD`F?p#P=b}kIF zJ_vihLLH~#?&M%T{R@Cqmsjy9a{dAd^v0{afRXu5|1P`t4@wE< z;0HDe&vAK+MGNycndpO26!+XFWZ+jvxli}+ihk#*XAJr!1$uIofuLwDwpi|up>KKXbYBjCyHfl~g1u1E zu^JrJ`JF6GsmK5HMf&gGn4W0VPcq0)(SwsMCCj`nS(A~BfO56c9dF??pQqYo;`}sS zO;mTfNAOqd$i4QIA9SUX(NU056e$Q+{jPcWb$&o$V@Qn_RxCJ>XdOSr8k}sS;D=tO z)(ACmE#3(bdhpAG%X(3@4pg5J^;Da~V~JDWQ&+63;F{aV2kkl+URSdxG6BJ#4Rb!KJkx%J*>af5Ne8|n=lx+zA0HynkAB-P7^BIhKea21$IdbzAimuI+ zIL-V$#jx+fNm|JCjAr|b*o=~8#Ttazp#7MrHHY-n0A~nD&Rl1`zV&r+;xxh%HZQ*s zb4AEq+0s}oVZWZLA{v?kzNxu>N+eE!cHPhM$!0z4bGs~SdW_C{F+;P0WA;m?uZhsO z(9BjsJTWBhs(72XJE7kdK1J%a2kn9bS~_DJQjqWp#3a|TitiL%S3HVW(E9FTQev<} zHg2tLyZqQxjZT_$`ofpPw7qvZ6T8huHhAr*U2PHnG|toO046BXe2%_$!oq470gXW-}vY^j-7qF z!9V1OFN*&j3Ub%FG#u=56N=PiB7pG|%vAbm3v25}>BE~eAx_H28q(yHx(yV~mGj%O z!kZ=DOog~CLFUxeR`e|*KLCD@u-QMg5+Ly zJiKM}okUDS?$Q=;$}jox`v)C4@>$PNL(R*Z_pqk&zI+NY`IMGfeg2+H&h*3Nedi61 z2oCaxlN13y+aZosu9*kb(FIGp$e1)1jh};cc8IZ%8HSo|6udt0p_R4FA@g^Xmn;oj zh{dkFGURnrwIs6V4G+$L0W4BU`(?BJ^L}IOq*W2tlcx%3HyLJ;BPQC*=Q?xDif3$v~|1HOfcyM_h?IpuoD+)2FsA6zn= z7FMjRtgEr7oza<&1voGaMlFD0R&#u5!iUumQ?ie@ztJ3~a*gYuizm+cl)%D&vJ&xS z(Arr+b}5cB`bFexIDNm({f-A%TRm$5$XPFeb^vVV(SU2C_(0xgjEqeG9a?>21JG^7 zN)@G*REa(U=Hsv549*ik5V~!xZ>Y$5W&5bn;x&8VTUiZ|hcp5UfSZpz6*SjxbWGqA zRr}{U&-9_4IiU{X>+7MP;CBA2vHW-~xpCkMh01g;72e_wH%iuMEk)9;1bcPGhR1nd zQ9J&fPVfYK`p3O6i`ncn`yig&;&C;A)!&ctwdKgLq+k!%;;$ae(f2WnIZO58?;Q2b zQl4dxyaa9^{Co!mY55=VWO5U+ z;+^j-rE1Zou?L?(C9qaUzA~?b&$*YlPSmp+^iZo&3j!kq%b|A&@(CvLaL-6+YFY~# z_Mw9tj4H~{(E-4sqEc)n1k)vbIF=Rg|JF@z@L&F6=-)Wp9ydyTjJPVrj3w?-YE0!y zP5QP94J5pC{p6QegY=ZfU26GIS6IkzY!HD nCh~c!LQ*#G?{}<#bBGfC&A1W!SJPrRN;9tPzW^xZUvvKlChb%! literal 0 HcmV?d00001 From 71d407efc982e71b1aaf9ca619a795a8255538ff Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 14:59:48 +0100 Subject: [PATCH 13/28] Add function for plotting elements --- examples/_utils/plot_dataset.py | 172 +++++++++++++++++++++++++++----- 1 file changed, 148 insertions(+), 24 deletions(-) diff --git a/examples/_utils/plot_dataset.py b/examples/_utils/plot_dataset.py index 833345c..becebef 100644 --- a/examples/_utils/plot_dataset.py +++ b/examples/_utils/plot_dataset.py @@ -3,19 +3,24 @@ from matplotlib.colors import LogNorm from scipy import stats + def plot_dataset1D(X, y, beta, title="Toy dataset"): """ Plot a 1D toy dataset with the true regression line. Parameters: - X: Input features (n_samples, n_features) - y: Vectors of the regression (n_samples,) - - beta: Coefficients of the true variaable of importance (n_features,) + - beta: Coefficients of the true variable of importance (n_features,) - title: Title of the plot (str) + + Returns: + - a figure with 3 subplots """ # Create a figure and a set of subplots fig, ([ax11, ax12], [ax21, ax22]) = plt.subplots( 2, 2, width_ratios=[0.9, 0.01], height_ratios=[0.9, 0.1], figsize=(6.4, 4.8) ) + # plot data im_X = ax11.imshow(X, aspect="auto", interpolation="nearest") cbaxes_X = fig.add_axes([0.05, 0.275, 0.03, 0.6]) col_X = plt.colorbar(im_X, cax=cbaxes_X, location="left") @@ -24,6 +29,7 @@ def plot_dataset1D(X, y, beta, title="Toy dataset"): ax11.set_ylabel("n samples") ax11.set_xlabel("n features") ax11.set_title("X:data", fontdict={"fontweight": "bold"}) + # plot regression im_Y = ax12.imshow(np.expand_dims(y, 1), aspect="auto", interpolation="nearest") ax12.set_ylabel("y:regression ", fontdict={"fontweight": "bold"}) ax12.yaxis.tick_right() @@ -32,91 +38,209 @@ def plot_dataset1D(X, y, beta, title="Toy dataset"): col_Y = plt.colorbar(im_Y, cax=cbaxes_Y, location="left") col_Y.ax.set_xlabel("Y values ", loc="center", labelpad=10) col_Y.ax.xaxis.set_label_position("top") + # plot beta ax21.imshow(np.expand_dims(beta, 0), aspect="auto", interpolation="nearest") ax21.set_xlabel("beta:variable of importance", fontdict={"fontweight": "bold"}) ax21.set_yticks([]) ax22.axis("off") + plt.suptitle(title) plt.subplots_adjust(hspace=0.3, left=0.15, right=0.85) -def plot_validate_variable_importance(beta, beta_hat, vmin=0., vmax=1.): +def plot_validate_variable_importance(beta, beta_hat, vmin=0.0, vmax=1.0): """ - Validate the variable importance estimation. + Plot for validating of the variable importance estimation. + Parameters: + - beta: Coefficients of the true variable of importance (n_features,) + - beta_hat: Coefficients of the estimated variable of importance (n_features,) + - vmin: Minimum value of the colorbar (float) + - vmax: Maximum value of the colorbar (float) + + Returns: + - a figure with 2 subplots """ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6.4, 2.8)) + # plot beta ax1.imshow(np.expand_dims(beta, 0), vmin=vmin, vmax=vmax) ax1.set_xlabel("beta:variable of importance", fontdict={"fontweight": "bold"}) ax1.set_yticks([]) + # plot beta_hat im = ax2.imshow(np.expand_dims(beta_hat, 0), vmin=vmin, vmax=vmax) ax2.set_xlabel("beta hat:variable of importance", fontdict={"fontweight": "bold"}) ax2.set_yticks([]) plt.colorbar( im, ax=ax2, orientation="horizontal", label="Variable importance", pad=0.5 ) + plt.subplots_adjust(top=1.0, bottom=0.2) -def plot_pvalue_H0(beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_corrected, vmin=0., vmax=1.): +def plot_pvalue_H0( + beta_hat, + pvalue, + pvalue_corrected, + one_minus_pvalue, + one_minus_pvalue_corrected, + vmin=0.0, + vmax=1.0, +): """ - Validate the variable is importance. + Plot for the confidence in the hipothse that the variables are important. + Parameters: + - beta_hat: Coefficients of the estimated variable of importance (n_features,) + - pvalue: pvalue of each variable of importance (n_features,) + - pvalue_corrected: corrected pvalue of each variable of importance (n_features,) + - one_minus_pvalue: 1 - pvalue of each variable of importance (n_features,) + - one_minus_pvalue_corrected: 1 - corrected pvalue of each variable of importance (n_features,) + - vmin: Minimum value of the colorbar (float) + - vmax: Maximum value of the colorbar (float) + + Returns: + - a figure with 3 subplots """ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(6.4, 4.8)) + # plot beta_hat im_beta_hat = ax1.imshow(np.expand_dims(beta_hat, 0), vmin=vmin, vmax=vmax) ax1.set_title("beta hat:variable of importance", fontdict={"fontweight": "bold"}) ax1.set_yticks([]) colbar_beta_hat = plt.colorbar( - im_beta_hat, ax=ax1, orientation="horizontal", label="Variable importance", pad=0.2 + im_beta_hat, + ax=ax1, + orientation="horizontal", + label="Variable importance", + pad=0.2, ) colbar_beta_hat.ax.xaxis.labelpad = 0 - im_pvalue = ax2.imshow(np.expand_dims(pvalue, 0), norm=LogNorm(vmin=np.min(pvalue), vmax=np.max(pvalue)), cmap=plt.cm.viridis.reversed()) - ax2.set_title("pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + # plot pvalue + im_pvalue = ax2.imshow( + np.expand_dims(pvalue, 0), + norm=LogNorm(vmin=np.min(pvalue), vmax=np.max(pvalue)), + cmap=plt.cm.viridis.reversed(), + ) + ax2.set_title( + "pvalue of each variable of importance", fontdict={"fontweight": "bold"} + ) ax2.set_yticks([]) colbar_pvalue = plt.colorbar( im_pvalue, ax=ax2, orientation="horizontal", label="pvalue", pad=0.2 ) colbar_pvalue.ax.xaxis.labelpad = 0 - im_pvalue_corrected = ax3.imshow(np.expand_dims(pvalue_corrected, 0), norm=LogNorm(vmin=np.min(pvalue_corrected), vmax=np.max(pvalue_corrected)), cmap=plt.cm.viridis.reversed()) - ax3.set_title("corrected pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + # plot pvalue_corrected + im_pvalue_corrected = ax3.imshow( + np.expand_dims(pvalue_corrected, 0), + norm=LogNorm(vmin=np.min(pvalue_corrected), vmax=np.max(pvalue_corrected)), + cmap=plt.cm.viridis.reversed(), + ) + ax3.set_title( + "corrected pvalue of each variable of importance", + fontdict={"fontweight": "bold"}, + ) ax3.set_yticks([]) colbar_pvalue_corrected = plt.colorbar( - im_pvalue_corrected, ax=ax3, orientation="horizontal", label="corrected pvalue", pad=0.2 + im_pvalue_corrected, + ax=ax3, + orientation="horizontal", + label="corrected pvalue", + pad=0.2, ) colbar_pvalue_corrected.ax.xaxis.labelpad = 0 + plt.subplots_adjust(top=1.0, hspace=0.3) - -def plot_pvalue_H1(beta_hat, pvalue, pvalue_corrected, one_minus_pvalue, one_minus_pvalue_corrected, vmin=0., vmax=1.): + + +def plot_pvalue_H1( + beta_hat, + pvalue, + pvalue_corrected, + one_minus_pvalue, + one_minus_pvalue_corrected, + vmin=0.0, + vmax=1.0, +): """ - Validate the variable is not importance. + Plot for the confidence in the hipothse that the variables are not important. + Parameters: + - beta_hat: Coefficients of the estimated variable of importance (n_features,) + - pvalue: pvalue of each variable of importance (n_features,) + - pvalue_corrected: corrected pvalue of each variable of importance (n_features,) + - one_minus_pvalue: 1 - pvalue of each variable of importance (n_features,) + - one_minus_pvalue_corrected: 1 - corrected pvalue of each variable of importance (n_features,) + - vmin: Minimum value of the colorbar (float) + - vmax: Maximum value of the colorbar (float) + + Returns: + - a figure with 3 subplots """ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(6.4, 4.8)) im_beta_hat = ax1.imshow(np.expand_dims(beta_hat, 0), vmin=vmin, vmax=vmax) ax1.set_title("beta hat:variable of importance", fontdict={"fontweight": "bold"}) ax1.set_yticks([]) colbar_beta_hat = plt.colorbar( - im_beta_hat, ax=ax1, orientation="horizontal", label="Variable importance", pad=0.2 + im_beta_hat, + ax=ax1, + orientation="horizontal", + label="Variable importance", + pad=0.2, ) colbar_beta_hat.ax.xaxis.labelpad = 0 - im_pvalue = ax2.imshow(np.expand_dims(one_minus_pvalue_corrected, 0), norm=LogNorm(vmin=np.min(one_minus_pvalue), vmax=np.max(one_minus_pvalue_corrected)), cmap=plt.cm.viridis.reversed()) - ax2.set_title("pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + im_pvalue = ax2.imshow( + np.expand_dims(one_minus_pvalue_corrected, 0), + norm=LogNorm( + vmin=np.min(one_minus_pvalue), vmax=np.max(one_minus_pvalue_corrected) + ), + cmap=plt.cm.viridis.reversed(), + ) + ax2.set_title( + "pvalue of each variable of importance", fontdict={"fontweight": "bold"} + ) ax2.set_yticks([]) colbar_pvalue = plt.colorbar( im_pvalue, ax=ax2, orientation="horizontal", label="pvalue", pad=0.2 ) colbar_pvalue.ax.xaxis.labelpad = 0 - im_pvalue_corrected = ax3.imshow(np.expand_dims(one_minus_pvalue_corrected, 0), norm=LogNorm(vmin=np.min(one_minus_pvalue_corrected), vmax=np.max(one_minus_pvalue_corrected)), cmap=plt.cm.viridis.reversed()) - ax3.set_title("corrected pvalue of each variable of importance", fontdict={"fontweight": "bold"}) + im_pvalue_corrected = ax3.imshow( + np.expand_dims(one_minus_pvalue_corrected, 0), + norm=LogNorm( + vmin=np.min(one_minus_pvalue_corrected), + vmax=np.max(one_minus_pvalue_corrected), + ), + cmap=plt.cm.viridis.reversed(), + ) + ax3.set_title( + "corrected pvalue of each variable of importance", + fontdict={"fontweight": "bold"}, + ) ax3.set_yticks([]) colbar_pvalue_corrected = plt.colorbar( - im_pvalue_corrected, ax=ax3, orientation="horizontal", label="corrected pvalue", pad=0.2 + im_pvalue_corrected, + ax=ax3, + orientation="horizontal", + label="corrected pvalue", + pad=0.2, ) colbar_pvalue_corrected.ax.xaxis.labelpad = 0 plt.subplots_adjust(top=1.0, hspace=0.3) + def plot_compare_proba_estimated(proba, beta_hat, scale): + """ + Plot the comparison of the estimated probability with the normal distribution. + Parameters: + - proba: Probability of each coefficent (n_features, n_permutations) + - beta_hat: Coefficients of the estimated variable of importance (n_features,) + - scale: Standard deviation of the distribution of the coefficients (n_features,) + + Returns: + - a figure with 5*5 subplots + """ fig, axs = plt.subplots(5, 5, figsize=(6.4, 4.8)) for index, ax in enumerate(axs.flat): - ax.hist(proba[:,index], bins=100, density=True, alpha=0.5, color="b", label="proba") - x = np.linspace( - 3*scale[index], + 3*scale[index], 100) - ax.plot(x, stats.norm.pdf(x, 0.0, scale[index]), 'r-', lw=2, label="norm pdf") - ax.set_title(f"beta_hat[{index}]") \ No newline at end of file + # plot the histogram of the proba + ax.hist( + proba[:, index], bins=100, density=True, alpha=0.5, color="b", label="proba" + ) + # plot the normal distribution + x = np.linspace(-3 * scale[index], +3 * scale[index], 100) + ax.plot(x, stats.norm.pdf(x, 0.0, scale[index]), "r-", lw=2, label="norm pdf") + ax.set_title(f"beta_hat[{index}]") From a5302527f803fa2001546d731acc667cc615de07 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 15:01:41 +0100 Subject: [PATCH 14/28] Fix typo --- hidimstat/permutation_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hidimstat/permutation_test.py b/hidimstat/permutation_test.py index 6a23a67..81f36de 100644 --- a/hidimstat/permutation_test.py +++ b/hidimstat/permutation_test.py @@ -93,7 +93,9 @@ def permutation_test_cv( return pval_corr, one_minus_pval_corr -def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, verbose=1, proba=False): +def permutation_test( + X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, verbose=1, proba=False +): """Permutation test shuffling the target Parameters @@ -141,7 +143,7 @@ def permutation_test(X, y, estimator, n_permutations=1000, seed=0, n_jobs=1, ver permutation_stats = np.array(permutation_stats) if proba: - return permutation_stats[:,0,:] + return permutation_stats[:, 0, :] two_sided_pval_corr = step_down_max_T(stat, permutation_stats) stat_sign = np.sign(stat) From 7ab4bfb40a8f2030e0f21436524e8b5cd927b35c Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 15:13:37 +0100 Subject: [PATCH 15/28] remove unecessary line --- examples/methods/ada_svr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index 60a94bf..6ae963b 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -156,7 +156,6 @@ "The maximum of difference between AdA-SVR estimation and our estimation", "is:{:.4f}".format(np.max(np.abs(scale - np.std(proba, axis=0)) / scale)), ) -plt.show() ############################################################################# # References From 83f049e75f078b865eaca670ae1d6346ae884401 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Mon, 23 Dec 2024 15:23:22 +0100 Subject: [PATCH 16/28] unecessary option --- examples/methods/ada_svr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index 6ae963b..c30ba26 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -45,7 +45,7 @@ # see the API for more details about the optional parameter: # :py:func:`hidimstat.ada_svr` -beta_hat, scale = ada_svr(X, y, rcond=1e-1) +beta_hat, scale = ada_svr(X, y) ############################################################################# # | **beta_hat** is the estimated variable of importance From 5887b1b02c299d66d467b63e6644c47db29e8513 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 12:16:53 +0100 Subject: [PATCH 17/28] Add a section in examples --- doc_conf/references.bib | 9 ++++++++- examples/methods/ada_svr.py | 40 +++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/doc_conf/references.bib b/doc_conf/references.bib index 372bbdb..f299f8e 100644 --- a/doc_conf/references.bib +++ b/doc_conf/references.bib @@ -194,4 +194,11 @@ @article{gaonkar_deriving_2012 pmid = {23285616}, pmcid = {PMC3703958}, file = {PubMed Central Full Text PDF:/home/likusch/Zotero/storage/DX8QQAF5/Gaonkar and Davatzikos - 2012 - Deriving statistical significance maps for SVM based image classification and group comparisons.pdf:application/pdf}, -} \ No newline at end of file +} + +@book{molnar2020interpretable, + title={Interpretable machine learning}, + author={Molnar, Christoph}, + year={2020}, + publisher={Lulu. com} +} diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index c30ba26..0a771f0 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -157,6 +157,46 @@ "is:{:.4f}".format(np.max(np.abs(scale - np.std(proba, axis=0)) / scale)), ) +############################################################################# +# Assumptions, Advantages and disadvantages +# ----------------------------------------- +# +# **Assumptions**: +# +# - The distribution of the coefficients of SVR is normal centred around zeros. +# - The method is valid for large sample sizes. +# - The method has the linear models assumptions: linearity, normality, +# homoscedasticity, independence, fixed features, absence of multicollinearity +# (see the book of :footcite:ct:`Molnar 2012` +# for details) +# +# **Advantages**: +# +# - The method is fast because it uses the central limit theorem to estimate +# the standard deviation of the distribution of the coefficients of SVR. +# - The method is simple to implement. +# - The method is a simplification of the permutation test for SVR. +# - The method has the advantage of linear models: transparency of +# the prediction, high level of collective experiance and expertise +# and a guarantee of convergence. (see the book of +# :footcite:ct:`Molnar 2012` for details) +# +# **Disadvantages**: +# +# - The method assumes that the distribution of the coefficients of SVR is normal centred around zeros. +# - The method is not valid for small sample sizes. +# - The method has all the disadvantages of linear models: only for linear +# realtionships, not good predicting performance, unintuitive. +# (see the book of +# :footcite:ct:`Molnar 2012` for details) +# +# **Conclusion**: +# +# The method is a good alternative to the permutation test for SVR when the +# distribution of the coefficients of SVR is normal centred around zeros. +# The method is a simplification of the permutation test for SVR. + + ############################################################################# # References # ---------- From 05f47a7a9b770b845520f1ca71b6885ed780cc30 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 12:32:41 +0100 Subject: [PATCH 18/28] Fix typo --- examples/methods/ada_svr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index 0a771f0..fd667cd 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -158,7 +158,7 @@ ) ############################################################################# -# Assumptions, Advantages and disadvantages +# Assumptions, Advantages and Disadvantages # ----------------------------------------- # # **Assumptions**: From e6076763f3607bc4b8d8650c538b40a59ba47ebd Mon Sep 17 00:00:00 2001 From: lionel kusch Date: Thu, 26 Dec 2024 13:40:06 +0100 Subject: [PATCH 19/28] Apply suggestions from code review fix typo errors Co-authored-by: bthirion --- examples/_utils/plot_dataset.py | 2 +- examples/methods/ada_svr.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/_utils/plot_dataset.py b/examples/_utils/plot_dataset.py index becebef..1b25db1 100644 --- a/examples/_utils/plot_dataset.py +++ b/examples/_utils/plot_dataset.py @@ -159,7 +159,7 @@ def plot_pvalue_H1( vmax=1.0, ): """ - Plot for the confidence in the hipothse that the variables are not important. + Plot for the confidence in the hypotheses that the variables are not important. Parameters: - beta_hat: Coefficients of the estimated variable of importance (n_features,) - pvalue: pvalue of each variable of importance (n_features,) diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index fd667cd..8429b96 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -186,7 +186,8 @@ # - The method assumes that the distribution of the coefficients of SVR is normal centred around zeros. # - The method is not valid for small sample sizes. # - The method has all the disadvantages of linear models: only for linear -# realtionships, not good predicting performance, unintuitive. +# relationships, not good predicting performance, unintuitive. +``` unintuitive: WDYM ? # (see the book of # :footcite:ct:`Molnar 2012` for details) # From 2e4037a2c821b9fdb011b6f7c17f7d8943af0bfa Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 13:41:33 +0100 Subject: [PATCH 20/28] Fix include copyright figure --- examples/figures/ada_svr_1.jpg | Bin 25807 -> 0 bytes examples/methods/ada_svr.py | 13 +------------ 2 files changed, 1 insertion(+), 12 deletions(-) delete mode 100644 examples/figures/ada_svr_1.jpg diff --git a/examples/figures/ada_svr_1.jpg b/examples/figures/ada_svr_1.jpg deleted file mode 100644 index b5f67d66e294910808c6016a6c97191fc715dae7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25807 zcmbrl1ymf}wl3NPX`DcC*8suYEx5Zj79`L#65KsNaEIW*-Q5%1-QC^YdHnm`z0cYI zIcL24?yc@QdUUNC-8E~iHSPQ6>)h)a;Ju85v;+VO3JQ?$_5!>v0z?7uaBx64Sa={1 zh=2f(h>VGXjD&=Yhw%XolL()Lm7giKa-NPvU76t@(T)!s;X;h z>*^aCn>sqXx_f&2`UfT^r>19S=jIpIH#WDncXs#o56&+xudZ)y@9rP|(hCXz{r{@< zFU|g@7y6rC?_gk{VSs<>1@+GPO`y?XV98kEFhrDrUu-{+v--kgevZzp{DVNjrgDa5 zWH*k8P07Aab^e!X|IqAzPcgs$BhCJ$*njA?06>O@dYe3Gbbt`x_Tf`{(QV~^wRC=w zzyzQ#a{Nq$?wW#>L{)ymQPxwxgRl836^OJLnU#617`2O-Mdo3yOmc8|tW|lC|J%_e zf%VNoiz5e>`V=V_whH<485h~rm%eAPbWMtwK&YA;_9a0md;B+~UqYhlHG_@YVV!P8 ziMBDg(49VBcf^5(7G-`r0hr3d9U(No-u++6S{uz>ugY0MN?u*Osld=JPGxc2kE$bN z)}$V2Aw<3l+lgiuMiqDNlW*tQzdU4ewMFo@hy#5-W+j(JNNK>7E1}6a0x*@u|9jE= zsb}#*YY;J$Kx^{~z=*7VZ}ZSD`-9JG;6)Ce$J@aA4|)_wP(idw^PgIY=kOl1A4ZmFB1UvH^S zqpnX9?WH&S^!ti*>I$GNj``n;W0|pv=Gji(LTii3{w=al4TbV8ng}!TGQlFS*5PVa zJQ|T1R_YJTH!z32jQy6|LHasDNpx{GoCY2YF?<>M=>|$|dpAT89!EY#C+2vcr}NY9 z^O*N(rj8DzJ&1#9{DhKPF+<2Ox+(J%U2s`*k)rm?D%j2TZZS`427BmYUsX=4tML+B ztaF#d1(j{)`w13V-_`t)lM}GNv9X3az=lO=+1fiD+!UW~BL8`Xd^X`=5tDP38U0rJ zDu2aj;8l9{q#e!V#4{6F=S7pvKe&9eS$^A6`|aNn7;PH@wM_~3d|LA z5U4FUOkOnlp1_9(z{gl|5MFzWM}U3YX@r8riS{9u3tdEOP0smOjM6_7`$bRrQWvT|VjAhqQ>&LLwUYpP4yLccmfze0Fiqh5(QbA%9S(Uz9 zj&d;y-y6TO)jcfv81)eQf+Q@+&;UFaSO9YHa@MCzOr z2QMdY14;~UIx~@5kO!_|6TaH z-iNLiY?tf7e)>}Slw%=qnD6%5dubTLun*l%Sm7I$S%d3?%?9uFL~Zi*$O0u3lyP_S z5KfmoX`s2*w^xHco`sj@2b{{UWSISQ7IY3gaDE4*=Gk9qYPuYx6F^&+>VVV(`J+c zNT_I;N8Y^jW?Y=MGM&hpJH$clhe}}l9GzM}-5$>R5?7f5tR=gE`vg4)BvRoiS2KM(eH$9hyEXs$7Cu^~^e%|1Lrqqd;Wf1C|!u-e8L&aZ~S5q8j9>5W2z@j(IDKa{vl zr^TT$R#V1g|EO`OSwC3{^XK%^AP^a~G;P|%U*VeN5JL6fZ=KF!Mv+;;?dOw~AguED zaksNg&Me$dr*5mpzExV(^Q`pGn?ZjCnD#NN#|JS2uD0qQsl(gf-&wUtuie)W49=kO zo5$#u7-FdTDXjQYI^6z375aSsQqsO~XpaGj(q0lr#;GGCi_jZSv`p~K2IKbe#{k`G zA?F`G4RpuJCLP@KCmreV7(F&?Sh2q4JfDP)c-l3To}#;OqS#H!`w;|G<9n=Fele7$ zp#Gwu+`!K%nOU;Twca;(CpVW>9nnLcbI)7mgn|0Z< z?nSXGG^KGsoIosWTZFbb_RqTg?_o<_51AT&9Z!BRepocs#$4^jr~A*EN`1V$sS(qk zJJ&hd66hBO)#&Isvt~5XM|i#$5mu0LI%WKf;M3?@f%en3HhHuRF`HhWmV#%-=TLg@ zPSk1+jV5=O`^_QKws4P2S&=i7D!vLBjx~_5RxJpn)WE}^9=|s;Nf4@)$5QiE3|YA{ zzHiUA2NIIFzBB=LOGs$iBtTkBGGiV#BlHtut~5m0<+1c%#F_&q(B^@i640A@L7$_F zdHTtO0U2I2@23B@VhAO>(e9zZLN^4JuYmEaX15K&zgb*3UbT%zi!kixtJ)w|}7g;cw_KUOSkI#yBl6_wCL6;d#3S6hT8_9`w}qf%43 zwe>R;@#?rOsuJ67X9H5ZFoft!_c=d->?9uT$5YvRre5pukp%61V5SgpTBpZr9Z(owINjipa*mATJvt!c|SY!^F7dHv;Vh& znzDNNiT+3r=zBmOOL#ZtT6DVYoyk;%T;h94b~Bm-JjKeaWpPhSXBPY_?ssMMLy%kA&*aI*`OUTm4mW5YJTc{#w6+Bl>rvrUX#d7H6>Hr z2z;hSBNkrCC(XWnFoUZ}5-o&zho%IHl}Zt7@g(-Xhc&21;t|^QkItZZB$j#6kB^&_ zRoqYU#>Re>JvTTLViGOQD%zi9pHZ%U1(cA`y#iWL(GEUMgK-?~u?qj(l`CqVo5m&d z&CcQ|)o}6J3k`e$JZ+_Z$K$3TVWA)l$+t1if36m(2&Sr=+)6nPb>?A}7Z`|r%qH7b z02*vSo3G&?I7T9g6zO}G5j2P5<%*z8g5C>`qumm$mf;$PrQ#1YpVg)0=u&M^`pX20 zC)M2bkV`{t$U%ppwT3z3J?PxV2fOj5#beGbn}qA(vR3qwZlNm_($Hju0O;^MRC74X zIe#(+Wgu2Mk$#7j(00CBf`SR#f(CKy-c;@#aj>^PvYb`4sG853e0t7lT%ov*wEaEm z*%|)}wO-;JE~&hmGTE^&PGZjSlX{>l@XeC&05gf`6dDt;d!?&Gi$jW^^!sJ0~!bZS&JHU$ZqFprcBMxY9F6bi#)`Qn6o2yg`x zbe)&dX7 zkIPvngTih&vF+N zQ)C<(6P~{GFVurwl?i_BFZS5+o+wi@zoZ|lQ%y|lSNS9O!W^+i!S#`*t_1g0lEf0v zX}uHmH7KZ$qODUL0{RdA6H@51qIluLdPCAn3S(U8fBy7r_H)n;{<=%2u*$Q*jv2*- ze7Z+PowiPbt}KYNK8Z-?h|{F3`#QXR8=a%*XY-K}y7ST*}`Ao)h`*&}ho{La3!VoxJ7 z38njJ*K>OCdEekk0M{S7ZwlEcC+I&9 zl*iD+Ee{egq<-Xss`w~+pN7ET8|mR#ilM9_NhK5W598@(h1GvozQGjqgjX;(cW^0F zt#8++>+Z*B+t>={$?C$dsR3^Y&jXB>^ELlZv%JpOzSTJAJ@?MwnYgxe!+>C)nBeSq zr|KfKuT)p;e4(k?h8jwdpQP02KnW=$oU?+BwIrP=2|?8l>L)54K_>}PO?gn0*$<}L z=JlfeOJTW+ew&LQu?#Vde91n1?ZCUD(H<{Cf9NR zj);Ao>8=!+z*2zHxYoWGWpe*$-`1}e$b&cU5yXGyT2Ij6y(B}x(TCCZaALuoR;1@K zpZ2Cb)Jqs0mKhfne4BI)WLtk+s~H?XsMu?kP78z5Waa0W-s9kN!SyYyVZJ1xTxki%JkB~UZ(IxrqU49y(HlB zHf5XWuFwP44)lpurd@B)G89tn%$7>I( z>yD{AIX%A|d{d9Vg%iwC-BLl1)c9rrt8CMymOIw0gg7Bnv<2GB&C#mwb8&D*#NG6s zPfkxNl*O^kxc{EVqDO1RG>n~^G$d&2j2wvL^8e7irA)^>zv8G@9!9qB8XpMR>x9)Z z!3WRD8yE@OvC;*#zh!8%+1_*5o38)~DhFPVNo~a&q{o2h<)4c@k0bt-n|OF-PXF$G zI1Ckb?bSILcvsac5=Qp9#5U_;6^-6<9@6P1T5A<+#=RX64uS33?Und~O81`?hg{%9 zTvUM(ILQKzNezUUNy9rciBQZI1%{`?y8|=H>YG0G?|fdDWm05uk7)NT zax0FhabnENH>O^w(-&SfHLAyrBBAk(A()JCE&P>E_Y=c~Qrak5lp@=NSpc4*pbWZm z0Xw63pGBUbuDjN`-qzfsfkW-HOf~J}fmZA&-g3X{$f(b{;t{w!dK`A^l(zZ@R>aU1 z3#ybG{DIP(H}bt{*}sTj zEykrpJ_&QPcj4jnNPz8=FNcLBTbtXa7y`>3t;!b%LkIT)uhJ1%)rI+K)gQBqh+UEG z3)kUo3l3Dk7qY~W1kY(!c}62>4-yZzG3$TmgrBf4SD=JAdL_CLhO~38d$S zMXto{7=q^#cE~uiKRc&aulu6NuWiCw?MEqkyXgNl%r<_PE7|TD$_lAuQOj1f!qU z@nM#frME{5cPVCCA6;Y1wcw$Al%hw@mSs|X7f~7pv z5JhGcnu)0GvCt=+s%$UoCx6bq&4&-CI{D@<2MDdl9e2Xwu)9hTi&c$8! zOJt&U75^(>R#{7*@I0cp$f8LMC%H6yifialpuoozO5;l^hU?+my_~D(f&ewkH6bZy-W|`dgA1Brk;Mx&8~bPo%>Xa)_bu@df!_t zTpo&6@#1}Si-bE;`oUWgB7w+S!UX+2df7tiV@JiaD<)A^MT|iLI0-x$=W@35!2ENJ z)eVhkt6vLjdraH+xkkw0v5%uHKL*wuw%ko-?SJjZ?$?!64`~0~HRMr$&}h|}+ewy@ zr?yW$r}pRP+K>&^6V;vEY>4T?mi$WR!_#T_)Y*oHrTAwR{YhvOGB_wewu`Y~CpLb( zlGL-kb$@;Avv0>a{uGFMy zM?ytGQWrVa_lP@Bdh9<8yJlO6Ye`(23;P4sb;6`Vq#zXA(`zNir#3o?b4pkt}2>jEb$sL&LuG}>5 z(An;wL!MkV!SzL#IS%j(HMD=Dm2*(8a?3(-fVw zbf8rknOK69hEXlbua@$lF2LG-{CKJ|C1wYD*8kJmyiJ<~l4*AK?x1Q^Z!4leb3RZ&0t!K67b9aRILOp(wuCnD9cr*o%10dS@$%krMtE;h@+2Uj6!#9emYQ#G}cWWAI)ikRKOYIoH){`W^my6l9H?_LERF%X@1cj;1Ac= zN_o~Buras|g7MNS$*YwsP>j*suVw`4iaT2obz#s)OJzn+ZM}D&svTTRIBP3mKs+8V zC%J`jsE` z&AW5T$3M^F8$bx<1Awj|QsgjJZ48o{lHW@Tl}piIvTun9KaTLK9pz7s{87&7+khIv zt%mg>Mu#FRfL3-TA=r_1K50Zdltjf|PxE`t6Mf=HtC!SE+A!7Z_nUb!hE>PkUs@{H z{9mvp78XlOYrau0iz9v_gG-xl^$;^f)%u`<9wp9zRoG$c^nyK;=~{DFs|v}dkF+P6 zqJI7e#@#|dz$H_BzT))vL3QvD%CS&7z|koyrKEM`Es=-K6jVm|9pyhgy2s4Z1IfLKRRq0H$A{~vlU4N1hzbh0fEy^z_&#u5@ZR>F=!3H z`R&JMYTRz3+53^*a$^T6evV^1QvGyPV(Z5v)J$0NV49_xP*wZ^9xnl^Xl-dy41s+3 zt0Y$7x6nrc8h`F=DK(Gnt*r99#8)!dO>tl~u-*wXbN4?zjsZahcg@n373n7gKW)T5 zNWxSu%9Qnuti!o(#y+TSpG%}!ZQ!v(3S(AG19{YUqSZ}ShB7s&DW8HO-7W{cg9NE! z4o=()OVkDlSvEc*mKir{J6owKwbPKID!9+Q0z#fp?8^@p!Vjo2T}v6h)Vq3JSrPU~ z9QTMohX4*o>T}_<;|@6ectp&Xh8n)XtB8#9M+xSrf(*td`k6rLRoG=OI&x-uVJp7X zexAiW*BOWw&~oSBnRNyQYHPbR9(1jc4CxaQ*ncCN6j&lh4C*A3pw#wnK=S>D^&!o{ zrjy6k{0-w&5$qj2pM`v4FJ1vy0Jpg0l|;;P%;fMaX_8fmBoTw=DYMA7jN7qmECmBB zaIp86w{2`M?h@Oy)Jy(zObp7h&hr%@bD4qef^n#DmJ#w%md2X#G_Mlm>6>edkE>Qp zXcCr5J!P+P+(rV~yIf(qF)i&n_SjhAZ(&X{3V#pKgfT_ncRm#+{=qMj&GEAZzcjj* z(wDhG?|X#Qh;6rwqbIwp6?%Iuh91foQ*vXRTfwMMfNNd&*{Zs1LGCohfD~%hZMe0T z6pmwi;-pcT^V%6;qiFvXfOH*7S4z3WugTnr(qi}HBurm9M8YyMDuFNpO5d{pdX6TC z2$lh0OM)r11^eb~=d63*(xs5GS*ELUlpo29ihlAccKcvt7Qo4f-BN^4Dh7mlW9to7 zIEyb`9SpTnn6YnI`$FPOT`$u);UT+@_obPp$jZ@atRu#n`<4RBPbv%QXWeoLt=PikN;>o}3 zIqlM?jG=2LC>*LgAdkHT?-VZeDe~a)f9Qc2ucwXr^Fx6PI=Lbo4kum7D>|*z0qB#| zvFTp%OWo%men)i~)t?h5VqE&fzycm7!_?HH<8`Wv8C|{ggRsEs0>}#_i2?hl-P3!lxvrFZmJxsciJzEXWl^^D5TPPD|MIdFe>-R3g03`1mIydSE^s| zhjUznx^%iKde-Ys;m+(O8^5QAx7(@#eM5Ko9dN6x_1z?`u zX{{i?0x~mX-=fQaU$Ey)J|27U$5i4fjSX_F)xT z#_X@ENJJ%b0-BGVZ4&0t`Tqd>LujD?9q^of1>m@ZDvy^t99jujA~am$4KL*iBLef6 z37=0Sz6phLgl|9^Ye+(Py;?a_3iPzRP5LeL2bPA!0+r@S9o8IkrdmwbhYdR%Ne~Wa z^<}XmhsT%eVpxRng2WPa&PIwi_*zw@7!B+PF&aY1$70}){Q7T2peZvcBjhQ06UsMS z85}%i)oKIrtX?K}#EEAm!-~I5hrXM$NkWsOGr1~tXCgGm_eySZEDz)tCCVyPem`MC z0W7nhBdRbz&XL%BximO2Kz3VXS9Kx_7`2E}ZuKdH8BcC3LT18Xh3kJ`66~^~)2Wl1P zbUW9>+mz?b9w63|cgN_&D_{?0L+A-h_5~AaQOy|x*J>+{@ghWvQFsI>mrfV<3JA!| ziEPbr`R?gZ=-rV*-L;Hio=(di?z!psX<$Jv=e?VMH^B7H?qE>o93^bl3v2RA!ZH3i zWA?YmwY1qlH=4pm)P=UmBIszx>N`qj_)C=*1fDK*OB4$>}|fH`au@`q@B_j6U96rDR) z+YdS=Hn<_0BrZY7Z2tQ*jScj*qjw5N)+Uo%kTf$nhZ^6Y_l#@MgL{C%=o47JO9sRn zi_~_PHc36(LS0?`Ik^g1cN=#KrUi07-;T<)$XmCtHRxWfJ!}>f#QoCkc-Y)QUu!Lo zKYctZwh>Ea`x-S2h=ZLER0;8B#H2OCl_y)f&7RVf8HL4(PT%wwe^Q6*vx@w=CJZT{ z!k|-+SG=Iks&*JG_S1iJjSip+c^v|?UaF9b%GVYXzuFZ7D$rDq!}5CYJu&E$mIaA zzwf26Nux)h3!y5k=(E*5Fem8R|GsV$&RFc&kDeE_dJ3qiS%Aca{ zZ0~~u%y%n)^PG&dO#E!HW$7j_vucnSliqnQj9Sr37BW^^rhoB26f)xnsL zVL1tkXE*SV;1pE8%{fk|Ci&;Ykv3hGffW-BMi}oyAl+?Z({(XdI@eeUCCidtMU*t5 zy9SPtju@j4fQL$?#QDzDj1-CLX(;9Nr;m@>u_&_8nn0nI7ZGM_U2F5q*T%-c1EiRh z&%cZ@t?rlQxKvZXm`$t;1G1!xiO|+g+f&y2vDzvgOnz2wL^ARk&KK|Q&HE5^MYrFhx)^Ny zCbA#ph~GGcIiyb~t{p$tcwk`cTBMLsqLV^)mkZJI$oQzhcJ&K2kmN^`dP6gB+&ZG3 zt~kaYAi zU|6;B z*)Pzo zs&mw+y0MW;S;8#!)wB{}3LuG&t#nO#M=V(H1IWCMpvBj$ZsMvj%QoJjKFt5lS0w{w zPt0(y$w}yNTBxJgMe%cN=L1h|vT1^5)W9NJt#mK6kj!$HG_?u3RjFM0odmzoD?nI) z>lFY+@#b0-|9OJ#f1Q{q1NLC+>^^;bAX*7U9{E_;P@68S7>_EvNO7|nBjBcjH!-}P z2J^X_9ff6)khV6==e!Z2LR($9zE@B#j|1b&5Ed*O2_h0st}HObHPoLFJ3 zSHL>v+3qudz5NL$@`)Dh`xCbWs&8)eD?s1&%}H9mak<*hez=&u1RTF1#DF&h0Ws+F znPQL!m#x4^V(uT5u9K0A<3T%nAH`+j&Ua0SZ;Z&lSdsrB@80&@0#3543Qd%0+8iKz zDpsj1*Qn=xeH4j+Y%g)HT2~@ zWP9R-yrDMG9SZY@t#yDDM*s7Q)|uOPqG5pBTZqM1_$B;FsO*C?Z$WJ!8bdaL!EH=m z7fBQCwQHxBDoDs^)DJ zikm9Ma7qIrFed0LK5R9ENoQ6}D@=7CT{Q5b**f0{2x(vquUykz%Vs|g$l|x2cU?Zx zxPN+$wP%T`BM9J^ItHB%hftZ7Zb$ETQcJB3znfC`g>ofEgSk%GOFRlUM}X9B=e<)T z6OAdupC9#`w2Vcoh3I7`4=k8IJ!&U_8o5|Wsa6RAd}wE7ld_Y?mR4$@us}effKKB^ z*#d|N5{tIW)VEW{E?bHL@5l{kazV9b2zf`$Hf!VAS#Z6tXPR>}TX(LvSS3vu92f1{ zCwR?+hg+s=b(U9x`L0Res@48tVaPj%}1(Z?np*65JOIdQe;NoNuj)_!^w$M8a;(tae3lS*#vjAkn0^l-!Xd zXZmhQ%%9Z9#rwBezC~`?_f`((7Rf?}`jAo*B|j~zlGu?db7Ll|qannfaS=aZJX%5J z;vD)6$|wAwFHQ`Uk=k@OvcoKWv3;R@En8k8y!PWwp6hy3Dw6!tpRgPZ@wDvVuk(-b zM&LXxWUw3Dk3Y|#;TafIKc^!RD5Feq57i&79!wj*kRo(cY$t_81U82h+UCybt%CHI zbVRA9%j7<=z8l{%JQk>;fsU=5wk-32fL*@p^?F+v*N}O;ny5Y-@j41iZAUN<*1Q5x zoUS!i<*I60nwsltL-<0o@?6!0!H8kV>33KbZ>65no?KH1b)u~!vSQe=Ghid@W$~{N zn+);_@D_iggB&feIfC=esBRPYibxjS%${KwyifNEkIRp3kdPoh@)mFFn7K36{GH@1 zN+^$VLNPVdbe|*}BbCo#s}?{xP6^Jc`;#~fyR8jfzCQbx#LK>g4y(cA{5t*zZkA_M zAv!ze1M%@YVurskI8)p4xPSMPmsF3@lrgSuSO+^(f^A^0hGr)8gc9MuU`Py=Y5DsG ze&(hm3^QYYH2CqLz8yY1#mO6eK@np7K^T372kO(NJygAaKSln84NausBDuK-3)7QUPFQMVsYDrYZq+hT1o^O=FJ zj1>MX9Wgw@y1QYlQ7%G}+<@bPGTy@tjd0_SxkSsYS14adQb=8<5*3*x8-V9G z;wL@18)IjKzX)a&f<~R%8$XB@H{M_)gwHSg?uVVHjsZ2rU5DGdwFJA33s&cJwHFml zZWYlIccikM zYqPTIQ;xC6IyCQjfS$LuP?6FOSb1^Kw^f_yvRUd9;#ATKi9UrUi2@PBa_>9fm{d zMD?s6U0_}TsSag+A+*w$oPX%(U@_I`9HEy4n&b)OabY)+k!S>1`BaF43;q)^;a@bA za>2XM^LO6BgC>;R+MN1#2U`TC(Ft9;VW@)V)TEC00*CF=SR2}yPbE;m1s@`DQqn8- zn*W|g^*0{;|F>CnaqfL=RWSJRap5=zTUj`89Mx$p@@J&yJaCV0x(#ai0EYzCdvbn= zd8x&z0ftnXXm;w*6v112ud(^)AoquqTYfZde2gSfCyv7^l_>TWDD zzl6a4{;~(yzsjXH0V`zim*~uR$cNwD0Mo;{?!3cII&B!a!LlG?=JJ&H9uAJRQ!_FMCQ_2eLQuTVjeDj7J*tR|1># zA}5s20BHnf3!YJ2db@3~??T09tR0xp3P}gg>Wg%79!=x577KLPi2RImuBiTPw9RxQk ziT>p6>Ga;@wgj_#i62+HYBGNq5UMYri1Pe|9T}e@Push|lyU#YMh0GR6v87jyQz$8 z7@CM(fSpf?hJQmqNr3t#ROKJro-Dm2@emum+#R~7-RI6-rbO}a(|+D`0Vm?0#;aQ$5F@vhiRN&7V^D``%|xAZMQ`_&$(k;c4>+2 zLI(qR#$c^?viL%hq&hT1>{9A`mo2+zP!BQH1cz_FbkQG%z6!q+?nFiEW7d{1{$C<| zaLlZ(n}Rh^vm$4kooAFR9gGPyr6$!TAL^)r2U#?)w}XiT>uLnxP!Q+W{B{l;okpw) zSqKfGeCYZVLmWp@=a(e&?~CayG^>B60|se6_|~xOa#IHo@eJFb;oOx8d^LlQT}T;x z)G8IP1l}s!gq@D^@Vk!zmAN`wiFag^c9U;p3%8{M2IZ3|OqQ#`J3qW6=#5 z`F0fmC_io@Nw?!Y=r=ZHfbsf9y^O|Q?f@^=Wdyp8dfXc=!?%Ec>!x_}jzp^Q9Cw!T z!*hX5>@mjsKr#7{xmSR5q|83eFI4s?)-mE~h8)N|dBn#+4tGC~h6V9h!z-w0TeTMX zv$ccSe&@W^6dt(=T^>WwZCOEP51OI>_MWG>^OX{eMrA)r4vRc(f|zhQ832dW?i;UJ zsqC9o(|n(Cq5H;^t#lN+$}FxgBNN(ccq?cc9eD4HDnWbE-nn;qQJxKHq~o_f3UrJR zt6$1_Ss0!-xUX%t(Q$P@m_=H>;DHl(i4I3gJ19W?nNjpHfROpeL%E#%KdDtHQIJTNb%hYxu=LnX!8rrc)9m3|o!R2sry?v$ zJ|wA_F9MxUVVaU_$~!_o7=P^OX&4HI`f87Egz)ZxC5kU(&lRtD4`{M#c>YNj{^utu z#Rl-{dkeu^`^{YY6|i_ks3!fk69p5ydgVaiD@3q3J#GP|1NXkd z8>~ksq&>?%iZ;h_Q|gYATfx0bVN5-Sn)8ULM*^DUc!X`6u=&zlXB^4;aRe>%O0dU= zXOzH$d&0wts&%+)a=`%PSGf#|A^;!cPdPpfYn<%{T%E%0*YnC}NhG4MG>7@g%HR-Q zPC|!iI;&kHVK#^Bk`w$;xYMq_sN>!r5fDeA74N?OC|mgzPs3bAjYm_dBSV{*B-#G! z%`FS=*ML6S)HULtH-{m!g`rj!+|9)S_6u*}UG0?HP73j30cOcrL_PC0-OJfQ`;SvS z6f(ed-t>6R$91}_)_^Pg@Gvf?(Kb9K)(qoki($J-v^#OfuT99BlC7~A3Q`L#6@i$4;jEtJ8_boU@VoCW8BN z2Jz#Bx#QB~>6pbqLT0*IhF3tu8mff-f||WNRtppT1*I)1{m3qqWFZH5(fDJTk&EEa zytxq<_unQ(zknbhMkZw!x+e}J-Yz%1R0)}jTrs`j!NWqlH|9T}yy z3bP~@GOu8UjXTj6XLN3t#>O;}1ZNlOXEe*{0~#pKGJD_Tq9&_k3!8${CZv9HEgk)B zC`z=UjFHlDVT2kt<(fD5Dh!XwBn->&x5g#i&))st5?{KfUI7zVWL(^?jEiL+7DG3Tv!fzHbG|)qWIOfFM<4m=|9@-w4ivuD7`kTXpCfh-` z5`4B-r9x;2irG&*%`?V6axbRcGac(9gXwR}?}$VDWjJR=G1c{3^NS25X`-|=`wnD7 zbR4TMOz$yUFU%!GP{Bj^;67ussULa8o1KTP*phuORlQ#RVHJX3Ivl+%=(~STPs$1nk>3h4rRY*?d-zp@??GK!-?^<3i zUu0KO-Z~%fysxYPZf_|Ey8gl_dsMeQa!hyEk1{0cDW_j(z~}6mQXSWtGzrQCc}AKA zE_ej=J_Wm#BwWCwT6=XwVlhavcvX?G5*%AZFY@P|G!&01ac9`$qh?E0aiIple~S3( zQ-D$t0NMu@!0-m!;JWoD8GFKI!dM>r6G)VXza%}|o`%fSHpmuhSDs@r-;j*X5 z*yg2*Z+|ZZJ!kfnRN5s&h~)ZdtC|)}!WLi7xu3f{nEF<-qTUNrWabvp z2OcaC41yU>D749*>WsDJ?PE+OjIg8=%$b4Ccrvr z30QdNF=t6^+v_n+y%bnpxvytl(#pECO>}gPK*9A%I@}j9Nb+@Yu?)pRX*$i9?^jj` zE%Lwe&;Kqb{fpE3L-C#w;|Pobd`VmJ)#mt7LRy*~Vy{rguXV#jML;WsIpL%QrjS1)rU&>Q{6GL`Y@&t03NF4FuOJPZV)+ zg<;Gv;GJQaBQUL!H%65QfycQ8_<^mkfjRVY z_BwcKU3i47(NXHb_ifgycg&~?2i2)3|Exv??wqWQFmdjOUoWB^so?5u?mJWEw!)Pt1)Al+h z8-|@#nkW&%LhgeGuapQL3as!=E3Pii33Ke*hwpABiiPBhGzR1vQQ1C7sQ!3c_Nf{Y z)j5J`W+t;u#8o|0MXWOW>;!y1YKCTr4SPI>TxyTvv!aTHBLZhTWjlY%=Vp*OA;>odeg4$oo^k>=m*$)Gcz&rwmZGn)KqoH z(3!sv(1ctfw7#|yG7B6`#ZLb}rF>^tQ(L!gC;|e4^eP=`B1P#PB{b5HQbnnuh8lV&Gzn7n&Hlc7&OQ5k&cFL(Wj%S;nq|#3*BtL0;~g*S zxxh%)Mke?&#jj$=cbNY0goJRShv`*ZHowDR2#5 zuYc)DYwY0w*S{%rf79sx^Eky>tabE3_LXRx^1I(yYkCKN^IU8ir^CmZC%inHw9 z)`%%y@s)A--umJpuhrWpaUR_FEmPCUZ8qxeTHnIfnOHuDME|%J+F%FlGB?yDz%$@K z9!hyH7K$1rbJgGLZdYL%dDC!q8hBykC%6PSW-{s~lCBLu{IM5$_a0k;F!%r~nT+Mz|dV=I-(+Zm5Xd~8+1zlWEHk43HOU0pEXNG%|u zW^@&0FY|t8`*Ke>JKK!ucKO)Z{-PPvOOfF<)n_wm)RG4Zkt(pZLV^AseTzh;0_lbc8!nY<$_}o zrMr#HYT4<*(F>Aez%Lo=b^rqQJbfi9kjWObrl#PoNY4?K7?9#!9|I1LU!6yLRccnc z%9`BPt=LixmxEnw_}5N)T{N$*q|T)ugbA5d7g{>|%}aPNz6jlDnrKr(iOxUvP%j%0 z1BOZaEPH6}KB|QH?nzfY%8w@tOyN>z?9vxXz!~em0LC~%5^-eUx!#}q zp*WF2{W1K_r3F*fCK=xG-~XV39i3o;po@PI1K!DwdR`8_Exa8ci)eNu$gLs zi@?t#{Vjs()S-a8?#K{46BpzR(oXxf zKG+XVn&iev#jgDd;eF=P3Zi&-y+AcAw`h^#x^kSUmMZ-n5dB`4j+z11bCN1oedtVp!yHtm=}>6qg-kK%SRgr zmS9p8a49R+sM{4rgRJCyse_IXHSNI)USv1~`rY=5!3f7zWR4KOCMXCj{0s11wmRdO zChV7)NCQ`qhhmcF-{SFly~+lWAeP5 zN>EFsurS^6wmuefDP^y?*&*nwt1&_m^bLbDrsVz8F#N3pe<@kGchIeK@~}M2TY6tM z*sRD;j`0Z zjOvU!3-dm+SyU5kI{6-ysC!HUP4!5De%q)GaaU|87}PG9-I;K9<&k|M>E&{FYZOu! z^jKyQ8l{Jnmnd|u5BlSu1%Zg9afvY79S}>8o+@BRwK|=gJW9-aoWP_#5PXugICau| zJ3NqeNrD7fl?`g@%p?{SvQ-wW}KhFk=FbdS;~RPC)yiA zK@|64o+bWbkt%>K!boDR9ieDAhONM(9JSB^uOVbOv3YkTN}=u4;g#GJ)h`xTpsRWK z-<}+T57@VV0XRj7a2V2G>%0i(ffWyRVfJ*M?@ zc~aDWl;G|k3)M^>2sR3I(Qo0H+g!=l_c^_cI04E!U^XMfzK(UU28OSa|FPcKIOzXU z$~xe{T^GyelZXefOGfgwJ6=mqop;jgwX{bUBT%gVp}k+AIV~_2}oZX`9G5Wp2hX)DMbl79X|WWk-Xg;zLFFdPNv5^w6Vpb3os3^oNAmM+mz) zaekYs`D|a-Nm;Mp+3lFeZ482`Gfw4Kx$iYghgV(s6?-Zc%t7-*;>pUDFfl^UpU9I6 zW{*!#7b(Ww&b344iA?#1bLZNc#e-u&$9j`F61G(HRs$Sqyr?fJ>umdh)!juv!^-?J zMvnMdtvE;{M-7y#DFIDgg_Rofz08St1+OBI(v@T8P-ovWIN8m21S%ey>&dtY| zAR;$$bgIscnxzmf*&oa;qOV@2+If?dnEHR@Mf_GPmYk8CA)^&wyK*Cx26*C0vn^Ht z;WC!sxxWQgBLA`I{V(cS6-!-@-PS*3)3dTan?J9hWdlq2ITF&F;qOie1k>B77aS_d zmX*!bi!aYV+?%Cqt7~=k43BQ_tkQl>o_4_?mU{CeT>Q!Tu9DK08Q94ENBfV4=`SHd z1wmled1mpUyXuUu72lb}{)7!zHZXKF##<vu+6O{taoHD-!Ab8Bw~7iAh9 zog6Rbpo+B=guV!Wy2CKy{#X;}JNOS%>%{&zFpIOe;E6z@F zui{!N$fHbrM-hVBbSGtE%~V58_-gxJv%mkHBPuekM#uXP9h?8;cY59ZpSGhl_|)2` zm)RpmQS_$OEaTwd^%#sceJFtZHiTMHKd2AjZzF)YRusTY{r)6>qViw@BCbBA!4Y*d z$Z95m?u`Cp6TRX(O1fnIGt47&|5WbH{`%VspspC4?RqK!Yh^?F7a+eUeC5&dwOUYK z+<6##Z-@N2-%m=$o3x2hN9=GLI{6+>97!$^!MCUecdI${W7i z?&u_#{T6ygz-sCddC1i;W7jk(ynlxE-Sgr3rp?0Lp`O;RywG)B9@I62P8c@DVW2=W z%b9Z~zUkt|+GeEbgxm*oBK7=*X#c`m-Qc=o{o%PSNsgSpd8osdHRu_w=9Yyl$L#$3 z#TSt>q&+Fb(AE0)W}AMisqNVzYHy^%%W#ay?@A71nmpNy%V3kFHZSSWs{j?81p33bBj94jv$BD0kc~`wJ zfiQf`i+K93`t46k*7qi*HeYi}KsK7J-*~9t&?HVLJ0ed2F8+Nj#vaq4MAh{Yb3T_jEQ?r-E58%8iPMaT;C@Q_y#88x#fXqRA73&MY@s7a_qexLt)noHukq@_Q z&Zv}tz17yNjG}knbAy{rh?%-bM5Wn978zH_GXdwZLVrh>YF5JkP<~ge(@^?tSymQm zkFk!=6aFPzzStJ1_(G$5K9NEkaIuGqkV6S^_vG?TgqnSQTu=I3};=eJGy17GqVUIyTLso6ULgT^K06h$N1N^suhvx;i~Seo%lBZjUKj zJ>YkRI6J^LJh<(@-B)&ROBzrZ%%^bbwVk&mYhwq8e4XtMl9v+(63D8GdT%kxuRUmX zIqM{INe^C*K0DC*GlsGTb$!#sBZTnHo<_~&S)S@ei;BGCoHDaL&Q0<~hzqGP?~-v4 zI*Fvo(5uQ z^{kC}UqBfszDNQlZ;k%uDX;P+?EXEOE_=_e#SV$*UTCk!zAda91MTHTaMMdp1k@yE~hIakERg#y%l zd@xt4Fb~BE0XQxbW$Q~yKli2Yjo@^8(Vi^i?=Yt@kUweRm`(E9Ey)}BqFO?Gn1p;}4OUO{ZaJMf?5J3G z|64Rb^#A3S{_h;M|DWrIw5GfHH)_JPfMsE&rp{D0%W$u7K|fl&fj~d%wE;zeCjMe> z6_T~_pz7A4gxjE!4UznFg2ueW3Lh!>!%o3rwve#2%5XQj>bNR*(jGDKlJu z(7XPoceQR}{Y)KsZpHMd2Evtz0(r!x$8?WSZkUzklT=mN)gv^73*s5|UpmC8mM7Q= zTK?OvlU_8^2dCB{IvQd&@m0iry`4-#Mp=YoVz3VXW@6&eTnl-}0(x*;P^ljFGZC;Y zmLw0Yf^mu-wWZkk*B`4CeIkZyP8)jM2GvIkmof7kN8VIIlVT7zWSM5S_-+tK&%NMhSF4eX6j#_>`Dx`r~)QM*-`K+UQ@lmF%NYnFQ>E8b!|$yhV7Ri%5_ zq`U#iOi0S!Yl&Y{4Xs3~LR{i%T= z{w^Rp&oHbMuZ1t>TBs8I1D6p&@->~-klNk>LUl@052e7{iyC&$!^gfj$1LGA86!U4 z(v^JKF}0L{&CM!vBJ^4!V8qCW)nml+p~UD%;e!puxU?Rw@Myjm`FrRilK$m6*-)=v zxUDxjW{2hW&TI%d>2~QqLO=f%dH(15*D-5)BG%O?OE39nTRciVx-sfKsm99jq*=;r zs9|TWEd_i=5K`@1mU%EO@hjaWgQTcFY)SUP_XxSg@IC}evBBx<{C59A4eI~_OGMTz zr+*hJ5a6KrwIu}1lZF!DO&Cf_eDcr-o-{bx;PuDseM3c+Ll0`H*@qB8FG+^Br4`XOTxuz-qqyMr!mX36tWa&c*_PBl1iw|H}7*) zI^joSbspae2R2{FE`WNCUlq_=LX3ED=85o=HQE_=Ol;QA5c9RkqFn7uBRRYIR%h1p z@9R;s36{J2XJLB1)69xxRa(EL#>GB&yJ$T|=9I4_r*$(OOoogRX4b??d=jCV-)?MC zgP&NdKdODQ=*H_{BEt>tw`hzHacr6W`P2mj=p~QviLwwX;JfBkw+UJYtJTaIauM}f z&wqSyqOsY$OoOel8oRdmvQw*8qU}-65V`nZFzFR3iIdXe2oS&*-z<%jKZ^YUyFJw& zx6l2vq`lWb%B!PD9U!)AqB)|(ydf{LMwV4&WgZhIVwGfNB-U#)zkyIqx~>}i83lXO zoKLCjXl(ea+t`n1jh5cTurESiTvHx5%6DzHdI)r5IUlt|PtsQ~NiQ%fTjehEJLI|4 zkhC4Yj&CloZLv~Fo#?8cqc^d0FaywxvebG(uN))fv;^DZYGx}}8z~OCl;19FWIY&d zH~;rE&v;rvC?a&zYdD;sqE~*gJprB(D?D`kn5ulJ@f0tK9qRiL;;WqsvlX$lYkbxJ z#@-mAm`%EHY6LiB|3DK|aOX&-D!r-5b>no_0@PCDSp5=t@tm-Hx_O!ZJkh?8@g@&n}|_mpTjE zw_>l&ig#prhx;bNViXitiSXs9se)u)BHm6CAn9t#wlmiS%KClr!tPDCoYent=M!Rz zHrA+1)1R{gYOYlQLhw=nKkSH}z(Vw2t8vg1$dl(btOlD!HiXPWh~}*(4qwI31KVSa zQ{(B41jGcox1dy$o(gwNnlYUp`RXSajNp^SMn%uus_v98XhX#7-LFPEtiq6O!6Ev@ zt?ByR-r_-hd^zmwvZq~JRi289NQovRwV9_ZK{b9tl;XtEy388u&@w&*IqeK*RUo^` zH&-E=I|swDJ4sf3!a3y&@=qdPK&50a-86{Q}HWQHPqGJBDsa()iz z1AMa1yEem4C5ta4eMIptG#`>87zm z_<1}N9cnJu)VT+?04OFuMbhFOY?}9r?><3h4$m}P*;0bz!n~`uYna^6e` z1qlW68Mlrf-J4GJyzmTkpB9p@8(QRWJJ>pkm$@ji#*EYjgq42Eqg-0H9rbdO`0}R~ z7kZ=#rncf(moy`m83u&7pA+IM^Odwe9cJve2I>+k=y>JIG)@*xUyD`F?p#P=b}kIF zJ_vihLLH~#?&M%T{R@Cqmsjy9a{dAd^v0{afRXu5|1P`t4@wE< z;0HDe&vAK+MGNycndpO26!+XFWZ+jvxli}+ihk#*XAJr!1$uIofuLwDwpi|up>KKXbYBjCyHfl~g1u1E zu^JrJ`JF6GsmK5HMf&gGn4W0VPcq0)(SwsMCCj`nS(A~BfO56c9dF??pQqYo;`}sS zO;mTfNAOqd$i4QIA9SUX(NU056e$Q+{jPcWb$&o$V@Qn_RxCJ>XdOSr8k}sS;D=tO z)(ACmE#3(bdhpAG%X(3@4pg5J^;Da~V~JDWQ&+63;F{aV2kkl+URSdxG6BJ#4Rb!KJkx%J*>af5Ne8|n=lx+zA0HynkAB-P7^BIhKea21$IdbzAimuI+ zIL-V$#jx+fNm|JCjAr|b*o=~8#Ttazp#7MrHHY-n0A~nD&Rl1`zV&r+;xxh%HZQ*s zb4AEq+0s}oVZWZLA{v?kzNxu>N+eE!cHPhM$!0z4bGs~SdW_C{F+;P0WA;m?uZhsO z(9BjsJTWBhs(72XJE7kdK1J%a2kn9bS~_DJQjqWp#3a|TitiL%S3HVW(E9FTQev<} zHg2tLyZqQxjZT_$`ofpPw7qvZ6T8huHhAr*U2PHnG|toO046BXe2%_$!oq470gXW-}vY^j-7qF z!9V1OFN*&j3Ub%FG#u=56N=PiB7pG|%vAbm3v25}>BE~eAx_H28q(yHx(yV~mGj%O z!kZ=DOog~CLFUxeR`e|*KLCD@u-QMg5+Ly zJiKM}okUDS?$Q=;$}jox`v)C4@>$PNL(R*Z_pqk&zI+NY`IMGfeg2+H&h*3Nedi61 z2oCaxlN13y+aZosu9*kb(FIGp$e1)1jh};cc8IZ%8HSo|6udt0p_R4FA@g^Xmn;oj zh{dkFGURnrwIs6V4G+$L0W4BU`(?BJ^L}IOq*W2tlcx%3HyLJ;BPQC*=Q?xDif3$v~|1HOfcyM_h?IpuoD+)2FsA6zn= z7FMjRtgEr7oza<&1voGaMlFD0R&#u5!iUumQ?ie@ztJ3~a*gYuizm+cl)%D&vJ&xS z(Arr+b}5cB`bFexIDNm({f-A%TRm$5$XPFeb^vVV(SU2C_(0xgjEqeG9a?>21JG^7 zN)@G*REa(U=Hsv549*ik5V~!xZ>Y$5W&5bn;x&8VTUiZ|hcp5UfSZpz6*SjxbWGqA zRr}{U&-9_4IiU{X>+7MP;CBA2vHW-~xpCkMh01g;72e_wH%iuMEk)9;1bcPGhR1nd zQ9J&fPVfYK`p3O6i`ncn`yig&;&C;A)!&ctwdKgLq+k!%;;$ae(f2WnIZO58?;Q2b zQl4dxyaa9^{Co!mY55=VWO5U+ z;+^j-rE1Zou?L?(C9qaUzA~?b&$*YlPSmp+^iZo&3j!kq%b|A&@(CvLaL-6+YFY~# z_Mw9tj4H~{(E-4sqEc)n1k)vbIF=Rg|JF@z@L&F6=-)Wp9ydyTjJPVrj3w?-YE0!y zP5QP94J5pC{p6QegY=ZfU26GIS6IkzY!HD nCh~c!LQ*#G?{}<#bBGfC&A1W!SJPrRN;9tPzW^xZUvvKlChb%! diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index fd667cd..4b5c9a9 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -102,17 +102,7 @@ # the SVR is assumed to be a normal distribution centred around zeros. ADA-SVR # uses the central limit theorem to estimate the # standard deviation of this normal distribution for each coefficient -# (see the :ref:`. -# :align: center -# :name: fig:ada_svr_1 -# -# Figure 1 of :footcite:ct:`Gaonkar et al. 2012 `. -# **Left**: Concept of support vector machines in 2-D space -# **Right**: Permutation testing for support vector machines +# (for details see figure 1 of ::footcite:ct:`Gaonkar et al. 2012 `). # ############################################################################# @@ -174,7 +164,6 @@ # # - The method is fast because it uses the central limit theorem to estimate # the standard deviation of the distribution of the coefficients of SVR. -# - The method is simple to implement. # - The method is a simplification of the permutation test for SVR. # - The method has the advantage of linear models: transparency of # the prediction, high level of collective experiance and expertise From ca9575eb159bf89dbcedb558454ac32cbfadfa5d Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 13:41:58 +0100 Subject: [PATCH 21/28] Fix format of the docstring --- examples/_utils/plot_dataset.py | 128 +++++++++++++++++++++----------- 1 file changed, 84 insertions(+), 44 deletions(-) diff --git a/examples/_utils/plot_dataset.py b/examples/_utils/plot_dataset.py index becebef..cb17f9b 100644 --- a/examples/_utils/plot_dataset.py +++ b/examples/_utils/plot_dataset.py @@ -7,14 +7,21 @@ def plot_dataset1D(X, y, beta, title="Toy dataset"): """ Plot a 1D toy dataset with the true regression line. - Parameters: - - X: Input features (n_samples, n_features) - - y: Vectors of the regression (n_samples,) - - beta: Coefficients of the true variable of importance (n_features,) - - title: Title of the plot (str) - - Returns: - - a figure with 3 subplots + + Parameters + ---------- + X : ndarray, shape (n_samples, n_features) + Input features. + y : ndarray, shape (n_samples,) + Vectors of the regression + beta : ndarray, shape (n_features,) + Coefficients of the true variable of importance + title : str + Title of the plot + + Returns + ------- + a figure with 3 subplots """ # Create a figure and a set of subplots fig, ([ax11, ax12], [ax21, ax22]) = plt.subplots( @@ -51,14 +58,21 @@ def plot_dataset1D(X, y, beta, title="Toy dataset"): def plot_validate_variable_importance(beta, beta_hat, vmin=0.0, vmax=1.0): """ Plot for validating of the variable importance estimation. - Parameters: - - beta: Coefficients of the true variable of importance (n_features,) - - beta_hat: Coefficients of the estimated variable of importance (n_features,) - - vmin: Minimum value of the colorbar (float) - - vmax: Maximum value of the colorbar (float) - - Returns: - - a figure with 2 subplots + + Parameters + ---------- + beta : ndarray, shape (n_features,) + Coefficients of the true variable of importance + beta_hat : ndarray, shape (n_features,) + Coefficients of the estimated variable of importance + vmin : float + Minimum value of the colorbar + vmax : float + Maximum value of the colorbar + + Returns + ------- + a figure with 2 subplots """ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(6.4, 2.8)) # plot beta @@ -87,17 +101,27 @@ def plot_pvalue_H0( ): """ Plot for the confidence in the hipothse that the variables are important. - Parameters: - - beta_hat: Coefficients of the estimated variable of importance (n_features,) - - pvalue: pvalue of each variable of importance (n_features,) - - pvalue_corrected: corrected pvalue of each variable of importance (n_features,) - - one_minus_pvalue: 1 - pvalue of each variable of importance (n_features,) - - one_minus_pvalue_corrected: 1 - corrected pvalue of each variable of importance (n_features,) - - vmin: Minimum value of the colorbar (float) - - vmax: Maximum value of the colorbar (float) - - Returns: - - a figure with 3 subplots + + Parameters + ---------- + beta_hat : ndarray, shape (n_features,) + Coefficients of the estimated variable of importance + pvalue : ndarray, shape (n_features,) + pvalue of each variable of importance + pvalue_corrected : ndarray, shape (n_features,) + corrected pvalue of each variable of importance + one_minus_pvalue : ndarray, shape (n_features,) + 1 - pvalue of each variable of importance + one_minus_pvalue_corrected : ndarray, shape (n_features,) + 1 - corrected pvalue of each variable of importance + vmin : float + Minimum value of the colorbar + vmax : float + Maximum value of the colorbar + + Returns + ------- + a figure with 3 subplots """ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(6.4, 4.8)) # plot beta_hat @@ -160,17 +184,27 @@ def plot_pvalue_H1( ): """ Plot for the confidence in the hipothse that the variables are not important. - Parameters: - - beta_hat: Coefficients of the estimated variable of importance (n_features,) - - pvalue: pvalue of each variable of importance (n_features,) - - pvalue_corrected: corrected pvalue of each variable of importance (n_features,) - - one_minus_pvalue: 1 - pvalue of each variable of importance (n_features,) - - one_minus_pvalue_corrected: 1 - corrected pvalue of each variable of importance (n_features,) - - vmin: Minimum value of the colorbar (float) - - vmax: Maximum value of the colorbar (float) - - Returns: - - a figure with 3 subplots + + Parameters + ---------- + beta_hat : ndarray, shape (n_features,) + Coefficients of the estimated variable of importance + pvalue : ndarray, shape (n_features,) + pvalue of each variable of importance + pvalue_corrected : ndarray, shape (n_features,) + corrected pvalue of each variable of importance + one_minus_pvalue : ndarray, shape (n_features,) + 1 - pvalue of each variable of importance + one_minus_pvalue_corrected : ndarray, shape (n_features,) + 1 - corrected pvalue of each variable of importance + vmin : float + Minimum value of the colorbar + vmax : float + Maximum value of the colorbar + + Returns + ------- + a figure with 3 subplots """ fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(6.4, 4.8)) im_beta_hat = ax1.imshow(np.expand_dims(beta_hat, 0), vmin=vmin, vmax=vmax) @@ -226,13 +260,19 @@ def plot_pvalue_H1( def plot_compare_proba_estimated(proba, beta_hat, scale): """ Plot the comparison of the estimated probability with the normal distribution. - Parameters: - - proba: Probability of each coefficent (n_features, n_permutations) - - beta_hat: Coefficients of the estimated variable of importance (n_features,) - - scale: Standard deviation of the distribution of the coefficients (n_features,) - Returns: - - a figure with 5*5 subplots + Parameters + ---------- + proba : ndarray, shape (n_features, n_permutations) + Probability of each coefficent + beta_hat : ndarray, shape (n_features,) + Coefficients of the estimated variable of importance + scale : ndarray, shape (n_features,) + Standard deviation of the distribution of the coefficients + + Returns + ------- + a figure with 5*5 subplots """ fig, axs = plt.subplots(5, 5, figsize=(6.4, 4.8)) for index, ax in enumerate(axs.flat): From 5c42cc31e36ac820af9fb5ad4623c3dc2969fc13 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 13:43:13 +0100 Subject: [PATCH 22/28] Remove a comment of advantages --- examples/methods/ada_svr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index 4b5c9a9..f2bfc28 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -164,7 +164,6 @@ # # - The method is fast because it uses the central limit theorem to estimate # the standard deviation of the distribution of the coefficients of SVR. -# - The method is a simplification of the permutation test for SVR. # - The method has the advantage of linear models: transparency of # the prediction, high level of collective experiance and expertise # and a guarantee of convergence. (see the book of From ded4d5165b4d372db2412d94e66472840895bfe4 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 14:03:55 +0100 Subject: [PATCH 23/28] Change folder for plotting result --- examples/methods/ada_svr.py | 2 +- {examples/_utils => hidimstat/visualisation}/__init__.py | 0 {examples/_utils => hidimstat/visualisation}/plot_dataset.py | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename {examples/_utils => hidimstat/visualisation}/__init__.py (100%) rename {examples/_utils => hidimstat/visualisation}/plot_dataset.py (100%) diff --git a/examples/methods/ada_svr.py b/examples/methods/ada_svr.py index 7af5d2f..8034617 100644 --- a/examples/methods/ada_svr.py +++ b/examples/methods/ada_svr.py @@ -13,7 +13,7 @@ from hidimstat.permutation_test import permutation_test from sklearn.svm import SVR from hidimstat.scenario import multivariate_1D_simulation -from examples._utils.plot_dataset import ( +from hidimstat.visualisation.plot_dataset import ( plot_dataset1D, plot_validate_variable_importance, plot_pvalue_H0, diff --git a/examples/_utils/__init__.py b/hidimstat/visualisation/__init__.py similarity index 100% rename from examples/_utils/__init__.py rename to hidimstat/visualisation/__init__.py diff --git a/examples/_utils/plot_dataset.py b/hidimstat/visualisation/plot_dataset.py similarity index 100% rename from examples/_utils/plot_dataset.py rename to hidimstat/visualisation/plot_dataset.py From ebad58c9b6e76142e2008b981d2d335b750a956a Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 14:04:28 +0100 Subject: [PATCH 24/28] Not use example as packages --- examples/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 examples/__init__.py diff --git a/examples/__init__.py b/examples/__init__.py deleted file mode 100644 index e69de29..0000000 From 64116e9dcd87127425f387dbec19510fb34e970b Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Thu, 26 Dec 2024 14:06:56 +0100 Subject: [PATCH 25/28] Change name of the file for methods --- examples/{methods => inference_model}/README.txt | 0 examples/{methods => inference_model}/ada_svr.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename examples/{methods => inference_model}/README.txt (100%) rename examples/{methods => inference_model}/ada_svr.py (100%) diff --git a/examples/methods/README.txt b/examples/inference_model/README.txt similarity index 100% rename from examples/methods/README.txt rename to examples/inference_model/README.txt diff --git a/examples/methods/ada_svr.py b/examples/inference_model/ada_svr.py similarity index 100% rename from examples/methods/ada_svr.py rename to examples/inference_model/ada_svr.py From 16cdad6832ba25332b6d2638a4fd83ecb7ac2400 Mon Sep 17 00:00:00 2001 From: lionel kusch Date: Tue, 31 Dec 2024 11:52:23 +0100 Subject: [PATCH 26/28] Update hidimstat/visualisation/plot_dataset.py fixe typo Co-authored-by: Joseph Paillard --- hidimstat/visualisation/plot_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hidimstat/visualisation/plot_dataset.py b/hidimstat/visualisation/plot_dataset.py index b34308e..672dc4e 100644 --- a/hidimstat/visualisation/plot_dataset.py +++ b/hidimstat/visualisation/plot_dataset.py @@ -100,7 +100,7 @@ def plot_pvalue_H0( vmax=1.0, ): """ - Plot for the confidence in the hipothse that the variables are important. + Plot for the confidence in the hypothesis that the variables are important. Parameters ---------- From e8959625f14911a4ad645e173cc54bc84c19934f Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Tue, 31 Dec 2024 11:59:51 +0100 Subject: [PATCH 27/28] Remove previous file --- .../adaptive_permutation_threshold.py | 74 ------------------- 1 file changed, 74 deletions(-) delete mode 100644 src/hidimstat/adaptive_permutation_threshold.py diff --git a/src/hidimstat/adaptive_permutation_threshold.py b/src/hidimstat/adaptive_permutation_threshold.py deleted file mode 100644 index 933fb9e..0000000 --- a/src/hidimstat/adaptive_permutation_threshold.py +++ /dev/null @@ -1,74 +0,0 @@ -import numpy as np - - -def ada_svr(X, y, rcond=1e-3): - """ - Adaptative Permutation Threshold for SVR - - Statistical inference procedure presented in Gaonkar et al. [1]_. - - Parameters - ---------- - X : ndarray, shape (n_samples, n_features) - Data. - - y : ndarray, shape (n_samples,) - Target. - - rcond : float, optional (default=1e-3) - Cutoff for small singular values. Singular values smaller - than `rcond` * largest_singular_value are set to zero. - - Returns - ------- - beta_hat : array, shape (n_features,) - Estimated parameter vector. - - scale : ndarray, shape (n_features,) - Value of the standard deviation of the parameters. - - References - ---------- - .. [1] Gaonkar, B., & Davatzikos, C. (2012, October). Deriving statistical - significance maps for SVM based image classification and group - comparisons. In International Conference on Medical Image Computing - and Computer-Assisted Intervention (pp. 723-730). Springer, Berlin, - Heidelberg. - """ - - X = np.asarray(X) - n_samples, n_features = X.shape - - K = _manual_inversion(np.dot(X, X.T), rcond=rcond) - sum_K = np.sum(K) - - L = -np.outer(np.sum(K, axis=0), np.sum(K, axis=1)) / sum_K - C = np.dot(X.T, K + L) - - beta_hat = np.dot(C, y) - - scale = np.sqrt(np.sum(C**2, axis=1)) - - return beta_hat, scale - - -def _manual_inversion(X, rcond=1e-3, full_rank=False): - "Inverting taking care of low eigenvalues to increase numerical stability" - - X = np.asarray(X) - n_samples, n_features = X.shape - - if n_samples != n_features: - raise ValueError("The matrix is not a square matrix") - - U, s, V = np.linalg.svd(X, full_matrices=False) - rank = np.sum(s > rcond * s.max()) - s_inv = np.zeros(np.size(s)) - s_inv[:rank] = 1 / s[:rank] - - if full_rank: - s_inv[rank:] = 1 / (rcond * s.max()) - - X_inv = np.linalg.multi_dot([U, np.diag(s_inv), V]) - - return X_inv From 6e2d4c907f8db2e9db02fb2c52b55429802f28c6 Mon Sep 17 00:00:00 2001 From: kusch lionel Date: Tue, 31 Dec 2024 12:11:33 +0100 Subject: [PATCH 28/28] Fix format --- doc_conf/conf.py | 4 ++-- examples/inference_model/ada_svr.py | 16 ++++++++-------- src/hidimstat/ada_svr.py | 2 ++ src/hidimstat/visualisation/plot_dataset.py | 6 +++--- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/doc_conf/conf.py b/doc_conf/conf.py index a511006..f88fd1e 100644 --- a/doc_conf/conf.py +++ b/doc_conf/conf.py @@ -259,8 +259,8 @@ "abort_on_example_error": False, "image_scrapers": scrapers, "show_memory": True, - 'filename_pattern': r'\.py', - 'ignore_pattern': r'__init__\.py', + "filename_pattern": r"\.py", + "ignore_pattern": r"__init__\.py", # 'reference_url': { # 'numpy': 'http://docs.scipy.org/doc/numpy-1.9.1', # 'scipy': 'http://docs.scipy.org/doc/scipy-0.17.0/reference', diff --git a/examples/inference_model/ada_svr.py b/examples/inference_model/ada_svr.py index 8034617..d7cf06a 100644 --- a/examples/inference_model/ada_svr.py +++ b/examples/inference_model/ada_svr.py @@ -155,27 +155,27 @@ # # - The distribution of the coefficients of SVR is normal centred around zeros. # - The method is valid for large sample sizes. -# - The method has the linear models assumptions: linearity, normality, +# - The method has the linear models assumptions: linearity, normality, # homoscedasticity, independence, fixed features, absence of multicollinearity # (see the book of :footcite:ct:`Molnar 2012` # for details) # # **Advantages**: -# +# # - The method is fast because it uses the central limit theorem to estimate # the standard deviation of the distribution of the coefficients of SVR. -# - The method has the advantage of linear models: transparency of -# the prediction, high level of collective experiance and expertise -# and a guarantee of convergence. (see the book of +# - The method has the advantage of linear models: transparency of +# the prediction, high level of collective experiance and expertise +# and a guarantee of convergence. (see the book of # :footcite:ct:`Molnar 2012` for details) # # **Disadvantages**: # # - The method assumes that the distribution of the coefficients of SVR is normal centred around zeros. # - The method is not valid for small sample sizes. -# - The method has all the disadvantages of linear models: only for linear -# relationships, not good predicting performance, unintuitive. -# (see the book of +# - The method has all the disadvantages of linear models: only for linear +# relationships, not good predicting performance, unintuitive. +# (see the book of # :footcite:ct:`Molnar 2012` for details) # # **Conclusion**: diff --git a/src/hidimstat/ada_svr.py b/src/hidimstat/ada_svr.py index 30c5fa0..5e12cd0 100644 --- a/src/hidimstat/ada_svr.py +++ b/src/hidimstat/ada_svr.py @@ -1,6 +1,8 @@ import numpy as np from hidimstat.stat_tools import pval_from_scale +__all__ = ["ada_svr", "ada_svr_pvalue"] + def ada_svr(X, y, rcond=1e-3): """ diff --git a/src/hidimstat/visualisation/plot_dataset.py b/src/hidimstat/visualisation/plot_dataset.py index 672dc4e..8ad8cac 100644 --- a/src/hidimstat/visualisation/plot_dataset.py +++ b/src/hidimstat/visualisation/plot_dataset.py @@ -7,7 +7,7 @@ def plot_dataset1D(X, y, beta, title="Toy dataset"): """ Plot a 1D toy dataset with the true regression line. - + Parameters ---------- X : ndarray, shape (n_samples, n_features) @@ -58,7 +58,7 @@ def plot_dataset1D(X, y, beta, title="Toy dataset"): def plot_validate_variable_importance(beta, beta_hat, vmin=0.0, vmax=1.0): """ Plot for validating of the variable importance estimation. - + Parameters ---------- beta : ndarray, shape (n_features,) @@ -184,7 +184,7 @@ def plot_pvalue_H1( ): """ Plot for the confidence in the hypotheses that the variables are not important. - + Parameters ---------- beta_hat : ndarray, shape (n_features,)