diff --git a/doc_conf/api.rst b/doc_conf/api.rst
index 885873f3..c6cb3d57 100644
--- a/doc_conf/api.rst
+++ b/doc_conf/api.rst
@@ -20,8 +20,10 @@ Functions
    clustered_inference
    data_simulation
    desparsified_lasso
+   desparsified_lasso_pvalue
+   desparsified_group_lasso_pvalue
    ensemble_clustered_inference
-   group_reid
+   reid
    hd_inference
    knockoff_aggregation
    model_x_knockoff
diff --git a/doc_conf/references.bib b/doc_conf/references.bib
index 8fa09830..91d0ed97 100644
--- a/doc_conf/references.bib
+++ b/doc_conf/references.bib
@@ -177,4 +177,103 @@ @article{liuFastPowerfulConditional2021
   archiveprefix = {arxiv},
   keywords = {Statistics - Methodology},
   file = {/home/ahmad/Zotero/storage/8HRQZX3H/Liu et al. - 2021 - Fast and Powerful Conditional Randomization Testin.pdf;/home/ahmad/Zotero/storage/YFNDKN2B/2006.html}
-}
\ No newline at end of file
+}
+
+@article{zhang2014confidence,
+  title={Confidence intervals for low dimensional parameters in high dimensional linear models},
+  author={Zhang, Cun-Hui and Zhang, Stephanie S},
+  journal={Journal of the Royal Statistical Society Series B: Statistical Methodology},
+  volume={76},
+  number={1},
+  pages={217--242},
+  year={2014},
+  publisher={Oxford University Press}
+}
+
+@article{van2014asymptotically,
+  title={On asymptotically optimal confidence regions and tests for high-dimensional models},
+  author={van de Geer, Sara and B{\"u}hlmann, Peter and Ritov, Ya'acov and Dezeure, Ruben},
+  journal={The Annals of Statistics},
+  pages={1166--1202},
+  year={2014},
+  publisher={JSTOR}
+}
+
+@article{javanmard2014confidence,
+  title={Confidence intervals and hypothesis testing for high-dimensional regression},
+  author={Javanmard, Adel and Montanari, Andrea},
+  journal={The Journal of Machine Learning Research},
+  volume={15},
+  number={1},
+  pages={2869--2909},
+  year={2014},
+  publisher={JMLR. org}
+}
+
+@article{bellec2022biasing,
+  title={De-biasing the lasso with degrees-of-freedom adjustment},
+  author={Bellec, Pierre C and Zhang, Cun-Hui},
+  journal={Bernoulli},
+  volume={28},
+  number={2},
+  pages={713--743},
+  year={2022},
+  publisher={Bernoulli Society for Mathematical Statistics and Probability}
+}
+
+@article{celentano2023lasso,
+  title={The lasso with general gaussian designs with applications to hypothesis testing},
+  author={Celentano, Michael and Montanari, Andrea and Wei, Yuting},
+  journal={The Annals of Statistics},
+  volume={51},
+  number={5},
+  pages={2194--2220},
+  year={2023},
+  publisher={Institute of Mathematical Statistics}
+}
+
+@phdthesis{chevalier2020statistical,
+  title={Statistical control of sparse models in high dimension},
+  author={Chevalier, J{\'e}r{\^o}me-Alexis},
+  year={2020},
+  school={Universit{\'e} Paris-Saclay}
+}
+
+@article{fan2012variance,
+  title={Variance estimation using refitted cross-validation in ultrahigh dimensional regression},
+  author={Fan, Jianqing and Guo, Shaojun and Hao, Ning},
+  journal={Journal of the Royal Statistical Society Series B: Statistical Methodology},
+  volume={74},
+  number={1},
+  pages={37--65},
+  year={2012},
+  publisher={Oxford University Press}
+}
+
+@article{reid2016study,
+  title={A study of error variance estimation in lasso regression},
+  author={Reid, Stephen and Tibshirani, Robert and Friedman, Jerome},
+  journal={Statistica Sinica},
+  pages={35--67},
+  year={2016},
+  publisher={JSTOR}
+}
+
+@article{chevalier2020statistical,
+  title={Statistical control for spatio-temporal meg/eeg source imaging with desparsified mutli-task lasso},
+  author={Chevalier, J{\'e}r{\^o}me-Alexis and Salmon, Joseph and Gramfort, Alexandre and Thirion, Bertrand},
+  journal={Advances in Neural Information Processing Systems},
+  volume={33},
+  pages={1759--1770},
+  year={2020}
+}
+
+@article{eshel2003yule,
+  title={The yule walker equations for the AR coefficients},
+  author={Eshel, Gidon},
+  journal={Internet resource},
+  volume={2},
+  pages={68--73},
+  year={2003}
+}
+
diff --git a/examples/plot_2D_simulation_example.py b/examples/plot_2D_simulation_example.py
index b19168d6..07890e5d 100644
--- a/examples/plot_2D_simulation_example.py
+++ b/examples/plot_2D_simulation_example.py
@@ -61,10 +61,13 @@
 from sklearn.feature_extraction import image
 
 from hidimstat.clustered_inference import clustered_inference
-from hidimstat.desparsified_lasso import desparsified_lasso
+from hidimstat.desparsified_lasso import (
+    desparsified_lasso,
+    desparsified_lasso_pvalue,
+)
 from hidimstat.ensemble_clustered_inference import ensemble_clustered_inference
 from hidimstat.scenario import multivariate_simulation
-from hidimstat.stat_tools import pval_from_cb, zscore_from_pval
+from hidimstat.stat_tools import zscore_from_pval
 
 #############################################################################
 # Specific plotting functions
@@ -237,8 +240,10 @@ def plot(maps, titles):
 # and referred to as Desparsified Lasso.
 
 # compute desparsified lasso
-beta_hat, cb_min, cb_max = desparsified_lasso(X_init, y, n_jobs=n_jobs)
-pval, pval_corr, one_minus_pval, one_minus_pval_corr = pval_from_cb(cb_min, cb_max)
+beta_hat, sigma_hat, omega_diag = desparsified_lasso(X_init, y, n_jobs=n_jobs)
+pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
+    desparsified_lasso_pvalue(X_init.shape[0], beta_hat, sigma_hat, omega_diag)
+)
 
 # compute estimated support (first method)
 zscore = zscore_from_pval(pval, one_minus_pval)
diff --git a/src/hidimstat/__init__.py b/src/hidimstat/__init__.py
index cdc21880..b9ae28b3 100644
--- a/src/hidimstat/__init__.py
+++ b/src/hidimstat/__init__.py
@@ -1,12 +1,16 @@
 from .adaptive_permutation_threshold import ada_svr
 from .clustered_inference import clustered_inference, hd_inference
-from .desparsified_lasso import desparsified_group_lasso, desparsified_lasso
+from .desparsified_lasso import (
+    desparsified_lasso,
+    desparsified_lasso_pvalue,
+    desparsified_group_lasso_pvalue,
+)
 from .Dnn_learner_single import DnnLearnerSingle
 from .ensemble_clustered_inference import ensemble_clustered_inference
 from .knockoff_aggregation import knockoff_aggregation
 from .knockoffs import model_x_knockoff
 from .multi_sample_split import aggregate_quantiles
-from .noise_std import group_reid, reid
+from .noise_std import reid
 from .permutation_test import permutation_test_cv
 from .scenario import multivariate_1D_simulation
 from .standardized_svr import standardized_svr
@@ -26,10 +30,11 @@
     "clustered_inference",
     "dcrt_zero",
     "desparsified_lasso",
-    "desparsified_group_lasso",
+    "desparsified_lasso_pvalue",
+    "desparsified_group_lasso_pvalue",
     "DnnLearnerSingle",
     "ensemble_clustered_inference",
-    "group_reid",
+    "reid",
     "hd_inference",
     "knockoff_aggregation",
     "model_x_knockoff",
diff --git a/src/hidimstat/clustered_inference.py b/src/hidimstat/clustered_inference.py
index 5202f0a8..69166c39 100644
--- a/src/hidimstat/clustered_inference.py
+++ b/src/hidimstat/clustered_inference.py
@@ -1,10 +1,12 @@
 import numpy as np
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import resample
-from sklearn.utils.validation import check_memory
 
-from .desparsified_lasso import desparsified_group_lasso, desparsified_lasso
-from .stat_tools import pval_from_cb
+from .desparsified_lasso import (
+    desparsified_lasso,
+    desparsified_lasso_pvalue,
+    desparsified_group_lasso_pvalue,
+)
 
 
 def _subsampling(n_samples, train_size, groups=None, seed=0):
@@ -45,7 +47,7 @@ def _ward_clustering(X_init, ward, train_index):
     return X_reduced, ward
 
 
-def hd_inference(X, y, method, n_jobs=1, memory=None, verbose=0, **kwargs):
+def hd_inference(X, y, method, n_jobs=1, verbose=0, **kwargs):
     """Wrap-up high-dimensional inference procedures
 
     Parameters
@@ -65,11 +67,6 @@ def hd_inference(X, y, method, n_jobs=1, memory=None, verbose=0, **kwargs):
     n_jobs : int or None, optional (default=1)
         Number of CPUs to use during parallel steps such as inference.
 
-    memory : str or joblib.Memory object, optional (default=None)
-        Used to cache the output of the computation of the clustering
-        and the inference. By default, no caching is done. If a string is
-        given, it is the path to the caching directory.
-
     verbose: int, optional (default=1)
         The verbosity level. If `verbose > 0`, we print a message before
         runing the clustered inference.
@@ -96,34 +93,28 @@ def hd_inference(X, y, method, n_jobs=1, memory=None, verbose=0, **kwargs):
     one_minus_pval_corr : ndarray, shape (n_features,)
         One minus the p-value corrected for multiple testing.
     """
-
-    if method == "desparsified-lasso":
-
-        beta_hat, cb_min, cb_max = desparsified_lasso(
-            X,
-            y,
-            confidence=0.95,
-            n_jobs=n_jobs,
-            memory=memory,
-            verbose=verbose,
-            **kwargs,
-        )
-        pval, pval_corr, one_minus_pval, one_minus_pval_corr = pval_from_cb(
-            cb_min, cb_max, confidence=0.95
-        )
-
-    elif method == "desparsified-group-lasso":
-
-        beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
-            desparsified_group_lasso(
-                X, y, n_jobs=n_jobs, memory=memory, verbose=verbose, **kwargs
+    if method not in ["desparsified-lasso", "desparsified-group-lasso"]:
+        raise ValueError("Unknow method")
+    group = method == "desparsified-group-lasso"
+    print("hd_inference", group, kwargs)
+    beta_hat, theta_hat, omega_diag = desparsified_lasso(
+        X, y, group=group, n_jobs=n_jobs, verbose=verbose, **kwargs
+    )
+    if not group:
+        pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
+            desparsified_lasso_pvalue(
+                X.shape[0],
+                beta_hat,
+                theta_hat,
+                omega_diag,
+                confidence=0.95,
+                **kwargs,
             )
         )
-
     else:
-
-        raise ValueError("Unknow method")
-
+        pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
+            desparsified_group_lasso_pvalue(beta_hat, theta_hat, omega_diag, **kwargs)
+        )
     return beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr
 
 
@@ -178,7 +169,6 @@ def clustered_inference(
     method="desparsified-lasso",
     seed=0,
     n_jobs=1,
-    memory=None,
     verbose=1,
     **kwargs,
 ):
@@ -220,11 +210,6 @@ def clustered_inference(
     n_jobs : int or None, optional (default=1)
         Number of CPUs to use during parallel steps such as inference.
 
-    memory : str or joblib.Memory object, optional (default=None)
-        Used to cache the output of the computation of the clustering
-        and the inference. By default, no caching is done. If a string is
-        given, it is the path to the caching directory.
-
     verbose: int, optional (default=1)
         The verbosity level. If `verbose > 0`, we print a message before
         runing the clustered inference.
@@ -257,9 +242,6 @@ def clustered_inference(
            Spatially relaxed inference on high-dimensional linear models.
            arXiv preprint arXiv:2106.02590.
     """
-
-    memory = check_memory(memory)
-
     n_samples, n_features = X_init.shape
 
     if verbose > 0:
@@ -273,20 +255,26 @@ def clustered_inference(
     train_index = _subsampling(n_samples, train_size, groups=groups, seed=seed)
 
     # Clustering
-    X, ward = memory.cache(_ward_clustering)(X_init, ward, train_index)
+    X, ward = _ward_clustering(X_init, ward, train_index)
 
     # Preprocessing
     X = StandardScaler().fit_transform(X)
     y = y - np.mean(y)
 
     # Inference: computing reduced parameter vector and stats
+    print("Clustered inference", kwargs)
     beta_hat_, pval_, pval_corr_, one_minus_pval_, one_minus_pval_corr_ = hd_inference(
-        X, y, method, n_jobs=n_jobs, memory=memory, **kwargs
+        X, y, method, n_jobs=n_jobs, **kwargs
     )
 
     # De-grouping
     beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = _degrouping(
-        ward, beta_hat_, pval_, pval_corr_, one_minus_pval_, one_minus_pval_corr_
+        ward,
+        beta_hat_,
+        pval_,
+        pval_corr_,
+        one_minus_pval_,
+        one_minus_pval_corr_,
     )
 
     return beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr
diff --git a/src/hidimstat/desparsified_lasso.py b/src/hidimstat/desparsified_lasso.py
index aaa5e03e..4e2fd2a5 100644
--- a/src/hidimstat/desparsified_lasso.py
+++ b/src/hidimstat/desparsified_lasso.py
@@ -4,141 +4,111 @@
 from scipy import stats
 from scipy.linalg import inv
 from sklearn.linear_model import Lasso
-from sklearn.utils.validation import check_memory
 
-from .noise_std import group_reid, reid
-from .stat_tools import pval_from_two_sided_pval_and_sign
-
-
-def _compute_all_residuals(
-    X, alphas, gram, max_iter=5000, tol=1e-3, method="lasso", n_jobs=1, verbose=0
-):
-    """Nodewise Lasso. Compute all the residuals: regressing each column of the
-    design matrix against the other columns"""
-
-    n_samples, n_features = X.shape
-
-    results = Parallel(n_jobs=n_jobs, verbose=verbose)(
-        delayed(_compute_residuals)(
-            X=X,
-            column_index=i,
-            alpha=alphas[i],
-            gram=gram,
-            max_iter=max_iter,
-            tol=tol,
-            method=method,
-        )
-        for i in range(n_features)
-    )
-
-    results = np.asarray(results, dtype=object)
-    Z = np.stack(results[:, 0], axis=1)
-    omega_diag = np.stack(results[:, 1])
-
-    return Z, omega_diag
-
-
-def _compute_residuals(
-    X, column_index, alpha, gram, max_iter=5000, tol=1e-3, method="lasso"
-):
-    """Compute the residuals of the regression of a given column of the
-    design matrix against the other columns"""
-
-    n_samples, n_features = X.shape
-    i = column_index
-
-    X_new = np.delete(X, i, axis=1)
-    y = np.copy(X[:, i])
-
-    if method == "lasso":
-
-        gram_ = np.delete(np.delete(gram, i, axis=0), i, axis=1)
-        clf = Lasso(alpha=alpha, precompute=gram_, max_iter=max_iter, tol=tol)
-
-    else:
-
-        ValueError("The only regression method available is 'lasso'")
-
-    clf.fit(X_new, y)
-    z = y - clf.predict(X_new)
-
-    omega_diag_i = n_samples * np.sum(z**2) / np.dot(y, z) ** 2
-
-    return z, omega_diag_i
+from hidimstat.noise_std import reid
+from hidimstat.stat_tools import pval_from_two_sided_pval_and_sign
+from hidimstat.stat_tools import pval_from_cb
 
 
 def desparsified_lasso(
     X,
     y,
     dof_ajdustement=False,
-    confidence=0.95,
     max_iter=5000,
     tol=1e-3,
-    residual_method="lasso",
     alpha_max_fraction=0.01,
+    eps=1e-2,
+    tol_reid=1e-4,
+    n_split=5,
     n_jobs=1,
-    memory=None,
+    seed=0,
     verbose=0,
+    group=False,
+    cov=None,
+    noise_method="AR",
+    order=1,
+    fit_Y=True,
+    stationary=True,
 ):
-    """Desparsified Lasso with confidence intervals
+    """
+    Desparsified Lasso with confidence intervals
+
+    Algorithm based on Algorithm 1 of d-Lasso and d-MTLasso in
+    :cite:`chevalier2020statistical`.
 
     Parameters
     ----------
     X : ndarray, shape (n_samples, n_features)
-        Data.
+        Input data matrix.
 
-    y : ndarray, shape (n_samples,)
-        Target.
+    y : ndarray, shape (n_samples,) or (n_samples, n_times)
+        Target vector for single response or matrix for multiple
+        responses.
 
     dof_ajdustement : bool, optional (default=False)
-        If True, makes the degrees of freedom adjustement (cf. [4]_ and [5]_).
-        Otherwise, the original Desparsified Lasso estimator is computed
-        (cf. [1]_ and [2]_ and [3]_).
-
-    confidence : float, optional (default=0.95)
-        Confidence level used to compute the confidence intervals.
-        Each value should be in the range [0, 1].
+        If True, applies degrees of freedom adjustment.
+        If False, computes original Desparsified Lasso estimator.
 
     max_iter : int, optional (default=5000)
-        The maximum number of iterations when regressing, by Lasso,
-        each column of the design matrix against the others.
+        Maximum iterations for Nodewise Lasso regressions.
 
     tol : float, optional (default=1e-3)
-        The tolerance for the optimization of the Lasso problems: if the
-        updates are smaller than `tol`, the optimization code checks the
-        dual gap for optimality and continues until it is smaller than `tol`.
-
-    residual_method : str, optional (default='lasso')
-        Method used for computing the residuals of the Nodewise Lasso.
-        Currently the only method available is 'lasso'.
+        Convergence tolerance for optimization.
 
     alpha_max_fraction : float, optional (default=0.01)
-        Only used if method='lasso'.
-        Then alpha = alpha_max_fraction * alpha_max.
+        Fraction of max alpha used for Lasso regularization.
 
-    n_jobs : int or None, optional (default=1)
-        Number of CPUs to use during the Nodewise Lasso.
+    eps : float, optional (default=1e-2)
+        Small constant used in noise estimation.
+
+    tol_reid : float, optional (default=1e-4)
+        Tolerance for Reid estimation.
+
+    n_split : int, optional (default=5)
+        Number of splits for cross-validation in Reid procedure.
+
+    n_jobs : int, optional (default=1)
+        Number of parallel jobs. Use -1 for all CPUs.
+
+    seed : int, default=0
+        Random seed for reproducibility.
 
-    memory : str or joblib.Memory object, optional (default=None)
-        Used to cache the output of the computation of the Nodewise Lasso.
-        By default, no caching is done. If a string is given, it is the path
-        to the caching directory.
+    verbose : int, default=0
+        Verbosity level for logging.
 
-    verbose: int, optional (default=1)
-        The verbosity level: if non zero, progress messages are printed
-        when computing the Nodewise Lasso in parralel.
-        The frequency of the messages increases with the verbosity level.
+    group : bool, default=False
+        If True, use group Lasso for multiple responses.
+
+    cov : ndarray, shape (n_times, n_times), default=None
+        Temporal covariance matrix of the noise.
+        If None, it is estimated.
+
+    noise_method : {'AR', 'simple'}, default='AR'
+        Method to estimate noise covariance:
+        - 'simple': Uses median correlation between consecutive
+                    timepoints
+        - 'AR': Fits autoregressive model of specified order
+
+    order : int, default=1
+        Order of AR model when noise_method='AR'. Must be < n_times.
+
+    fit_Y : bool, default=True
+        Whether to fit Y in noise estimation.
+
+    stationary : bool, default=True
+        Whether to assume stationary noise in estimation.
 
     Returns
     -------
-    beta_hat : array, shape (n_features,)
-        Estimated parameter vector.
+    beta_hat : ndarray, shape (n_features,) or (n_features, n_times)
+        Desparsified Lasso coefficient estimates.
 
-    cb_min : array, shape (n_features)
-        Lower bound of the confidence intervals on the parameter vector.
+    sigma_hat/theta_hat : float or ndarray, shape (n_times, n_times)
+        Estimated noise level (single response) or precision matrix
+        (multiple responses).
 
-    cb_max : array, shape (n_features)
-        Upper bound of the confidence intervals on the parameter vector.
+    omega_diag : ndarray, shape (n_features,)
+        Diagonal elements of the precision matrix.
 
     Notes
     -----
@@ -152,271 +122,378 @@ def desparsified_lasso(
 
     References
     ----------
-    .. [1] Zhang, C. H., & Zhang, S. S. (2014). Confidence intervals for
-           low dimensional parameters in high dimensional linear models.
-           Journal of the Royal Statistical Society: Series B: Statistical
-           Methodology, 217-242.
-
-    .. [2] Van de Geer, S., Bühlmann, P., Ritov, Y. A., & Dezeure, R. (2014).
-           On asymptotically optimal confidence regions and tests for
-           high-dimensional models. Annals of Statistics, 42(3), 1166-1202.
-
-    .. [3] Javanmard, A., & Montanari, A. (2014). Confidence intervals and
-           hypothesis testing for high-dimensional regression. The Journal
-           of Machine Learning Research, 15(1), 2869-2909.
-
-    .. [4] Bellec, P. C., & Zhang, C. H. (2019). De-biasing the lasso with
-           degrees-of-freedom adjustment. arXiv preprint arXiv:1902.08885.
-
-    .. [5] Celentano, M., Montanari, A., & Wei, Y. (2020). The Lasso with
-           general Gaussian designs with applications to hypothesis testing.
-           arXiv preprint arXiv:2007.13716.
+    .. footbibliography::
     """
 
-    X = np.asarray(X)
-
-    n_samples, n_features = X.shape
-
-    memory = check_memory(memory)
+    X_ = np.asarray(X)
+
+    n_samples, n_features = X_.shape
+    if group:
+        n_times = y.shape[1]
+        if cov is not None and cov.shape != (n_times, n_times):
+            raise ValueError(
+                f'Shape of "cov" should be ({n_times}, {n_times}),'
+                + f' the shape of "cov" was ({cov.shape}) instead'
+            )
+
+    # centering the data and the target variable
+    y_ = y - np.mean(y)
+    X_ = X_ - np.mean(X_, axis=0)
+
+    # Lasso regression and noise standard deviation estimation
+    # TODO: other estimation of the noise standard deviation?
+    sigma_hat, beta_reid = reid(
+        X_,
+        y_,
+        eps=eps,
+        tol=tol_reid,
+        max_iter=max_iter,
+        n_split=n_split,
+        n_jobs=n_jobs,
+        seed=seed,
+        # for group
+        group=group,
+        method=noise_method,
+        order=order,
+        fit_Y=fit_Y,
+        stationary=stationary,
+    )
 
-    y = y - np.mean(y)
-    X = X - np.mean(X, axis=0)
-    gram = np.dot(X.T, X)
-    gram_nodiag = gram - np.diag(np.diag(gram))
+    # compute the Gram matrix
+    gram = np.dot(X_.T, X_)
+    gram_nodiag = np.copy(gram)
+    np.fill_diagonal(gram_nodiag, 0)
 
+    # define the alphas for the Nodewise Lasso
+    # TODO why don't use the function _alpha_max instead of this?
     list_alpha_max = np.max(np.abs(gram_nodiag), axis=0) / n_samples
     alphas = alpha_max_fraction * list_alpha_max
 
     # Calculating precision matrix (Nodewise Lasso)
-    Z, omega_diag = memory.cache(_compute_all_residuals, ignore=["n_jobs"])(
-        X,
+    Z, omega_diag = _compute_all_residuals(
+        X_,
         alphas,
         gram,
         max_iter=max_iter,
         tol=tol,
-        method=residual_method,
         n_jobs=n_jobs,
         verbose=verbose,
     )
 
-    # Lasso regression
-    sigma_hat, beta_lasso = reid(X, y, n_jobs=n_jobs)
-
     # Computing the degrees of freedom adjustement
     if dof_ajdustement:
-        coef_max = np.max(np.abs(beta_lasso))
-        support = np.sum(np.abs(beta_lasso) > 0.01 * coef_max)
+        coef_max = np.max(np.abs(beta_reid))
+        support = np.sum(np.abs(beta_reid) > 0.01 * coef_max)
         support = min(support, n_samples - 1)
         dof_factor = n_samples / (n_samples - support)
     else:
         dof_factor = 1
 
     # Computing Desparsified Lasso estimator and confidence intervals
-    beta_bias = dof_factor * np.dot(y.T, Z) / np.sum(X * Z, axis=0)
+    # Estimating the coefficient vector
+    beta_bias = dof_factor * np.dot(y_.T, Z) / np.sum(X_ * Z, axis=0)
 
-    P = ((Z.T.dot(X)).T / np.sum(X * Z, axis=0)).T
+    # beta hat
+    P = (np.dot(X_.T, Z) / np.sum(X_ * Z, axis=0)).T
     P_nodiag = P - np.diag(np.diag(P))
     Id = np.identity(n_features)
     P_nodiag = dof_factor * P_nodiag + (dof_factor - 1) * Id
-
-    beta_hat = beta_bias - P_nodiag.dot(beta_lasso)
-
+    beta_hat = beta_bias.T - P_nodiag.dot(beta_reid.T)
+    # confidence intervals
     omega_diag = omega_diag * dof_factor**2
-    omega_invsqrt_diag = omega_diag ** (-0.5)
 
+    if not group:
+        return beta_hat, sigma_hat, omega_diag
+    else:
+        cov_hat = sigma_hat
+        if cov is not None:
+            cov_hat = cov
+        theta_hat = n_samples * inv(cov_hat)
+        return beta_hat, theta_hat, omega_diag
+
+
+def desparsified_lasso_pvalue(
+    n_samples,
+    beta_hat,
+    sigma_hat,
+    omega_diag,
+    confidence=0.95,
+    distribution="norm",
+    eps=1e-14,
+    confidence_interval_only=False,
+):
+    """
+    Calculate confidence intervals and p-values for desparsified lasso estimators.
+    This function computes confidence intervals for the desparsified lasso
+    estimator beta_hat.
+    It can also return p-values derived from these confidence intervals.
+    Parameters
+    ----------
+    n_samples : float
+        The number of samples
+    beta_hat : ndarray, shape (n_features,)
+        The desparsified lasso coefficient estimates.
+    sigma_hat : float
+        Estimated noise level.
+    omega_diag : ndarray, shape (n_features,)
+        Diagonal elements of the precision matrix estimate.
+    confidence : float, default=0.95
+        Confidence level for intervals, must be in [0, 1].
+    distribution : str, default="norm"
+        Distribution to use for p-value calculation.
+        Currently only "norm" supported.
+    eps : float, default=1e-14
+        Small value to avoid numerical issues in p-value calculation.
+    confidence_interval_only : bool, optional (default=False)
+        If True, return only confidence intervals.
+        If False, also return p-values.
+    Returns
+    -------
+    If confidence_interval_only=True:
+        cb_min : ndarray, shape (n_features,)
+            Lower bounds of confidence intervals
+        cb_max : ndarray, shape (n_features,)
+            Upper bounds of confidence intervals
+    If confidence_interval_only=False:
+        pval : ndarray, shape (n_features,)
+            P-values
+        pval_corr : ndarray, shape (n_features,)
+            Corrected p-values
+        one_minus_pval : ndarray, shape (n_features,)
+            1 - p-values
+        one_minus_pval_corr : ndarray, shape (n_features,)
+            1 - corrected p-values
+        cb_min : ndarray, shape (n_features,)
+            Lower bounds of confidence intervals
+        cb_max : ndarray, shape (n_features,)
+            Upper bounds of confidence intervals
+    """
+    # define the quantile for the confidence intervals
     quantile = stats.norm.ppf(1 - (1 - confidence) / 2)
-
+    # TODO:why the double inverse of omega_diag?
+    omega_invsqrt_diag = omega_diag ** (-0.5)
     confint_radius = np.abs(
         quantile * sigma_hat / (np.sqrt(n_samples) * omega_invsqrt_diag)
     )
     cb_max = beta_hat + confint_radius
     cb_min = beta_hat - confint_radius
 
-    return beta_hat, cb_min, cb_max
+    if confidence_interval_only:
+        return cb_min, cb_max
 
+    pval, pval_corr, one_minus_pval, one_minus_pval_corr = pval_from_cb(
+        cb_min, cb_max, confidence=confidence, distribution=distribution, eps=eps
+    )
+    return pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max
 
-def desparsified_group_lasso(
-    X,
-    Y,
-    cov=None,
-    test="chi2",
-    max_iter=5000,
-    tol=1e-3,
-    residual_method="lasso",
-    alpha_max_fraction=0.01,
-    noise_method="AR",
-    order=1,
-    n_jobs=1,
-    memory=None,
-    verbose=0,
-):
-    """Desparsified Group Lasso
+
+def desparsified_group_lasso_pvalue(beta_hat, theta_hat, omega_diag, test="chi2"):
+    """
+    Compute p-values for the desparsified group Lasso estimator using
+    chi-squared or F tests
 
     Parameters
     ----------
-    X : ndarray, shape (n_samples, n_features)
-        Data.
+    beta_hat : ndarray, shape (n_features, n_times)
+        Estimated parameter matrix from desparsified group Lasso.
 
-    Y : ndarray, shape (n_samples, n_times)
-        Target.
+    theta_hat : ndarray, shape (n_times, n_times)
+        Estimated precision matrix (inverse covariance).
 
-    cov : ndarray, shape (n_times, n_times), optional (default=None)
-        If None, a temporal covariance matrix of the noise is estimated.
-        Otherwise, `cov` is the temporal covariance matrix of the noise.
+    omega_diag : ndarray, shape (n_features,)
+        Diagonal elements of the precision matrix.
 
-    test : str, optional (default='chi2')
-        Statistical test used to compute p-values. 'chi2' corresponds
-        to a chi-squared test and 'F' corresponds to an F-test.
+    test : {'chi2', 'F'}, default='chi2'
+        Statistical test for computing p-values:
+        - 'chi2': Chi-squared test (recommended for large samples)
+        - 'F': F-test (better for small samples)
 
-    max_iter : int, optional (default=5000)
-        The maximum number of iterations when regressing, by Lasso,
-        each column of the design matrix against the others.
+    Returns
+    -------
+    pval : ndarray, shape (n_features,)
+        Raw p-values, numerically accurate for positive effects
+        (p-values close to 0).
 
-    tol : float, optional (default=1e-3)
-        The tolerance for the optimization of the Lasso problems: if the
-        updates are smaller than `tol`, the optimization code checks the
-        dual gap for optimality and continues until it is smaller than `tol`.
+    pval_corr : ndarray, shape (n_features,)
+        P-values corrected for multiple testing using
+        Benjamini-Hochberg procedure.
 
-    residual_method : str, optional (default='lasso')
-        Method used for computing the residuals of the Nodewise Lasso.
-        Currently the only method available is 'lasso'.
+    one_minus_pval : ndarray, shape (n_features,)
+        1 - p-values, numerically accurate for negative effects
+        (p-values close to 1).
 
-    alpha_max_fraction : float, optional (default=0.01)
-        Only used if method='lasso'.
-        Then alpha = alpha_max_fraction * alpha_max.
+    one_minus_pval_corr : ndarray, shape (n_features,)
+        1 - corrected p-values.
 
-    noise_method : str, optional (default='simple')
-        If 'simple', the correlation matrix is estimated by taking the
-        median of the correlation between two consecutive time steps
-        and the noise standard deviation for each time step is estimated
-        by taking the median of the standard deviations for every time step.
-        If 'AR', the order of the AR model is given by `order` and
-        Yule-Walker method is used to estimate the covariance matrix.
+    Notes
+    -----
+    The chi-squared test assumes asymptotic normality while the F-test
+    makes no such assumption and is preferable for small sample sizes.
+    P-values are computed based on score statistics from the estimated
+    coefficients and precision matrix.
+    """
+    n_features, n_times = beta_hat.shape
+    n_samples = omega_diag.shape[0]
 
-    order : int, optional (default=1)
-        If `method=AR`, `order` gives the order of the estimated autoregressive
-        model. `order` must be smaller than the number of time steps.
+    # Compute the two-sided p-values
+    if test == "chi2":
+        chi2_scores = np.diag(multi_dot([beta_hat, theta_hat, beta_hat.T])) / omega_diag
+        two_sided_pval = np.minimum(2 * stats.chi2.sf(chi2_scores, df=n_times), 1.0)
+    elif test == "F":
+        f_scores = (
+            np.diag(multi_dot([beta_hat, theta_hat, beta_hat.T])) / omega_diag / n_times
+        )
+        two_sided_pval = np.minimum(
+            2 * stats.f.sf(f_scores, dfd=n_samples, dfn=n_times), 1.0
+        )
+    else:
+        raise ValueError(f"Unknown test '{test}'")
 
-    n_jobs : int or None, optional (default=1)
-        Number of CPUs to use during the Nodewise Lasso.
+    # Compute the p-values
+    sign_beta = np.sign(np.sum(beta_hat, axis=1))
+    pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
+        pval_from_two_sided_pval_and_sign(two_sided_pval, sign_beta)
+    )
 
-    memory : str or joblib.Memory object, optional (default=None)
-        Used to cache the output of the computation of the Nodewise Lasso.
-        By default, no caching is done. If a string is given, it is the path
-        to the caching directory.
+    return pval, pval_corr, one_minus_pval, one_minus_pval_corr
 
-    verbose: int, optional (default=1)
-        The verbosity level: if non zero, progress messages are printed
-        when computing the Nodewise Lasso in parralel.
-        The frequency of the messages increases with the verbosity level.
 
-    Returns
-    -------
-    beta_hat : ndarray, shape (n_features, n_times)
-        Estimated parameter matrix.
+def _compute_all_residuals(
+    X, alphas, gram, max_iter=5000, tol=1e-3, n_jobs=1, verbose=0
+):
+    """
+    Nodewise Lasso for computing residuals and precision matrix diagonal.
 
-    pval : ndarray, shape (n_features,)
-        p-value, with numerically accurate values for
-        positive effects (ie., for p-value close to zero).
+    For each feature, fits a Lasso regression against all other features
+    to estimate the precision matrix and residuals needed for the
+    desparsified Lasso estimator.
 
-    pval_corr : ndarray, shape (n_features,)
-        p-value corrected for multiple testing.
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Input data matrix.
 
-    one_minus_pval : ndarray, shape (n_features,)
-        One minus the p-value, with numerically accurate values
-        for negative effects (ie., for p-value close to one).
+    alphas : ndarray, shape (n_features,)
+        Lasso regularization parameters, one per feature.
+
+    gram : ndarray, shape (n_features, n_features)
+        Precomputed Gram matrix X.T @ X to speed up computations.
+
+    max_iter : int, optional (default=5000)
+        Maximum number of iterations for Lasso optimization.
+
+    tol : float, optional (default=1e-3)
+        Convergence tolerance for Lasso optimization.
+
+    n_jobs : int or None, optional (default=1)
+        Number of parallel jobs. None means using all processors.
+
+    verbose : int, optional (default=0)
+        Controls the verbosity when fitting the models:
+        0 = silent
+        1 = progress bar
+        >1 = more detailed output
+
+    Returns
+    -------
+    Z : ndarray, shape (n_samples, n_features)
+        Matrix of residuals from nodewise regressions.
+
+    omega_diag : ndarray, shape (n_features,)
+        Diagonal entries of the precision matrix estimate.
 
-    one_minus_pval_corr : ndarray, shape (n_features,)
-        One minus the p-value corrected for multiple testing.
     Notes
     -----
-    The columns of `X` and the matrix `Y` are always centered, this ensures
-    that the intercepts of the Nodewise Lasso problems are all equal to zero
-    and the intercept of the noise model is also equal to zero. Since
-    the values of the intercepts are not of interest, the centering avoids
-    the consideration of unecessary additional parameters.
-    Also, you may consider to center and scale `X` beforehand, notably if
-    the data contained in `X` has not been prescaled from measurements.
+    This implements the nodewise Lasso procedure from :cite:`chevalier2020statistical`
+    for estimating entries of the precision matrix needed in the
+    desparsified Lasso. The procedure regresses each feature against
+    all others using Lasso to obtain residuals and precision matrix estimates.
 
     References
     ----------
-    .. [1] Chevalier, J. A., Gramfort, A., Salmon, J., & Thirion, B. (2020).
-           Statistical control for spatio-temporal MEG/EEG source imaging with
-           desparsified multi-task Lasso. In NeurIPS 2020-34h Conference on
-           Neural Information Processing Systems.
+    .. footbibliography::
     """
 
-    X = np.asarray(X)
-
     n_samples, n_features = X.shape
-    n_times = Y.shape[1]
-
-    memory = check_memory(memory)
 
-    if cov is not None and cov.shape != (n_times, n_times):
-        raise ValueError(
-            f'Shape of "cov" should be ({n_times}, {n_times}),'
-            + f' the shape of "cov" was ({cov.shape}) instead'
+    results = Parallel(n_jobs=n_jobs, verbose=verbose)(
+        delayed(_compute_residuals)(
+            X=X,
+            column_index=i,
+            alpha=alphas[i],
+            gram=gram,
+            max_iter=max_iter,
+            tol=tol,
         )
+        for i in range(n_features)
+    )
 
-    Y = Y - np.mean(Y)
-    X = X - np.mean(X, axis=0)
-    gram = np.dot(X.T, X)
-    gram_nodiag = gram - np.diag(np.diag(gram))
+    # Unpacking the results
+    results = np.asarray(results, dtype=object)
+    Z = np.stack(results[:, 0], axis=1)
+    omega_diag = np.stack(results[:, 1])
 
-    list_alpha_max = np.max(np.abs(gram_nodiag), axis=0) / n_samples
-    alphas = alpha_max_fraction * list_alpha_max
+    return Z, omega_diag
 
-    # Calculating precision matrix (Nodewise Lasso)
-    Z, omega_diag = memory.cache(_compute_all_residuals, ignore=["n_jobs"])(
-        X,
-        alphas,
-        gram,
-        max_iter=max_iter,
-        tol=tol,
-        method=residual_method,
-        n_jobs=n_jobs,
-        verbose=verbose,
-    )
 
-    # Group Lasso regression
-    cov_hat, beta_mtl = group_reid(
-        X, Y, method=noise_method, order=order, n_jobs=n_jobs
-    )
+def _compute_residuals(X, column_index, alpha, gram, max_iter=5000, tol=1e-3):
+    """
+    Compute nodewise Lasso regression for desparsified Lasso estimation
 
-    if cov is not None:
-        cov_hat = cov
+    For feature i, regresses X[:,i] against all other features to
+    obtain residuals and precision matrix diagonal entry needed for debiasing.
 
-    theta_hat = n_samples * inv(cov_hat)
+    Parameters
+    ----------
+    X : ndarray, shape (n_samples, n_features)
+        Centered input data matrix
 
-    # Estimating the coefficient vector
-    beta_bias = Y.T.dot(Z) / np.sum(X * Z, axis=0)
+    column_index : int
+        Index i of feature to regress
 
-    beta_mtl = beta_mtl.T
-    beta_bias = beta_bias.T
+    alpha : float
+        Lasso regularization parameter
 
-    P = (np.dot(X.T, Z) / np.sum(X * Z, axis=0)).T
-    P_nodiag = P - np.diag(np.diag(P))
+    gram : ndarray, shape (n_features, n_features)
+        Precomputed X.T @ X matrix
 
-    beta_hat = beta_bias - P_nodiag.dot(beta_mtl)
+    max_iter : int, default=5000
+        Maximum Lasso iterations
 
-    if test == "chi2":
+    tol : float, default=1e-3
+        Optimization tolerance
 
-        chi2_scores = np.diag(multi_dot([beta_hat, theta_hat, beta_hat.T])) / omega_diag
-        two_sided_pval = np.minimum(2 * stats.chi2.sf(chi2_scores, df=n_times), 1.0)
+    Returns
+    -------
+    z : ndarray, shape (n_samples,)
+        Residuals from regression
 
-    if test == "F":
+    omega_diag_i : float
+        Diagonal entry i of precision matrix estimate,
+        computed as n * ||z||^2 / <x_i, z>^2
 
-        f_scores = (
-            np.diag(multi_dot([beta_hat, theta_hat, beta_hat.T])) / omega_diag / n_times
-        )
-        two_sided_pval = np.minimum(
-            2 * stats.f.sf(f_scores, dfd=n_samples, dfn=n_times), 1.0
-        )
+    Notes
+    -----
+    Uses sklearn's Lasso with precomputed Gram matrix for efficiency.
+    """
 
-    sign_beta = np.sign(np.sum(beta_hat, axis=1))
-    pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
-        pval_from_two_sided_pval_and_sign(two_sided_pval, sign_beta)
-    )
+    n_samples, n_features = X.shape
+    i = column_index
 
-    return beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr
+    # Removing the column to regress against the others
+    X_new = np.delete(X, i, axis=1)
+    y_new = np.copy(X[:, i])
+
+    # Method used for computing the residuals of the Nodewise Lasso.
+    # here we use the Lasso method
+    gram_ = np.delete(np.delete(gram, i, axis=0), i, axis=1)
+    clf = Lasso(alpha=alpha, precompute=gram_, max_iter=max_iter, tol=tol)
+
+    # Fitting the Lasso model and computing the residuals
+    clf.fit(X_new, y_new)
+    z = y_new - clf.predict(X_new)
+
+    # Computing the diagonal of the covariance matrix
+    omega_diag_i = n_samples * np.sum(z**2) / np.dot(y_new, z) ** 2
+
+    return z, omega_diag_i
diff --git a/src/hidimstat/ensemble_clustered_inference.py b/src/hidimstat/ensemble_clustered_inference.py
index 2c4233e6..ab289f68 100644
--- a/src/hidimstat/ensemble_clustered_inference.py
+++ b/src/hidimstat/ensemble_clustered_inference.py
@@ -57,7 +57,6 @@ def ensemble_clustered_inference(
     gamma_min=0.2,
     n_bootstraps=25,
     n_jobs=1,
-    memory=None,
     verbose=1,
     **kwargs,
 ):
@@ -113,11 +112,6 @@ def ensemble_clustered_inference(
         Number of CPUs used to compute several clustered inference
         algorithms at the same time.
 
-    memory : str, optional (default=None)
-        Used to cache the output of the computation of the clustering
-        and the inference. By default, no caching is done. If a string is
-        given, it is the path to the caching directory.
-
     verbose: int, optional (default=1)
         The verbosity level. If `verbose > 0`, we print a message before
         runing the clustered inference.
@@ -150,13 +144,6 @@ def ensemble_clustered_inference(
            Spatially relaxed inference on high-dimensional linear models.
            arXiv preprint arXiv:2106.02590.
     """
-
-    if memory is not None and not isinstance(memory, str):
-        raise ValueError(
-            "'memory' must be None or a string corresponding "
-            + "to the path of the caching directory."
-        )
-
     # Clustered inference algorithms
     results = Parallel(n_jobs=n_jobs, verbose=verbose)(
         delayed(clustered_inference)(
@@ -169,7 +156,6 @@ def ensemble_clustered_inference(
             method=inference_method,
             seed=i,
             n_jobs=1,
-            memory=memory,
             verbose=verbose,
             **kwargs,
         )
diff --git a/src/hidimstat/noise_std.py b/src/hidimstat/noise_std.py
index 276b2c7d..4ad5347b 100644
--- a/src/hidimstat/noise_std.py
+++ b/src/hidimstat/noise_std.py
@@ -5,175 +5,105 @@
 from sklearn.model_selection import KFold
 
 
-def reid(X, y, eps=1e-2, tol=1e-4, max_iter=10000, n_jobs=1, seed=0):
-    """Estimation of noise standard deviation using Reid procedure
-
-    Parameters
-    ----------
-    X : ndarray, shape (n_samples, n_features)
-        Data.
-
-    y : ndarray, shape (n_samples,)
-        Target.
-
-    eps: float, optional (default=1e-2)
-        Length of the cross-validation path.
-        eps=1e-2 means that alpha_min / alpha_max = 1e-2.
-
-    tol : float, optional (default=1e-4)
-        The tolerance for the optimization: if the updates are smaller
-        than `tol`, the optimization code checks the dual gap for optimality
-        and continues until it is smaller than `tol`.
-
-    max_iter : int, optional (default=1e4)
-        The maximum number of iterations.
-
-    n_jobs : int or None, optional (default=1)
-        Number of CPUs to use during the cross validation.
-
-    seed: int, optional (default=0)
-        Seed passed in the KFold object which is used to cross-validate
-        LassoCV. This seed controls the partitioning randomness.
-
-    Returns
-    -------
-    sigma_hat : float
-        Estimated noise standard deviation.
-
-    beta_hat : array, shape (n_features,)
-        Estimated parameter vector.
-
-    References
-    ----------
-    .. [1] Reid, S., Tibshirani, R., & Friedman, J. (2016). A study of error
-           variance estimation in lasso regression. Statistica Sinica, 35-67.
-    """
-
-    X = np.asarray(X)
-    n_samples, n_features = X.shape
-
-    if max_iter // 5 <= n_features:
-        max_iter = n_features * 5
-        print(f"'max_iter' has been increased to {max_iter}")
-
-    cv = KFold(n_splits=5, shuffle=True, random_state=seed)
-
-    clf_lasso_cv = LassoCV(
-        eps=eps, fit_intercept=False, cv=cv, tol=tol, max_iter=max_iter, n_jobs=n_jobs
-    )
-
-    clf_lasso_cv.fit(X, y)
-    beta_hat = clf_lasso_cv.coef_
-    residual = clf_lasso_cv.predict(X) - y
-    coef_max = np.max(np.abs(beta_hat))
-    support = np.sum(np.abs(beta_hat) > tol * coef_max)
-
-    # avoid dividing by 0
-    support = min(support, n_samples - 1)
-
-    sigma_hat = norm(residual) / np.sqrt(n_samples - support)
-
-    return sigma_hat, beta_hat
-
-
-def group_reid(
+def reid(
     X,
-    Y,
-    fit_Y=True,
-    stationary=True,
-    method="simple",
-    order=1,
+    y,
     eps=1e-2,
     tol=1e-4,
     max_iter=10000,
+    n_split=5,
     n_jobs=1,
     seed=0,
+    group=False,
+    fit_Y=True,
+    stationary=True,
+    method="simple",
+    order=1,
 ):
-    """Estimation of the covariance matrix using group Reid procedure
+    """
+    Residual sum of squares based estimators for noise standard deviation
+    estimation.
+
+    This implementation follows the procedure described in
+    :footcite:`fan2012variance` and :cite:`reid2016study`. It uses Lasso with
+    cross-validation to estimate both the noise standard deviation and model
+    coefficients.
+
+    For group, the implementation is based on the procedure
+    from :footcite:`chevalier2020statistical`.
 
     Parameters
     ----------
     X : ndarray, shape (n_samples, n_features)
-        Data.
+        Input data matrix.
+
+    y : ndarray, shape (n_samples,)/(n_samples, n_times)
+        Target vector. The time means the presence of groups.
 
-    Y : ndarray, shape (n_samples, n_times)
-        Target.
+    eps : float, optional (default=1e-2)
+        Length of the cross-validation path, where alpha_min / alpha_max = eps.
+        Smaller values create a finer grid.
 
-    fit_Y : bool, optional (default=True)
-        If True, Y will be regressed against X by MultiTaskLassoCV
-        and the covariance matrix is estimated on the residuals.
-        Otherwise, covariance matrix is estimated directly on Y.
+    tol : float, optional (default=1e-4)
+        Tolerance for optimization convergence. The algorithm stops
+        when updates are smaller than tol and dual gap is smaller than tol.
 
-    stationary : bool, optional (default=True)
-        If True, noise is considered to have the same magnitude for each
-        time step. Otherwise, magnitude of the noise is not constant.
+    max_iter : int, optional (default=10000)
+        Maximum number of iterations for the optimization algorithm.
 
-    method : str, optional (default='simple')
-        If 'simple', the correlation matrix is estimated by taking the
-        median of the correlation between two consecutive time steps
-        and the noise standard deviation for each time step is estimated
-        by taking the median of the standard deviations for every time step.
-        If 'AR', the order of the AR model is given by `order` and
-        Yule-Walker method is used to estimate the covariance matrix.
+    n_split : int, optional (default=5)
+        Number of folds for cross-validation.
 
-    order : int, optional (default=1)
-        If `stationary=True` and `method=AR`, `order` gives the
-        order of the estimated autoregressive model. `order` must
-        be smaller than the number of time steps.
+    n_jobs : int, optional (default=1)
+        Number of parallel jobs for cross-validation.
+        -1 means using all processors.
 
-    eps : float, optional (default=1e-2)
-        Length of the cross-validation path.
-        eps=1e-2 means that alpha_min / alpha_max = 1e-2.
+    seed : int, optional (default=0)
+        Random seed for reproducible cross-validation splits.
 
-    tol : float, optional (default=1e-4)
-        The tolerance for the optimization: if the updates are smaller
-        than `tol`, the optimization code checks the dual gap for optimality
-        and continues until it is smaller than `tol`.
+    fit_Y : bool, (default=True)
+        Whether to use MultiTaskLassoCV to fit Y against X.
+        If False, covariance is estimated directly from Y.
 
-    max_iter : int, optional (default=1e4)
-        The maximum number of iterations.
+    stationary : bool, (default=True)
+        Whether noise has constant magnitude across time steps.
 
-    n_jobs : int or None, optional (default=1)
-        Number of CPUs to use during the cross validation.
+    method : {'simple', 'AR'}, (default='simple')
+        Covariance estimation method:
+        - 'simple': Uses median correlation between consecutive time steps
+        - 'AR': Uses Yule-Walker method with specified order
 
-    seed: int, optional (default=0)
-        Seed passed in the KFold object which is used to cross-validate
-        LassoCV. This seed controls also the partitioning randomness.
+    order : int, default=1
+        Order of AR model when method='AR'. Must be < n_times.
 
     Returns
     -------
-    cov_hat : ndarray, shape (n_times, n_times)
-        Estimated covariance matrix.
+    sigma_hat/cov_hat : float/ndarray, shape (n_times, n_times)
+        Estimated noise standard deviation based on residuals
+        or estimated covariance matrix for group.
 
-    beta_hat : ndarray, shape (n_features, n_times)
-        Estimated parameter matrix.
+    beta_hat : ndarray, shape (n_features,)/(n_features, n_times)
+        Estimated sparse coefficient vector from Lasso regression.
 
     References
     ----------
-    .. [1] Chevalier, J. A., Gramfort, A., Salmon, J., & Thirion, B. (2020).
-           Statistical control for spatio-temporal MEG/EEG source imaging with
-           desparsified multi-task Lasso. In NeurIPS 2020-34h Conference on
-           Neural Information Processing Systems.
+    .. footbibliography::
     """
 
-    X = np.asarray(X)
-    n_samples, n_features = X.shape
-    n_times = Y.shape[1]
-
-    if method == "simple":
-        print("Group reid: simple cov estimation")
-    else:
-        print(f"Group reid: {method}{order} cov estimation")
-
-    if (max_iter // 5) <= n_features:
-        max_iter = n_features * 5
-        print(f"'max_iter' has been increased to {max_iter}")
-
-    cv = KFold(n_splits=5, shuffle=True, random_state=seed)
-
+    X_ = np.asarray(X)
+    n_samples, n_features = X_.shape
+    if group:
+        n_times = y.shape[1]
     if fit_Y:
-
-        clf_mtlcv = MultiTaskLassoCV(
+        # check if max_iter is large enough
+        if max_iter // n_split <= n_features:
+            max_iter = n_features * n_split
+            print(f"'max_iter' has been increased to {max_iter}")
+
+        # use the cross-validation for define the best alpha of Lasso
+        cv = KFold(n_splits=n_split, shuffle=True, random_state=seed)
+        Refit_CV = MultiTaskLassoCV if group else LassoCV
+        clf_cv = Refit_CV(
             eps=eps,
             fit_intercept=False,
             cv=cv,
@@ -181,107 +111,146 @@ def group_reid(
             max_iter=max_iter,
             n_jobs=n_jobs,
         )
+        clf_cv.fit(X_, y)
 
-        clf_mtlcv.fit(X, Y)
-        beta_hat = clf_mtlcv.coef_
-        residual = clf_mtlcv.predict(X) - Y
-        row_max = np.max(np.sum(np.abs(beta_hat), axis=0))
-        support = np.sum(np.sum(np.abs(beta_hat), axis=0) > tol * row_max)
+        # Estimate the support of the variable importance
+        beta_hat = clf_cv.coef_
+        residual = clf_cv.predict(X_) - y
 
-        # avoid dividing by 0
-        support = min(support, n_samples - 1)
+        # get the number of non-zero coefficients
+        coef_sum = np.sum(np.abs(beta_hat), axis=0)
+        size_support = np.sum(coef_sum > tol * coef_sum.max())
 
+        # avoid dividing by 0
+        size_support = min(size_support, n_samples - 1)
     else:
-
+        # null model
+        # TODO Why do we need a null model?
         beta_hat = np.zeros((n_features, n_times))
-        residual = np.copy(Y)
-        support = 0
+        residual = np.copy(y)
+        size_support = 0
 
-    sigma_hat_raw = norm(residual, axis=0) / np.sqrt(n_samples - support)
+    # estimate the noise standard deviation (eq. 7 in `fan2012variance`)
+    sigma_hat_raw = norm(residual, axis=0) / np.sqrt(n_samples - size_support)
 
-    if stationary:
-        sigma_hat = np.median(sigma_hat_raw) * np.ones(n_times)
-        corr_emp = np.corrcoef(residual.T)
-    else:
-        sigma_hat = sigma_hat_raw
-        residual_rescaled = residual / sigma_hat
-        corr_emp = np.corrcoef(residual_rescaled.T)
-
-    # Median method
-    if not stationary or method == "simple":
-
-        rho_hat = np.median(np.diag(corr_emp, 1))
-        corr_hat = toeplitz(np.geomspace(1, rho_hat ** (n_times - 1), n_times))
-        cov_hat = np.outer(sigma_hat, sigma_hat) * corr_hat
-
-    # Yule-Walker method
-    elif stationary and method == "AR":
-
-        if order > n_times - 1:
-            raise ValueError(
-                "The requested AR order is to high with "
-                + "respect to the number of time steps."
-            )
-
-        rho_ar = np.zeros(order + 1)
-        rho_ar[0] = 1
-
-        for i in range(1, order + 1):
-            rho_ar[i] = np.median(np.diag(corr_emp, i))
-
-        A = toeplitz(rho_ar[:-1])
-        coef_ar = solve(A, rho_ar[1:])
-
-        residual_estimate = np.zeros((n_samples, n_times - order))
-
-        for i in range(order):
-            # time window used to estimate the residual from AR model
-            start = order - i - 1
-            end = -i - 1
-            residual_estimate += coef_ar[i] * residual[:, start:end]
-
-        residual_diff = residual[:, order:] - residual_estimate
-        sigma_eps = np.median(norm(residual_diff, axis=0) / np.sqrt(n_samples))
-
-        rho_ar_full = np.zeros(n_times)
-        rho_ar_full[: rho_ar.size] = rho_ar
-
-        for i in range(order + 1, n_times):
-            start = i - order
-            end = i
-            rho_ar_full[i] = np.dot(coef_ar[::-1], rho_ar_full[start:end])
-
-        corr_hat = toeplitz(rho_ar_full)
-        sigma_hat[:] = sigma_eps / np.sqrt((1 - np.dot(coef_ar, rho_ar[1:])))
-        cov_hat = np.outer(sigma_hat, sigma_hat) * corr_hat
+    if not group:
+        return sigma_hat_raw, beta_hat
 
+    ## Computation of the covariance matrix for group
     else:
-        raise ValueError("Unknown method for estimating the covariance matrix")
-
-    return cov_hat, beta_hat
+        if method == "simple":
+            print("Group reid: simple cov estimation")
+        elif method == "AR":
+            print(f"Group reid: {method}{order} cov estimation")
+            if order > n_times - 1:
+                raise ValueError(
+                    "The requested AR order is to high with "
+                    + "respect to the number of time steps."
+                )
+            elif not stationary:
+                raise ValueError(
+                    "The AR method is not compatible with the non-stationary"
+                    + " noise assumption."
+                )
+        else:
+            raise ValueError("Unknown method for estimating the covariance matrix")
+        ## compute emperical correlation of the residual
+        if stationary:
+            # consideration of stationary noise
+            # (section 2.5 of `chevalier2020statistical`)
+            sigma_hat = np.median(sigma_hat_raw) * np.ones(n_times)
+            # compute rho from the empirical correlation matrix
+            # (section 2.5 of `chevalier2020statistical`)
+            corr_emp = np.corrcoef(residual.T)
+        else:
+            sigma_hat = sigma_hat_raw
+            residual_rescaled = residual / sigma_hat
+            corr_emp = np.corrcoef(residual_rescaled.T)
+
+        # TODO: Why the name of the method is different
+        #           than the name of the "function"?
+        # Median method
+        if not stationary or method == "simple":
+            rho_hat = np.median(np.diag(corr_emp, 1))
+            # estimate M (section 2.5 of `chevalier2020statistical`)
+            corr_hat = toeplitz(np.geomspace(1, rho_hat ** (n_times - 1), n_times))
+            cov_hat = np.outer(sigma_hat, sigma_hat) * corr_hat
+
+        # Yule-Walker method (algorithm in section 3 of `eshel2003yule`)
+        elif stationary and method == "AR":
+            # compute the autocorrelation coefficients of the AR model
+            rho_ar = np.zeros(order + 1)
+            rho_ar[0] = 1
+
+            for i in range(1, order + 1):
+                rho_ar[i] = np.median(np.diag(corr_emp, i))
+
+            # solve the Yule-Walker equations (see eq.2 in `eshel2003yule`)
+            R = toeplitz(rho_ar[:-1])
+            coef_ar = solve(R, rho_ar[1:])
+
+            # estimate the variance of the noise from the AR model
+            residual_estimate = np.zeros((n_samples, n_times - order))
+            for i in range(order):
+                # time window used to estimate the residual from AR model
+                start = order - i - 1
+                end = -i - 1
+                residual_estimate += coef_ar[i] * residual[:, start:end]
+            residual_diff = residual[:, order:] - residual_estimate
+            sigma_eps = np.median(norm(residual_diff, axis=0) / np.sqrt(n_samples))
+
+            # estimation of the autocorrelation matrices
+            rho_ar_full = np.zeros(n_times)
+            rho_ar_full[: rho_ar.size] = rho_ar
+            for i in range(order + 1, n_times):
+                start = i - order
+                end = i
+                rho_ar_full[i] = np.dot(coef_ar[::-1], rho_ar_full[start:end])
+            corr_hat = toeplitz(rho_ar_full)
+
+            # estimation of the variance of an AR process
+            # from wikipedia it should be:
+            # VAR(X_t)=\frac{\sigma_\epsilon^2}{1-\phi^2}
+            # TODO there is a short difference between the code
+            # and the above formula
+            sigma_hat[:] = sigma_eps / np.sqrt((1 - np.dot(coef_ar, rho_ar[1:])))
+            # estimation of the covariance based on the
+            # correlation matrix and sigma
+            # COV(X_t, X_t) = COR(X_t, X_t) * \sigma^2
+            cov_hat = np.outer(sigma_hat, sigma_hat) * corr_hat
+
+        return cov_hat, beta_hat
 
 
 def empirical_snr(X, y, beta, noise=None):
-    """Compute the SNR for the linear model: y = X beta + noise
+    """
+    Compute the empirical signal-to-noise ratio (SNR) for
+    the linear model y = X @ beta + noise.
 
     Parameters
     ----------
-    X : ndarray or scipy.sparse matrix, shape (n_samples, n_features)
-        Data.
+    X : ndarray, shape (n_samples, n_features)
+        Design matrix.
 
     y : ndarray, shape (n_samples,)
-        Target.
+        Target vector.
 
     beta : ndarray, shape (n_features,)
-        True parameter vector.
+        Parameter vector.
 
-    noise : ndarray, shape (n_samples,), optional (default=None)
-        True error vector.
+    noise : ndarray, shape (n_samples,), optional
+        Noise vector. If None, computed as y - X @ beta.
 
     Returns
     -------
     snr_hat : float
-        Empirical signal-to-noise ratio.
+        Empirical SNR computed as var(signal) / var(noise).
+
+    Notes
+    -----
+    SNR measures the ratio of signal power to noise power,
+    indicating model estimation quality.
+    Higher values suggest better signal recovery.
     """
     X = np.asarray(X)
 
@@ -290,8 +259,7 @@ def empirical_snr(X, y, beta, noise=None):
     if noise is None:
         noise = y - signal
 
-    sig_signal = np.linalg.norm(signal - np.mean(signal))
-    sig_noise = np.linalg.norm(noise - np.mean(noise))
-    snr_hat = (sig_signal / sig_noise) ** 2
+    # compute signal-to-noise ratio
+    snr_hat = np.var(signal) / np.var(noise)
 
     return snr_hat
diff --git a/src/hidimstat/stat_tools.py b/src/hidimstat/stat_tools.py
index 0ced13e9..e3640882 100644
--- a/src/hidimstat/stat_tools.py
+++ b/src/hidimstat/stat_tools.py
@@ -54,7 +54,7 @@ def pval_corr_from_pval(one_sided_pval):
     return one_sided_pval_corr
 
 
-def pval_from_scale(beta, scale, distrib="norm", eps=1e-14):
+def pval_from_scale(beta, scale, distribution="norm", eps=1e-14):
     """Computing one-sided p-values from the value of the parameter
     and its scale.
 
@@ -66,7 +66,7 @@ def pval_from_scale(beta, scale, distrib="norm", eps=1e-14):
     scale : ndarray, shape (n_features,)
         Value of the standard deviation of the parameters.
 
-    distrib : str, opitonal (default='norm')
+    distribution : str, opitonal (default='norm')
         Type of distribution assumed for the underlying estimator.
         'norm' means normal and is the only value accepted at the moment.
 
@@ -97,7 +97,7 @@ def pval_from_scale(beta, scale, distrib="norm", eps=1e-14):
     pval = np.zeros(n_features) + 0.5
     one_minus_pval = np.zeros(n_features) + 0.5
 
-    if distrib == "norm":
+    if distribution == "norm":
 
         pval[index_no_nan] = norm.sf(beta[index_no_nan], scale=scale[index_no_nan])
         one_minus_pval[index_no_nan] = norm.cdf(
@@ -113,7 +113,7 @@ def pval_from_scale(beta, scale, distrib="norm", eps=1e-14):
     return pval, pval_corr, one_minus_pval, one_minus_pval_corr
 
 
-def zscore_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm"):
+def zscore_from_cb(cb_min, cb_max, confidence=0.95, distribution="norm"):
     """Computing z-scores from confidence intervals.
 
     Parameters
@@ -128,7 +128,7 @@ def zscore_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm"):
         Confidence level used to compute the confidence intervals.
         Each value should be in the range [0, 1].
 
-    distrib : str, opitonal (default='norm')
+    distribution : str, opitonal (default='norm')
         Type of distribution assumed for the underlying estimator.
         'norm' means normal and is the only value accepted at the moment.
 
@@ -138,7 +138,7 @@ def zscore_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm"):
         z-scores.
     """
 
-    if distrib == "norm":
+    if distribution == "norm":
         quantile = norm.ppf(1 - (1 - confidence) / 2)
 
     beta_hat = (cb_min + cb_max) / 2
@@ -148,7 +148,7 @@ def zscore_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm"):
     return zscore
 
 
-def pval_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm", eps=1e-14):
+def pval_from_cb(cb_min, cb_max, confidence=0.95, distribution="norm", eps=1e-14):
     """Computing one-sided p-values from confidence intervals.
 
     Parameters
@@ -163,7 +163,7 @@ def pval_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm", eps=1e-14):
         Confidence level used to compute the confidence intervals.
         Each value should be in the range [0, 1].
 
-    distrib : str, opitonal (default='norm')
+    distribution : str, opitonal (default='norm')
         Type of distribution assumed for the underlying estimator.
         'norm' means normal and is the only value accepted at the moment.
 
@@ -187,9 +187,11 @@ def pval_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm", eps=1e-14):
         One minus the p-value corrected for multiple testing.
     """
 
-    zscore = zscore_from_cb(cb_min, cb_max, confidence=confidence, distrib=distrib)
+    zscore = zscore_from_cb(
+        cb_min, cb_max, confidence=confidence, distribution=distribution
+    )
 
-    if distrib == "norm":
+    if distribution == "norm":
 
         pval = norm.sf(zscore)
         one_minus_pval = norm.cdf(zscore)
@@ -203,7 +205,7 @@ def pval_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm", eps=1e-14):
     return pval, pval_corr, one_minus_pval, one_minus_pval_corr
 
 
-def two_sided_pval_from_zscore(zscore, distrib="norm"):
+def two_sided_pval_from_zscore(zscore, distribution="norm"):
     """Computing two-sided p-values from z-scores.
 
     Parameters
@@ -211,7 +213,7 @@ def two_sided_pval_from_zscore(zscore, distrib="norm"):
     zscore : ndarray, shape (n_features,)
         z-scores.
 
-    distrib : str, opitonal (default='norm')
+    distribution : str, opitonal (default='norm')
         Type of distribution assumed for the underlying estimator.
         'norm' means normal and is the only value accepted at the moment.
 
@@ -225,7 +227,7 @@ def two_sided_pval_from_zscore(zscore, distrib="norm"):
     """
     n_features = zscore.size
 
-    if distrib == "norm":
+    if distribution == "norm":
         two_sided_pval = 2 * norm.sf(np.abs(zscore))
 
     two_sided_pval_corr = np.minimum(1, two_sided_pval * n_features)
@@ -233,7 +235,7 @@ def two_sided_pval_from_zscore(zscore, distrib="norm"):
     return two_sided_pval, two_sided_pval_corr
 
 
-def two_sided_pval_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm"):
+def two_sided_pval_from_cb(cb_min, cb_max, confidence=0.95, distribution="norm"):
     """Computing two-sided p-values from confidence intervals.
 
     Parameters
@@ -248,7 +250,7 @@ def two_sided_pval_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm"):
         Confidence level used to compute the confidence intervals.
         Each value should be in the range [0, 1].
 
-    distrib : str, opitonal (default='norm')
+    distribution : str, opitonal (default='norm')
         Type of distribution assumed for the underlying estimator.
         'norm' means normal and is the only value accepted at the moment.
 
@@ -260,16 +262,18 @@ def two_sided_pval_from_cb(cb_min, cb_max, confidence=0.95, distrib="norm"):
     two_sided_pval_corr : ndarray, shape (n_features,)
         Two-sided p-values corrected for multiple testing.
     """
-    zscore = zscore_from_cb(cb_min, cb_max, confidence=confidence, distrib=distrib)
+    zscore = zscore_from_cb(
+        cb_min, cb_max, confidence=confidence, distribution=distribution
+    )
 
     two_sided_pval, two_sided_pval_corr = two_sided_pval_from_zscore(
-        zscore, distrib=distrib
+        zscore, distribution=distribution
     )
 
     return two_sided_pval, two_sided_pval_corr
 
 
-def zscore_from_pval(pval, one_minus_pval=None, distrib="norm"):
+def zscore_from_pval(pval, one_minus_pval=None, distribution="norm"):
     """Computing z-scores from one-sided p-values.
 
     Parameters
@@ -282,7 +286,7 @@ def zscore_from_pval(pval, one_minus_pval=None, distrib="norm"):
         One minus the p-value, with numerically accurate values
         for negative effects (ie., for p-value close to one).
 
-    distrib : str, opitonal (default='norm')
+    distribution : str, opitonal (default='norm')
         Type of distribution assumed for the underlying estimator.
         'norm' means normal and is the only value accepted at the moment.
 
@@ -292,7 +296,7 @@ def zscore_from_pval(pval, one_minus_pval=None, distrib="norm"):
         z-scores.
     """
 
-    if distrib == "norm":
+    if distribution == "norm":
 
         zscore = norm.isf(pval)
 
@@ -357,7 +361,7 @@ def pval_from_two_sided_pval_and_sign(two_sided_pval, parameter_sign, eps=1e-14)
     return pval, pval_corr, one_minus_pval, one_minus_pval_corr
 
 
-def two_sided_pval_from_pval(pval, one_minus_pval=None, distrib="norm"):
+def two_sided_pval_from_pval(pval, one_minus_pval=None, distribution="norm"):
     """Computing two-sided p-value from one-sided p-values.
 
     Parameters
@@ -370,7 +374,7 @@ def two_sided_pval_from_pval(pval, one_minus_pval=None, distrib="norm"):
         One minus the p-value, with numerically accurate values
         for negative effects (ie., for p-value close to one).
 
-    distrib : str, opitonal (default='norm')
+    distribution : str, opitonal (default='norm')
         Type of distribution assumed for the underlying estimator.
         'norm' means normal and is the only value accepted at the moment.
 
@@ -383,10 +387,10 @@ def two_sided_pval_from_pval(pval, one_minus_pval=None, distrib="norm"):
         Two-sided p-values corrected for multiple testing.
     """
 
-    zscore = zscore_from_pval(pval, one_minus_pval, distrib=distrib)
+    zscore = zscore_from_pval(pval, one_minus_pval, distribution=distribution)
 
     two_sided_pval, two_sided_pval_corr = two_sided_pval_from_zscore(
-        zscore, distrib=distrib
+        zscore, distribution=distribution
     )
 
     return two_sided_pval, two_sided_pval_corr
diff --git a/test/test_clustered_inference.py b/test/test_clustered_inference.py
index 94e04ef0..fb7450d9 100644
--- a/test/test_clustered_inference.py
+++ b/test/test_clustered_inference.py
@@ -2,6 +2,7 @@
 Test the clustered_inference module
 """
 
+import pytest
 import numpy as np
 from numpy.testing import assert_almost_equal
 from sklearn.cluster import FeatureAgglomeration
@@ -51,6 +52,12 @@ def test_clustered_inference():
         n_clusters=n_clusters, connectivity=connectivity, linkage="ward"
     )
 
+    # test exception
+    with pytest.raises(ValueError, match="Unknow method"):
+        beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
+            clustered_inference(X_init, y, ward, n_clusters, method="test")
+        )
+
     beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
         clustered_inference(X_init, y, ward, n_clusters)
     )
diff --git a/test/test_desparsified_lasso.py b/test/test_desparsified_lasso.py
index 713bd2c5..89953fc4 100644
--- a/test/test_desparsified_lasso.py
+++ b/test/test_desparsified_lasso.py
@@ -2,11 +2,16 @@
 Test the desparsified_lasso module
 """
 
+import pytest
 import numpy as np
 from numpy.testing import assert_almost_equal, assert_equal
 from scipy.linalg import toeplitz
 
-from hidimstat.desparsified_lasso import desparsified_group_lasso, desparsified_lasso
+from hidimstat.desparsified_lasso import (
+    desparsified_lasso,
+    desparsified_lasso_pvalue,
+    desparsified_group_lasso_pvalue,
+)
 from hidimstat.scenario import (
     multivariate_1D_simulation,
     multivariate_temporal_simulation,
@@ -18,7 +23,7 @@ def test_desparsified_lasso():
     a support of size 1. Computing 99% confidence bounds and checking
     that they contains the true parameter vector."""
 
-    n_samples, n_features = 50, 50
+    n_samples, n_features = 52, 50
     support_size = 1
     sigma = 0.1
     rho = 0.0
@@ -32,20 +37,42 @@ def test_desparsified_lasso():
         shuffle=False,
         seed=2,
     )
+    expected_pval_corr = np.concatenate(
+        (np.zeros(support_size), 0.5 * np.ones(n_features - support_size))
+    )
 
-    beta_hat, cb_min, cb_max = desparsified_lasso(X, y, confidence=0.99)
+    beta_hat, sigma_hat, omega_diag = desparsified_lasso(X, y)
 
+    pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
+        desparsified_lasso_pvalue(
+            X.shape[0], beta_hat, sigma_hat, omega_diag, confidence=0.99
+        )
+    )
     assert_almost_equal(beta_hat, beta, decimal=1)
     assert_equal(cb_min < beta, True)
     assert_equal(cb_max > beta, True)
+    assert_almost_equal(pval_corr, expected_pval_corr, decimal=1)
 
-    beta_hat, cb_min, cb_max = desparsified_lasso(
-        X, y, dof_ajdustement=True, confidence=0.99
+    beta_hat, sigma_hat, omega_diag = desparsified_lasso(X, y, dof_ajdustement=True)
+    pval, pval_corr, one_minus_pval, one_minus_pval_corr, cb_min, cb_max = (
+        desparsified_lasso_pvalue(
+            X.shape[0], beta_hat, sigma_hat, omega_diag, confidence=0.99
+        )
     )
-
+    cb_min_bis, cb_max_bis = desparsified_lasso_pvalue(
+        X.shape[0],
+        beta_hat,
+        sigma_hat,
+        omega_diag,
+        confidence=0.99,
+        confidence_interval_only=True,
+    )
+    assert np.all(cb_min_bis == cb_min)
+    assert np.all(cb_max_bis == cb_max)
     assert_almost_equal(beta_hat, beta, decimal=1)
     assert_equal(cb_min < beta, True)
     assert_equal(cb_max > beta, True)
+    assert_almost_equal(pval_corr, expected_pval_corr, decimal=1)
 
 
 def test_desparsified_group_lasso():
@@ -72,8 +99,9 @@ def test_desparsified_group_lasso():
         rho_noise=rho,
     )
 
-    beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
-        desparsified_group_lasso(X, Y, cov=cov)
+    beta_hat, theta_hat, omega_diag = desparsified_lasso(X, Y, group=True, cov=cov)
+    pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
+        desparsified_group_lasso_pvalue(beta_hat, theta_hat, omega_diag)
     )
 
     expected_pval_corr = np.concatenate(
@@ -83,8 +111,9 @@ def test_desparsified_group_lasso():
     assert_almost_equal(beta_hat, beta, decimal=1)
     assert_almost_equal(pval_corr, expected_pval_corr, decimal=1)
 
-    beta_hat, pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
-        desparsified_group_lasso(X, Y, test="F")
+    beta_hat, theta_hat, omega_diag = desparsified_lasso(X, Y, group=True)
+    pval, pval_corr, one_minus_pval, one_minus_pval_corr = (
+        desparsified_group_lasso_pvalue(beta_hat, theta_hat, omega_diag, test="F")
     )
 
     assert_almost_equal(beta_hat, beta, decimal=1)
@@ -93,5 +122,8 @@ def test_desparsified_group_lasso():
     # Testing error is raised when the covariance matrix has wrong shape
     bad_cov = np.delete(cov, 0, axis=1)
     np.testing.assert_raises(
-        ValueError, desparsified_group_lasso, X=X, Y=Y, cov=bad_cov
+        ValueError, desparsified_lasso, X=X, y=Y, group=True, cov=bad_cov
     )
+
+    with pytest.raises(ValueError, match=f"Unknown test 'r2'"):
+        desparsified_group_lasso_pvalue(beta_hat, theta_hat, omega_diag, test="r2")
diff --git a/test/test_noise_std.py b/test/test_noise_std.py
index cb890529..2e90e069 100644
--- a/test/test_noise_std.py
+++ b/test/test_noise_std.py
@@ -2,11 +2,12 @@
 Test the noise_std module
 """
 
+import pytest
 import numpy as np
 from numpy.testing import assert_almost_equal
 from scipy.linalg import toeplitz
 
-from hidimstat.noise_std import empirical_snr, group_reid, reid
+from hidimstat.noise_std import empirical_snr, reid
 from hidimstat.scenario import (
     multivariate_1D_simulation,
     multivariate_temporal_simulation,
@@ -84,13 +85,13 @@ def test_group_reid():
     )
 
     # max_iter=1 to get a better coverage
-    cov_hat, _ = group_reid(X, Y, tol=1e-3, max_iter=1)
+    cov_hat, _ = reid(X, Y, group=True, tol=1e-3, max_iter=1)
     error_ratio = cov_hat / cov
 
     assert_almost_equal(np.max(error_ratio), 1.0, decimal=0)
     assert_almost_equal(np.log(np.min(error_ratio)), 0.0, decimal=1)
 
-    cov_hat, _ = group_reid(X, Y, method="AR")
+    cov_hat, _ = reid(X, Y, group=True, method="AR")
     error_ratio = cov_hat / cov
 
     assert_almost_equal(np.max(error_ratio), 1.0, decimal=0)
@@ -110,25 +111,57 @@ def test_group_reid():
         seed=1,
     )
 
-    cov_hat, _ = group_reid(X, Y)
+    cov_hat, _ = reid(X, Y, group=True)
     error_ratio = cov_hat / cov
 
     assert_almost_equal(np.max(error_ratio), 1.0, decimal=0)
     assert_almost_equal(np.log(np.min(error_ratio)), 0.0, decimal=1)
 
-    cov_hat, _ = group_reid(X, Y, fit_Y=False, stationary=False)
+    cov_hat, _ = reid(X, Y, group=True, fit_Y=False, stationary=False)
     error_ratio = cov_hat / cov
 
     assert_almost_equal(np.max(error_ratio), 1.0, decimal=0)
     assert_almost_equal(np.log(np.min(error_ratio)), 0.0, decimal=0)
 
-    cov_hat, _ = group_reid(X, Y, method="AR")
+    cov_hat, _ = reid(X, Y, group=True, method="AR")
     error_ratio = cov_hat / cov
 
     assert_almost_equal(np.max(error_ratio), 1.0, decimal=0)
     assert_almost_equal(np.log(np.min(error_ratio)), 0.0, decimal=1)
 
 
+def test_reid_exception():
+    "Test for testing the exceptions on the arguments of reid function"
+    n_samples, n_features = 50, 30
+    n_times = 10
+    sigma = 1.0
+    rho = 0.9
+
+    # First expe
+    # ##########
+    support_size = 2
+
+    X, y, beta, noise = multivariate_temporal_simulation(
+        n_samples=n_samples,
+        n_features=n_features,
+        n_times=n_times,
+        support_size=support_size,
+        sigma=sigma,
+        rho_noise=rho,
+    )
+
+    with pytest.raises(
+        ValueError, match="Unknown method for estimating the covariance matrix"
+    ):
+        _, _ = reid(X, y, method="test", group=True)
+    with pytest.raises(
+        ValueError, match="The AR method is not compatible with the non-stationary"
+    ):
+        _, _ = reid(X, y, method="AR", stationary=False, group=True)
+    with pytest.raises(ValueError, match="The requested AR order is to high with"):
+        _, _ = reid(X, y, method="AR", order=1e4, group=True)
+
+
 def test_empirical_snr():
     """Computing empirical signal to noise ratio from the target `y`,
     the data `X` and the true parameter vector `beta` in a simple