diff --git a/DESCRIPTION b/DESCRIPTION index 38c4c3c37..48466d6ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.15.5 +Version: 1.15.6 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", diff --git a/NAMESPACE b/NAMESPACE index 43c48df48..771585489 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -160,6 +160,7 @@ exportMethods(addLDA) exportMethods(addMediation) exportMethods(addNMF) exportMethods(addNotContaminantQC) +exportMethods(addPERMANOVA) exportMethods(addPerSampleDominantFeatures) exportMethods(addPerSampleDominantTaxa) exportMethods(addRDA) @@ -202,6 +203,7 @@ exportMethods(getLDA) exportMethods(getMediation) exportMethods(getNMDS) exportMethods(getNMF) +exportMethods(getPERMANOVA) exportMethods(getPrevalence) exportMethods(getPrevalent) exportMethods(getPrevalentAbundance) @@ -409,6 +411,7 @@ importFrom(utils,read.delim) importFrom(utils,read.table) importFrom(utils,unzip) importFrom(vegan,"sppscores<-") +importFrom(vegan,adonis2) importFrom(vegan,anova.cca) importFrom(vegan,avgdist) importFrom(vegan,betadisper) diff --git a/NEWS b/NEWS index 17936409b..951328dfd 100644 --- a/NEWS +++ b/NEWS @@ -160,3 +160,4 @@ computation Changes in version 1.15.x + subsetBy*: added update.tree argument + agglomerateBy*: Add na.rm option for excluding NA counts ++ Added getPERMANOVA and addPERMANOVA functions to calculate PERMANOVA diff --git a/R/addDivergence.R b/R/addDivergence.R index a3337c581..3e9f3a16d 100644 --- a/R/addDivergence.R +++ b/R/addDivergence.R @@ -13,7 +13,7 @@ #' (Default: \code{"median"}) #' #' @param ... optional arguments passed to -#' \code{\link[mia:addDissimilarity]{addDissimilarity}}. Additionally: +#' \code{\link[=addDissimilarity]{addDissimilarity}}. Additionally: #' \itemize{ #' \item \code{dimred}: \code{Character scalar}. Specifies the name of #' dimension reduction result from \code{reducedDim(x)}. If used, these @@ -36,8 +36,8 @@ #' #' @seealso #' \itemize{ -#' \item \code{\link[mia:addAlpha]{addAlpha}} -#' \item \code{\link[mia:addDissimilarity]{addDissimilarity}} +#' \item \code{\link[=addAlpha]{addAlpha}} +#' \item \code{\link[=addDissimilarity]{addDissimilarity}} #' \item \code{\link[scater:plotColData]{plotColData}} #' } #' diff --git a/R/estimateDiversity.R b/R/estimateDiversity.R index 445627284..6d9dfa5bb 100644 --- a/R/estimateDiversity.R +++ b/R/estimateDiversity.R @@ -164,9 +164,9 @@ #' @seealso #' \code{\link[scater:plotColData]{plotColData}} #' \itemize{ -#' \item \code{\link[mia:estimateRichness]{estimateRichness}} -#' \item \code{\link[mia:estimateEvenness]{estimateEvenness}} -#' \item \code{\link[mia:estimateDominance]{estimateDominance}} +#' \item \code{\link[=estimateRichness]{estimateRichness}} +#' \item \code{\link[=estimateEvenness]{estimateEvenness}} +#' \item \code{\link[=estimateDominance]{estimateDominance}} #' \item \code{\link[vegan:diversity]{diversity}} #' \item \code{\link[vegan:specpool]{estimateR}} #' } diff --git a/R/estimateDominance.R b/R/estimateDominance.R index dc789cebe..bc8a342ba 100644 --- a/R/estimateDominance.R +++ b/R/estimateDominance.R @@ -162,9 +162,9 @@ #' #' @seealso #' \itemize{ -#' \item \code{\link[mia:estimateRichness]{estimateRichness}} -#' \item \code{\link[mia:estimateEvenness]{estimateEvenness}} -#' \item \code{\link[mia:estimateDiversity]{estimateDiversity}} +#' \item \code{\link[=estimateRichness]{estimateRichness}} +#' \item \code{\link[=estimateEvenness]{estimateEvenness}} +#' \item \code{\link[=estimateDiversity]{estimateDiversity}} #' } #' #' @name .estimate_dominance diff --git a/R/estimateEvenness.R b/R/estimateEvenness.R index 1b9154749..cd542d021 100644 --- a/R/estimateEvenness.R +++ b/R/estimateEvenness.R @@ -99,9 +99,9 @@ #' @seealso #' \code{\link[scater:plotColData]{plotColData}} #' \itemize{ -#' \item{\code{\link[mia:estimateRichness]{estimateRichness}}} -#' \item{\code{\link[mia:estimateDominance]{estimateDominance}}} -#' \item{\code{\link[mia:estimateDiversity]{estimateDiversity}}} +#' \item{\code{\link[=estimateRichness]{estimateRichness}}} +#' \item{\code{\link[=estimateDominance]{estimateDominance}}} +#' \item{\code{\link[=estimateDiversity]{estimateDiversity}}} #' } #' #' @name .estimate_evenness diff --git a/R/getPERMANOVA.R b/R/getPERMANOVA.R new file mode 100644 index 000000000..d92c79602 --- /dev/null +++ b/R/getPERMANOVA.R @@ -0,0 +1,246 @@ +#' @name +#' getPERMANOVA +#' +#' @title +#' Calculate PERMANOVA (Permutational Multivariate Analysis of Variance) +#' +#' @description +#' These functions perform PERMANOVA to assess the significance of group +#' differences based on a specified dissimilarity matrix. The results can be +#' returned directly or added to metadata in an object of class +#' \code{TreeSummarizedExperiment}. +#' +#' @details +#' PERMANOVA is a non-parametric method used to test whether the centroids of +#' different groups (as defined by the formula or covariates) are significantly +#' different in terms of multivariate space. +#' +#' PERMANOVA relies on the assumption of group homogeneity, meaning the groups +#' should be distinct and have similar variances within each group. This +#' assumption is essential as PERMANOVA is sensitive to differences in +#' within-group dispersion, which can otherwise confound results. +#' This is why the functions return homogeneity test results by default. +#' +#' The functions utilize \code{\link[vegan:adonis2]{vegan::adonis2}} to compute +#' PERMANOVA. For homogeneity testing, +#' \code{\link[vegan:betadisper]{vegan::betadisper}} along +#' with \code{\link[vegan:permutest]{vegan::permutest}} are utilized by default, +#' which allow testing for equal dispersion across groups and validate the +#' homogeneity assumption. +#' +#' PERMANOVA and distance-based redundancy analysis (dbRDA) are closely related +#' methods for analyzing multivariate data. PERMANOVA is non-parametric, making +#' fewer assumptions about the data. In contrast, dbRDA assumes a linear +#' relationship when constructing the ordination space, although it also +#' employs a PERMANOVA-like approach to test the significance of predictors +#' within this space. dbRDA offers a broader scope overall, as it provides +#' visualization of the constrained ordination, which can reveal patterns and +#' relationships. However, when the underlying data structure is non-linear, +#' the results from the two methods can differ significantly due to dbRDA's +#' reliance on linear assumptions. +#' +#' @return +#' \code{getPERMANOVA} returns the PERMANOVA results or a list containing the +#' PERMANOVA results and homogeneity test results if +#' \code{test.homogeneity = TRUE}. \code{addPERMANOVA} adds these results to +#' metadata of \code{x}. +#' +#' @inheritParams runCCA +#' +#' @param name \code{Character scalar}. A name for the results that will be +#' stored to metadata. (Default: \code{"permanova"}) +#' +#' @param method \code{Character scalar}. A dissimilarity metric used in +#' PERMANOVA and group dispersion calculation. (Default: \code{"bray"}) +#' +#' @param test.homogeneity \code{Logical scalar}. Should the homogeneity of +#' group dispersions be evaluated? (Default: \code{TRUE}) +#' +#' @param ... additional arguments passed to \code{vegan::adonis2}. +#' \itemize{ +#' \item \code{by}: \code{Character scalar}. Specifies how significance is +#' calculated. (Default: \code{"margin"}) +#' +#' \item \code{na.action}: \code{function}. Action to take when missing +#' values for any of the variables in \code{formula} are encountered. +#' (Default: \code{na.fail}) +#' +#' \item \code{full} \code{Logical scalar}. should all the results from the +#' homogeneity calculations be returned. When \code{FALSE}, only +#' summary tables are returned. (Default: \code{FALSE}) +#' +#' \item \code{homogeneity.test}: \code{Character scalar}. Specifies +#' the significance test used to analyse +#' \code{\link[vegan:betadisper]{vegan::betadisper}} results. +#' Options include 'permanova' +#' (\code{\link[vegan:permutest]{vegan::permutest}}), 'anova' +#' (\code{\link[stats:anova]{stats::anova}}) and 'tukeyhsd' +#' (\code{\link[stats:TukeyHSD]{stats::TukeyHSD}}). +#' (Default: \code{"permanova"}) +#' } +#' +#' @examples +#' +#' data(GlobalPatterns) +#' tse <- GlobalPatterns +#' +#' # Apply relative transformation +#' tse <- transformAssay(tse, method = "relabundance") +#' # Perform PERMANOVA +#' tse <- addPERMANOVA( +#' tse, +#' assay.type = "relabundance", +#' method = "bray", +#' formula = x ~ SampleType, +#' permutations = 99 +#' ) +#' # The results are stored to metadata +#' metadata(tse)[["permanova"]] +#' +#' # Calculate dbRDA +#' rda_res <- getRDA( +#' tse, assay.type = "relabundance", method = "bray", +#' formula = x ~ SampleType, permutations = 99) +#' # Significance results are similar to PERMANOVA +#' attr(rda_res, "significance") +#' +#' @seealso +#' For more details on the actual implementation see +#' \code{\link[vegan:adonis2]{vegan::adonis2}}, +#' \code{\link[vegan:betadisper]{vegan::betadisper}}, and +#' \code{\link[vegan:permutest]{vegan::permutest}}. See also +#' \code{\link[=runCCA]{addCCA}} and \code{\link[=runCCA]{addRDA}} +#' +NULL + +#' @rdname getPERMANOVA +setGeneric("getPERMANOVA", signature = c("x"), function(x, ...) + standardGeneric("getPERMANOVA")) + +#' @rdname getPERMANOVA +setGeneric("addPERMANOVA", signature = c("x"), function(x, ...) + standardGeneric("addPERMANOVA")) + +#' @export +#' @rdname getPERMANOVA +setMethod("getPERMANOVA", "SingleCellExperiment", function(x, ...){ + # Get altexp if specified + x <- .check_and_get_altExp(x, ...) + res <- callNextMethod(x, ...) + return(res) + } +) + +#' @export +#' @rdname getPERMANOVA +setMethod("getPERMANOVA", "SummarizedExperiment", + function( + x, assay.type = "counts", formula = NULL, col.var = NULL, ...){ + ############################# Input check ############################## + # Assay must be present + .check_assay_present(assay.type, x) + # Formula must be either correctly specified or not specified + if( !(is.null(formula) || is(formula, "formula")) ){ + stop("'formula' must be formula or NULL.", call. = FALSE) + } + # User can also specify covariates with col.var + if( !(is.null(col.var) || (is.character(col.var) && + all(col.var %in% colnames(colData(x))))) ){ + stop("'col.var' must specify column from colData(x) or be NULL.", + call. = FALSE) + } + ########################### Input check end ############################ + # Get abundance table, formula and related sample metadata as DF + mat <- assay(x, assay.type) + temp <- .get_formula_and_covariates(x, formula, col.var) + formula <- temp[["formula"]] + covariates <- temp[["variables"]] + # Calculate PERMANOVA with matrix method + res <- getPERMANOVA(mat, formula = formula, data = covariates, ...) + return(res) + } +) + +#' @export +#' @rdname getPERMANOVA +setMethod("getPERMANOVA", "ANY", function( + x, formula, data, method = "bray", test.homogeneity = TRUE, ...){ + if( !is.matrix(x) ){ + stop("'x' must be matrix.", call. = FALSE) + } + if( !is(formula, "formula") ){ + stop("'formula' must be formula or NULL.", call. = FALSE) + } + if( !(is.data.frame(data) || is.matrix(data) || is(data, "DFrame")) ){ + stop("'data' must be data.frame or coarcible to one.", call. = FALSE) + } + if( ncol(x) != nrow(data) ){ + stop("Number of columns in 'x' should match with number of rows in ", + "'data'.", call. = FALSE) + } + if( !.is_a_string(method) ){ + stop("'method' must be a single character value.", call. = FALSE) + } + if( !.is_a_bool(test.homogeneity) ){ + stop("'test.homogeneity' must be TRUE or FALSE.", call. = FALSE) + } + # Calculate PERMANOVA + res <- .calculate_permanova( + x, formula = formula, data = data, method = method, ...) + # Test homogeneity + if( test.homogeneity ){ + homogeneity <- .calculate_homogeneity(x, data, method = method, ...) + res <- list(permanova = res, homogeneity = homogeneity) + } + return(res) +}) + +#' @export +#' @rdname getPERMANOVA +setMethod("addPERMANOVA", "SummarizedExperiment", + function(x, name = "permanova", ...){ + if( !.is_a_string(name) ){ + stop("'name' must be a single character value.", call. = FALSE) + } + # Calculate permanova + res <- getPERMANOVA(x, ...) + # Addresults to metadata + x <- .add_values_to_metadata(x, name, res) + return(x) + } +) + +################################ HELP FUNCTIONS ################################ + +# This function is internal function to perform PERMANOVA from abundance +# matrix, formula and sample metadata table. +#' @importFrom vegan adonis2 +.calculate_permanova <- function( + x, formula, data, by = "margin", na.action = na.fail, ...){ + # + if( !.is_a_string(by) ){ + stop("'by' must be a single character value.", call. = FALSE) + } + # + # Get abundance data into correct orientation. Samples must be in rows and + # features in columns. Also ensure that the abundance table is matrix. + x <- as.matrix(t(x)) + # Covariate table must be data.frame + data <- data.frame(data, check.names = FALSE) + # Instead of letting na.action pass through, give informative error + # about missing values. + if( any(is.na(data)) && isTRUE(all.equal(na.action, na.fail)) ){ + stop("Variables contain missing values. Set na.action to na.exclude ", + "to remove samples with missing values.", call. = FALSE) + } + # This next step ensures that the formula points correctly to abundance + # table + formula <- as.character(formula) + formula[[2]] <- "x" + formula <- as.formula( + paste(as.character(formula)[c(2,1,3)], collapse = " ")) + # Calculate PERMANOVA + res <- adonis2( + formula = formula, data = data, by = by, na.action = na.action, ...) + return(res) +} diff --git a/R/runCCA.R b/R/runCCA.R index 33aa1c432..e6400b1fb 100644 --- a/R/runCCA.R +++ b/R/runCCA.R @@ -21,7 +21,7 @@ #' \code{SummarizedExperiment},\code{col.var} can be used to specify variables #' from \code{colData}. #' -#' @param variables Deprecated. Use \code{"col.var"} instead. +#' @param variables Deprecated. Use \code{col.var} instead. #' #' @param test.signif \code{Logical scalar}. Should the PERMANOVA and analysis #' of multivariate homogeneity of group dispersions be performed. @@ -30,34 +30,41 @@ #' @param altexp \code{Character scalar} or \code{integer scalar}. Specifies an #' alternative experiment containing the input data. #' -#' @param name \code{Character scalar}. A name for the column of the -#' \code{colData} where results will be stored. (Default: \code{"CCA"}) +#' @param name \code{Character scalar}. A name for the \code{reducedDim()} +#' where results will be stored. (Default: \code{"CCA"}) #' #' @param exprs_values Deprecated. Use \code{assay.type} instead. #' #' @param ... additional arguments passed to vegan::cca or vegan::dbrda and #' other internal functions. #' \itemize{ -#' \item{\code{method} a dissimilarity measure to be applied in dbRDA and -#' possible following homogeneity test. (By default: -#' \code{method="euclidean"})} -#' \item{\code{scale}: \code{Logical scalar}. Should the expression values be +#' \item \code{method} a dissimilarity measure to be applied in dbRDA and +#' possible following homogeneity test. (Default: \code{"euclidean"}) +#' +#' \item \code{scale}: \code{Logical scalar}. Should the expression values be #' standardized? \code{scale} is disabled when using \code{*RDA} functions. -#' Please scale before performing RDA. (Default: \code{TRUE})} -#' \item{\code{na.action}: \code{function}. Action to take when missing +#' Please scale before performing RDA. (Default: \code{TRUE}) +#' +#' \item \code{na.action}: \code{function}. Action to take when missing #' values for any of the variables in \code{formula} are encountered. -#' (Default: \code{na.fail})} -#' \item{\code{full} \code{Logical scalar}. should all the results from the -#' significance calculations be returned. When \code{full=FALSE}, only -#' summary tables are returned. (Default: \code{FALSE})} -#' \item{\code{homogeneity.test}: \code{Character scalar}. Specifies -#' the significance test used to analyse \code{vegan::betadisper} results. -#' Options include 'permanova' (\code{vegan::permutest}), 'anova' -#' (\code{stats::anova}) and 'tukeyhsd' (\code{stats::TukeyHSD}). -#' (By default: \code{homogeneity.test="permanova"})} -#' \item{\code{permutations} a numeric value specifying the number of +#' (Default: \code{na.fail}) +#' +#' \item \code{full} \code{Logical scalar}. Should all the results from the +#' significance calculations be returned. When \code{FALSE}, only +#' summary tables are returned. (Default: \code{FALSE}) +#' +#' \item \code{homogeneity.test}: \code{Character scalar}. Specifies +#' the significance test used to analyse +#' \code{\link[vegan:betadisper]{vegan::betadisper}} results. +#' Options include 'permanova' +#' (\code{\link[vegan:permutest]{vegan::permutest}}), 'anova' +#' (\code{\link[stats:anova]{stats::anova}}) and 'tukeyhsd' +#' (\code{\link[stats:TukeyHSD]{stats::TukeyHSD}}). +#' (Default: \code{"permanova"}) +#' +#' \item \code{permutations} a numeric value specifying the number of #' permutations for significance testing in \code{vegan::anova.cca}. -#' (By default: \code{permutations=999})} +#' (Default: \code{999}) #' } #' #' @details @@ -68,7 +75,8 @@ #' #' Significance tests are done with \code{vegan:anova.cca} (PERMANOVA). Group #' dispersion, i.e., homogeneity within groups is analyzed with -#' \code{vegan:betadisper} (multivariate homogeneity of groups dispersions +#' \code{\link[vegan:betadisper]{vegan::betadisper}} +#' (multivariate homogeneity of groups dispersions #' (variances)) and statistical significance of homogeneity is tested with a #' test specified by \code{homogeneity.test} parameter. #' @@ -566,22 +574,13 @@ setMethod("addRDA", "SingleCellExperiment", } # Test association of variables to ordination -#' @importFrom vegan anova.cca vegdist betadisper +#' @importFrom vegan anova.cca .test_rda_vars <- function( - mat, rda, variables, permanova_model, by = "margin", full = FALSE, - homogeneity.test = "permanova", method = distance, - distance = "euclidean", ...){ + mat, rda, variables, permanova_model, by = "margin", full = FALSE, ...){ # Check full parameter if( !.is_a_bool(full) ){ stop("'full' must be TRUE or FALSE.", call. = FALSE) } - # Check homogeneity.test - if( !(is.character(homogeneity.test) && - length(homogeneity.test) == 1 && - homogeneity.test %in% c("permanova", "anova", "tukeyhsd")) ){ - stop("'homogeneity.test' must be 'permanova', 'anova', or 'tukeyhsd'.", - call. = FALSE) - } # # Perform PERMANOVA permanova <- anova.cca(rda, by = by, ...) @@ -597,17 +596,50 @@ setMethod("addRDA", "SingleCellExperiment", # Add info about explained variance permanova_tab[ , "Explained variance"] <- permanova_tab[ , 2] / permanova_tab[ , "Total variance"] - + # Perform homogeneity analysis - mat <- t(mat) - # Get the dissimilarity matrix based on original dissimilarity index - # provided by user. If the analysis is CCA, disable method; calculate - # always euclidean distances because CCA is based on Euclidean distances. - if( length(class(rda)) == 1 && is(rda, 'cca') ){ - dist_mat <- vegdist(mat, method = "euclidean") - } else{ - dist_mat <- vegdist(mat, method = method, ...) + homogeneity <- .calculate_homogeneity(mat, variables, full = full, ...) + + # Return whole data or just a tables + permanova_res <- permanova_tab + if( full ){ + permanova_res <- list( + summary = permanova_tab, + model = permanova_model, + variables = permanova) + } + res <- list(permanova = permanova_res, homogeneity = homogeneity) + return(res) +} + +#' @importFrom vegan vegdist betadisper +.calculate_homogeneity <- function( + mat, variables, method = distance, distance = "euclidean", + homogeneity.test = "permanova", full = FALSE, ...){ + # Check homogeneity.test + if( !(.is_a_string(homogeneity.test) && + homogeneity.test %in% c("permanova", "anova", "tukeyhsd")) ){ + stop("'homogeneity.test' must be 'permanova', 'anova', or 'tukeyhsd'.", + call. = FALSE) + } + # Check full parameter + if( !.is_a_bool(full) ){ + stop("'full' must be TRUE or FALSE.", call. = FALSE) } + # Check that there are more than one group per variable. If there are only + # one group, we cannot calculate homogeneity for those variables. + cols <- vapply( + variables, function(x) length(unique(x))>1, FUN.VALUE = logical(1)) + if( any(!cols) ){ + warning("The following variables contain only one group. Cannot ", + "calculate homogeneity for them: '", + paste0(names(cols)[!cols], collapse = "','"), "'", call. = FALSE) + variables <- variables[, cols, drop = FALSE] + } + # + # Calculate dissimilarity matrix + mat <- t(mat) + diss_mat <- vegdist(mat, method = method, ...) # For all variables run the analysis homogeneity <- lapply(colnames(variables), function(x){ # Get variable values @@ -617,9 +649,9 @@ setMethod("addRDA", "SingleCellExperiment", # changed to zero" Suppress possible messages: # "missing observations due to 'group' removed" suppressWarnings( - suppressMessages( - betadisper_res <- betadisper(dist_mat, group = var) - ) + suppressMessages( + betadisper_res <- betadisper(diss_mat, group = var) + ) ) # Run significance test significance <- .homogeneity_significance( @@ -640,23 +672,10 @@ setMethod("addRDA", "SingleCellExperiment", # Get tables homogeneity_tab <- lapply(homogeneity, function(x) x[["table"]]) # Combine tables - homogeneity_tab <- do.call(rbind, homogeneity_tab) - - # Return whole data or just a tables + res <- do.call(rbind, homogeneity_tab) + # Return either only summary table or whole result including fitterd models if( full ){ - res <- list( - permanova = list( - summary = permanova_tab, - model = permanova_model, - variables = permanova), - homogeneity = list( - summary = homogeneity_tab, - variables = homogeneity_model) - ) - } else{ - res <- list( - permanova = permanova_tab, - homogeneity = homogeneity_tab) + res <- list(summary = res, variables = homogeneity_model) } return(res) } @@ -666,14 +685,12 @@ setMethod("addRDA", "SingleCellExperiment", #' @importFrom vegan permutest .homogeneity_significance <- function(betadisper_res, homogeneity.test, ...){ # Run specified significance test - if( homogeneity.test == "anova" ){ - res <- anova(betadisper_res, ...) - } else if ( homogeneity.test == "tukeyhsd" ){ - res <- TukeyHSD(betadisper_res, ...) - } else{ - res <- permutest(betadisper_res, ...) - } - + FUN <- switch(homogeneity.test, + "permanova" = permutest, + "anova" = anova, + "tukeyhsd" = TukeyHSD + ) + res <- do.call(FUN, c(list(betadisper_res), ...)) # Get summary table from the results if( homogeneity.test == "anova" ){ tab <- as.data.frame(res) @@ -682,7 +699,6 @@ setMethod("addRDA", "SingleCellExperiment", } else{ tab <- res[["tab"]] } - # Modify permanova/anova table if( homogeneity.test != "tukeyhsd" ){ # Add info about total variance @@ -694,7 +710,6 @@ setMethod("addRDA", "SingleCellExperiment", # Get only groups row (drop residuals row) tab <- tab[1, ] } - # Create list from the object and results res <- list( obj = res, diff --git a/man/addDivergence.Rd b/man/addDivergence.Rd index 82ebfbf98..714bf0a7a 100644 --- a/man/addDivergence.Rd +++ b/man/addDivergence.Rd @@ -36,7 +36,7 @@ getDivergence( in metadata of the output. (Default: \code{method})} \item{...}{optional arguments passed to -\code{\link[mia:addDissimilarity]{addDissimilarity}}. Additionally: +\code{\link[=addDissimilarity]{addDissimilarity}}. Additionally: \itemize{ \item \code{dimred}: \code{Character scalar}. Specifies the name of dimension reduction result from \code{reducedDim(x)}. If used, these @@ -98,8 +98,8 @@ colData(tse) } \seealso{ \itemize{ -\item \code{\link[mia:addAlpha]{addAlpha}} -\item \code{\link[mia:addDissimilarity]{addDissimilarity}} +\item \code{\link[=addAlpha]{addAlpha}} +\item \code{\link[=addDissimilarity]{addDissimilarity}} \item \code{\link[scater:plotColData]{plotColData}} } } diff --git a/man/getPERMANOVA.Rd b/man/getPERMANOVA.Rd new file mode 100644 index 000000000..3544e58c6 --- /dev/null +++ b/man/getPERMANOVA.Rd @@ -0,0 +1,150 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/getPERMANOVA.R +\name{getPERMANOVA} +\alias{getPERMANOVA} +\alias{addPERMANOVA} +\alias{getPERMANOVA,SingleCellExperiment-method} +\alias{getPERMANOVA,SummarizedExperiment-method} +\alias{getPERMANOVA,ANY-method} +\alias{addPERMANOVA,SummarizedExperiment-method} +\title{Calculate PERMANOVA (Permutational Multivariate Analysis of Variance)} +\usage{ +getPERMANOVA(x, ...) + +addPERMANOVA(x, ...) + +\S4method{getPERMANOVA}{SingleCellExperiment}(x, ...) + +\S4method{getPERMANOVA}{SummarizedExperiment}(x, assay.type = "counts", formula = NULL, col.var = NULL, ...) + +\S4method{getPERMANOVA}{ANY}(x, formula, data, method = "bray", test.homogeneity = TRUE, ...) + +\S4method{addPERMANOVA}{SummarizedExperiment}(x, name = "permanova", ...) +} +\arguments{ +\item{x}{\code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.} + +\item{...}{additional arguments passed to \code{vegan::adonis2}. +\itemize{ +\item \code{by}: \code{Character scalar}. Specifies how significance is +calculated. (Default: \code{"margin"}) + +\item \code{na.action}: \code{function}. Action to take when missing +values for any of the variables in \code{formula} are encountered. +(Default: \code{na.fail}) + +\item \code{full} \code{Logical scalar}. should all the results from the +homogeneity calculations be returned. When \code{FALSE}, only +summary tables are returned. (Default: \code{FALSE}) + +\item \code{homogeneity.test}: \code{Character scalar}. Specifies +the significance test used to analyse +\code{\link[vegan:betadisper]{vegan::betadisper}} results. +Options include 'permanova' +(\code{\link[vegan:permutest]{vegan::permutest}}), 'anova' +(\code{\link[stats:anova]{stats::anova}}) and 'tukeyhsd' +(\code{\link[stats:TukeyHSD]{stats::TukeyHSD}}). +(Default: \code{"permanova"}) +}} + +\item{assay.type}{\code{Character scalar}. Specifies which assay to use for +calculation. (Default: \code{"counts"})} + +\item{formula}{\code{formula}. If \code{x} is a +\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} +a formula can be supplied. Based on the right-hand side of the given formula +\code{colData} is subset to \code{col.var}. + +\code{col.var} and \code{formula} can be missing, which turns the CCA +analysis into a CA analysis and dbRDA into PCoA/MDS.} + +\item{col.var}{\code{Character scalar}. When \code{x} is a +\code{SummarizedExperiment},\code{col.var} can be used to specify variables +from \code{colData}.} + +\item{data}{\code{data.frame} or coarcible to one. The covariance table +including covariates defined by \code{formula}.} + +\item{method}{\code{Character scalar}. A dissimilarity metric used in +PERMANOVA and group dispersion calculation. (Default: \code{"bray"})} + +\item{test.homogeneity}{\code{Logical scalar}. Should the homogeneity of +group dispersions be evaluated? (Default: \code{TRUE})} + +\item{name}{\code{Character scalar}. A name for the results that will be +stored to metadata. (Default: \code{"permanova"})} +} +\value{ +\code{getPERMANOVA} returns the PERMANOVA results or a list containing the +PERMANOVA results and homogeneity test results if +\code{test.homogeneity = TRUE}. \code{addPERMANOVA} adds these results to +metadata of \code{x}. +} +\description{ +These functions perform PERMANOVA to assess the significance of group +differences based on a specified dissimilarity matrix. The results can be +returned directly or added to metadata in an object of class +\code{TreeSummarizedExperiment}. +} +\details{ +PERMANOVA is a non-parametric method used to test whether the centroids of +different groups (as defined by the formula or covariates) are significantly +different in terms of multivariate space. + +PERMANOVA relies on the assumption of group homogeneity, meaning the groups +should be distinct and have similar variances within each group. This +assumption is essential as PERMANOVA is sensitive to differences in +within-group dispersion, which can otherwise confound results. +This is why the functions return homogeneity test results by default. + +The functions utilize \code{\link[vegan:adonis2]{vegan::adonis2}} to compute +PERMANOVA. For homogeneity testing, +\code{\link[vegan:betadisper]{vegan::betadisper}} along +with \code{\link[vegan:permutest]{vegan::permutest}} are utilized by default, +which allow testing for equal dispersion across groups and validate the +homogeneity assumption. + +PERMANOVA and distance-based redundancy analysis (dbRDA) are closely related +methods for analyzing multivariate data. PERMANOVA is non-parametric, making +fewer assumptions about the data. In contrast, dbRDA assumes a linear +relationship when constructing the ordination space, although it also +employs a PERMANOVA-like approach to test the significance of predictors +within this space. dbRDA offers a broader scope overall, as it provides +visualization of the constrained ordination, which can reveal patterns and +relationships. However, when the underlying data structure is non-linear, +the results from the two methods can differ significantly due to dbRDA's +reliance on linear assumptions. +} +\examples{ + +data(GlobalPatterns) +tse <- GlobalPatterns + +# Apply relative transformation +tse <- transformAssay(tse, method = "relabundance") +# Perform PERMANOVA +tse <- addPERMANOVA( + tse, + assay.type = "relabundance", + method = "bray", + formula = x ~ SampleType, + permutations = 99 + ) +# The results are stored to metadata +metadata(tse)[["permanova"]] + +# Calculate dbRDA +rda_res <- getRDA( + tse, assay.type = "relabundance", method = "bray", + formula = x ~ SampleType, permutations = 99) +# Significance results are similar to PERMANOVA +attr(rda_res, "significance") + +} +\seealso{ +For more details on the actual implementation see +\code{\link[vegan:adonis2]{vegan::adonis2}}, +\code{\link[vegan:betadisper]{vegan::betadisper}}, and +\code{\link[vegan:permutest]{vegan::permutest}}. See also +\code{\link[=runCCA]{addCCA}} and \code{\link[=runCCA]{addRDA}} +} diff --git a/man/runCCA.Rd b/man/runCCA.Rd index 18b61b488..1fd0057f2 100644 --- a/man/runCCA.Rd +++ b/man/runCCA.Rd @@ -71,26 +71,33 @@ runRDA(x, ...) \item{...}{additional arguments passed to vegan::cca or vegan::dbrda and other internal functions. \itemize{ -\item{\code{method} a dissimilarity measure to be applied in dbRDA and -possible following homogeneity test. (By default: -\code{method="euclidean"})} -\item{\code{scale}: \code{Logical scalar}. Should the expression values be +\item \code{method} a dissimilarity measure to be applied in dbRDA and +possible following homogeneity test. (Default: \code{"euclidean"}) + +\item \code{scale}: \code{Logical scalar}. Should the expression values be standardized? \code{scale} is disabled when using \code{*RDA} functions. -Please scale before performing RDA. (Default: \code{TRUE})} -\item{\code{na.action}: \code{function}. Action to take when missing +Please scale before performing RDA. (Default: \code{TRUE}) + +\item \code{na.action}: \code{function}. Action to take when missing values for any of the variables in \code{formula} are encountered. -(Default: \code{na.fail})} -\item{\code{full} \code{Logical scalar}. should all the results from the -significance calculations be returned. When \code{full=FALSE}, only -summary tables are returned. (Default: \code{FALSE})} -\item{\code{homogeneity.test}: \code{Character scalar}. Specifies -the significance test used to analyse \code{vegan::betadisper} results. -Options include 'permanova' (\code{vegan::permutest}), 'anova' -(\code{stats::anova}) and 'tukeyhsd' (\code{stats::TukeyHSD}). -(By default: \code{homogeneity.test="permanova"})} -\item{\code{permutations} a numeric value specifying the number of +(Default: \code{na.fail}) + +\item \code{full} \code{Logical scalar}. Should all the results from the +significance calculations be returned. When \code{FALSE}, only +summary tables are returned. (Default: \code{FALSE}) + +\item \code{homogeneity.test}: \code{Character scalar}. Specifies +the significance test used to analyse +\code{\link[vegan:betadisper]{vegan::betadisper}} results. +Options include 'permanova' +(\code{\link[vegan:permutest]{vegan::permutest}}), 'anova' +(\code{\link[stats:anova]{stats::anova}}) and 'tukeyhsd' +(\code{\link[stats:TukeyHSD]{stats::TukeyHSD}}). +(Default: \code{"permanova"}) + +\item \code{permutations} a numeric value specifying the number of permutations for significance testing in \code{vegan::anova.cca}. -(By default: \code{permutations=999})} +(Default: \code{999}) }} \item{formula}{\code{formula}. If \code{x} is a @@ -108,7 +115,7 @@ including covariates defined by \code{formula}.} \code{SummarizedExperiment},\code{col.var} can be used to specify variables from \code{colData}.} -\item{variables}{Deprecated. Use \code{"col.var"} instead.} +\item{variables}{Deprecated. Use \code{col.var} instead.} \item{test.signif}{\code{Logical scalar}. Should the PERMANOVA and analysis of multivariate homogeneity of group dispersions be performed. @@ -124,8 +131,8 @@ calculation. (Default: \code{"counts"})} \item{altexp}{\code{Character scalar} or \code{integer scalar}. Specifies an alternative experiment containing the input data.} -\item{name}{\code{Character scalar}. A name for the column of the -\code{colData} where results will be stored. (Default: \code{"CCA"})} +\item{name}{\code{Character scalar}. A name for the \code{reducedDim()} +where results will be stored. (Default: \code{"CCA"})} } \value{ For \code{getCCA} a matrix with samples as rows and CCA dimensions as @@ -148,7 +155,8 @@ which turns the CCA analysis into a CA analysis and dbRDA into PCoA/MDS. Significance tests are done with \code{vegan:anova.cca} (PERMANOVA). Group dispersion, i.e., homogeneity within groups is analyzed with -\code{vegan:betadisper} (multivariate homogeneity of groups dispersions +\code{\link[vegan:betadisper]{vegan::betadisper}} +(multivariate homogeneity of groups dispersions (variances)) and statistical significance of homogeneity is tested with a test specified by \code{homogeneity.test} parameter. } diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 9cdb3d10c..c5521f7b5 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -4,6 +4,7 @@ reference: - contents: - getCrossAssociation - getMediation + - getPERMANOVA - title: Clustering - contents: diff --git a/tests/testthat/test-getPERMANOVA.R b/tests/testthat/test-getPERMANOVA.R new file mode 100644 index 000000000..0e586264d --- /dev/null +++ b/tests/testthat/test-getPERMANOVA.R @@ -0,0 +1,211 @@ +context("PERMANOVA") +# Sample data setup +data(GlobalPatterns, package = "mia") +tse <- GlobalPatterns +tse <- transformAssay(tse, method = "relabundance") + +test_that("getPERMANOVA works on SummarizedExperiment", { + # Test basic PERMANOVA without homogeneity check + res <- getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + test.homogeneity = FALSE, + permutations = 99 + ) + expect_s3_class(res, "anova.cca") + + # Test PERMANOVA with homogeneity check enabled + res <- getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + test.homogeneity = TRUE, + permutations = 99 + ) + expect_type(res, "list") + expect_true("permanova" %in% names(res)) + expect_true("homogeneity" %in% names(res)) + expect_s3_class(res$permanova, "anova.cca") + expect_s3_class(res$homogeneity, "data.frame") + + # Test full results with nested structure validation for detailed outputs + res <- getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + test.homogeneity = TRUE, + full = TRUE, + permutations = 99 + ) + expect_type(res, "list") + expect_s3_class(res[[2]][[2]][[1]][[1]], "betadisper") + expect_s3_class(res[[2]][[2]][[1]][[2]], "permutest.betadisper") +}) + +test_that("addPERMANOVA stores results in metadata", { + # Test that addPERMANOVA saves results in metadata as expected + tse_with_meta <- addPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + permutations = 99, + name = "permanova_test" + ) + metadata_res <- metadata(tse_with_meta)[["permanova_test"]] + expect_type(metadata_res, "list") + expect_true("permanova" %in% names(metadata_res)) + expect_true("homogeneity" %in% names(metadata_res)) + expect_s3_class(metadata_res$permanova, "anova.cca") +}) + +test_that("getPERMANOVA input validations", { + # Check error handling for a nonexistent assay type + expect_error( + getPERMANOVA(tse, assay.type = "nonexistent", formula = x ~ SampleType) + ) + # Check that an incorrect formula type raises an error + expect_error( + getPERMANOVA(tse, assay.type = "relabundance", formula = "SampleType"), + "'formula' must be formula or NULL." + ) + # Check that dimension mismatches between matrix and covariate data raise an error + expect_error( + getPERMANOVA( + assay(tse[, 1:10]), formula = x ~ SampleType, data = colData(tse) + ), + "Number of columns in 'x' should match with number of rows in 'data'" + ) + # Check that invalid homogeneity test settings raise an error + expect_error( + getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + test.homogeneity = "invalid" + ), + "'test.homogeneity' must be TRUE or FALSE" + ) + + # Invalid assay type (non-existent type) + expect_error( + getPERMANOVA(tse, assay.type = "nonexistent", formula = x ~ SampleType) + ) + + # Invalid variable + expect_error( + getPERMANOVA(tse, assay.type = "relabundance", col.var = "invalid") + ) + + # Incorrect permutations + expect_error( + getPERMANOVA( + tse, assay.type = "relabundance", formula = x ~ SampleType, + permutations = -10 + ) + ) + + # Incorrect 'by' parameter + expect_error( + getPERMANOVA( + tse, assay.type = "relabundance", formula = x ~ SampleType, + by = "invalid_option" + ) + ) + + # Missing or incorrect 'homogeneity.test' option + expect_error( + getPERMANOVA( + tse, assay.type = "relabundance", formula = x ~ SampleType, + homogeneity.test = "unsupported_test" + ) + ) +}) + +test_that("getPERMANOVA works on SingleCellExperiment", { + # Convert tse to SingleCellExperiment format and test getPERMANOVA functionality + sce <- as(tse, "SingleCellExperiment") + res <- getPERMANOVA( + sce, assay.type = "relabundance", + formula = x ~ SampleType, + permutations = 99 + ) + expect_s3_class(res$permanova, "anova.cca") + expect_s3_class(res$homogeneity, "data.frame") +}) + +test_that("getPERMANOVA 'by' and 'homogeneity.test' options", { + # Test 'by' parameter with 'terms' for PERMANOVA + res_by_terms <- getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, by = "terms" + ) + expect_s3_class(res_by_terms$permanova, "anova.cca") + + # Test homogeneity test with ANOVA option + res_anova <- getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + homogeneity.test = "anova" + ) + expect_s3_class(res_anova$homogeneity, "data.frame") + + # Test homogeneity test with Tukey HSD and full results + res_tukey <- getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + homogeneity.test = "tukeyhsd", + full = TRUE + ) + expect_s3_class(res_tukey[[2]][[2]][[1]][[1]], "betadisper") + expect_s3_class(res_tukey[[2]][[2]][[1]][[2]], "TukeyHSD") +}) + +test_that("getPERMANOVA handles edge cases", { + # Test handling of a missing formula (default behavior) + expect_no_error(getPERMANOVA(tse, assay.type = "relabundance")) + + # Test for error when permutations count is zero + expect_error( + getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, permutations = 0 + ) + ) + + # Test for error when input matrix is NULL + expect_error(getPERMANOVA(NULL, formula = x ~ SampleType)) + + # Test for warning when only one level exists in the factor variable + tse_subset <- tse[, tse$SampleType == "Soil"] + expect_warning( + getPERMANOVA( + tse_subset, assay.type = "relabundance", + formula = x ~ SampleType + ) + ) +}) + +test_that("getPERMANOVA matches direct calculations", { + # Perform direct calculations with vegan package for comparison + permanova_direct <- vegan::adonis2( + t(assay(tse, "relabundance")) ~ SampleType, + data = colData(tse), + permutations = 99 + ) + homogeneity_direct <- vegan::betadisper( + vegdist(t(assay(tse, "relabundance"))), + group = tse$SampleType + ) + + # Run the getPERMANOVA function and compare results + res <- getPERMANOVA( + tse, assay.type = "relabundance", + formula = x ~ SampleType, + test.homogeneity = TRUE, + full = TRUE, + permutations = 99 + ) + + # Verify permanova results match + expect_equal(res$permanova$aov.tab, permanova_direct$aov.tab) + + # Verify homogeneity results match + expect_equal( + res[[2]][[2]][[1]][[1]]$distances, homogeneity_direct$distances) +})