From a2bc35dda55f4276b3506c664c756e4d868f8f23 Mon Sep 17 00:00:00 2001 From: Tuomas Borman <60338854+TuomasBorman@users.noreply.github.com> Date: Fri, 17 Jan 2025 14:57:24 +0200 Subject: [PATCH] Add addPrevalence function (#667) --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ NEWS | 1 + R/AllGenerics.R | 5 +++ R/getPrevalence.R | 55 +++++++++++++++++++++++-------- man/getPrevalence.Rd | 13 +++++++- tests/testthat/test-5prevalence.R | 25 ++++++++++---- 7 files changed, 81 insertions(+), 22 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4edf2c8cf..6faae2775 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mia Type: Package -Version: 1.15.9 +Version: 1.15.10 Authors@R: c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", diff --git a/NAMESPACE b/NAMESPACE index 7e510f3e3..aebee0c72 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export(addNMF) export(addNotContaminantQC) export(addPerSampleDominantFeatures) export(addPerSampleDominantTaxa) +export(addPrevalence) export(addRDA) export(addTaxonomyTree) export(agglomerateByPrevalence) @@ -181,6 +182,7 @@ exportMethods(addNotContaminantQC) exportMethods(addPERMANOVA) exportMethods(addPerSampleDominantFeatures) exportMethods(addPerSampleDominantTaxa) +exportMethods(addPrevalence) exportMethods(addRDA) exportMethods(addTaxonomyTree) exportMethods(agglomerateByPrevalence) diff --git a/NEWS b/NEWS index 6e12e235e..b773d3fac 100644 --- a/NEWS +++ b/NEWS @@ -163,3 +163,4 @@ Changes in version 1.15.x + Added getPERMANOVA and addPERMANOVA functions to calculate PERMANOVA + update.tree = TRUE by default + Add functions for classifying taxa based on their average abundance ++ Add addPrevalence function that adds results of getPrevalence to rowData diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 8084a15bb..20668220f 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -388,3 +388,8 @@ setGeneric("getAbundanceClass", signature = "x", function(x, ...) #' @export setGeneric("addAbundanceClass", signature = "x", function(x, ...) standardGeneric("addAbundanceClass")) + +#' @rdname getPrevalence +#' @export +setGeneric("addPrevalence", signature = "x", function(x, ...) + standardGeneric("addPrevalence")) diff --git a/R/getPrevalence.R b/R/getPrevalence.R index 2ebe74607..2177fd613 100644 --- a/R/getPrevalence.R +++ b/R/getPrevalence.R @@ -4,10 +4,10 @@ #' \code{\link{SummarizedExperiment-class}} object. #' #' @inheritParams getDissimilarity -#' +#' #' @param x #' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}. -#' +#' #' @param assay_name Deprecated. Use \code{assay.type} instead. #' #' @param detection \code{Numeric scalar}. Detection threshold for @@ -18,7 +18,7 @@ #' #' @param include.lowest \code{Logical scalar}. Should the lower boundary of the #' detection and prevalence cutoffs be included? (Default: \code{FALSE}) -#' +#' #' @param include_lowest Deprecated. Use \code{include.lowest} instead. #' #' @param sort \code{Logical scalar}. Should the result be sorted by prevalence? @@ -29,10 +29,14 @@ #' #' @param na.rm \code{Logical scalar}. Should NA values be omitted? #' (Default: \code{TRUE}) -#' +#' #' @param update.tree \code{Logical scalar}. Should #' \code{rowTree()} also be agglomerated? (Default: \code{TRUE}) #' +#' @param name \code{Character scalar}. Specifies name of column in +#' \code{rowData} where the results will be stored. +#' (Default: \code{"prevalence"}) +#' #' @param ... additional arguments #' \itemize{ #' \item If \code{!is.null(rank)} arguments are passed on to @@ -52,7 +56,7 @@ #' the detection threshold. For \code{SummarizedExperiment} objects, the #' prevalence is calculated for the selected taxonomic rank, otherwise for the #' rows. The absolute population prevalence can be obtained by multiplying the -#' prevalence by the number of samples (\code{ncol(x)}). +#' prevalence by the number of samples (\code{ncol(x)}). #' #' The core abundance index from \code{getPrevalentAbundance} gives the relative #' proportion of the core species (in between 0 and 1). The core taxa are @@ -72,7 +76,8 @@ #' \itemize{ #' \item \code{getPrevalence} returns a \code{numeric} vector with the #' names being set to either the row names of \code{x} or the names after -#' agglomeration. +#' agglomeration. \code{addPrevalence} adds these results to +#' \code{rowData(x)}. #' #' \item \code{getPrevalentAbundance} returns a \code{numeric} vector with #' the names corresponding to the column name of \code{x} and include the @@ -171,10 +176,32 @@ #' NULL +#' @rdname getPrevalence +#' @export +setMethod("addPrevalence", signature = c(x = "SummarizedExperiment"), + function(x, name = "prevalence", ...){ + if( !.is_a_string(name) ){ + stop("'name' must be a single character value.", call. = FALSE) + } + # Agglomerate data if specified + x <- .merge_features(x, ...) + # Sorting is disabled as it messes up the order of taxa. Moreover, we + # do not want to agglomerate the data again. + args <- c(list(x = x), list(...)) + args <- args[ !names(args) %in% c("sort", "rank") ] + # Calculate + res <- do.call(getPrevalence, args) + # Add results to rowData + res <- list(res) + x <- .add_values_to_colData(x, res, name, MARGIN = 1L) + return(x) + } +) + #' @rdname getPrevalence #' @export setMethod("getPrevalence", signature = c(x = "ANY"), function( - x, detection = 0, include.lowest = include_lowest, include_lowest = FALSE, + x, detection = 0, include.lowest = include_lowest, include_lowest = FALSE, sort = FALSE, na.rm = TRUE, ...){ # input check if (!.is_numeric_string(detection)) { @@ -313,7 +340,7 @@ NULL #' @rdname getPrevalence #' @export setMethod("getPrevalent", signature = c(x = "ANY"), - function(x, prevalence = 50/100, include.lowest = include_lowest, + function(x, prevalence = 50/100, include.lowest = include_lowest, include_lowest = FALSE, ...){ .get_prevalent_taxa(x, rank = NULL, prevalence = prevalence, include.lowest = include.lowest, ...) @@ -368,7 +395,7 @@ NULL #' @rdname getPrevalence #' @export setMethod("getRare", signature = c(x = "ANY"), - function(x, prevalence = 50/100, include.lowest = include_lowest, + function(x, prevalence = 50/100, include.lowest = include_lowest, include_lowest = FALSE, ...){ .get_rare_taxa(x, rank = NULL, prevalence = prevalence, include.lowest = include.lowest, ...) @@ -477,14 +504,14 @@ setMethod("getPrevalentAbundance", signature = c(x = "SummarizedExperiment"), ############################# agglomerateByPrevalence ########################## #' Agglomerate data based on population prevalence -#' +#' #' @name agglomerateByPrevalence -#' +#' #' @inheritParams agglomerateByRank -#' +#' #' @param other.name \code{Character scalar}. Used as the label for the #' summary of non-prevalent taxa. (default: \code{"Other"}) -#' +#' #' @param other_label Deprecated. use \code{other.name} instead. #' #' @details @@ -561,7 +588,7 @@ setMethod("agglomerateByPrevalence", signature = c(x = "SummarizedExperiment"), #' @rdname agglomerateByPrevalence #' @export -setMethod("agglomerateByPrevalence", +setMethod("agglomerateByPrevalence", signature = c(x = "TreeSummarizedExperiment"), function(x, rank = NULL, other.name = other_label, other_label = "Other", update.tree = TRUE, ...){ diff --git a/man/getPrevalence.Rd b/man/getPrevalence.Rd index 3d810df2a..7faa0342a 100644 --- a/man/getPrevalence.Rd +++ b/man/getPrevalence.Rd @@ -7,6 +7,8 @@ \alias{subsetByPrevalent} \alias{subsetByRare} \alias{getPrevalentAbundance} +\alias{addPrevalence} +\alias{addPrevalence,SummarizedExperiment-method} \alias{getPrevalence,ANY-method} \alias{getPrevalence,SummarizedExperiment-method} \alias{getPrevalent,ANY-method} @@ -38,6 +40,10 @@ getPrevalentAbundance( ... ) +addPrevalence(x, ...) + +\S4method{addPrevalence}{SummarizedExperiment}(x, name = "prevalence", ...) + \S4method{getPrevalence}{ANY}( x, detection = 0, @@ -129,6 +135,10 @@ calculation. (Default: \code{"counts"})} \item{assay_name}{Deprecated. Use \code{assay.type} instead.} +\item{name}{\code{Character scalar}. Specifies name of column in +\code{rowData} where the results will be stored. +(Default: \code{"prevalence"})} + \item{detection}{\code{Numeric scalar}. Detection threshold for absence/presence. If \code{as_relative = FALSE}, it sets the counts threshold for a taxon to be considered present. @@ -164,7 +174,8 @@ All other functions return a named vectors: \itemize{ \item \code{getPrevalence} returns a \code{numeric} vector with the names being set to either the row names of \code{x} or the names after -agglomeration. +agglomeration. \code{addPrevalence} adds these results to +\code{rowData(x)}. \item \code{getPrevalentAbundance} returns a \code{numeric} vector with the names corresponding to the column name of \code{x} and include the diff --git a/tests/testthat/test-5prevalence.R b/tests/testthat/test-5prevalence.R index b8172a490..1973191bf 100644 --- a/tests/testthat/test-5prevalence.R +++ b/tests/testthat/test-5prevalence.R @@ -117,6 +117,19 @@ test_that("getPrevalence", { res <- getPrevalence( tse, na.rm = TRUE, rank = "Genus", empty.rm = TRUE) expect_true( all(res == ref, na.rm = TRUE) ) + + # Test that results of addPrevalence equals to getPrevalence + # sort should be disabled + tse <- GlobalPatterns + tse2 <- addPrevalence(tse, sort = TRUE) + prev <- getPrevalence(tse, sort = FALSE) + expect_equal(rowData(tse2)[["prevalence"]], prev) + tse2 <- addPrevalence(tse, rank = "Family") + prev <- getPrevalence(tse, rank = "Family") + expect_equal(rowData(tse2)[["prevalence"]], prev) + expect_error(addPrevalence(tse, name = 1)) + expect_error(addPrevalence(tse, name = NULL)) + expect_error(addPrevalence(tse, name = c("asd", "test"))) }) @@ -310,7 +323,7 @@ test_that("subsetByPrevalent", { alias <- subsetByPrevalent(gp_null, detection=5, prevalence = 0.33, rank = "Phylum") alias <- unname(assay(alias, "counts")) expect_equal(alias, pr2) - + # Check that tree subsetting works expect_error(subsetByPrevalent(GlobalPatterns, update.tree = 1)) expect_error(subsetByPrevalent(GlobalPatterns, update.tree = NULL)) @@ -378,7 +391,7 @@ test_that("subsetByRare", { alias <- subsetByRare(gp_null, detection=5, prevalence = 0.33, rank = "Phylum") alias <- unname(assay(alias, "counts")) expect_equal(alias, pr2) - + # Check that tree subsetting works expect_error(subsetByRare(GlobalPatterns, update.tree = 1)) expect_error(subsetByRare(GlobalPatterns, update.tree = NULL)) @@ -440,12 +453,12 @@ test_that("agglomerateByPrevalence", { other_label = "test", update.tree = TRUE) expect_equal(length(rowTree(actual)$tip.label), length(rownames(actual))) - + # Load data from miaTime package skip_if_not(require("miaTime", quietly = TRUE)) data(SilvermanAGutData) se <- SilvermanAGutData - + # checking reference consensus sequence generation actual <- agglomerateByPrevalence(se,"Genus", update.refseq = FALSE) # There should be only one exact match for each sequence @@ -454,7 +467,7 @@ test_that("agglomerateByPrevalence", { expect_true(all(vapply( seqs_test, function(seq) sum(seqs_ref %in% seq) == 1, FUN.VALUE = logical(1) )) ) - + # Merging creates consensus sequences. th <- runif(1, 0, 1) actual <- agglomerateByPrevalence( @@ -470,7 +483,7 @@ test_that("agglomerateByPrevalence", { threshold = th) seqs_test <- seqs_test[ names(seqs_test) %in% feature ] expect_equal(seqs_test, seqs_ref) - + # checking reference consensus sequence generation using 'Genus:Alistipes' actual <- agglomerateByPrevalence(se,"Genus", update.refseq = FALSE) expect_equal(as.character(referenceSeq(actual)[["Alistipes"]]),