Skip to content

Commit

Permalink
Add addPrevalence function (#667)
Browse files Browse the repository at this point in the history
  • Loading branch information
TuomasBorman authored Jan 17, 2025
1 parent 8c658c9 commit a2bc35d
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 22 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mia
Type: Package
Version: 1.15.9
Version: 1.15.10
Authors@R:
c(person(given = "Tuomas", family = "Borman", role = c("aut", "cre"),
email = "[email protected]",
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(addNMF)
export(addNotContaminantQC)
export(addPerSampleDominantFeatures)
export(addPerSampleDominantTaxa)
export(addPrevalence)
export(addRDA)
export(addTaxonomyTree)
export(agglomerateByPrevalence)
Expand Down Expand Up @@ -181,6 +182,7 @@ exportMethods(addNotContaminantQC)
exportMethods(addPERMANOVA)
exportMethods(addPerSampleDominantFeatures)
exportMethods(addPerSampleDominantTaxa)
exportMethods(addPrevalence)
exportMethods(addRDA)
exportMethods(addTaxonomyTree)
exportMethods(agglomerateByPrevalence)
Expand Down
1 change: 1 addition & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,4 @@ Changes in version 1.15.x
+ Added getPERMANOVA and addPERMANOVA functions to calculate PERMANOVA
+ update.tree = TRUE by default
+ Add functions for classifying taxa based on their average abundance
+ Add addPrevalence function that adds results of getPrevalence to rowData
5 changes: 5 additions & 0 deletions R/AllGenerics.R
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,8 @@ setGeneric("getAbundanceClass", signature = "x", function(x, ...)
#' @export
setGeneric("addAbundanceClass", signature = "x", function(x, ...)
standardGeneric("addAbundanceClass"))

#' @rdname getPrevalence
#' @export
setGeneric("addPrevalence", signature = "x", function(x, ...)
standardGeneric("addPrevalence"))
55 changes: 41 additions & 14 deletions R/getPrevalence.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
#' \code{\link{SummarizedExperiment-class}} object.
#'
#' @inheritParams getDissimilarity
#'
#'
#' @param x
#' \code{\link[TreeSummarizedExperiment:TreeSummarizedExperiment-class]{TreeSummarizedExperiment}}.
#'
#'
#' @param assay_name Deprecated. Use \code{assay.type} instead.
#'
#' @param detection \code{Numeric scalar}. Detection threshold for
Expand All @@ -18,7 +18,7 @@
#'
#' @param include.lowest \code{Logical scalar}. Should the lower boundary of the
#' detection and prevalence cutoffs be included? (Default: \code{FALSE})
#'
#'
#' @param include_lowest Deprecated. Use \code{include.lowest} instead.
#'
#' @param sort \code{Logical scalar}. Should the result be sorted by prevalence?
Expand All @@ -29,10 +29,14 @@
#'
#' @param na.rm \code{Logical scalar}. Should NA values be omitted?
#' (Default: \code{TRUE})
#'
#'
#' @param update.tree \code{Logical scalar}. Should
#' \code{rowTree()} also be agglomerated? (Default: \code{TRUE})
#'
#' @param name \code{Character scalar}. Specifies name of column in
#' \code{rowData} where the results will be stored.
#' (Default: \code{"prevalence"})
#'
#' @param ... additional arguments
#' \itemize{
#' \item If \code{!is.null(rank)} arguments are passed on to
Expand All @@ -52,7 +56,7 @@
#' the detection threshold. For \code{SummarizedExperiment} objects, the
#' prevalence is calculated for the selected taxonomic rank, otherwise for the
#' rows. The absolute population prevalence can be obtained by multiplying the
#' prevalence by the number of samples (\code{ncol(x)}).
#' prevalence by the number of samples (\code{ncol(x)}).
#'
#' The core abundance index from \code{getPrevalentAbundance} gives the relative
#' proportion of the core species (in between 0 and 1). The core taxa are
Expand All @@ -72,7 +76,8 @@
#' \itemize{
#' \item \code{getPrevalence} returns a \code{numeric} vector with the
#' names being set to either the row names of \code{x} or the names after
#' agglomeration.
#' agglomeration. \code{addPrevalence} adds these results to
#' \code{rowData(x)}.
#'
#' \item \code{getPrevalentAbundance} returns a \code{numeric} vector with
#' the names corresponding to the column name of \code{x} and include the
Expand Down Expand Up @@ -171,10 +176,32 @@
#'
NULL

#' @rdname getPrevalence
#' @export
setMethod("addPrevalence", signature = c(x = "SummarizedExperiment"),
function(x, name = "prevalence", ...){
if( !.is_a_string(name) ){
stop("'name' must be a single character value.", call. = FALSE)
}
# Agglomerate data if specified
x <- .merge_features(x, ...)
# Sorting is disabled as it messes up the order of taxa. Moreover, we
# do not want to agglomerate the data again.
args <- c(list(x = x), list(...))
args <- args[ !names(args) %in% c("sort", "rank") ]
# Calculate
res <- do.call(getPrevalence, args)
# Add results to rowData
res <- list(res)
x <- .add_values_to_colData(x, res, name, MARGIN = 1L)
return(x)
}
)

#' @rdname getPrevalence
#' @export
setMethod("getPrevalence", signature = c(x = "ANY"), function(
x, detection = 0, include.lowest = include_lowest, include_lowest = FALSE,
x, detection = 0, include.lowest = include_lowest, include_lowest = FALSE,
sort = FALSE, na.rm = TRUE, ...){
# input check
if (!.is_numeric_string(detection)) {
Expand Down Expand Up @@ -313,7 +340,7 @@ NULL
#' @rdname getPrevalence
#' @export
setMethod("getPrevalent", signature = c(x = "ANY"),
function(x, prevalence = 50/100, include.lowest = include_lowest,
function(x, prevalence = 50/100, include.lowest = include_lowest,
include_lowest = FALSE, ...){
.get_prevalent_taxa(x, rank = NULL, prevalence = prevalence,
include.lowest = include.lowest, ...)
Expand Down Expand Up @@ -368,7 +395,7 @@ NULL
#' @rdname getPrevalence
#' @export
setMethod("getRare", signature = c(x = "ANY"),
function(x, prevalence = 50/100, include.lowest = include_lowest,
function(x, prevalence = 50/100, include.lowest = include_lowest,
include_lowest = FALSE, ...){
.get_rare_taxa(x, rank = NULL, prevalence = prevalence,
include.lowest = include.lowest, ...)
Expand Down Expand Up @@ -477,14 +504,14 @@ setMethod("getPrevalentAbundance", signature = c(x = "SummarizedExperiment"),
############################# agglomerateByPrevalence ##########################

#' Agglomerate data based on population prevalence
#'
#'
#' @name agglomerateByPrevalence
#'
#'
#' @inheritParams agglomerateByRank
#'
#'
#' @param other.name \code{Character scalar}. Used as the label for the
#' summary of non-prevalent taxa. (default: \code{"Other"})
#'
#'
#' @param other_label Deprecated. use \code{other.name} instead.
#'
#' @details
Expand Down Expand Up @@ -561,7 +588,7 @@ setMethod("agglomerateByPrevalence", signature = c(x = "SummarizedExperiment"),

#' @rdname agglomerateByPrevalence
#' @export
setMethod("agglomerateByPrevalence",
setMethod("agglomerateByPrevalence",
signature = c(x = "TreeSummarizedExperiment"),
function(x, rank = NULL, other.name = other_label, other_label = "Other",
update.tree = TRUE, ...){
Expand Down
13 changes: 12 additions & 1 deletion man/getPrevalence.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 19 additions & 6 deletions tests/testthat/test-5prevalence.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,19 @@ test_that("getPrevalence", {
res <- getPrevalence(
tse, na.rm = TRUE, rank = "Genus", empty.rm = TRUE)
expect_true( all(res == ref, na.rm = TRUE) )

# Test that results of addPrevalence equals to getPrevalence
# sort should be disabled
tse <- GlobalPatterns
tse2 <- addPrevalence(tse, sort = TRUE)
prev <- getPrevalence(tse, sort = FALSE)
expect_equal(rowData(tse2)[["prevalence"]], prev)
tse2 <- addPrevalence(tse, rank = "Family")
prev <- getPrevalence(tse, rank = "Family")
expect_equal(rowData(tse2)[["prevalence"]], prev)
expect_error(addPrevalence(tse, name = 1))
expect_error(addPrevalence(tse, name = NULL))
expect_error(addPrevalence(tse, name = c("asd", "test")))
})


Expand Down Expand Up @@ -310,7 +323,7 @@ test_that("subsetByPrevalent", {
alias <- subsetByPrevalent(gp_null, detection=5, prevalence = 0.33, rank = "Phylum")
alias <- unname(assay(alias, "counts"))
expect_equal(alias, pr2)

# Check that tree subsetting works
expect_error(subsetByPrevalent(GlobalPatterns, update.tree = 1))
expect_error(subsetByPrevalent(GlobalPatterns, update.tree = NULL))
Expand Down Expand Up @@ -378,7 +391,7 @@ test_that("subsetByRare", {
alias <- subsetByRare(gp_null, detection=5, prevalence = 0.33, rank = "Phylum")
alias <- unname(assay(alias, "counts"))
expect_equal(alias, pr2)

# Check that tree subsetting works
expect_error(subsetByRare(GlobalPatterns, update.tree = 1))
expect_error(subsetByRare(GlobalPatterns, update.tree = NULL))
Expand Down Expand Up @@ -440,12 +453,12 @@ test_that("agglomerateByPrevalence", {
other_label = "test",
update.tree = TRUE)
expect_equal(length(rowTree(actual)$tip.label), length(rownames(actual)))

# Load data from miaTime package
skip_if_not(require("miaTime", quietly = TRUE))
data(SilvermanAGutData)
se <- SilvermanAGutData

# checking reference consensus sequence generation
actual <- agglomerateByPrevalence(se,"Genus", update.refseq = FALSE)
# There should be only one exact match for each sequence
Expand All @@ -454,7 +467,7 @@ test_that("agglomerateByPrevalence", {
expect_true(all(vapply(
seqs_test, function(seq) sum(seqs_ref %in% seq) == 1,
FUN.VALUE = logical(1) )) )

# Merging creates consensus sequences.
th <- runif(1, 0, 1)
actual <- agglomerateByPrevalence(
Expand All @@ -470,7 +483,7 @@ test_that("agglomerateByPrevalence", {
threshold = th)
seqs_test <- seqs_test[ names(seqs_test) %in% feature ]
expect_equal(seqs_test, seqs_ref)

# checking reference consensus sequence generation using 'Genus:Alistipes'
actual <- agglomerateByPrevalence(se,"Genus", update.refseq = FALSE)
expect_equal(as.character(referenceSeq(actual)[["Alistipes"]]),
Expand Down

0 comments on commit a2bc35d

Please sign in to comment.