From 730d63ea37f0fb0e96f828bec8ee6f7fe12b7e67 Mon Sep 17 00:00:00 2001 From: Kasper Skytte Andersen Date: Mon, 23 May 2022 13:06:56 +0200 Subject: [PATCH] Expose some internal functions for general use --- DESCRIPTION | 2 +- NAMESPACE | 5 +++ R/amp_ordinate.R | 4 +-- R/internals.R | 55 +++++++++++++++++++++-------- _pkgdown.yml | 8 +++++ man/aggregate_abund.Rd | 15 ++++++-- man/amp_rarefy.Rd | 7 +++- man/{unifrac.Rd => dist.unifrac.Rd} | 6 ++-- man/filter_species.Rd | 11 ++++-- man/matchOTUs.Rd | 9 +++-- man/normaliseTo100.Rd | 9 +++-- 11 files changed, 98 insertions(+), 33 deletions(-) rename man/{unifrac.Rd => dist.unifrac.Rd} (90%) diff --git a/DESCRIPTION b/DESCRIPTION index 61604022..c7e2297e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: ampvis2 Type: Package Title: Tools for visualising amplicon data Description: ampvis2 is a small set of tools that allows effortless visualisation of amplicon data. -Version: 2.7.25 +Version: 2.7.26 Authors@R: c( person( c("Kasper", "Skytte"), "Andersen", diff --git a/NAMESPACE b/NAMESPACE index ca597366..892eff42 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ S3method(print,coreplot) S3method(print,figcaption) S3method(print,hmfunplot) export("%>%") +export(aggregate_abund) export(amp_alpha_diversity) export(amp_alphadiv) export(amp_boxplot) @@ -29,11 +30,15 @@ export(amp_rank_abundance) export(amp_rankabundance) export(amp_rarecurve) export(amp_rarefaction_curve) +export(amp_rarefy) export(amp_subset_samples) export(amp_subset_taxa) export(amp_time_series) export(amp_timeseries) export(amp_venn) +export(filter_species) +export(matchOTUs) +export(normaliseTo100) import(ggplot2) importFrom(RColorBrewer,brewer.pal) importFrom(ape,drop.tip) diff --git a/R/amp_ordinate.R b/R/amp_ordinate.R index 68e2b3c3..c2a2e0a8 100644 --- a/R/amp_ordinate.R +++ b/R/amp_ordinate.R @@ -350,7 +350,7 @@ amp_ordinate <- function(data, } else if (distmeasure %in% validVegdistMethods) { distmatrix <- vegan::vegdist(data$abund, method = distmeasure) } else if (distmeasure == "unifrac") { - distmatrix <- unifrac( + distmatrix <- dist.unifrac( abund = t(data$abund), tree = data$tree, weighted = FALSE, @@ -358,7 +358,7 @@ amp_ordinate <- function(data, num_threads = num_threads ) } else if (distmeasure == "wunifrac") { - distmatrix <- unifrac( + distmatrix <- dist.unifrac( abund = t(data$abund), tree = data$tree, weighted = TRUE, diff --git a/R/internals.R b/R/internals.R index 2779ae01..6c982875 100644 --- a/R/internals.R +++ b/R/internals.R @@ -7,7 +7,12 @@ #' @return An ampvis2 object with rarefied OTU abundances. #' @importFrom vegan rrarefy #' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk} -#' @keywords internal +#' @export +#' @examples +#' data("AalborgWWTPs") +#' AalborgWWTPs +#' rarefied <- amp_rarefy(AalborgWWTPs, 20000) +#' rarefied amp_rarefy <- function(data, rarefy) { ### Data must be in ampvis2 format if (class(data) != "ampvis2") { @@ -198,7 +203,7 @@ extractFunctions <- function(FGList) { #' @return A distance matrix of class \code{dist}. #' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk} #' @keywords internal -unifrac <- function(abund, +dist.unifrac <- function(abund, tree, weighted = FALSE, normalise = TRUE, @@ -359,7 +364,7 @@ getLowestTaxLvl <- function(tax, tax_aggregate = NULL, tax_add = NULL) { } #' @title Aggregate OTUs to a specific taxonomic level -#' @description Calculates the sum of OTUs per taxonomic level +#' @description Sums up all OTU read counts at the chosen taxonomic level. Used internally in many ampvis2 functions, but can also be used separately for custom purposes. #' #' @param abund The OTU abundance table from an ampvis2 object (\code{ampvis2obj$abund}) #' @param tax The OTU abundance table from an ampvis2 object (\code{ampvis2obj$tax}) @@ -371,7 +376,18 @@ getLowestTaxLvl <- function(tax, tax_aggregate = NULL, tax_add = NULL) { #' @importFrom data.table data.table melt #' @return A data.table. #' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk} -#' @keywords internal +#' @export +#' @examples +#' data("AalborgWWTPs") +#' aggregated <- aggregate_abund( +#' AalborgWWTPs$abund, +#' AalborgWWTPs$tax, +#' tax_aggregate = "Genus", +#' tax_add = "Phylum", +#' format = "long", +#' calcSums = TRUE +#' ) +#' aggregated aggregate_abund <- function(abund, tax, tax_aggregate = "OTU", @@ -471,8 +487,13 @@ abundAreCounts <- function(data) { #' #' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}. #' -#' @return A modifed ampvis2 object -#' @keywords internal +#' @return A modified ampvis2 object +#' @export +#' @examples +#' data("AalborgWWTPs") +#' AalborgWWTPs +#' normalised <- normaliseTo100(AalborgWWTPs) +#' normalised normaliseTo100 <- function(data) { ### Data must be in ampvis2 format is_ampvis2(data) @@ -495,14 +516,20 @@ normaliseTo100 <- function(data) { return(data) } -#' @title Filter species by a threshold in percent +#' @title Filter OTUs by a threshold in percent +#' @description Removes all OTUs that are not found with a higher relative abundance than the set threshold in percent in at least one sample. #' #' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}. #' @param filter_species Remove low abundant OTU's across all samples below this threshold in percent. (\emph{default}: \code{0}) #' #' @importFrom ape drop.tip #' @return An ampvis2 object -#' @keywords internal +#' @export +#' @examples +#' data("AalborgWWTPs") +#' AalborgWWTPs +#' filtered <- filter_species(AalborgWWTPs, filter_species = 0.1) +#' filtered filter_species <- function(data, filter_species = 0) { ### Data must be in ampvis2 format is_ampvis2(data) @@ -555,7 +582,7 @@ filter_species <- function(data, filter_species = 0) { names_stripped <- stringr::str_split(names(data$refseq), ";", simplify = TRUE)[, 1] data$refseq <- data$refseq[names_stripped %in% rownames(data$abund)] } else if (is.null(names(data$refseq))) { - warning("DNA sequences have not been subsetted, could not find the names of the sequences in data$refseq.", call. = FALSE) + warning("DNA sequences have not been filtered, could not find the names of the sequences in data$refseq.", call. = FALSE) } } nOTUsafter <- nrow(data$abund) @@ -655,16 +682,16 @@ as.data.table.DNAbin <- function(x, ...) { dt } -#' @title Rename OTU's by sequence matching with a FASTA file -#' @description Match and rename OTU's in an ampvis2 object by sequence to a FASTA file +#' @title Rename OTUs by exact sequence matches from a FASTA file +#' @description Renames sequences loaded in an ampvis2 object based on exact matches (100% identity and exact same length) in a FASTA file. This is useful for enabling direct cross-study/cross-dataset comparison of OTU/ASV names. This function is also used internally in \code{amp_merge_ampvis2}. #' #' @param data data (\emph{required}) Data list as loaded with \code{\link{amp_load}}. #' @param fasta Path to a FASTA file or a \code{DNAbin} class object with sequences whose names will be used as OTU names by exact matches (i.e. same length, 100\% sequence identity). (\emph{default:} \code{NULL}) -#' @param unmatched_prefix Prefix used to name any unmatched sequences when \code{refseq_names} is provided. An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"}) -#' @param rename_unmatched Whether to rename any unmatched sequences or not when \code{refseq_names} is provided. (\emph{default:} \code{TRUE}) +#' @param unmatched_prefix Prefix used to name any unmatched sequences in the FASTA file An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"}) +#' @param rename_unmatched Whether to rename any unmatched sequences or not. (\emph{default:} \code{TRUE}) #' #' @return An ampvis2 class object -#' @keywords internal +#' @export matchOTUs <- function( data, fasta, diff --git a/_pkgdown.yml b/_pkgdown.yml index 50aa4817..97b9064d 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,6 +44,14 @@ reference: - amp_frequency - amp_otu_network - amp_rank_abundance + - title: Utility functions + desc: Various utility functions that are also used internally in several ampvis2 functions, but may also be useful in other custom scenarios. + contents: + - amp_rarefy + - normaliseTo100 + - filter_species + - aggregate_abund + - matchOTUs - title: Data sets contents: - has_keyword("data") diff --git a/man/aggregate_abund.Rd b/man/aggregate_abund.Rd index a792ac84..26c89f5f 100644 --- a/man/aggregate_abund.Rd +++ b/man/aggregate_abund.Rd @@ -30,9 +30,20 @@ aggregate_abund( A data.table. } \description{ -Calculates the sum of OTUs per taxonomic level +Sums up all OTU read counts at the chosen taxonomic level. Used internally in many ampvis2 functions, but can also be used separately for custom purposes. +} +\examples{ +data("AalborgWWTPs") +aggregated <- aggregate_abund( + AalborgWWTPs$abund, + AalborgWWTPs$tax, + tax_aggregate = "Genus", + tax_add = "Phylum", + format = "long", + calcSums = TRUE +) +aggregated } \author{ Kasper Skytte Andersen \email{ksa@bio.aau.dk} } -\keyword{internal} diff --git a/man/amp_rarefy.Rd b/man/amp_rarefy.Rd index 8f9ad15b..785733be 100644 --- a/man/amp_rarefy.Rd +++ b/man/amp_rarefy.Rd @@ -17,7 +17,12 @@ An ampvis2 object with rarefied OTU abundances. \description{ This is just a wrapper of \code{\link[vegan]{rrarefy}} with convenient error messages and adjusted to work with ampvis2 objects. } +\examples{ +data("AalborgWWTPs") +AalborgWWTPs +rarefied <- amp_rarefy(AalborgWWTPs, 20000) +rarefied +} \author{ Kasper Skytte Andersen \email{ksa@bio.aau.dk} } -\keyword{internal} diff --git a/man/unifrac.Rd b/man/dist.unifrac.Rd similarity index 90% rename from man/unifrac.Rd rename to man/dist.unifrac.Rd index f5d82e58..0af70c39 100644 --- a/man/unifrac.Rd +++ b/man/dist.unifrac.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/internals.R -\name{unifrac} -\alias{unifrac} +\name{dist.unifrac} +\alias{dist.unifrac} \title{Calculate weighted or unweighted UniFrac distances. Adopted from fastUniFrac() from phyloseq} \usage{ -unifrac(abund, tree, weighted = FALSE, normalise = TRUE, num_threads = 1L) +dist.unifrac(abund, tree, weighted = FALSE, normalise = TRUE, num_threads = 1L) } \arguments{ \item{abund}{Abundance table with OTU counts, in \code{ampvis2} objects it is available with simply data$abund} diff --git a/man/filter_species.Rd b/man/filter_species.Rd index 6d899b6d..71b134f8 100644 --- a/man/filter_species.Rd +++ b/man/filter_species.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/internals.R \name{filter_species} \alias{filter_species} -\title{Filter species by a threshold in percent} +\title{Filter OTUs by a threshold in percent} \usage{ filter_species(data, filter_species = 0) } @@ -15,6 +15,11 @@ filter_species(data, filter_species = 0) An ampvis2 object } \description{ -Filter species by a threshold in percent +Removes all OTUs that are not found with a higher relative abundance than the set threshold in percent in at least one sample. +} +\examples{ +data("AalborgWWTPs") +AalborgWWTPs +filtered <- filter_species(AalborgWWTPs, filter_species = 0.1) +filtered } -\keyword{internal} diff --git a/man/matchOTUs.Rd b/man/matchOTUs.Rd index dcfd9e76..0dcee9a4 100644 --- a/man/matchOTUs.Rd +++ b/man/matchOTUs.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/internals.R \name{matchOTUs} \alias{matchOTUs} -\title{Rename OTU's by sequence matching with a FASTA file} +\title{Rename OTUs by exact sequence matches from a FASTA file} \usage{ matchOTUs(data, fasta, unmatched_prefix = "unmatched", rename_unmatched = TRUE) } @@ -11,14 +11,13 @@ matchOTUs(data, fasta, unmatched_prefix = "unmatched", rename_unmatched = TRUE) \item{fasta}{Path to a FASTA file or a \code{DNAbin} class object with sequences whose names will be used as OTU names by exact matches (i.e. same length, 100\% sequence identity). (\emph{default:} \code{NULL})} -\item{unmatched_prefix}{Prefix used to name any unmatched sequences when \code{refseq_names} is provided. An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"})} +\item{unmatched_prefix}{Prefix used to name any unmatched sequences in the FASTA file An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"})} -\item{rename_unmatched}{Whether to rename any unmatched sequences or not when \code{refseq_names} is provided. (\emph{default:} \code{TRUE})} +\item{rename_unmatched}{Whether to rename any unmatched sequences or not. (\emph{default:} \code{TRUE})} } \value{ An ampvis2 class object } \description{ -Match and rename OTU's in an ampvis2 object by sequence to a FASTA file +Renames sequences loaded in an ampvis2 object based on exact matches (100% identity and exact same length) in a FASTA file. This is useful for enabling direct cross-study/cross-dataset comparison of OTU/ASV names. This function is also used internally in \code{amp_merge_ampvis2}. } -\keyword{internal} diff --git a/man/normaliseTo100.Rd b/man/normaliseTo100.Rd index ced3086c..3d09e33a 100644 --- a/man/normaliseTo100.Rd +++ b/man/normaliseTo100.Rd @@ -10,9 +10,14 @@ normaliseTo100(data) \item{data}{(\emph{required}) Data list as loaded with \code{\link{amp_load}}.} } \value{ -A modifed ampvis2 object +A modified ampvis2 object } \description{ Normalise read counts to 100, i.e. in percent relative abundance per sample } -\keyword{internal} +\examples{ +data("AalborgWWTPs") +AalborgWWTPs +normalised <- normaliseTo100(AalborgWWTPs) +normalised +}