From 730d63ea37f0fb0e96f828bec8ee6f7fe12b7e67 Mon Sep 17 00:00:00 2001
From: Kasper Skytte Andersen <knaldhat@gmail.com>
Date: Mon, 23 May 2022 13:06:56 +0200
Subject: [PATCH] Expose some internal functions for general use

---
 DESCRIPTION                         |  2 +-
 NAMESPACE                           |  5 +++
 R/amp_ordinate.R                    |  4 +--
 R/internals.R                       | 55 +++++++++++++++++++++--------
 _pkgdown.yml                        |  8 +++++
 man/aggregate_abund.Rd              | 15 ++++++--
 man/amp_rarefy.Rd                   |  7 +++-
 man/{unifrac.Rd => dist.unifrac.Rd} |  6 ++--
 man/filter_species.Rd               | 11 ++++--
 man/matchOTUs.Rd                    |  9 +++--
 man/normaliseTo100.Rd               |  9 +++--
 11 files changed, 98 insertions(+), 33 deletions(-)
 rename man/{unifrac.Rd => dist.unifrac.Rd} (90%)

diff --git a/DESCRIPTION b/DESCRIPTION
index 61604022..c7e2297e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: ampvis2
 Type: Package
 Title: Tools for visualising amplicon data
 Description: ampvis2 is a small set of tools that allows effortless visualisation of amplicon data.
-Version: 2.7.25
+Version: 2.7.26
 Authors@R: c(
   person(
     c("Kasper", "Skytte"), "Andersen", 
diff --git a/NAMESPACE b/NAMESPACE
index ca597366..892eff42 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -5,6 +5,7 @@ S3method(print,coreplot)
 S3method(print,figcaption)
 S3method(print,hmfunplot)
 export("%>%")
+export(aggregate_abund)
 export(amp_alpha_diversity)
 export(amp_alphadiv)
 export(amp_boxplot)
@@ -29,11 +30,15 @@ export(amp_rank_abundance)
 export(amp_rankabundance)
 export(amp_rarecurve)
 export(amp_rarefaction_curve)
+export(amp_rarefy)
 export(amp_subset_samples)
 export(amp_subset_taxa)
 export(amp_time_series)
 export(amp_timeseries)
 export(amp_venn)
+export(filter_species)
+export(matchOTUs)
+export(normaliseTo100)
 import(ggplot2)
 importFrom(RColorBrewer,brewer.pal)
 importFrom(ape,drop.tip)
diff --git a/R/amp_ordinate.R b/R/amp_ordinate.R
index 68e2b3c3..c2a2e0a8 100644
--- a/R/amp_ordinate.R
+++ b/R/amp_ordinate.R
@@ -350,7 +350,7 @@ amp_ordinate <- function(data,
     } else if (distmeasure %in% validVegdistMethods) {
       distmatrix <- vegan::vegdist(data$abund, method = distmeasure)
     } else if (distmeasure == "unifrac") {
-      distmatrix <- unifrac(
+      distmatrix <- dist.unifrac(
         abund = t(data$abund),
         tree = data$tree,
         weighted = FALSE,
@@ -358,7 +358,7 @@ amp_ordinate <- function(data,
         num_threads = num_threads
       )
     } else if (distmeasure == "wunifrac") {
-      distmatrix <- unifrac(
+      distmatrix <- dist.unifrac(
         abund = t(data$abund),
         tree = data$tree,
         weighted = TRUE,
diff --git a/R/internals.R b/R/internals.R
index 2779ae01..6c982875 100644
--- a/R/internals.R
+++ b/R/internals.R
@@ -7,7 +7,12 @@
 #' @return An ampvis2 object with rarefied OTU abundances.
 #' @importFrom vegan rrarefy
 #' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
-#' @keywords internal
+#' @export
+#' @examples
+#' data("AalborgWWTPs")
+#' AalborgWWTPs
+#' rarefied <- amp_rarefy(AalborgWWTPs, 20000)
+#' rarefied
 amp_rarefy <- function(data, rarefy) {
   ### Data must be in ampvis2 format
   if (class(data) != "ampvis2") {
@@ -198,7 +203,7 @@ extractFunctions <- function(FGList) {
 #' @return A distance matrix of class \code{dist}.
 #' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
 #' @keywords internal
-unifrac <- function(abund,
+dist.unifrac <- function(abund,
                     tree,
                     weighted = FALSE,
                     normalise = TRUE,
@@ -359,7 +364,7 @@ getLowestTaxLvl <- function(tax, tax_aggregate = NULL, tax_add = NULL) {
 }
 
 #' @title Aggregate OTUs to a specific taxonomic level
-#' @description Calculates the sum of OTUs per taxonomic level
+#' @description Sums up all OTU read counts at the chosen taxonomic level. Used internally in many ampvis2 functions, but can also be used separately for custom purposes.
 #'
 #' @param abund The OTU abundance table from an ampvis2 object (\code{ampvis2obj$abund})
 #' @param tax The OTU abundance table from an ampvis2 object (\code{ampvis2obj$tax})
@@ -371,7 +376,18 @@ getLowestTaxLvl <- function(tax, tax_aggregate = NULL, tax_add = NULL) {
 #' @importFrom data.table data.table melt
 #' @return A data.table.
 #' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
-#' @keywords internal
+#' @export
+#' @examples
+#' data("AalborgWWTPs")
+#' aggregated <- aggregate_abund(
+#'   AalborgWWTPs$abund,
+#'   AalborgWWTPs$tax,
+#'   tax_aggregate = "Genus",
+#'   tax_add = "Phylum",
+#'   format = "long",
+#'   calcSums = TRUE
+#' )
+#' aggregated
 aggregate_abund <- function(abund,
                             tax,
                             tax_aggregate = "OTU",
@@ -471,8 +487,13 @@ abundAreCounts <- function(data) {
 #'
 #' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}.
 #'
-#' @return A modifed ampvis2 object
-#' @keywords internal
+#' @return A modified ampvis2 object
+#' @export
+#' @examples
+#' data("AalborgWWTPs")
+#' AalborgWWTPs
+#' normalised <- normaliseTo100(AalborgWWTPs)
+#' normalised
 normaliseTo100 <- function(data) {
   ### Data must be in ampvis2 format
   is_ampvis2(data)
@@ -495,14 +516,20 @@ normaliseTo100 <- function(data) {
   return(data)
 }
 
-#' @title Filter species by a threshold in percent
+#' @title Filter OTUs by a threshold in percent
+#' @description Removes all OTUs that are not found with a higher relative abundance than the set threshold in percent in at least one sample.
 #'
 #' @param data (\emph{required}) Data list as loaded with \code{\link{amp_load}}.
 #' @param filter_species Remove low abundant OTU's across all samples below this threshold in percent. (\emph{default}: \code{0})
 #'
 #' @importFrom ape drop.tip
 #' @return An ampvis2 object
-#' @keywords internal
+#' @export
+#' @examples
+#' data("AalborgWWTPs")
+#' AalborgWWTPs
+#' filtered <- filter_species(AalborgWWTPs, filter_species = 0.1)
+#' filtered
 filter_species <- function(data, filter_species = 0) {
   ### Data must be in ampvis2 format
   is_ampvis2(data)
@@ -555,7 +582,7 @@ filter_species <- function(data, filter_species = 0) {
           names_stripped <- stringr::str_split(names(data$refseq), ";", simplify = TRUE)[, 1]
           data$refseq <- data$refseq[names_stripped %in% rownames(data$abund)]
         } else if (is.null(names(data$refseq))) {
-          warning("DNA sequences have not been subsetted, could not find the names of the sequences in data$refseq.", call. = FALSE)
+          warning("DNA sequences have not been filtered, could not find the names of the sequences in data$refseq.", call. = FALSE)
         }
       }
       nOTUsafter <- nrow(data$abund)
@@ -655,16 +682,16 @@ as.data.table.DNAbin <- function(x, ...) {
   dt
 }
 
-#' @title Rename OTU's by sequence matching with a FASTA file
-#' @description Match and rename OTU's in an ampvis2 object by sequence to a FASTA file
+#' @title Rename OTUs by exact sequence matches from a FASTA file
+#' @description Renames sequences loaded in an ampvis2 object based on exact matches (100% identity and exact same length) in a FASTA file. This is useful for enabling direct cross-study/cross-dataset comparison of OTU/ASV names. This function is also used internally in \code{amp_merge_ampvis2}.
 #'
 #' @param data data (\emph{required}) Data list as loaded with \code{\link{amp_load}}.
 #' @param fasta Path to a FASTA file or a \code{DNAbin} class object with sequences whose names will be used as OTU names by exact matches (i.e. same length, 100\% sequence identity). (\emph{default:} \code{NULL})
-#' @param unmatched_prefix Prefix used to name any unmatched sequences when \code{refseq_names} is provided. An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"})
-#' @param rename_unmatched Whether to rename any unmatched sequences or not when \code{refseq_names} is provided. (\emph{default:} \code{TRUE})
+#' @param unmatched_prefix Prefix used to name any unmatched sequences in the FASTA file An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"})
+#' @param rename_unmatched Whether to rename any unmatched sequences or not. (\emph{default:} \code{TRUE})
 #'
 #' @return An ampvis2 class object
-#' @keywords internal
+#' @export
 matchOTUs <- function(
   data,
   fasta,
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 50aa4817..97b9064d 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -44,6 +44,14 @@ reference:
     - amp_frequency
     - amp_otu_network
     - amp_rank_abundance
+  - title: Utility functions
+    desc: Various utility functions that are also used internally in several ampvis2 functions, but may also be useful in other custom scenarios.
+    contents:
+    - amp_rarefy
+    - normaliseTo100
+    - filter_species
+    - aggregate_abund
+    - matchOTUs
   - title: Data sets
     contents:
     - has_keyword("data")
diff --git a/man/aggregate_abund.Rd b/man/aggregate_abund.Rd
index a792ac84..26c89f5f 100644
--- a/man/aggregate_abund.Rd
+++ b/man/aggregate_abund.Rd
@@ -30,9 +30,20 @@ aggregate_abund(
 A data.table.
 }
 \description{
-Calculates the sum of OTUs per taxonomic level
+Sums up all OTU read counts at the chosen taxonomic level. Used internally in many ampvis2 functions, but can also be used separately for custom purposes.
+}
+\examples{
+data("AalborgWWTPs")
+aggregated <- aggregate_abund(
+  AalborgWWTPs$abund,
+  AalborgWWTPs$tax,
+  tax_aggregate = "Genus",
+  tax_add = "Phylum",
+  format = "long",
+  calcSums = TRUE
+)
+aggregated
 }
 \author{
 Kasper Skytte Andersen \email{ksa@bio.aau.dk}
 }
-\keyword{internal}
diff --git a/man/amp_rarefy.Rd b/man/amp_rarefy.Rd
index 8f9ad15b..785733be 100644
--- a/man/amp_rarefy.Rd
+++ b/man/amp_rarefy.Rd
@@ -17,7 +17,12 @@ An ampvis2 object with rarefied OTU abundances.
 \description{
 This is just a wrapper of \code{\link[vegan]{rrarefy}} with convenient error messages and adjusted to work with ampvis2 objects.
 }
+\examples{
+data("AalborgWWTPs")
+AalborgWWTPs
+rarefied <- amp_rarefy(AalborgWWTPs, 20000)
+rarefied
+}
 \author{
 Kasper Skytte Andersen \email{ksa@bio.aau.dk}
 }
-\keyword{internal}
diff --git a/man/unifrac.Rd b/man/dist.unifrac.Rd
similarity index 90%
rename from man/unifrac.Rd
rename to man/dist.unifrac.Rd
index f5d82e58..0af70c39 100644
--- a/man/unifrac.Rd
+++ b/man/dist.unifrac.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/internals.R
-\name{unifrac}
-\alias{unifrac}
+\name{dist.unifrac}
+\alias{dist.unifrac}
 \title{Calculate weighted or unweighted UniFrac distances. Adopted from fastUniFrac() from phyloseq}
 \usage{
-unifrac(abund, tree, weighted = FALSE, normalise = TRUE, num_threads = 1L)
+dist.unifrac(abund, tree, weighted = FALSE, normalise = TRUE, num_threads = 1L)
 }
 \arguments{
 \item{abund}{Abundance table with OTU counts, in \code{ampvis2} objects it is available with simply data$abund}
diff --git a/man/filter_species.Rd b/man/filter_species.Rd
index 6d899b6d..71b134f8 100644
--- a/man/filter_species.Rd
+++ b/man/filter_species.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/internals.R
 \name{filter_species}
 \alias{filter_species}
-\title{Filter species by a threshold in percent}
+\title{Filter OTUs by a threshold in percent}
 \usage{
 filter_species(data, filter_species = 0)
 }
@@ -15,6 +15,11 @@ filter_species(data, filter_species = 0)
 An ampvis2 object
 }
 \description{
-Filter species by a threshold in percent
+Removes all OTUs that are not found with a higher relative abundance than the set threshold in percent in at least one sample.
+}
+\examples{
+data("AalborgWWTPs")
+AalborgWWTPs
+filtered <- filter_species(AalborgWWTPs, filter_species = 0.1)
+filtered
 }
-\keyword{internal}
diff --git a/man/matchOTUs.Rd b/man/matchOTUs.Rd
index dcfd9e76..0dcee9a4 100644
--- a/man/matchOTUs.Rd
+++ b/man/matchOTUs.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/internals.R
 \name{matchOTUs}
 \alias{matchOTUs}
-\title{Rename OTU's by sequence matching with a FASTA file}
+\title{Rename OTUs by exact sequence matches from a FASTA file}
 \usage{
 matchOTUs(data, fasta, unmatched_prefix = "unmatched", rename_unmatched = TRUE)
 }
@@ -11,14 +11,13 @@ matchOTUs(data, fasta, unmatched_prefix = "unmatched", rename_unmatched = TRUE)
 
 \item{fasta}{Path to a FASTA file or a \code{DNAbin} class object with sequences whose names will be used as OTU names by exact matches (i.e. same length, 100\% sequence identity). (\emph{default:} \code{NULL})}
 
-\item{unmatched_prefix}{Prefix used to name any unmatched sequences when \code{refseq_names} is provided. An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"})}
+\item{unmatched_prefix}{Prefix used to name any unmatched sequences in the FASTA file An integer counting from 1 will be appended to this prefix, so for example the 123th unmatched sequence will be named \code{unmatched123}, and so on. (\emph{default:} \code{"unmatched"})}
 
-\item{rename_unmatched}{Whether to rename any unmatched sequences or not when \code{refseq_names} is provided. (\emph{default:} \code{TRUE})}
+\item{rename_unmatched}{Whether to rename any unmatched sequences or not. (\emph{default:} \code{TRUE})}
 }
 \value{
 An ampvis2 class object
 }
 \description{
-Match and rename OTU's in an ampvis2 object by sequence to a FASTA file
+Renames sequences loaded in an ampvis2 object based on exact matches (100% identity and exact same length) in a FASTA file. This is useful for enabling direct cross-study/cross-dataset comparison of OTU/ASV names. This function is also used internally in \code{amp_merge_ampvis2}.
 }
-\keyword{internal}
diff --git a/man/normaliseTo100.Rd b/man/normaliseTo100.Rd
index ced3086c..3d09e33a 100644
--- a/man/normaliseTo100.Rd
+++ b/man/normaliseTo100.Rd
@@ -10,9 +10,14 @@ normaliseTo100(data)
 \item{data}{(\emph{required}) Data list as loaded with \code{\link{amp_load}}.}
 }
 \value{
-A modifed ampvis2 object
+A modified ampvis2 object
 }
 \description{
 Normalise read counts to 100, i.e. in percent relative abundance per sample
 }
-\keyword{internal}
+\examples{
+data("AalborgWWTPs")
+AalborgWWTPs
+normalised <- normaliseTo100(AalborgWWTPs)
+normalised
+}