Larger commit, read desc

- Renamed raw to normalise in all plotting functions - added call. = FALSE in all warning and error messages - amp_subset_taxa now shows a short summary of the subset just like amp_subset_samples does - read stats is now stored as an attribute in the ampvis2 object when the data is normalised with either amp_subset_samples or amp_subset_taxa. In this way it will print correctly with print.ampvis2, but manual editing the data will probably cause the stats to be incorrect from now on - both amp_subset_samples and amp_subset_taxa now also checks if the data has already been normalised - ampvis2 objects printed to the console now has underlined section titles - added envfit_arrowcolor to amp_ordinate to color the arrows when doing numerical fitting - updated to version 2.3.3 and rerun docs
KasperSkytte · Mar 5, 2018 · b38bdaf · b38bdaf
1 parent 66dec69
commit b38bdaf
Show file tree

Hide file tree

Showing 88 changed files with 849 additions and 313 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: ampvis2
 Type: Package
 Title: Tools for visualising amplicon data
 Description: ampvis2 is a small set of tools that allows effortless visualisation of amplicon data.
-Version: 2.3.2
+Version: 2.3.3
 Date: 2017-11-15
 Authors@R: c(person("Mads", "Albertsen", role = c("aut", "cre"), email = "[email protected]"), person(c("Kasper", "Skytte"), "Andersen", role = c("aut"), email = "[email protected]"), person(c("Rasmus", "Hansen"), "Kirkegaard", role = c("ctb"), email = "[email protected]"))
 License: AGPL-3
@@ -33,8 +33,7 @@ Imports:
     lubridate,
     remotes,
     cowplot,
-    readxl
+    readxl,
+    crayon
 Remotes: github::briatte/ggnet
-Suggests: 
-    remotes
 RoxygenNote: 6.0.1
diff --git a/NAMESPACE b/NAMESPACE
@@ -25,6 +25,7 @@ importFrom(ape,pcoa)
 importFrom(ape,read.FASTA)
 importFrom(ape,write.dna)
 importFrom(cowplot,plot_grid)
+importFrom(crayon,underline)
 importFrom(data.table,as.data.table)
 importFrom(data.table,data.table)
 importFrom(data.table,setkey)

diff --git a/R/amp_alphadiv.R b/R/amp_alphadiv.R
@@ -42,15 +42,15 @@ amp_alphadiv <- function (data,
                           rarefy = NULL) {
   ### Data must be in ampvis2 format
   if(class(data) != "ampvis2")
-    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)")
+    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis2 functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)", call. = FALSE)
 
   #check measures
   validMeasures <- c("observed", "shannon", "simpson", "invsimpson")
   if(is.null(measure)) {
     measure <- validMeasures
   } else if(!is.null(measure) & any(!measure %in% validMeasures)) {
     measure <- measure %>% tolower()
-    warning("Some or none of the provided measures were not recognised, calculating all. Valid options are:\n", paste0(validMeasures, collapse = ", "))
+    warning("Some or none of the provided measures were not recognised, calculating all. Valid options are:\n", paste0(validMeasures, collapse = ", "), call. = FALSE)
     measure <- validMeasures
   }
 
@@ -65,18 +65,18 @@ amp_alphadiv <- function (data,
 
   if(!is.null(rarefy) & is.numeric(rarefy)){
     if(rarefy > max(results$Reads) ) {
-      stop("The chosen rarefy size is larger than the largest amount of reads in any sample (", as.character(max(results$Reads)), ").")
+      stop("The chosen rarefy size is larger than the largest amount of reads in any sample (", as.character(max(results$Reads)), ").", call. = FALSE)
     } else if (rarefy < min(results$Reads)) {
       abund <- suppressWarnings(vegan::rrarefy(abund, sample = rarefy)) %>% as.data.frame()
-      warning("The chosen rarefy size (", as.character(rarefy), ") is smaller than the smallest amount of reads in any sample (", as.character(min(colSums(data$abund))), ").")
+      warning("The chosen rarefy size (", as.character(rarefy), ") is smaller than the smallest amount of reads in any sample (", as.character(min(colSums(data$abund))), ").", call. = FALSE)
     } else {
       abund <- suppressWarnings(vegan::rrarefy(abund, sample = rarefy)) %>% as.data.frame()
       if (min(results$Reads) < rarefy) {
         message("The following samples have not been rarefied (less than ", as.character(rarefy), " reads):\n", paste(rownames(data$metadata[which(results$Reads < rarefy),]), collapse = ", "))
       }
     }
   } else if(!is.null(rarefy) & !is.numeric(rarefy)) {
-    stop("Argument rarefy must be numerical.")
+    stop("Argument rarefy must be numerical.", call. = FALSE)
   }
 
   #warning from phyloseq::estimate_richness
@@ -85,7 +85,7 @@ amp_alphadiv <- function (data,
             "any singletons. This is highly suspicious. Results of richness\n", 
             "estimates (for example) are probably unreliable, or wrong, if you have already\n", 
             "trimmed low-abundance taxa from the data.\n", "\n", 
-            "We recommend that you find the un-trimmed data and retry.")
+            "We recommend that you find the un-trimmed data and retry.", call. = FALSE)
   }
 
   if(any("observed" %in% measure) | is.null(measure)) {

diff --git a/R/amp_boxplot.R b/R/amp_boxplot.R
@@ -24,7 +24,7 @@
 #' @param point_size The size of points. (\emph{default:} \code{1})
 #' @param sort_by Sort the boxplots by \code{"median"}, \code{"mean"} or \code{"total"}. (\emph{default:} \code{"median"})
 #' @param plot_type Plot type. \code{"boxplot"} or \code{"point"}. (\emph{default:} \code{"boxplot"})
-#' @param raw (\emph{logical}) Display raw input instead of converting to percentages. (\emph{default:} \code{FALSE})
+#' @param normalise (\emph{logical}) Transform the OTU read counts to be in percent per sample. (\emph{default:} \code{TRUE})
 #' @param detailed_output (\emph{logical}) Return additional details or not. If \code{TRUE}, it is recommended to save to an object and then access the additional data by \code{View(object$data)}. (\emph{default:} \code{FALSE})
 #' 
 #' @return A ggplot2 object. If \code{detailed_output = TRUE} a list with a ggplot2 object and additional data.
@@ -65,20 +65,20 @@ amp_boxplot <- function(data,
                         plot_flip = FALSE, 
                         plot_log = FALSE, 
                         adjust_zero = NULL,
-                        raw = FALSE,
+                        normalise = TRUE,
                         detailed_output = FALSE){
 
   ### Data must be in ampvis2 format
   if(class(data) != "ampvis2")
-    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)")
+    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis2 functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)", call. = FALSE)
 
   ## Clean up the taxonomy
   data <- amp_rename(data = data, tax_class = tax_class, tax_empty = tax_empty, tax_level = tax_aggregate)
 
   #tax_add and tax_aggregate can't be the same
   if(!is.null(tax_aggregate) & !is.null(tax_add)) {
     if(tax_aggregate == tax_add) {
-      stop("tax_aggregate and tax_add cannot be the same")
+      stop("tax_aggregate and tax_add cannot be the same", call. = FALSE)
     }
   }
 
@@ -87,7 +87,9 @@ amp_boxplot <- function(data,
   tax <- data[["tax"]]
   metadata <- data[["metadata"]]
 
-  if (raw == FALSE){
+  if (isTRUE(normalise)){
+    if(isTRUE(attributes(data)$normalised))
+      warning("The data has already been normalised by either amp_subset_samples or amp_subset_taxa. Setting normalise = TRUE (the default) will normalise the data again and the relative abundance information about the original data of which the provided data is a subset will be lost.", call. = FALSE)
     #calculate sample percentages, skip columns with 0 sum to avoid NaN's
     abund[,which(colSums(abund) != 0)] <- as.data.frame(apply(abund[,which(colSums(abund) != 0), drop = FALSE], 2, function(x) x/sum(x)*100))
   }

diff --git a/R/amp_core.R b/R/amp_core.R
@@ -16,7 +16,7 @@
 #' @param tax_class Converts a specific phylum to class level instead, e.g. \code{"p__Proteobacteria"}.
 #' @param abund_thrh Threshold in percent for defining "abundant"/"core" taxa. (\emph{default:} \code{0.1})
 #' @param plotly (\emph{logical}) Returns an interactive plot instead. (\emph{default:} \code{FALSE})
-#' @param raw (\emph{logical}) Display raw input instead of converting to percentages. (\emph{default:} \code{FALSE})
+#' @param normalise (\emph{logical}) Transform the OTU read counts to be in percent per sample. (\emph{default:} \code{TRUE})
 #' @param detailed_output (\emph{logical}) Return additional details or not. If \code{TRUE}, it is recommended to save to an object and then access the additional data by \code{View(object$data)}. (\emph{default:} \code{FALSE})
 #' 
 #' @return A ggplot2 object. If \code{detailed_output = TRUE} a list with a ggplot2 object and additional data.
@@ -54,12 +54,12 @@ amp_core <- function(data,
                      tax_class = NULL,
                      tax_empty = "best", 
                      plotly = FALSE, 
-                     raw = FALSE,
+                     normalise = TRUE,
                      detailed_output = FALSE){
 
   ### Data must be in ampvis2 format
   if(class(data) != "ampvis2")
-    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)")
+    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis2 functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)", call. = FALSE)
 
   ## Clean up the taxonomy
   data <- amp_rename(data = data, tax_class = tax_class, tax_empty = tax_empty, tax_level = tax_aggregate)
@@ -69,7 +69,9 @@ amp_core <- function(data,
   tax <- data[["tax"]]
   metadata <- data[["metadata"]]
 
-  if (raw == F){
+  if (isTRUE(normalise)){
+    if(isTRUE(attributes(data)$normalised))
+      warning("The data has already been normalised by either amp_subset_samples or amp_subset_taxa. Setting normalise = TRUE (the default) will normalise the data again and the relative abundance information about the original data of which the provided data is a subset will be lost.", call. = FALSE)
     #calculate sample percentages, skip columns with 0 sum to avoid NaN's
     abund[,which(colSums(abund) != 0)] <- as.data.frame(apply(abund[,which(colSums(abund) != 0), drop = FALSE], 2, function(x) x/sum(x)*100))
   }

diff --git a/R/amp_export_fasta.R b/R/amp_export_fasta.R
@@ -32,16 +32,16 @@ amp_export_fasta <- function(data,
 
   ### Data must be in ampvis2 format
   if(class(data) != "ampvis2")
-    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)")
+    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis2 functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)", call. = FALSE)
 
   ### Reference sequences must be there!
   if(is.null(data$refseq)) {
-    stop("No \"refseq\" element in the provided data.")
+    stop("No \"refseq\" element in the provided data.", call. = FALSE)
   }
 
   ### Check if refseq data is in the right format
   if(!is.null(data$refseq) & !class(data$refseq) == "DNAbin") {
-    stop("The refseq element is not of class \"DNAbin\". The reference sequences must be loaded with ape::read.dna().")
+    stop("The refseq element is not of class \"DNAbin\". The reference sequences must be loaded with ape::read.dna().", call. = FALSE)
   }
 
   t <- data[["refseq"]]

diff --git a/R/amp_export_otutable.R b/R/amp_export_otutable.R
@@ -10,7 +10,7 @@
 #' @param sep Separator passed directly to \code{\link{write.table}}. (\emph{default:} \code{"\t"})
 #' @param id Name the samples using a variable in the metadata.
 #' @param sort_samples Vector to sort the samples by.
-#' @param raw (\emph{logical}) Use raw counts instead of percentages. (\emph{default:} \code{TRUE})
+#' @param normalise (\emph{logical}) Transform the OTU read counts to be in percent per sample. (\emph{default:} \code{FALSE})
 #' @param ... Additional arguments passed to \code{\link{write.table}} other than \code{sep} and \code{row.names}.
 #' 
 #' @export
@@ -37,34 +37,35 @@ amp_export_otutable <- function(data,
                                 sep = "\t",
                                 id = NULL, 
                                 sort_samples = NULL, 
-                                raw = TRUE,
+                                normalise = FALSE,
                                 ...){
 
   ### Data must be in ampvis2 format
   if(class(data) != "ampvis2")
-    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)")
+    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis2 functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)", call. = FALSE)
 
   abund <- data[["abund"]]
   tax <- data[["tax"]]
   metadata <- data[["metadata"]]
 
-  if (raw == F){
+  if (isTRUE(normalise)){
+    if(isTRUE(attributes(data)$normalised))
+      warning("The data has already been normalised by either amp_subset_samples or amp_subset_taxa. Setting normalise = TRUE (the default) will normalise the data again and the relative abundance information about the original data of which the provided data is a subset will be lost.", call. = FALSE)
     #calculate sample percentages, skip columns with 0 sum to avoid NaN's
     abund[,which(colSums(abund) != 0)] <- as.data.frame(apply(abund[,which(colSums(abund) != 0), drop = FALSE], 2, function(x) x/sum(x)*100))
     rownames(abund) <- rownames(data[["abund"]])
   }
 
   if(!is.null(id)){
-
     ## Test if the ID exists in the metadata
     if( !(id %in% colnames(metadata)) ){
       ametadata <- paste(colnames(metadata), collapse = ", ")
-      stop(paste(id, "not found in metadata.\n\nAvailable metadata is: ", ametadata))
+      stop(paste(id, "not found in metadata.\n\nAvailable metadata is: ", ametadata), call. = FALSE)
     } 
 
     ## Test if the ID is unique for each sample
     if( length(unique(metadata[,id])) != length(colnames(abund)) ){
-      stop(paste(id, "is not unique for each sample"))
+      stop(paste(id, "is not unique for each sample"), call. = FALSE)
     } 
 
     ## Re-arrange after coloumns after metadata
@@ -79,7 +80,7 @@ amp_export_otutable <- function(data,
 
     ## Test if the ID is unique for each sample
     if( length(sort_samples) != length(colnames(abund)) ){
-      stop(paste("`sort_samples` does not match `id`"))
+      stop(paste("`sort_samples` does not match `id`"), call. = FALSE)
     } 
 
     abund <- abund[,sort_samples]

diff --git a/R/amp_frequency.R b/R/amp_frequency.R
@@ -15,7 +15,7 @@
 #'    }
 #' @param tax_class Converts a specific phylum to class level instead, e.g. \code{"p__Proteobacteria"}.
 #' @param weight (\emph{logical}) Weight the frequency by abundance. (\emph{default:} \code{TRUE})
-#' @param raw (\emph{logical}) Display raw input instead of converting to percentages. (\emph{default:} \code{FALSE})
+#' @param normalise (\emph{logical}) Transform the OTU read counts to be in percent per sample. (\emph{default:} \code{TRUE})
 #' @param detailed_output (\emph{logical}) Return additional details or not. If \code{TRUE}, it is recommended to save to an object and then access the additional data by \code{View(object$data)}. (\emph{default:} \code{FALSE})
 #' 
 #' @return A ggplot2 object. If \code{detailed_output = TRUE} a list with a ggplot2 object and additional data.
@@ -47,12 +47,12 @@ amp_frequency <- function(data,
                           tax_empty = "best",
                           tax_aggregate = "OTU",
                           weight = TRUE, 
-                          raw = FALSE,
+                          normalise = TRUE,
                           detailed_output = FALSE){
 
   ### Data must be in ampvis2 format
   if(class(data) != "ampvis2")
-    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)")
+    stop("The provided data is not in ampvis2 format. Use amp_load() to load your data before using ampvis2 functions. (Or class(data) <- \"ampvis2\", if you know what you are doing.)", call. = FALSE)
 
   ## Clean up the taxonomy
   data <- amp_rename(data = data, tax_class = tax_class, tax_empty = tax_empty, tax_level = tax_aggregate)
@@ -62,7 +62,9 @@ amp_frequency <- function(data,
   tax <- data[["tax"]]
   metadata <- data[["metadata"]]
 
-  if (raw == FALSE){
+  if (isTRUE(normalise)){
+    if(isTRUE(attributes(data)$normalised))
+      warning("The data has already been normalised by either amp_subset_samples or amp_subset_taxa. Setting normalise = TRUE (the default) will normalise the data again and the relative abundance information about the original data of which the provided data is a subset will be lost.", call. = FALSE)
     #calculate sample percentages, skip columns with 0 sum to avoid NaN's
     abund[,which(colSums(abund) != 0)] <- as.data.frame(apply(abund[,which(colSums(abund) != 0), drop = FALSE], 2, function(x) x/sum(x)*100))
   }