Skip to content

Commit

Permalink
fix #152+refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
Kasper Skytte Andersen committed May 27, 2023
1 parent 31fdd72 commit c93d28d
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 68 deletions.
112 changes: 47 additions & 65 deletions R/amp_boxplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#' @param plot_log (\emph{logical}) Log10-scale the plot. (\emph{default:} \code{FALSE})
#' @param adjust_zero Keep abundances of 0 in the calculation of medians by adding this value. (\emph{default:} \code{NULL})
#' @param point_size The size of points. (\emph{default:} \code{1})
#' @param sort_by Sort the boxplots by \code{"median"}, \code{"mean"} or \code{"total"}. (\emph{default:} \code{"median"})
#' @param sort_by Generic function name to use for sorting most abundant taxa, fx \code{mean}, \code{median}, or \code{sum}. (\emph{default:} \code{median})
#' @param plot_type Plot type. \code{"boxplot"} or \code{"point"}. (\emph{default:} \code{"boxplot"})
#' @param normalise (\emph{logical}) Transform the OTU read counts to be in percent per sample. (\emph{default:} \code{TRUE})
#'
Expand Down Expand Up @@ -50,8 +50,8 @@
#' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
#' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com}
amp_boxplot <- function(data,
group_by = "Sample",
sort_by = "median",
group_by = NULL,
sort_by = median,
plot_type = "boxplot",
point_size = 1,
tax_aggregate = "Genus",
Expand All @@ -74,87 +74,69 @@ amp_boxplot <- function(data,
data = data,
tax_class = tax_class,
tax_empty = tax_empty,
tax_level = lowest_taxlevel
tax_level = tax_aggregate
)

# normalise counts
if (isTRUE(normalise)) {
data <- normaliseTo100(data)
}

# Aggregate to a specific taxonomic level
abund3 <- aggregate_abund(
# Group by sample if group_by is NULL, always coerce to factor
sampleIDvarname <- colnames(data$metadata)[1] # also used later
if(is.null(group_by)) {
group_by <- sampleIDvarname
}
data$metadata[group_by] <- lapply(data$metadata[group_by], factor)

# Aggregate to a specific taxonomic level and merge with chosen metadata group_by var(s)
abund5 <- aggregate_abund(
abund = data$abund,
tax = data$tax,
tax_aggregate = tax_aggregate,
tax_add = tax_add,
calcSums = TRUE,
format = "long"
) %>%
as.data.frame()

## Add group information
suppressWarnings(
if (group_by != "Sample") {
if (length(group_by) > 1) {
grp <- data.frame(
Sample = rownames(data$metadata),
.Group = apply(
data$metadata[, group_by],
1,
paste,
collapse = " "
)
as.data.frame() %>%
merge(
data.frame(
Sample = data$metadata[[1]],
.Group = apply(
data$metadata[, group_by, drop = FALSE],
1,
paste,
collapse = " "
)
} else {
grp <- data.frame(
Sample = rownames(data$metadata),
.Group = data$metadata[, group_by]
)
}
abund3$.Group <- grp$.Group[match(abund3$Sample, grp$Sample)]
abund5 <- abund3
} else {
abund5 <- data.frame(abund3, .Group = abund3$Sample)
}
)

## Find the x most abundant levels and sort
TotalCounts <- group_by(abund5, Display) %>%
summarise(Median = median(Abundance), Total = sum(Abundance), Mean = mean(Abundance))
if (sort_by == "median") {
TotalCounts %<>% arrange(desc(Median)) %>% as.data.frame()
}
if (sort_by == "mean") {
TotalCounts %<>% arrange(desc(Mean)) %>% as.data.frame()
}
if (sort_by == "total") {
TotalCounts %<>% arrange(desc(Total)) %>% as.data.frame()
}

),
by = "Sample"
)

## Sort by chosen measure (median/mean/sum etc)
TotalCounts <- abund5 %>%
group_by(Display) %>%
summarise(measure = match.fun(sort_by)(Sum)) %>%
arrange(desc(measure))
abund5$Display <- factor(abund5$Display, levels = rev(TotalCounts$Display))

## Subset to the x most abundant levels
## Subset to X most abundant levels
if (is.numeric(tax_show)) {
if (tax_show > nrow(TotalCounts)) {
warning(paste0("There are only ", nrow(TotalCounts), " taxa, showing all"), call. = FALSE)
tax_show <- nrow(TotalCounts)
}
abund7 <- subset(abund5, abund5$Display %in% TotalCounts[1:tax_show, "Display"])
}
## Subset to a list of level names
if (!is.numeric(tax_show)) {
if (length(tax_show) > 1) {
abund7 <- subset(abund5, as.character(abund5$Display) %in% tax_show)
}
if ((length(tax_show) == 1) && (tax_show != "all")) {
abund7 <- subset(abund5, as.character(abund5$Display) %in% tax_show)
}
### Or just show all
if ((length(tax_show) == 1) && (tax_show == "all")) {
tax_show <- nrow(TotalCounts)
abund7 <- subset(abund5, abund5$Display %in% TotalCounts[1:tax_show, "Display"])
abund7 <- filter(abund5, Display %in% unique(TotalCounts$Display)[1:tax_show])
} else if (!is.numeric(tax_show)) {
tax_show <- as.character(tax_show)
if (all(tolower(tax_show) == "all")) {
abund7 <- abund5
} else {
abund7 <- filter(abund5, Display %in% tax_show)
}
}

# filter returns a tibble in older versions
abund7 <- as.data.frame(abund7)

## Add a small constant to handle ggplot2 removal of 0 values in log scaled plots
if (!is.null(adjust_zero)) {
Expand All @@ -168,10 +150,10 @@ amp_boxplot <- function(data,
}

## plot the data
if (group_by == "Sample") {
if (group_by == sampleIDvarname) {
p <- ggplot(abund7, aes(x = Display, y = Abundance))
}
if (group_by != "Sample") {
if (group_by != sampleIDvarname) {
if (!is.null(order_group)) {
abund7$.Group <- factor(abund7$.Group, levels = rev(order_group))
}
Expand All @@ -191,7 +173,7 @@ amp_boxplot <- function(data,
isFALSE(normalise)) {
p <- p + ylab("Read counts")
} else {
p <- p + ylab("Read Abundance (%)")
p <- p + ylab("Relative Abundance (%)")
}

if (plot_flip == F) {
Expand Down
6 changes: 3 additions & 3 deletions man/amp_boxplot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c93d28d

Please sign in to comment.