Skip to content

Commit

Permalink
Merge branch 'dev' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
adrientaudiere authored Jan 22, 2024
2 parents d2d17ec + d0b7c45 commit c4923a7
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 25 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- Add functions `get_funguild_db()` and `funguild_assign()` from the [FUNGuildR](https://github.com/brendanf/FUNGuildR/) package into MiscMetabar to decrease package dependencies
- Remove all dependencies from packages not available on CRAN or Bioconductor. Improve code using `goodpractice::gp`() and `devtools::check()` function
- Add messages in various cases (NA in samples data, low number of sequences in samples, low number of sequences by taxa) when using `verify_pq()` with args `verbose=TRUE`
- Fix a bug in `mutitax_bar_pq()` when using `nb_seq = FALSE`

# MiscMetabar 0.52

Expand Down
78 changes: 56 additions & 22 deletions R/plot_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,9 @@ venn_pq <-
#'
#' Note that you can use ggplot2 function to customize the plot
#' for ex. `+ scale_fill_distiller(palette = "BuPu", direction = 1)`
#' and `+ scale_x_continuous(expand = expansion(mult = 0.5))`. See
#' examples.
#'
#' @inheritParams clean_pq
#' @param fact (required): Name of the factor to cluster samples by modalities.
#' Need to be in \code{physeq@sam_data}.
Expand Down Expand Up @@ -991,9 +994,10 @@ venn_pq <-
#' data_fungi@sam_data$Height %in% c("Low", "High"))
#' ggvenn_pq(data_fungi2, fact = "Height")
#'
#' ggvenn_pq(data_fungi, fact = "Height", add_nb_sequences = TRUE, set_size = 4)
#' ggvenn_pq(data_fungi, fact = "Height", add_nb_sequences = TRUE, set_size = 4)
#' ggvenn_pq(data_fungi, fact = "Height", rarefy_before_merging = TRUE)
#' ggvenn_pq(data_fungi, fact = "Height", rarefy_after_merging = TRUE)
#' ggvenn_pq(data_fungi, fact = "Height", rarefy_after_merging = TRUE) +
#' scale_x_continuous(expand = expansion(mult = 0.5))
#'
#' @export
#' @author Adrien Taudière
Expand Down Expand Up @@ -2352,12 +2356,13 @@ plot_tax_pq <-


################################################################################
#' Plot taxonomic distribution across 3 levels
#' Plot taxonomic distribution across 3 taxonomic levels and optionally
#' one sample factor
#'
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Note that lvl3need to be nested in lvl2 which need to be nested
#' Note that lvl3 need to be nested in lvl2 which need to be nested
#' in lvl1
#'
#' @inheritParams clean_pq
Expand All @@ -2384,33 +2389,44 @@ plot_tax_pq <-
#' multitax_bar_pq(data_fungi_sp_known, "Phylum", "Class", "Order",
#' nb_seq = FALSE, log10trans = FALSE
#' )
multitax_bar_pq <- function(physeq,
lvl1,
lvl2,
lvl3,
fact = NULL,
nb_seq = TRUE,
log10trans = TRUE) {
if (!nb_seq) {
physeq <- as_binary_otu_table(physeq)
}
psm <- psmelt(physeq) %>% filter(!is.na(.data[[lvl1]]))
psm <- psm %>%
multitax_bar_pq <- function(
physeq,
lvl1,
lvl2,
lvl3,
fact = NULL,
nb_seq = TRUE,
log10trans = TRUE) {
psm_1 <- psmelt(physeq) %>%
filter(Abundance > 0) %>%
filter(!is.na(.data[[lvl1]])) %>%
filter(!is.na(.data[[lvl3]])) %>%
filter(!is.na(.data[[lvl2]]))

if (is.null(fact)) {
psm_2 <- psm_1 %>%
group_by(OTU) %>%
summarise(Abundance = sum(Abundance))

psm <- inner_join(psm_2, psm_1[,c("OTU", lvl1, lvl2, lvl3)],
by = join_by("OTU" == "OTU"), multiple =
"first")

if (!nb_seq) {
psm$Abundance = 1
}

data_gg <- tibble(
"Abundance" = tapply(psm$Abundance, psm[[lvl3]], sum),
"LVL1" = tapply(psm[[lvl1]], psm[[lvl3]], unique),
"LVL2" = tapply(psm[[lvl2]], psm[[lvl3]], unique),
"LVL3" = tapply(psm[[lvl3]], psm[[lvl3]], unique)
)

if (log10trans) {
data_gg$Abundance <- log10(data_gg$Abundance)
}

p <- ggplot(data_gg, aes(
x = Abundance,
fill = LVL1,
Expand All @@ -2421,18 +2437,32 @@ multitax_bar_pq <- function(physeq,
theme(strip.text.y.right = element_text(angle = 0)) +
theme(legend.position = "none")
} else {

psm_2 <- psm %>%
group_by(OTU,.data[[fact]]) %>%
summarise(Abundance = sum(Abundance)) %>%
filter(Abundance > 0)

psm <- inner_join(psm_2, psm_1[,c("OTU", lvl1, lvl2, lvl3, fact)],
by = join_by("OTU" == "OTU"), multiple =
"first")

if (!nb_seq) {
psm$Abundance = 1
}

data_gg <- tibble(
"Abundance" = tapply(psm$Abundance, paste(psm[[fact]], psm[[lvl3]]), sum),
"FACT" = tapply(psm[[fact]], paste(psm[[fact]], psm[[lvl3]]), unique),
"LVL1" = tapply(psm[[lvl1]], paste(psm[[fact]], psm[[lvl3]]), unique),
"LVL2" = tapply(psm[[lvl2]], paste(psm[[fact]], psm[[lvl3]]), unique),
"LVL3" = tapply(psm[[lvl3]], paste(psm[[fact]], psm[[lvl3]]), unique)
)

if (log10trans) {
data_gg$Abundance <- log10(data_gg$Abundance)
}

p <- ggplot(data_gg, aes(
x = Abundance,
fill = LVL1,
Expand Down Expand Up @@ -2667,14 +2697,17 @@ SRS_curve_pq <- function(physeq, clean_pq = FALSE, ...) {
#' @author Adrien Taudière
#'
#'
iNEXT_pq <- function(physeq, merge_sample_by = NULL, ...) {
iNEXT_pq <- function(physeq,
merge_sample_by = NULL,
...) {
if (!is.null(merge_sample_by)) {
physeq <- merge_samples2(physeq, merge_sample_by)
physeq <- clean_pq(physeq, force_taxa_as_columns = TRUE)
}

df <- data.frame(t(as.matrix(unclass(physeq@otu_table))))
res_iNEXT <- iNEXT::iNEXT(df, ...)
return(res_iNEXT)
}
################################################################################

Expand Down Expand Up @@ -2937,7 +2970,8 @@ upset_test_pq <-

psm2 <- data.frame(lapply(psm, function(col) {
tapply(col, paste0(psm$OTU), function(vec) {
diff_fct_diff_class(vec, numeric_fonction = numeric_fonction, na.rm = TRUE)
diff_fct_diff_class(vec, numeric_fonction = numeric_fonction,
na.rm = TRUE)
})
})) %>% arrange(., desc(Abundance))

Expand Down
6 changes: 3 additions & 3 deletions vignettes/Reclustering.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,11 @@ library(MiscMetabar)

# Re-clustering ASVs

**ASV** (stands for *Amplicon Sequence Variant*; also called **ESV** for Exact Amplicon Variant) is a DNA sequence obtained from high-throughput analysis of marker genes. **OTU* are a group of closely related individuals created by clustering sequences based on a threshold of similarity. An ASV is a special case of an OTU with a similarity threshold of 100%.
**ASV** (stands for *Amplicon Sequence Variant*; also called **ESV** for Exact Amplicon Variant) is a DNA sequence obtained from high-throughput analysis of marker genes. **OTU** are a group of closely related individuals created by clustering sequences based on a threshold of similarity. An ASV is a special case of an OTU with a similarity threshold of 100%. A third concept is the zero-radius OTU **zOTU** [@edgar2016] which is the same concept than ASV but compute with other softwares than [dada](https://github.com/benjjneb/dada2) (e.g. [vsearch](https://github.com/torognes/vsearch)).

The choice between ASV and OTU is important because they lead to different results [@joos2020], Box 2 in [@tedersoo2022]). Most articles recommend making a choice depending on the question. For example, ASV may be better than OTU for describing a group of very closely related species. In addition, ASV are comparable across different datasets (obtained using identical marker genes). On the other hand, [@tedersoo2022] report that ASV approaches overestimate the richness of common fungal species (due to haplotype variation), but underestimate the richness of rare species. They therefore recommend the use of OTUs in metabarcoding analyses of fungal communities. Finally, [@kauserud2023] argues that the ASV term falls within the original OTU term and recommends adopting only the OTU terms, but with a concise and clear report on how the OTUs were generated.
The choice between ASV and OTU is important because they lead to different results (@joos2020, Box 2 in @tedersoo2022, @chiarello2022). Most articles recommend making a choice depending on the question. For example, ASV may be better than OTU for describing a group of very closely related species. In addition, ASV are comparable across different datasets (obtained using identical marker genes). On the other hand, [@tedersoo2022] report that ASV approaches overestimate the richness of common fungal species (due to haplotype variation), but underestimate the richness of rare species. They therefore recommend the use of OTUs in metabarcoding analyses of fungal communities. Finally, [@kauserud2023] argues that the ASV term falls within the original OTU term and recommends adopting only the OTU terms, but with a concise and clear report on how the OTUs were generated.

Recent articles ([@forster2019; @antich2021]) propose to use both approach together. They recommend (i) using ASV to denoise the dataset and (ii) for some questions, clustering the ASV sequences into OTUs. This is the goal of the function `asv2otu()`, using either the `DECIPHER::Clusterize` function from R or the [vsearch](https://github.com/torognes/vsearch) software.
Recent articles [@forster2019; @antich2021] propose to use both approach together. They recommend (i) using ASV to denoise the dataset and (ii) for some questions, clustering the ASV sequences into OTUs. This is the goal of the function `asv2otu()`, using either the `DECIPHER::Clusterize` function from R or the [vsearch](https://github.com/torognes/vsearch) software.

## Using decipher or Vsearch algorithm
```{r}
Expand Down
33 changes: 33 additions & 0 deletions vignettes/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,39 @@ @article{McCauley2023
url = {https://app.dimensions.ai/details/publication/pub.1151239188}
}

@article{chiarello2022,
title={Ranking the biases: The choice of OTUs vs. ASVs in 16S rRNA amplicon data analysis has stronger effects on diversity measures than rarefaction and OTU identity threshold},
author={Chiarello, Marl{\`e}ne and McCauley, Mark and Vill{\'e}ger, S{\'e}bastien and Jackson, Colin R},
journal={PLoS One},
volume={17},
number={2},
pages={e0264443},
year={2022},
publisher={Public Library of Science San Francisco, CA USA},
doi={10.1371/journal.pone.0264443}
}

@article{edgar2016,
title={UNOISE2: improved error-correction for Illumina 16S and ITS amplicon sequencing},
author={Edgar, Robert C},
journal={BioRxiv},
pages={081257},
year={2016},
publisher={Cold Spring Harbor Laboratory}
}

@article{segata2011,
title={Metagenomic biomarker discovery and explanation},
author={Segata, Nicola and Izard, Jacques and Waldron, Levi and Gevers, Dirk and Miropolsky, Larisa and Garrett, Wendy S and Huttenhower, Curtis},
journal={Genome biology},
volume={12},
pages={1--18},
year={2011},
publisher={Springer},
doi={10.1186/gb-2011-12-6-r60}
}


@software{mclaren2020,
author = {Michael McLaren},
title = {mikemc/speedyseq: speedyseq v0.2.0},
Expand Down

0 comments on commit c4923a7

Please sign in to comment.