From 5b5b3edac8affb932191ee7c79cf64c79570c03c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Taudi=C3=A8re?= Date: Thu, 28 Nov 2024 05:57:36 +0100 Subject: [PATCH 1/3] Ammend v0.10.3 --- .Rbuildignore | 1 + DESCRIPTION | 2 +- NEWS.md | 6 ++++-- R/MiscMetabar-package.R | 3 ++- R/dada_phyloseq.R | 9 +++++---- man/lulu_pq.Rd | 4 ++-- man/mumu_pq.Rd | 4 ++-- man/track_wkflow.Rd | 2 ++ tests/testthat/test_figures_taxo.R | 30 +++++++++++++++--------------- 9 files changed, 34 insertions(+), 27 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 234938ad..425bc4f4 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,3 +15,4 @@ ^citation.cff$ ^vignettes/articles$ ^cran-comments\.md$ +^.lintr diff --git a/DESCRIPTION b/DESCRIPTION index 8d736667..f5d94f0f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MiscMetabar Type: Package Title: Miscellaneous Functions for Metabarcoding Analysis -Version: 0.10.3 +Version: 0.10.4 Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182")) Description: Facilitate the description, transformation, exploration, and reproducibility of metabarcoding analyses. 'MiscMetabar' is mainly built on top of the 'phyloseq', 'dada2' and 'targets' R packages. It helps to build reproducible and robust bioinformatics pipelines in R. 'MiscMetabar' makes ecological analysis of alpha and beta-diversity easier, more reproducible and more powerful by integrating a large number of tools. Important features are described in Taudière A. (2023) . diff --git a/NEWS.md b/NEWS.md index 7df9df5f..ac86e405 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,12 @@ -# MiscMetabar 0.10.3 (in development) +# MiscMetabar 0.10.4 (in development) + +# MiscMetabar 0.10.3 - Add params `type`, `na_remove` and `verbose` to `ggvenn_pq()`. The type = "nb_seq" allow to plot Venn diagram with the number of shared sequences instead of shared ASV. - Add automatic report in json for the function `cutadapt_remove_primers()`. - Add param `verbose` to `track_wkflow()` and improve examples for `track_wkflow()` and `list_fastq_files` -# MiscMetabar 0.10.2 (in development) +# MiscMetabar 0.10.2 - Improve code thanks to {lintr} package - Add option `return_file_path` to `cutadapt_remove_primers()` in order to facilitate targets pipeline diff --git a/R/MiscMetabar-package.R b/R/MiscMetabar-package.R index 975106d3..c33615c6 100644 --- a/R/MiscMetabar-package.R +++ b/R/MiscMetabar-package.R @@ -25,7 +25,8 @@ if (getRversion() >= "2.15.1") { "ymin", ".group", "archetype", "nOTUid", "taxon", "total", "chim_rm", "condition", "physeq", "seq_tab_Pairs", "nb_samp", "silent", "X1_lim1", "X1_lim2", "aicc", "variable", "pos_letters", "alluvium", - "na_remove", "stratum", "to_lodes_form" + "na_remove", "stratum", "to_lodes_form", "clean_fastq", "clean_sam", + "samples_names_common" )) } diff --git a/R/dada_phyloseq.R b/R/dada_phyloseq.R index 263b2cf4..d3c4a152 100644 --- a/R/dada_phyloseq.R +++ b/R/dada_phyloseq.R @@ -226,6 +226,7 @@ clean_pq <- function(physeq, #' @param taxonomy_rank A vector of int. Define the column number of #' taxonomic rank `in physeq@tax_table` to compute the number of unique value. #' Default is NULL and do not compute values for any taxonomic rank +#' @param verbose (logical) If true, print some additional messages. #' @param ... Other arguments passed on to [clean_pq()] function. #' #' @return The number of sequences, clusters (e.g. OTUs, ASVs) and samples for @@ -1032,8 +1033,8 @@ read_pq <- function(path = NULL, #' Set to number of cpus/processors to use for the clustering #' @param id (default: 0.84) id for --usearch_global. #' @param vsearchpath (default: vsearch) path to vsearch. -#' @param verbose (logical) if true, print some additional messages. -#' @param clean_pq (logical) if true, empty samples and empty ASV are discarded +#' @param verbose (logical) If true, print some additional messages. +#' @param clean_pq (logical) If true, empty samples and empty ASV are discarded #' before clustering. #' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files #' @param ... Others args for function [lulu()] @@ -1188,8 +1189,8 @@ lulu_pq <- function(physeq, #' @param vsearchpath (default: vsearch) path to vsearch. #' @param mumupath path to mumu. See [mumu](https://github.com/frederic-mahe/mumu) #' for installation instruction -#' @param verbose (logical) if true, print some additional messages. -#' @param clean_pq (logical) if true, empty samples and empty ASV are discarded +#' @param verbose (logical) If true, print some additional messages. +#' @param clean_pq (logical) If true, empty samples and empty ASV are discarded #' before clustering. #' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files #' @return a list of for object diff --git a/man/lulu_pq.Rd b/man/lulu_pq.Rd index c5dd78bf..0ea988fe 100644 --- a/man/lulu_pq.Rd +++ b/man/lulu_pq.Rd @@ -26,9 +26,9 @@ Set to number of cpus/processors to use for the clustering} \item{vsearchpath}{(default: vsearch) path to vsearch.} -\item{verbose}{(logical) if true, print some additional messages.} +\item{verbose}{(logical) If true, print some additional messages.} -\item{clean_pq}{(logical) if true, empty samples and empty ASV are discarded +\item{clean_pq}{(logical) If true, empty samples and empty ASV are discarded before clustering.} \item{keep_temporary_files}{(logical, default: FALSE) Do we keep temporary files} diff --git a/man/mumu_pq.Rd b/man/mumu_pq.Rd index 20a4df46..544f8671 100644 --- a/man/mumu_pq.Rd +++ b/man/mumu_pq.Rd @@ -29,9 +29,9 @@ Set to number of cpus/processors to use for the clustering} \item{mumupath}{path to mumu. See \href{https://github.com/frederic-mahe/mumu}{mumu} for installation instruction} -\item{verbose}{(logical) if true, print some additional messages.} +\item{verbose}{(logical) If true, print some additional messages.} -\item{clean_pq}{(logical) if true, empty samples and empty ASV are discarded +\item{clean_pq}{(logical) If true, empty samples and empty ASV are discarded before clustering.} \item{keep_temporary_files}{(logical, default: FALSE) Do we keep temporary files} diff --git a/man/track_wkflow.Rd b/man/track_wkflow.Rd index 688a547d..89fb8f92 100644 --- a/man/track_wkflow.Rd +++ b/man/track_wkflow.Rd @@ -27,6 +27,8 @@ before clustering.} taxonomic rank \verb{in physeq@tax_table} to compute the number of unique value. Default is NULL and do not compute values for any taxonomic rank} +\item{verbose}{(logical) If true, print some additional messages.} + \item{...}{Other arguments passed on to \code{\link[=clean_pq]{clean_pq()}} function.} } \value{ diff --git a/tests/testthat/test_figures_taxo.R b/tests/testthat/test_figures_taxo.R index 6fd11791..d899f3d1 100644 --- a/tests/testthat/test_figures_taxo.R +++ b/tests/testthat/test_figures_taxo.R @@ -10,21 +10,21 @@ data_fungi_2trees <- GP_archae <- subset_taxa(GlobalPatterns, GlobalPatterns@tax_table[, 1] == "Archaea") -test_that("rotl_pq works with data_fungi dataset", { - skip_on_os("windows") - skip_on_cran() - library("rotl") - expect_s3_class(suppressWarnings(tr <- - rotl_pq(data_fungi, species_colnames = "Genus_species")), "phylo") - expect_s3_class(suppressWarnings( - rotl_pq( - data_fungi, - species_colnames = "Genus_species", - context_name = "Ascomycetes" - ) - ), "phylo") - expect_silent(plot(tr)) -}) +# test_that("rotl_pq works with data_fungi dataset", { +# skip_on_os("windows") +# skip_on_cran() +# library("rotl") +# expect_s3_class(suppressWarnings(tr <- +# rotl_pq(data_fungi, species_colnames = "Genus_species")), "phylo") +# expect_s3_class(suppressWarnings( +# rotl_pq( +# data_fungi, +# species_colnames = "Genus_species", +# context_name = "Ascomycetes" +# ) +# ), "phylo") +# expect_silent(plot(tr)) +# }) # test_that("heat_tree_pq works with data_fungi dataset", { # skip_on_cran() From d13f04ef5fb42c9868bcb096e88895a03cf1e897 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Taudi=C3=A8re?= Date: Thu, 28 Nov 2024 10:59:41 +0100 Subject: [PATCH 2/3] v 0.10.4 --- .github/workflows/test-coverage.yaml | 6 +- DESCRIPTION | 1 + NAMESPACE | 2 + NEWS.md | 6 +- R/MiscMetabar-package.R | 2 +- R/plot_functions.R | 271 +++++++++++++++++- _pkgdown.yml | 6 + docs/404.html | 2 +- docs/CODE_OF_CONDUCT.html | 2 +- docs/CONTRIBUTING.html | 2 +- docs/LICENSE.html | 2 +- docs/articles/MiscMetabar.html | 4 +- docs/articles/Reclustering.html | 6 +- docs/articles/Rules.html | 2 +- docs/articles/alpha-div.html | 4 +- docs/articles/beta-div.html | 4 +- docs/articles/explore_data.html | 2 +- docs/articles/fastq_quality_check.html | 2 +- docs/articles/filter.html | 4 +- docs/articles/import_export_track.html | 4 +- docs/articles/index.html | 2 +- docs/articles/states_of_fields_in_R.html | 2 +- docs/articles/tengeler.html | 4 +- docs/articles/tree_visualization.html | 4 +- docs/authors.html | 6 +- docs/index.html | 8 +- docs/news/index.html | 13 +- docs/pkgdown.yml | 2 +- docs/reference/LCBD_pq.html | 2 +- docs/reference/MiscMetabar-deprecated.html | 2 +- docs/reference/MiscMetabar-package.html | 2 +- docs/reference/SRS_curve_pq-1.png | Bin 74590 -> 74026 bytes docs/reference/SRS_curve_pq.html | 2 +- docs/reference/Tengeler2020_pq.html | 2 +- docs/reference/accu_plot-1.png | Bin 153682 -> 150818 bytes docs/reference/accu_plot-2.png | Bin 116850 -> 117195 bytes docs/reference/accu_plot.html | 4 +- .../accu_plot_balanced_modality.html | 2 +- docs/reference/accu_samp_threshold.html | 2 +- docs/reference/add_blast_info.html | 2 +- docs/reference/add_dna_to_phyloseq.html | 2 +- docs/reference/add_funguild_info.html | 2 +- docs/reference/add_info_to_sam_data.html | 2 +- docs/reference/add_new_taxonomy_pq.html | 2 +- docs/reference/adonis_pq.html | 2 +- docs/reference/adonis_rarperm_pq.html | 2 +- docs/reference/all_object_size.html | 2 +- docs/reference/ancombc_pq.html | 2 +- docs/reference/are_modality_even_depth.html | 2 +- docs/reference/as_binary_otu_table.html | 2 +- docs/reference/biplot_pq.html | 2 +- docs/reference/blast_pq.html | 2 +- docs/reference/blast_to_derep.html | 2 +- docs/reference/blast_to_phyloseq.html | 2 +- docs/reference/build_phytree_pq.html | 8 +- docs/reference/chimera_detection_vs.html | 62 ++-- docs/reference/chimera_removal_vs.html | 2 +- docs/reference/circle_pq.html | 2 +- docs/reference/clean_pq.html | 2 +- docs/reference/compare_pairs_pq.html | 2 +- docs/reference/count_seq.html | 2 +- docs/reference/cutadapt_remove_primers.html | 2 +- docs/reference/data_fungi.html | 2 +- docs/reference/data_fungi_mini.html | 2 +- docs/reference/data_fungi_sp_known.html | 2 +- docs/reference/diff_fct_diff_class.html | 2 +- docs/reference/dist_bycol.html | 2 +- docs/reference/dist_pos_control.html | 2 +- docs/reference/distri_1_taxa.html | 2 +- docs/reference/fac2col.html | 2 +- docs/reference/filter_asv_blast.html | 2 +- docs/reference/filter_trim.html | 2 +- docs/reference/formattable_pq.html | 2 +- docs/reference/funguild_assign.html | 2 +- docs/reference/funky_color.html | 2 +- docs/reference/get_file_extension.html | 2 +- docs/reference/get_funguild_db.html | 2 +- docs/reference/ggaluv_pq.html | 2 +- docs/reference/ggbetween_pq.html | 2 +- docs/reference/ggscatt_pq.html | 2 +- docs/reference/ggvenn_pq.html | 4 +- docs/reference/glmutli_pq.html | 2 +- docs/reference/graph_test_pq.html | 2 +- docs/reference/hill_pq.html | 2 +- docs/reference/hill_test_rarperm_pq.html | 2 +- docs/reference/hill_tuckey_pq.html | 2 +- docs/reference/iNEXT_pq.html | 2 +- docs/reference/index.html | 31 +- docs/reference/is_cutadapt_installed.html | 4 +- docs/reference/is_falco_installed.html | 2 +- docs/reference/is_krona_installed.html | 2 +- docs/reference/is_mumu_installed.html | 2 +- docs/reference/is_swarm_installed.html | 2 +- docs/reference/is_vsearch_installed.html | 2 +- docs/reference/krona.html | 2 +- docs/reference/list_fastq_files.html | 12 +- docs/reference/lulu.html | 2 +- docs/reference/lulu_pq.html | 11 +- docs/reference/merge_krona.html | 2 +- docs/reference/merge_samples2.html | 2 +- docs/reference/merge_taxa_vec.html | 2 +- docs/reference/multi_biplot_pq.html | 2 +- docs/reference/multipatt_pq.html | 2 +- docs/reference/multiplot.html | 2 +- docs/reference/multitax_bar_pq.html | 2 +- docs/reference/mumu_pq.html | 6 +- docs/reference/normalize_prop_pq.html | 2 +- docs/reference/perc.html | 2 +- docs/reference/phyloseq_to_edgeR.html | 2 +- docs/reference/physeq_or_string_to_dna.html | 40 +-- docs/reference/plot_LCBD_pq.html | 2 +- docs/reference/plot_SCBD_pq.html | 2 +- docs/reference/plot_ancombc_pq.html | 2 +- docs/reference/plot_deseq2_pq.html | 2 +- docs/reference/plot_edgeR_pq.html | 2 +- docs/reference/plot_guild_pq.html | 2 +- docs/reference/plot_mt.html | 2 +- docs/reference/plot_tax_pq.html | 2 +- docs/reference/plot_tsne_pq.html | 2 +- docs/reference/plot_var_part_pq.html | 2 +- docs/reference/postcluster_pq.html | 6 +- docs/reference/psmelt_samples_pq.html | 2 +- .../rarefy_sample_count_by_modality.html | 2 +- docs/reference/read_pq.html | 2 +- docs/reference/rename_samples.html | 2 +- docs/reference/rename_samples_otu_table.html | 2 +- docs/reference/reorder_taxa_pq.html | 2 +- docs/reference/ridges_pq.html | 2 +- docs/reference/rotl_pq.html | 13 +- docs/reference/sam_data_matching_names.html | 2 +- .../reference/sample_data_with_new_names.html | 2 +- docs/reference/sankey_pq.html | 2 +- docs/reference/save_pq.html | 2 +- docs/reference/search_exact_seq_pq.html | 2 +- docs/reference/select_one_sample.html | 2 +- docs/reference/select_taxa-methods.html | 2 +- docs/reference/signif_ancombc.html | 2 +- docs/reference/simplify_taxo.html | 2 +- docs/reference/subsample_fastq.html | 2 +- docs/reference/subset_samples_pq.html | 2 +- docs/reference/subset_taxa_pq.html | 2 +- docs/reference/subset_taxa_tax_control.html | 2 +- docs/reference/summary_plot_pq.html | 2 +- docs/reference/swarm_clustering.html | 2 +- docs/reference/tax_bar_pq.html | 2 +- docs/reference/tax_datatable.html | 2 +- docs/reference/taxa_as_columns.html | 2 +- docs/reference/taxa_as_rows.html | 2 +- docs/reference/taxa_only_in_one_level.html | 2 +- docs/reference/tbl_sum_samdata.html | 2 +- docs/reference/track_wkflow.html | 6 +- docs/reference/track_wkflow_samples.html | 2 +- docs/reference/transp.html | 2 +- docs/reference/treemap_pq.html | 2 +- docs/reference/tsne_pq.html | 2 +- docs/reference/unique_or_na.html | 2 +- docs/reference/upset_pq.html | 2 +- docs/reference/upset_test_pq.html | 2 +- docs/reference/var_par_pq.html | 2 +- docs/reference/var_par_rarperm_pq.html | 2 +- docs/reference/venn_pq.html | 2 +- docs/reference/verify_pq.html | 2 +- docs/reference/vs_search_global.html | 2 +- docs/reference/vsearch_clustering.html | 2 +- docs/reference/write_pq.html | 2 +- docs/search.json | 2 +- docs/sitemap.xml | 2 + man/ggvenn_pq.Rd | 2 +- 168 files changed, 580 insertions(+), 250 deletions(-) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index d78a35fb..6659b8ca 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -36,7 +36,7 @@ jobs: && cd ./mumu/ \ && make \ && make check \ - && sudo make install + && sudo make install - name: Install swarm run: | @@ -67,11 +67,11 @@ jobs: - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package - name: Upload coverage reports to Codecov with GitHub Action uses: codecov/codecov-action@v4.2.0 env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} \ No newline at end of file + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/DESCRIPTION b/DESCRIPTION index f5d94f0f..b6678523 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -44,6 +44,7 @@ Suggests: httr, iNEXT, indicspecies, + IRanges, jsonlite, knitr, magrittr, diff --git a/NAMESPACE b/NAMESPACE index f9e6345d..6b760058 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -94,6 +94,8 @@ export(plot_edgeR_phyloseq) export(plot_edgeR_pq) export(plot_guild_pq) export(plot_mt) +export(plot_refseq_extremity_pq) +export(plot_refseq_pq) export(plot_tax_pq) export(plot_tsne_pq) export(plot_var_part_pq) diff --git a/NEWS.md b/NEWS.md index ac86e405..046f2b53 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -# MiscMetabar 0.10.4 (in development) +# MiscMetabar 0.10.5 (in development) + +# MiscMetabar 0.10.4 + +- Add functions `plot_refseq_pq()` and `plot_refseq_extremity_pq()` to plot the proportion of each nucleotide and the diversity of nucleotides from `@refseq` of a phyloseq object. # MiscMetabar 0.10.3 diff --git a/R/MiscMetabar-package.R b/R/MiscMetabar-package.R index c33615c6..385cecd3 100644 --- a/R/MiscMetabar-package.R +++ b/R/MiscMetabar-package.R @@ -26,7 +26,7 @@ if (getRversion() >= "2.15.1") { "chim_rm", "condition", "physeq", "seq_tab_Pairs", "nb_samp", "silent", "X1_lim1", "X1_lim2", "aicc", "variable", "pos_letters", "alluvium", "na_remove", "stratum", "to_lodes_form", "clean_fastq", "clean_sam", - "samples_names_common" + "samples_names_common", "seq_id" )) } diff --git a/R/plot_functions.R b/R/plot_functions.R index a8524cda..530c5527 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -1255,7 +1255,7 @@ venn_pq <- #' data = ggVennDiagram::venn_setedge(res_venn), #' show.legend = FALSE, linewidth = 2 #' ) + -#' scale_color_manual(values = c("red", "red","blue")) + +#' scale_color_manual(values = c("red", "red", "blue")) + #' # 3. set label layer #' geom_text(aes(X, Y, label = name), #' data = ggVennDiagram::venn_setlabel(res_venn) @@ -1343,7 +1343,7 @@ ggvenn_pq <- function(physeq = NULL, nb_seq <- c(nb_seq, sum(physeq@otu_table[physeq@sam_data[[fact]] == f, ], na.rm = TRUE)) - if(type == "nb_seq") { + if (type == "nb_seq") { res[[f]] <- unlist(sapply(res[[f]], function(x) { paste0(x, "_", seq(1, taxa_sums(physeq)[[x]])) })) @@ -4264,3 +4264,270 @@ ggaluv_pq <- function(physeq, return(p) } ################################################################################ + + + +################################################################################ +#' Plot the nucleotide proportion at both extremity of the sequences +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' It is a useful function to check for the absence of unwanted patterns caused +#' for example by Illumina adaptator or bad removal of primers. +#' +#' If `hill_scale` is not null, Hill diversity number are used to represent the distribution +#' of the diversity (equitability) along the sequences. +#' +#' @inheritParams clean_pq +#' @param first_n (int, default 10) The number of nucleotides to plot the 5' extremity. +#' @param last_n (int, default 10) The number of nucleotides to plot the 3' extremity. +#' @param hill_scales (vector) A vector defining the Hill number wanted. Set to NULL if +#' you don't want to plot Hill diversity metrics. +#' @param min_width (int, default 0) Select only the sequences from physeq@refseq with using a +#' minimum length threshold. If `first_n` is superior to the minimum length of the +#' references sequences, you must use min_width to filter out the narrower sequences +#' @return A list of 4 objects +#' - p_start and p_last are the ggplot object representing respectively the start and +#' the end of the sequences. +#' - df_start and df_last are the data.frame corresponding to the ggplot object. +#' @export +#' @author Adrien Taudière +#' @examples +#' res1 <- plot_refseq_extremity_pq(data_fungi) +#' names(res1) +#' res1$plot_start +#' res1$plot_last +#' +#' res2 <- plot_refseq_extremity_pq(data_fungi, first_n = 200, last_n = 100) +#' res2$plot_start +#' res2$plot_last +#' +#' plot_refseq_extremity_pq(data_fungi, +#' first_n = 400, +#' min_width = 400, +#' hill_scales = NULL +#' )$plot_start + +#' geom_line(aes(y = value, x = seq_id, color = name), alpha = 0.4, linewidth = 0.2) +#' +#' plot_refseq_extremity_pq(data_fungi, +#' first_n = NULL, +#' last_n = 400, +#' min_width = 400, +#' hill_scales = c(3) +#' )$plot_last +plot_refseq_extremity_pq <- function( + physeq, + first_n = 10, + last_n = 10, + hill_scales = c(1, 2), + min_width = 0) { + + if (min_width > 0) { + cond <- Biostrings::width(physeq@refseq) > min_width + names(cond) <- taxa_names(physeq) + physeq <- clean_pq(subset_taxa_pq(physeq, cond)) + } + + if (!is.null(first_n)) { + end_n <- Biostrings::width(physeq@refseq) + end_n[end_n < first_n] <- first_n + + letters_sequences <- + c(strsplit(as.vector(IRanges::narrow(physeq@refseq, end = end_n)), + split = "" + )) + letters_sequences <- lapply(letters_sequences, `length<-`, + max(lengths(letters_sequences))) + + tib_interm <- t(data.frame(letters_sequences)) + nucleotide_first_interm <- data.frame( + "nb_A" = colSums(tib_interm == "A", na.rm = T) / nrow(tib_interm), + "nb_C" = colSums(tib_interm == "C", na.rm = T) / nrow(tib_interm), + "nb_G" = colSums(tib_interm == "G", na.rm = T) / nrow(tib_interm), + "nb_T" = colSums(tib_interm == "T", na.rm = T) / nrow(tib_interm), + "seq_id" = 1:ncol(tib_interm) + ) |> + rowwise() |> + mutate("max_letter_prob" = max(across(starts_with("nb_")))) + + nucleotide_first <- nucleotide_first_interm |> + tidyr::pivot_longer(cols = starts_with("nb_")) + + + p_start <- ggplot(nucleotide_first) + + geom_point(aes(x = seq_id, y = value, color = name)) + + xlim(c(0, first_n)) + + labs(subtitle = paste0( + "Proportion of nucleotide along the ", + first_n, + " first nucleotides \n in sequences of ", + ntaxa(physeq), + " taxa representing ", + sum(physeq@otu_table), + " sequences in ", + nsamples(physeq), + " samples." + )) + + if (!is.null(hill_scales)) { + renyi_nucleotide <- + vegan::renyi(nucleotide_first_interm[, c("nb_A", "nb_C", "nb_G", "nb_T")], + scales = hill_scales) + + if (inherits(renyi_nucleotide, "numeric")) { + renyi_nucleotide <- data.frame(renyi_nucleotide) + colnames(renyi_nucleotide) <- hill_scales + } + if (sum(hill_scales == 0) > 0) { + renyi_nucleotide <- renyi_nucleotide |> + rename("Hill 0 (Shannon)" = "0") + } + if (sum(hill_scales == 1) > 0) { + renyi_nucleotide <- renyi_nucleotide |> + rename("Hill 1 (Simpson)" = "1") + } + if (sum(hill_scales == 2) > 0) { + renyi_nucleotide <- renyi_nucleotide |> + rename("Hill 2 (Shannon)" = "2") + } + + renyi_nucleotide <- renyi_nucleotide |> + tidyr::pivot_longer(cols = everything()) + + renyi_nucleotide$seq_id <- + sort(rep(1:ncol(tib_interm), + times = nrow(renyi_nucleotide) / ncol(tib_interm))) + + p_start <- p_start + + geom_line(data = renyi_nucleotide, aes(x = seq_id, y = value, color = name)) + } + } else { + p_start <- NULL + nucleotide_first_interm <- NULL + } + + if (!is.null(last_n)) { + tib_interm_last <- t(data.frame(letters = c( + strsplit(as.vector( + IRanges::narrow(physeq@refseq, + start = Biostrings::width(physeq@refseq) - last_n)), + split = "") + ))) + + nucleotide_last_interm <- data.frame( + "nb_A" = colSums(tib_interm_last == "A") / nrow(tib_interm_last), + "nb_C" = colSums(tib_interm_last == "C") / nrow(tib_interm_last), + "nb_G" = colSums(tib_interm_last == "G") / nrow(tib_interm_last), + "nb_T" = colSums(tib_interm_last == "T") / nrow(tib_interm_last), + "seq_id" = 1:ncol(tib_interm_last) + ) + + nucleotide_last <- nucleotide_last_interm |> + tidyr::pivot_longer(cols = starts_with("nb_")) + + p_last <- ggplot(nucleotide_last) + + geom_point(aes(x = seq_id, y = value, color = name)) + + labs(subtitle = paste0( + "Proportion of nucleotide along the ", + last_n, + " last nucleotides \n in sequences of ", + ntaxa(physeq), + " taxa representing ", + sum(physeq@otu_table), + " sequences in ", + nsamples(physeq), + " samples." + )) + + if (!is.null(hill_scales)) { + renyi_nucleotide <- + vegan::renyi(nucleotide_last_interm[, c("nb_A", "nb_C", "nb_G", "nb_T")], + scales = hill_scales + ) + + if (inherits(renyi_nucleotide, "numeric")) { + renyi_nucleotide <- data.frame(renyi_nucleotide) + colnames(renyi_nucleotide) <- hill_scales + } + if (sum(hill_scales == 0) > 0) { + renyi_nucleotide <- renyi_nucleotide |> + rename("Hill 0 (Shannon)" = "0") + } + if (sum(hill_scales == 1) > 0) { + renyi_nucleotide <- renyi_nucleotide |> + rename("Hill 1 (Simpson)" = "1") + } + if (sum(hill_scales == 2) > 0) { + renyi_nucleotide <- renyi_nucleotide |> + rename("Hill 2 (Shannon)" = "2") + } + + renyi_nucleotide <- renyi_nucleotide |> + tidyr::pivot_longer(cols = everything()) + + renyi_nucleotide$seq_id <- + sort(rep(1:ncol(tib_interm_last), + times = nrow(renyi_nucleotide) / ncol(tib_interm_last) + )) + + p_last <- p_last + + geom_line(data = renyi_nucleotide, aes( + x = seq_id, + y = value, + color = name + )) + } + } else { + p_last <- NULL + nucleotide_last_interm <- NULL + } + + return(list( + "plot_start" = p_start, + "plot_last" = p_last, + "df_start" = nucleotide_first_interm, + "df_end" = nucleotide_last_interm + )) +} +################################################################################ + + + + + +################################################################################ +#' Plot the nucleotide proportion of references sequences +#' +#' @description +#' +#' +#' lifecycle-experimental +#' +#' It is a wrapper of the function `plot_refseq_extremity_pq()`. See +#' ?plot_refseq_extremity_pq for more examples. +#' +#' If `hill_scale` is not null, Hill diversity number are used to represent the distribution +#' of the diversity (equitability) along the sequences. +#' +#' @inheritParams clean_pq +#' @param first_n (int, default 10) The number of nucleotides to plot the 5' extremity. +#' @param last_n (int, default 10) The number of nucleotides to plot the 3' extremity. +#' @param hill_scales (vector) A vector defining the Hill number wanted. Set to NULL if +#' you don't want to plot Hill diversity metrics. +#' @param min_width (int, default 0) Select only the sequences from physeq@refseq with using a +#' minimum length threshold. If `first_n` is superior to the minimum length of the +#' references sequences, you must use min_width to filter out the narrower sequences +#' @return A ggplot2 object +#' @export +#' @author Adrien Taudière +#' @examples +#' plot_refseq_pq(data_fungi) +#' plot_refseq_pq(data_fungi, hill_scales = c(2), first_n = 300) +#' +plot_refseq_pq <- function(physeq, hill_scales = NULL, first_n = min(Biostrings::width(physeq@refseq)), last_n = NULL, min_width = first_n) { + res <- plot_refseq_extremity_pq(physeq = physeq, hill_scales = hill_scales, first_n = first_n, last_n = last_n, min_width = min_width) + return(res$plot_start) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 69b483c1..3c83eaf8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -114,6 +114,12 @@ reference: contents: - build_phytree_pq +- subtitle: References sequences + contents: + - plot_refseq_extremity_pq + - plot_refseq_pq + - add_dna_to_phyloseq + - subtitle: Others contents: - add_dna_to_phyloseq diff --git a/docs/404.html b/docs/404.html index be9a96d5..0e754e30 100644 --- a/docs/404.html +++ b/docs/404.html @@ -27,7 +27,7 @@ MiscMetabar - 0.10.3 + 0.10.4