diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 78690fa1..a860475a 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -47,6 +47,6 @@ jobs: with: upload-snapshots: true #Character vector of arguments to pass to ⁠R CMD check⁠. - args: 'c("--as-cran", "--no-tests", "--no-vignettes", "--no-manual", "--no-build-vignettes", "--ignore-vignettes")' + args: 'c("--as-cran", "--no-tests", "--no-vignettes", "--no-manual", "--no-build-vignettes", "--ignore-vignettes", "--no-examples")' #Character vector of arguments to pass to ⁠R CMD build. build_args: 'c("--no-build-vignettes", "--no-manual")' diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 932412b7..d63a8441 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -43,6 +43,7 @@ jobs: git clone https://github.com/torognes/swarm.git \ && cd swarm/ \ && make + sudo cp ../swarm /usr/bin/swarm - name: Install cutadapt run: | diff --git a/DESCRIPTION b/DESCRIPTION index a3762c36..7a6274c8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MiscMetabar Type: Package Title: Miscellaneous Functions for Metabarcoding Analysis -Version: 0.7.1 +Version: 0.7.2 Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182")) Description: The MiscMetabar package aims to facilitate the description, transformation, exploration, and reproducibility of metabarcoding analysis. Mainly build on the top of phyloseq, dada2 R packages. MiscMetabar help to build reproducible and robust bioinformatic pipeline in R. MiscMetabar make ecological analysis of alpha and beta-diversity simple and powerfull by integrating a large number of analysis, some of them from other R packages. diff --git a/NEWS.md b/NEWS.md index 6c634af2..fc971f7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,7 +32,7 @@ - Add functions `get_funguild_db()` and `funguild_assign()` from the [FUNGuildR](https://github.com/brendanf/FUNGuildR/) package into MiscMetabar to decrease package dependencies - Remove all dependencies from packages not available on CRAN or Bioconductor. Improve code using `goodpractice::gp`() and `devtools::check()` function - Add messages in various cases (NA in samples data, low number of sequences in samples, low number of sequences by taxa) when using `verify_pq()` with args `verbose=TRUE` -- Fix a bug in `mutitax_bar_pq()` when using `nb_seq = FALSE` +- Fix a bug in `multitax_bar_pq()` when using `nb_seq = FALSE` # MiscMetabar 0.52 diff --git a/R/beta_div_test.R b/R/beta_div_test.R index 0301cc7a..d57e6e74 100644 --- a/R/beta_div_test.R +++ b/R/beta_div_test.R @@ -589,6 +589,16 @@ multipatt_pq <- function(physeq, #' ), color = "darkgrey") + #' geom_point() #' +#' res_time <- ancombc_pq( +#' subset_taxa_pq( +#' data_fungi_sp_known, +#' taxa_sums(data_fungi_sp_known) > 5000 +#' ), +#' fact = "Time", +#' levels_fact = c("0", "15"), +#' tax_level = "Family", +#' verbose = TRUE +#' ) #' @author Adrien Taudière #' @details #' This function is mainly a wrapper of the work of others. diff --git a/R/blast.R b/R/blast.R index 0c2ed144..0bb919fb 100644 --- a/R/blast.R +++ b/R/blast.R @@ -45,7 +45,7 @@ #' #' @examples #' \dontrun{ -#' # data(data_fungi) +#' #' blastpath <- "...YOUR_PATH_TO_BLAST..." #' blast_to_phyloseq(data_fungi, #' seq2search = system.file("extdata", "ex.fasta", diff --git a/R/controls.R b/R/controls.R index 1437efe4..822bb51a 100644 --- a/R/controls.R +++ b/R/controls.R @@ -145,7 +145,7 @@ dist_pos_control <- function(physeq, samples_names, method = "bray") { #' @export #' #' @examples -#' # data(data_fungi) +#' #' #' subset_taxa_tax_control(data_fungi, #' as.numeric(data_fungi@otu_table[, 300]), diff --git a/R/dada_phyloseq.R b/R/dada_phyloseq.R index f2d9f2f5..88a41266 100644 --- a/R/dada_phyloseq.R +++ b/R/dada_phyloseq.R @@ -437,7 +437,7 @@ track_wkflow <- function(list_of_objects, #' @author Adrien Taudière #' #' @examples -#' # data(data_fungi) +#' #' tree_A10_005 <- subset_samples(data_fungi, Tree_name == "A10-005") #' track_wkflow_samples(tree_A10_005) track_wkflow_samples <- function(list_pq_obj, ...) { @@ -490,18 +490,27 @@ track_wkflow_samples <- function(list_pq_obj, ...) { #' @param tax_adjust (Default 0) See the man page #' of [merge_taxa_vec()] for more details. #' To conserved the taxonomic rank of the most abundant ASV, -#' set tax_adjust to 0 (default) +#' set tax_adjust to 0 (default). For the moment only tax_adjust = 0 is +#' robust #' @param vsearch_cluster_method (default: "--cluster_size) See other possible #' methods in the [vsearch pdf manual](https://github.com/torognes/vsearch/releases/download/v2.23.0/vsearch_manual.pdf) (e.g. `--cluster_size` or `--cluster_smallmem`) #' - `--cluster_fast` : Clusterize the fasta sequences in filename, automatically sort by decreasing sequence length beforehand. #' - `--cluster_size` : Clusterize the fasta sequences in filename, automatically sort by decreasing sequence abundance beforehand. -#' - `--cluster_smallmem` : Clusterize the fasta sequences in filename without automatically modifying their order beforehand. Sequence are expected to be sorted by decreasing sequence length, unless *--usersort* is used +#' - `--cluster_smallmem` : Clusterize the fasta sequences in filename without automatically modifying their order beforehand. Sequence are expected to be sorted by decreasing sequence length, unless *--usersort* is used. +#' In that case you may set `vsearch_args` to vsearch_args = "--strand both --usersort" #' @param vsearch_args (default : "--strand both") a one length character element defining other parameters to #' passed on to vsearch. #' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files #' - temp.fasta (refseq in fasta or dna_seq sequences) #' - cluster.fasta (centroid if method = "vsearch") #' - temp.uc (clusters if method = "vsearch") +#' @param swarmpath (default: swarm) path to swarm +#' @param d (default: 1) maximum number of differences allowed between two +#' amplicons, meaning that two amplicons will be grouped if they have `d` +#' (or less) differences +#' @param swarm_args (default : "--fastidious") a one length character +#' element defining other parameters to passed on to swarm See other possible +#' methods in the [SWARM pdf manual](https://github.com/torognes/swarm/blob/master/man/swarm_manual.pdf) #' @param ... Others arguments passed on to [DECIPHER::Clusterize()] #' @details This function use the `merge_taxa_vec` function to #' merge taxa into clusters. By default tax_adjust = 0. See the man page @@ -529,6 +538,9 @@ asv2otu <- function(physeq = NULL, vsearch_cluster_method = "--cluster_size", vsearch_args = "--strand both", keep_temporary_files = FALSE, + swarmpath = "swarm", + d = 1, + swarm_args = "--fastidious", ...) { if (inherits(physeq, "phyloseq")) { verify_pq(physeq) @@ -548,10 +560,6 @@ asv2otu <- function(physeq = NULL, ) } - if (!method %in% c("clusterize", "vsearch", "swarm")) { - stop("Method allows 2 values only : `clusterize`, `vsearch` or `swarm`") - } - if (method == "clusterize") { ## Find clusters of ASVs to form the new OTUs clusters <- DECIPHER::Clusterize(dna, @@ -588,136 +596,25 @@ asv2otu <- function(physeq = NULL, keep_temporary_files = keep_temporary_files ) } else if (method == "swarm") { - - } - return(new_obj) -} -################################################################################ - -################################################################################ -#' Search for a list of sequence in a fasta file against physeq reference -#' sequences using [vsearch](https://github.com/torognes/vsearch) -#' -#' `r lifecycle::badge("maturing")` -#' -#' @inheritParams clean_pq -#' @param seq2search (required if path_to_fasta is NULL) Either (i) a DNAstringSet object -#' or (ii) a character vector that will be convert to DNAstringSet using -#' [Biostrings::DNAStringSet()] -#' @param path_to_fasta (required if seq2search is NULL) a path to fasta file if seq2search is est to NULL. -#' @param vsearchpath (default: vsearch) path to vsearch -#' @param id (default: 0.8) id for the option `--usearch_global` of the vsearch software -#' @param iddef (default: 0) iddef for the option `--usearch_global` of the vsearch software -#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files -#' - temp.fasta (refseq in fasta) -#' - cluster.fasta (centroid) -#' - temp.uc (clusters) -#' @examples -#' \dontrun{ -#' file_dna <- tempfile("dna.fa") -#' seqinr::write.fasta("GCCCATTAGTATTCTAGTGGGCATGCCTGTTCGAGCGTCATTTTCA -#' ACCCTCAAGCCCCTTATTGCTTGGTGTTGGGAGTTTAGCTGGCTTTATAGCGGTTAACTCCCTAAATATACTGGCG", -#' file = file_dna, names = "seq1" -#' ) -#' # data(data_fungi) -#' res <- vs_search_global(data_fungi, file_dna) -#' unlink(file_dna) -#' -#' res[res$identity != "*", ] -#' -#' clean_pq(subset_taxa(data_fungi, res$identity != "*")) -#' } -#' @return A dataframe with uc results (invisible) -#' @export -#' @details -#' This function is mainly a wrapper of the work of others. -#' Please cite [vsearch](https://github.com/torognes/vsearch). -#' @author Adrien Taudière - -vs_search_global <- function(physeq, - seq2search = NULL, - path_to_fasta = NULL, - vsearchpath = "vsearch", - id = 0.8, - iddef = 0, - keep_temporary_files = FALSE) { - verify_pq(physeq) - dna <- Biostrings::DNAStringSet(physeq@refseq) - Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "temp.fasta")) - - if (is.null(seq2search) && is.null(path_to_fasta)) { - stop("You must fill either seq2search or path_to_fasta argument.") - } - - if (!is.null(seq2search) && !is.null(path_to_fasta)) { - stop("You must set either seq2search or path_to_fasta but not both.") - } - if (!is.null(seq2search)) { - if (inherits(seq2search, "character")) { - seq2search <- Biostrings::DNAStringSet(seq2search) - } - Biostrings::writeXStringSet(seq2search, paste0(tempdir(), "seq2search.fasta")) - seq2search <- paste0(tempdir(), "seq2search.fasta") - } else if (!is.null(path_to_fasta)) { - dna <- Biostrings::readDNAStringSet(path_to_fasta) - Biostrings::writeXStringSet(dna, paste0(tempdir(), "seq2search.fasta")) - seq2search <- paste0(tempdir(), "seq2search.fasta") - } - - system2( - vsearchpath, - paste( - " --usearch_global ", - paste0(tempdir(), "/", "temp.fasta"), - " --db ", - seq2search, - " --uc ", - paste0(tempdir(), "/", "temp.uc"), - " --id ", - id, - " --uc_allhits", - " --strand both", - " --iddef ", - iddef, - sep = "" + new_obj <- swarm_clustering( + physeq = physeq, + dna_seq = dna_seq, + d = d, + swarmpath = swarmpath, + vsearch_path = vsearchpath, + nproc = nproc, + swarm_args = swarm_args, + tax_adjust = tax_adjust, + keep_temporary_files = keep_temporary_files ) - ) - - pack_clusts <- - utils::read.table(paste0(tempdir(), "/", "temp.uc"), sep = "\t") - colnames(pack_clusts) <- c( - "type", - "cluster", - "width", - "identity", - "strand", - "6", - "7", - "cigarAlignment", - "query", - "target" - ) - - if (!keep_temporary_files) { - if (file.exists(paste0(tempdir(), "temp.fasta"))) { - unlink(paste0(tempdir(), "temp.fasta")) - } - if (file.exists(paste0(tempdir(), "temp.uc"))) { - unlink(paste0(tempdir(), "temp.uc")) - } - if (file.exists(paste0(tempdir(), "seq2search.fasta"))) { - unlink(paste0(tempdir(), "seq2search.fasta")) - } } else { - message(paste0("Temporary files are located at ", tempdir())) + stop("Method allows 2 values only : `clusterize`, `vsearch` or `swarm`") } - - return(invisible(pack_clusts)) + return(new_obj) } ################################################################################ - ################################################################################ #' Save phyloseq object in the form of multiple csv tables. #' @@ -753,7 +650,7 @@ vs_search_global <- function(physeq, #' @export #' @author Adrien Taudière #' @examples -#' # data(data_fungi) +#' #' # write_pq(data_fungi, path = "phyloseq") #' # write_pq(data_fungi, path = "phyloseq", one_file = TRUE) #' @seealso [MiscMetabar::save_pq()] @@ -960,7 +857,7 @@ write_pq <- function(physeq, #' @author Adrien Taudière #' @examples #' \dontrun{ -#' # data(data_fungi) +#' #' save_pq(data_fungi, path = "phyloseq") #' } #' @seealso [MiscMetabar::write_pq()] @@ -1478,7 +1375,7 @@ verify_pq <- function( #' the number of samples #' #' @examples -#' # data(data_fungi) +#' #' cond_samp <- grepl("A1", data_fungi@sam_data[["Sample_names"]]) #' subset_samples_pq(data_fungi, cond_samp) #' @@ -1532,7 +1429,7 @@ subset_samples_pq <- function(physeq, condition) { #' of this function if you use this parameter. No effect if the condition #' is of class `tax_table`. #' @examples -#' # data(data_fungi) +#' #' subset_taxa_pq(data_fungi, data_fungi@tax_table[, "Phylum"] == "Ascomycota") #' #' cond_taxa <- grepl("Endophyte", data_fungi@tax_table[, "Guild"]) @@ -1637,7 +1534,7 @@ subset_taxa_pq <- function(physeq, #' @author Adrien Taudière #' #' @examples -#' # data(data_fungi) +#' #' A8_005 <- select_one_sample(data_fungi, "A8-005_S4_MERGED.fastq.gz") #' A8_005 select_one_sample <- function(physeq, sam_name, silent = FALSE) { @@ -1783,7 +1680,7 @@ tbl_sum_samdata <- function(physeq, remove_col_unique_value = TRUE, ...) { #' @export #' @author Adrien Taudière #' @examples -#' # data(data_fungi) +#' #' df <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 5000) #' \dontrun{ #' df <- add_funguild_info(df, @@ -1852,7 +1749,7 @@ add_funguild_info <- function(physeq, #' @author Adrien Taudière #' @examples #' \dontrun{ -#' # data(data_fungi) +#' #' df <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 5000) #' df <- add_funguild_info(df, #' taxLevels = c( @@ -2015,7 +1912,7 @@ plot_guild_pq <- #' use this function. #' @examples #' library("phangorn") -#' # data(data_fungi) +#' #' df <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 9000) #' df_tree <- build_phytree_pq(df, nb_bootstrap = 5) #' plot(df_tree$UPGMA) @@ -2152,7 +2049,7 @@ build_phytree_pq <- function(physeq, #' @author Adrien Taudière #' @importFrom stats kruskal.test #' @examples -#' # data(data_fungi) +#' #' are_modality_even_depth(data_fungi, "Time")$p.value #' are_modality_even_depth(rarefy_even_depth(data_fungi), "Time")$p.value #' are_modality_even_depth(data_fungi, "Height", boxplot = TRUE) @@ -2190,7 +2087,7 @@ are_modality_even_depth <- function(physeq, fact, boxplot = FALSE) { #' @export #' @author Adrien Taudière #' @examples -#' # data(data_fungi) +#' #' data_fungi_ordered_by_genus <- reorder_taxa_pq( #' data_fungi, #' taxa_names(data_fungi)[order(as.vector(data_fungi@tax_table[, "Genus"]))] @@ -2262,7 +2159,7 @@ reorder_taxa_pq <- function(physeq, names_ordered, remove_phy_tree = FALSE) { #' @export #' #' @examples -#' # data(data_fungi) +#' #' data_fungi <- add_info_to_sam_data(data_fungi) #' boxplot(data_fungi@sam_data$nb_otu ~ data_fungi@sam_data$Time) #' @@ -2301,214 +2198,6 @@ add_info_to_sam_data <- function(physeq, ################################################################################ - -############################################################################### -#' Re-cluster sequences of an object of class `physeq` -#' or cluster a list of DNA sequences using SWARM -#' -#' @description -#' `r lifecycle::badge("maturing")` -#' -#' @inheritParams clean_pq -#' @param dna_seq NOT WORKING FOR THE MOMENT -#' You may directly use a character vector of DNA sequences -#' in place of physeq args. When physeq is set, dna sequences take the value of -#' `physeq@refseq` -#' @param d (default: 1) maximum number of differences allowed between two -#' amplicons, meaning that two amplicons will be grouped if they have `d` -#' (or less) differences -#' @param swarmpath (default: swarm) path to swarm -#' @param vsearch_path (default: vsearch) path to vsearch, used only if physeq -#' is NULL and dna_seq is provided. -#' @param nproc (default: 1) -#' Set to number of cpus/processors to use for the clustering -#' @param swarm_args (default : "--fastidious") a one length character -#' element defining other parameters to passed on to swarm See other possible -#' methods in the [SWARM pdf manual](https://github.com/torognes/swarm/blob/master/man/swarm_manual.pdf) -#' @param tax_adjust (Default 0) See the man page -#' of [merge_taxa_vec()] for more details. -#' To conserved the taxonomic rank of the most abundant ASV, -#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary -#' files ? -#' - temp.fasta (refseq in fasta or dna_seq sequences) -#' - temp_output (classical output of SWARM) -#' - temp_uclust (clusters output of SWARM) -#' @details This function use the `merge_taxa_vec` function to -#' merge taxa into clusters. By default tax_adjust = 0. See the man page -#' of [merge_taxa_vec()]. -#' @return A new object of class `physeq` or a list of cluster if dna_seq -#' args was used. -#' -#' @references -#' SWARM can be downloaded from -#' \url{https://github.com/torognes/swarm/}. -#' -#' @export -#' @examples -#' \dontrun{ -#' # data(data_fungi) -#' summary_plot_pq(data_fungi) -#' # Change with your PATH -#' path_to_swarm <- "/home/adrien/miniconda3/bin/swarm" -#' -#' data_fungi_swarm <- swarm_clustering(data_fungi, swarmpath = path_to_swarm) -#' summary_plot_pq(data_fungi_swarm) -#' -#' sequences_ex <- c( -#' "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTAATAACGAATTCATTGAATCA", -#' "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTACCTGGTAAGGCCCACTT", -#' "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTACCTGGTAGAGGTG", -#' "TACCTATGTTGCCTTGGCGGCTAAACCTACC", -#' "CGGGATTTGATGGCGAATTACCTGGTATTTTAGCCCACTTACCCGGTACCATGAGGTG", -#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACCTGG", -#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG", -#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG", -#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG" -#' ) -#' -#' sequences_ex_swarm <- swarm_clustering( -#' dna_seq = sequences_ex, -#' swarmpath = path_to_swarm -#' ) -#' } -#' @seealso [asv2otu()], [vsearch_clustering()] -#' @references -#' SWARM can be downloaded from -#' \url{https://github.com/torognes/swarm}. -#' More information in the associated publications -#' \url{https://doi.org/10.1093/bioinformatics/btab493} and -#' \url{https://doi.org/10.7717%2Fpeerj.593}. -#' @details -#' This function is mainly a wrapper of the work of others. -#' Please cite [SWARM](https://github.com/torognes/swarm). - -swarm_clustering <- function(physeq = NULL, - dna_seq = NULL, - d = 1, - swarmpath = "swarm", - vsearch_path = "vsearch", - nproc = 1, - swarm_args = "--fastidious", - tax_adjust = 0, - keep_temporary_files = FALSE) { - dna <- physeq_or_string_to_dna( - physeq = physeq, - dna_seq = dna_seq - ) - - if (!is.null(physeq)) { - nseq <- taxa_sums(physeq) - nseq <- nseq[match(names(nseq), names(dna))] - names(dna) <- paste0(names(dna), "_", nseq) - Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "temp.fasta")) - system2( - swarmpath, - paste0( - paste0(tempdir(), "/", "temp.fasta"), - " -o ", - paste0(tempdir(), "/", "temp_output"), - " ", - " -u ", - paste0(tempdir(), "/", "temp_uclust"), - " -t ", - nproc, - " ", - swarm_args - ), - stdout = TRUE, - stderr = TRUE - ) - } else { - Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "amplicons.fasta")) - system2( - vsearch_path, - paste0( - " --derep_fulllength ", - paste0(tempdir(), "/", "amplicons.fasta"), - " --sizeout --relabel_sha1 --fasta_width 0 --output ", - paste0(tempdir(), "/", "temp.fasta") - ), - stdout = TRUE, - stderr = TRUE - ) - system2( - swarmpath, - paste0( - paste0(tempdir(), "/", "temp.fasta"), - " -o ", - paste0(tempdir(), "/", "temp_output"), - " ", - " -u ", - paste0(tempdir(), "/", "temp_uclust"), - " -z ", - " -t ", - nproc, - " ", - swarm_args - ), - stdout = TRUE, - stderr = TRUE - ) - } - - pack_clusts <- - utils::read.table(paste0(tempdir(), "/", "temp_uclust"), sep = "\t") - colnames(pack_clusts) <- - c( - "type", - "cluster", - "width", - "identity", - "strand", - "6", - "7", - "cigarAlignment", - "query", - "target" - ) - - if (inherits(physeq, "phyloseq")) { - clusters <- pack_clusts$cluster[pack_clusts$type != "C"] - names(clusters) <- - sub("_.*$", "", pack_clusts$query[pack_clusts$type != "C"]) - - clusters <- clusters[match(taxa_names(physeq), names(clusters))] - - new_obj <- - merge_taxa_vec(physeq, - clusters, - tax_adjust = tax_adjust - ) - } else if (inherits(dna_seq, "character")) { - new_obj <- pack_clusts - } else { - stop( - "You must set the args physeq (object of class phyloseq) or - dna_seq (character vector)." - ) - } - - if (file.exists(paste0(tempdir(), "/", "temp.fasta")) && - !keep_temporary_files) { - unlink(paste0(tempdir(), "/", "temp.fasta")) - } - if (file.exists(paste0(tempdir(), "/", "temp_output")) && - !keep_temporary_files) { - unlink(paste0(tempdir(), "/", "temp_output")) - } - if (file.exists(paste0(tempdir(), "/", "temp_uclust")) && - !keep_temporary_files) { - unlink(paste0(tempdir(), "/", "temp_uclust")) - } - if (file.exists(paste0(tempdir(), "/", "amplicon.fasta")) && - !keep_temporary_files) { - unlink(paste0(tempdir(), "/", "amplicon.fasta")) - } - return(new_obj) -} -############################################################################### - - ############################################################################### #' Return a DNAStringSet object from either a character vector of DNA sequences #' or the `refseq` slot of a phyloseq-class object @@ -2529,7 +2218,7 @@ swarm_clustering <- function(physeq = NULL, #' @export #' #' @examples -#' # data(data_fungi) +#' #' dna <- physeq_or_string_to_dna(data_fungi) #' dna #' @@ -2573,147 +2262,6 @@ physeq_or_string_to_dna <- function(physeq = NULL, ############################################################################### -############################################################################### -#' Recluster sequences of an object of class `physeq` -#' or cluster a list of DNA sequences using vsearch software -#' -#' @description -#' `r lifecycle::badge("maturing")` -#' -#' @inheritParams clean_pq -#' @param dna_seq You may directly use a character vector of DNA sequences -#' in place of physeq args. When physeq is set, dna sequences take the value of -#' `physeq@refseq` -#' @param nproc (default: 1) -#' Set to number of cpus/processors to use for the clustering -#' @param id (default: 0.97) level of identity to cluster -#' @param vsearchpath (default: vsearch) path to vsearch -#' @param tax_adjust (Default 0) See the man page -#' of [merge_taxa_vec()] for more details. -#' To conserved the taxonomic rank of the most abundant ASV, -#' set tax_adjust to 0 (default) -#' @param vsearch_cluster_method (default: "--cluster_size) See other possible -#' methods in the [vsearch pdf manual](https://github.com/torognes/vsearch/releases/download/v2.23.0/vsearch_manual.pdf) (e.g. `--cluster_size` or `--cluster_smallmem`) -#' - `--cluster_fast` : Clusterize the fasta sequences in filename, automatically sort by decreasing sequence length beforehand. -#' - `--cluster_size` : Clusterize the fasta sequences in filename, automatically sort by decreasing sequence abundance beforehand. -#' - `--cluster_smallmem` : Clusterize the fasta sequences in filename without automatically modifying their order beforehand. Sequence are expected to be sorted by decreasing sequence length, unless *--usersort* is used -#' @param vsearch_args (default : "--strand both") a one length character element defining other parameters to -#' passed on to vsearch. -#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files ? -#' - temp.fasta (refseq in fasta or dna_seq sequences) -#' - cluster.fasta (centroid if method = "vsearch") -#' - temp.uc (clusters if method = "vsearch") -#' -#' @seealso [asv2otu()], [swarm_clustering()] -#' @details This function use the [merge_taxa_vec()] function to -#' merge taxa into clusters. By default tax_adjust = 0. See the man page -#' of [merge_taxa_vec()]. -#' -#' @return A new object of class `physeq` or a list of cluster if dna_seq -#' args was used. -#' -#' @references -#' VSEARCH can be downloaded from -#' \url{https://github.com/torognes/vsearch}. -#' More information in the associated publication -#' \url{https://pubmed.ncbi.nlm.nih.gov/27781170}. -#' @export -#' @author Adrien Taudière -#' -#' @examples -#' # data(data_fungi) -#' summary_plot_pq(data_fungi) -#' d_vs <- vsearch_clustering(data_fungi) -#' summary_plot_pq(d_vs) -#' @details -#' This function is mainly a wrapper of the work of others. -#' Please cite [vsearch](https://github.com/torognes/vsearch). -vsearch_clustering <- function(physeq = NULL, - dna_seq = NULL, - nproc = 1, - id = 0.97, - vsearchpath = "vsearch", - tax_adjust = 0, - vsearch_cluster_method = "--cluster_size", - vsearch_args = "--strand both", - keep_temporary_files = FALSE) { - dna <- physeq_or_string_to_dna(physeq = physeq, dna_seq = dna_seq) - - Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "temp.fasta")) - - system2( - vsearchpath, - paste0( - paste0( - " ", - vsearch_cluster_method, - " ", - paste0(tempdir(), "/", "temp.fasta"), - " ", - vsearch_args - ), - " -id ", - id, - " --centroids ", - paste0(tempdir(), "/", "cluster.fasta"), - " --uc ", - paste0(tempdir(), "/", "temp.uc") - ), - stdout = TRUE, - stderr = TRUE - ) - - pack_clusts <- - utils::read.table(paste0(tempdir(), "/", "temp.uc"), sep = "\t") - colnames(pack_clusts) <- - c( - "type", - "cluster", - "width", - "identity", - "strand", - "6", - "7", - "cigarAlignment", - "query", - "target" - ) - - clusters <- pack_clusts$cluster[pack_clusts$type != "C"] - names(clusters) <- pack_clusts$query[pack_clusts$type != "C"] - clusters <- clusters[match(taxa_names(physeq), names(clusters))] - - if (inherits(physeq, "phyloseq")) { - new_obj <- - merge_taxa_vec(physeq, - clusters, - tax_adjust = tax_adjust - ) - } else if (inherits(dna_seq, "character")) { - new_obj <- pack_clusts - } else { - stop( - "You must set the args physeq (object of class phyloseq) or dna_seq (character vector)." - ) - } - - if (file.exists(paste0(tempdir(), "/", "temp.fasta")) && - !keep_temporary_files) { - unlink(paste0(tempdir(), "/", "temp.fasta")) - } - if (file.exists(paste0(tempdir(), "/", "cluster.fasta")) && - !keep_temporary_files) { - unlink(paste0(tempdir(), "/", "cluster.fasta")) - } - if (file.exists(paste0(tempdir(), "/", "temp.uc")) && - !keep_temporary_files) { - unlink(paste0(tempdir(), "/", "temp.uc")) - } - return(new_obj) -} -############################################################################### - - ################################################################################ #' Remove primers using [cutadapt](https://github.com/marcelm/cutadapt/) diff --git a/R/plot_functions.R b/R/plot_functions.R index 62fe7acd..ba303903 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -10,7 +10,7 @@ #' @param taxa (default: "Species") The taxonomic level you choose for x-positioning. #' @author Adrien Taudière #' @examples -#' # data(data_fungi) +#' #' # Filter samples that don't have Enterotype #' data_fungi <- subset_samples(data_fungi, !is.na(Time)) #' res <- mt(data_fungi, "Time", method = "fdr", test = "f", B = 300) @@ -978,7 +978,7 @@ venn_pq <- #' of plots. #' @seealso [upset_pq()] #' @examples -#' # data(data_fungi) +#' #' ggvenn_pq(data_fungi, fact = "Height") #' ggvenn_pq(data_fungi, fact = "Height") + #' ggplot2::scale_fill_distiller(palette = "BuPu", direction = 1) @@ -1213,7 +1213,7 @@ multiplot <- #' @export #' @author Adrien Taudière #' @examples -#' # data(data_fungi) +#' #' p <- hill_pq(data_fungi, "Height") #' p_h1 <- p[[1]] + theme(legend.position = "none") #' p_h2 <- p[[2]] + theme(legend.position = "none") @@ -1440,7 +1440,6 @@ hill_pq <- #' #' @export #' @examples -#' data(data_fungi) #' p <- ggbetween_pq(data_fungi, variable = "Time", p.adjust.method = "BH") #' p[[1]] #' ggbetween_pq(data_fungi, variable = "Height", one_plot = TRUE) @@ -1513,7 +1512,7 @@ ggbetween_pq <- function(physeq, variable, one_plot = FALSE, rarefy_by_sample = #' @param clean_pq (logical): Does the phyloseq #' object is cleaned using the [clean_pq()] function? #' @examples -#' # data(data_fungi) +#' #' summary_plot_pq(data_fungi) #' summary_plot_pq(data_fungi, add_info = FALSE) + scale_fill_viridis_d() #' @return A ggplot2 object @@ -1861,7 +1860,7 @@ heat_tree_pq <- function(physeq, taxonomic_level = NULL, ...) { #' @return A plot #' #' @examples -#' # data(data_fungi) +#' #' data_fungi_2Height <- subset_samples(data_fungi, Height %in% c("Low", "High")) #' biplot_pq(data_fungi_2Height, "Height", merge_sample_by = "Height") #' @export @@ -2113,7 +2112,7 @@ biplot_pq <- function(physeq, #' @export #' #' @examples -#' # data(data_fungi) +#' #' data_fungi_abun <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 10000) #' p <- multi_biplot_pq(data_fungi_abun, "Height") #' lapply(p, print) @@ -2496,7 +2495,7 @@ multitax_bar_pq <- function(physeq, #' @export #' #' @examples -#' # data(data_fungi) +#' #' res_tsne <- tsne_pq(data_fungi) tsne_pq <- function(physeq, @@ -2747,7 +2746,7 @@ iNEXT_pq <- function(physeq, #' #' @seealso [ggvenn_pq()] #' @examples -#' # data(data_fungi) +#' #' upset_pq(data_fungi, fact = "Height", width_ratio = 0.2) #' upset_pq(data_fungi, #' fact = "Height", width_ratio = 0.2, @@ -3016,7 +3015,7 @@ upset_test_pq <- #' @export #' #' @examples -#' # data(data_fungi) +#' #' diff_fct_diff_class( #' data_fungi@sam_data$Sample_id, #' numeric_fonction = sum, @@ -3133,7 +3132,7 @@ diff_fct_diff_class <- #' @export #' #' @examples -#' # data(data_fungi) +#' #' data_fungi_ab <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 10000) #' tax_bar_pq(data_fungi_ab) + theme(legend.position = "none") #' tax_bar_pq(data_fungi_ab, taxa = "Class") @@ -3183,8 +3182,7 @@ tax_bar_pq <- function(physeq, fact = "Sample", taxa = "Order", percent_bar = FA #' @export #' @author Adrien Taudière #' @examples -#' data(data_fungi) -#' data(data_fungi_sp_known) +#' #' ridges_pq(data_fungi, "Time", alpha = 0.5, log10trans = FALSE) + xlim(c(0, 1000)) #' ridges_pq(data_fungi, "Time", alpha = 0.5) #' ridges_pq( diff --git a/R/table_functions.R b/R/table_functions.R index 430bda31..c432708e 100644 --- a/R/table_functions.R +++ b/R/table_functions.R @@ -114,7 +114,7 @@ tax_datatable <- function(physeq, #' @importFrom rlang .data #' @export #' @examples -#' # data(data_fungi) +#' #' data_fungi_low_high <- subset_samples(data_fungi, Height %in% c("Low", "High")) #' compare_pairs_pq(data_fungi_low_high, bifactor = "Height", merge_sample_by = "Height") #' compare_pairs_pq(data_fungi_low_high, diff --git a/R/targets_misc.R b/R/targets_misc.R index c9160d9d..ca4f34be 100644 --- a/R/targets_misc.R +++ b/R/targets_misc.R @@ -63,7 +63,7 @@ list_fastq_files <- #' @author Adrien Taudière #' #' @examples -#' # data(data_fungi) +#' #' rename_samples_otu_table(data_fungi, as.character(seq_along(sample_names(data_fungi)))) #' rename_samples_otu_table <- function(physeq, names_of_samples) { diff --git a/R/vsearch.R b/R/vsearch.R index 2ea3707a..ca210ed5 100644 --- a/R/vsearch.R +++ b/R/vsearch.R @@ -1,3 +1,477 @@ + +################################################################################ +#' Search for a list of sequence in a fasta file against physeq reference +#' sequences using [vsearch](https://github.com/torognes/vsearch) +#' +#' `r lifecycle::badge("maturing")` +#' +#' @inheritParams clean_pq +#' @param seq2search (required if path_to_fasta is NULL) Either (i) a DNAstringSet object +#' or (ii) a character vector that will be convert to DNAstringSet using +#' [Biostrings::DNAStringSet()] +#' @param path_to_fasta (required if seq2search is NULL) a path to fasta file if seq2search is est to NULL. +#' @param vsearchpath (default: vsearch) path to vsearch +#' @param id (default: 0.8) id for the option `--usearch_global` of the vsearch software +#' @param iddef (default: 0) iddef for the option `--usearch_global` of the vsearch software +#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files +#' - temp.fasta (refseq in fasta) +#' - cluster.fasta (centroid) +#' - temp.uc (clusters) +#' @examples +#' \dontrun{ +#' file_dna <- tempfile("dna.fa") +#' seqinr::write.fasta("GCCCATTAGTATTCTAGTGGGCATGCCTGTTCGAGCGTCATTTTCA +#' ACCCTCAAGCCCCTTATTGCTTGGTGTTGGGAGTTTAGCTGGCTTTATAGCGGTTAACTCCCTAAATATACTGGCG", +#' file = file_dna, names = "seq1" +#' ) +#' +#' res <- vs_search_global(data_fungi, file_dna) +#' unlink(file_dna) +#' +#' res[res$identity != "*", ] +#' +#' clean_pq(subset_taxa(data_fungi, res$identity != "*")) +#' } +#' @return A dataframe with uc results (invisible) +#' @export +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please cite [vsearch](https://github.com/torognes/vsearch). +#' @author Adrien Taudière + +vs_search_global <- function(physeq, + seq2search = NULL, + path_to_fasta = NULL, + vsearchpath = "vsearch", + id = 0.8, + iddef = 0, + keep_temporary_files = FALSE) { + verify_pq(physeq) + dna <- Biostrings::DNAStringSet(physeq@refseq) + Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "temp.fasta")) + + if (is.null(seq2search) && is.null(path_to_fasta)) { + stop("You must fill either seq2search or path_to_fasta argument.") + } + + if (!is.null(seq2search) && !is.null(path_to_fasta)) { + stop("You must set either seq2search or path_to_fasta but not both.") + } + if (!is.null(seq2search)) { + if (inherits(seq2search, "character")) { + seq2search <- Biostrings::DNAStringSet(seq2search) + } + Biostrings::writeXStringSet(seq2search, paste0(tempdir(), "seq2search.fasta")) + seq2search <- paste0(tempdir(), "seq2search.fasta") + } else if (!is.null(path_to_fasta)) { + dna <- Biostrings::readDNAStringSet(path_to_fasta) + Biostrings::writeXStringSet(dna, paste0(tempdir(), "seq2search.fasta")) + seq2search <- paste0(tempdir(), "seq2search.fasta") + } + + system2( + vsearchpath, + paste( + " --usearch_global ", + paste0(tempdir(), "/", "temp.fasta"), + " --db ", + seq2search, + " --uc ", + paste0(tempdir(), "/", "temp.uc"), + " --id ", + id, + " --uc_allhits", + " --strand both", + " --iddef ", + iddef, + sep = "" + ) + ) + + pack_clusts <- + utils::read.table(paste0(tempdir(), "/", "temp.uc"), sep = "\t") + colnames(pack_clusts) <- c( + "type", + "cluster", + "width", + "identity", + "strand", + "6", + "7", + "cigarAlignment", + "query", + "target" + ) + + if (!keep_temporary_files) { + if (file.exists(paste0(tempdir(), "temp.fasta"))) { + unlink(paste0(tempdir(), "temp.fasta")) + } + if (file.exists(paste0(tempdir(), "temp.uc"))) { + unlink(paste0(tempdir(), "temp.uc")) + } + if (file.exists(paste0(tempdir(), "seq2search.fasta"))) { + unlink(paste0(tempdir(), "seq2search.fasta")) + } + } else { + message(paste0("Temporary files are located at ", tempdir())) + } + + return(invisible(pack_clusts)) +} +################################################################################ + + + +############################################################################### +#' Re-cluster sequences of an object of class `physeq` +#' or cluster a list of DNA sequences using SWARM +#' +#' @description +#' `r lifecycle::badge("maturing")` +#' +#' @inheritParams clean_pq +#' @param dna_seq NOT WORKING FOR THE MOMENT +#' You may directly use a character vector of DNA sequences +#' in place of physeq args. When physeq is set, dna sequences take the value of +#' `physeq@refseq` +#' @param d (default: 1) maximum number of differences allowed between two +#' amplicons, meaning that two amplicons will be grouped if they have `d` +#' (or less) differences +#' @param swarmpath (default: swarm) path to swarm +#' @param vsearch_path (default: vsearch) path to vsearch, used only if physeq +#' is NULL and dna_seq is provided. +#' @param nproc (default: 1) +#' Set to number of cpus/processors to use for the clustering +#' @param swarm_args (default : "--fastidious") a one length character +#' element defining other parameters to passed on to swarm See other possible +#' methods in the [SWARM pdf manual](https://github.com/torognes/swarm/blob/master/man/swarm_manual.pdf) +#' @param tax_adjust (Default 0) See the man page +#' of [merge_taxa_vec()] for more details. +#' To conserved the taxonomic rank of the most abundant ASV, +#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary +#' files ? +#' - temp.fasta (refseq in fasta or dna_seq sequences) +#' - temp_output (classical output of SWARM) +#' - temp_uclust (clusters output of SWARM) +#' @details This function use the `merge_taxa_vec` function to +#' merge taxa into clusters. By default tax_adjust = 0. See the man page +#' of [merge_taxa_vec()]. +#' @return A new object of class `physeq` or a list of cluster if dna_seq +#' args was used. +#' +#' @references +#' SWARM can be downloaded from +#' \url{https://github.com/torognes/swarm/}. +#' +#' @export +#' @examples +#' \dontrun{ +#' +#' summary_plot_pq(data_fungi) +#' system2("swarm", "-h") +#' +#' data_fungi_swarm <- swarm_clustering(data_fungi) +#' summary_plot_pq(data_fungi_swarm) +#' +#' sequences_ex <- c( +#' "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTAATAACGAATTCATTGAATCA", +#' "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTACCTGGTAAGGCCCACTT", +#' "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTACCTGGTAGAGGTG", +#' "TACCTATGTTGCCTTGGCGGCTAAACCTACC", +#' "CGGGATTTGATGGCGAATTACCTGGTATTTTAGCCCACTTACCCGGTACCATGAGGTG", +#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACCTGG", +#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG", +#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG", +#' "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG" +#' ) +#' +#' sequences_ex_swarm <- swarm_clustering( +#' dna_seq = sequences_ex +#' ) +#' } +#' @seealso [asv2otu()], [vsearch_clustering()] +#' @references +#' SWARM can be downloaded from +#' \url{https://github.com/torognes/swarm}. +#' More information in the associated publications +#' \url{https://doi.org/10.1093/bioinformatics/btab493} and +#' \url{https://doi.org/10.7717%2Fpeerj.593}. +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please cite [SWARM](https://github.com/torognes/swarm). + +swarm_clustering <- function(physeq = NULL, + dna_seq = NULL, + d = 1, + swarmpath = "swarm", + vsearch_path = "vsearch", + nproc = 1, + swarm_args = "--fastidious", + tax_adjust = 0, + keep_temporary_files = FALSE) { + dna <- physeq_or_string_to_dna( + physeq = physeq, + dna_seq = dna_seq + ) + + if (!is.null(physeq)) { + nseq <- taxa_sums(physeq) + nseq <- nseq[match(names(nseq), names(dna))] + names(dna) <- paste0(names(dna), "_", nseq) + Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "temp.fasta")) + system2( + swarmpath, + paste0( + paste0(tempdir(), "/", "temp.fasta"), + " -o ", + paste0(tempdir(), "/", "temp_output"), + " ", + " -u ", + paste0(tempdir(), "/", "temp_uclust"), + " -t ", + nproc, + " ", + swarm_args + ), + stdout = TRUE, + stderr = TRUE + ) + } else { + Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "amplicons.fasta")) + system2( + vsearch_path, + paste0( + " --derep_fulllength ", + paste0(tempdir(), "/", "amplicons.fasta"), + " --sizeout --relabel_sha1 --fasta_width 0 --output ", + paste0(tempdir(), "/", "temp.fasta") + ), + stdout = TRUE, + stderr = TRUE + ) + system2( + swarmpath, + paste0( + paste0(tempdir(), "/", "temp.fasta"), + " -o ", + paste0(tempdir(), "/", "temp_output"), + " ", + " -u ", + paste0(tempdir(), "/", "temp_uclust"), + " -z ", + " -t ", + nproc, + " ", + swarm_args + ), + stdout = TRUE, + stderr = TRUE + ) + } + + pack_clusts <- + utils::read.table(paste0(tempdir(), "/", "temp_uclust"), sep = "\t") + colnames(pack_clusts) <- + c( + "type", + "cluster", + "width", + "identity", + "strand", + "6", + "7", + "cigarAlignment", + "query", + "target" + ) + + if (inherits(physeq, "phyloseq")) { + clusters <- pack_clusts$cluster[pack_clusts$type != "C"] + names(clusters) <- + sub("_.*$", "", pack_clusts$query[pack_clusts$type != "C"]) + + clusters <- clusters[match(taxa_names(physeq), names(clusters))] + + new_obj <- + merge_taxa_vec(physeq, + clusters, + tax_adjust = tax_adjust + ) + } else if (inherits(dna_seq, "character")) { + new_obj <- pack_clusts + } else { + stop( + "You must set the args physeq (object of class phyloseq) or + dna_seq (character vector)." + ) + } + + if (file.exists(paste0(tempdir(), "/", "temp.fasta")) && + !keep_temporary_files) { + unlink(paste0(tempdir(), "/", "temp.fasta")) + } + if (file.exists(paste0(tempdir(), "/", "temp_output")) && + !keep_temporary_files) { + unlink(paste0(tempdir(), "/", "temp_output")) + } + if (file.exists(paste0(tempdir(), "/", "temp_uclust")) && + !keep_temporary_files) { + unlink(paste0(tempdir(), "/", "temp_uclust")) + } + if (file.exists(paste0(tempdir(), "/", "amplicon.fasta")) && + !keep_temporary_files) { + unlink(paste0(tempdir(), "/", "amplicon.fasta")) + } + return(new_obj) +} +############################################################################### + + + +############################################################################### +#' Recluster sequences of an object of class `physeq` +#' or cluster a list of DNA sequences using vsearch software +#' +#' @description +#' `r lifecycle::badge("maturing")` +#' +#' @inheritParams clean_pq +#' @param dna_seq You may directly use a character vector of DNA sequences +#' in place of physeq args. When physeq is set, dna sequences take the value of +#' `physeq@refseq` +#' @param nproc (default: 1) +#' Set to number of cpus/processors to use for the clustering +#' @param id (default: 0.97) level of identity to cluster +#' @param vsearchpath (default: vsearch) path to vsearch +#' @param tax_adjust (Default 0) See the man page +#' of [merge_taxa_vec()] for more details. +#' To conserved the taxonomic rank of the most abundant ASV, +#' set tax_adjust to 0 (default). For the moment only tax_adjust = 0 is +#' robust +#' @param vsearch_cluster_method (default: "--cluster_size) See other possible +#' methods in the [vsearch pdf manual](https://github.com/torognes/vsearch/releases/download/v2.23.0/vsearch_manual.pdf) (e.g. `--cluster_size` or `--cluster_smallmem`) +#' - `--cluster_fast` : Clusterize the fasta sequences in filename, automatically sort by decreasing sequence length beforehand. +#' - `--cluster_size` : Clusterize the fasta sequences in filename, automatically sort by decreasing sequence abundance beforehand. +#' - `--cluster_smallmem` : Clusterize the fasta sequences in filename without automatically modifying their order beforehand. Sequence are expected to be sorted by decreasing sequence length, unless *--usersort* is used. +#' In that case you may set `vsearch_args` to vsearch_args = "--strand both --usersort" +#' @param vsearch_args (default : "--strand both") a one length character element defining other parameters to +#' passed on to vsearch. +#' @param keep_temporary_files (logical, default: FALSE) Do we keep temporary files ? +#' - temp.fasta (refseq in fasta or dna_seq sequences) +#' - cluster.fasta (centroid if method = "vsearch") +#' - temp.uc (clusters if method = "vsearch") +#' +#' @seealso [asv2otu()], [swarm_clustering()] +#' @details This function use the [merge_taxa_vec()] function to +#' merge taxa into clusters. By default tax_adjust = 0. See the man page +#' of [merge_taxa_vec()]. +#' +#' @return A new object of class `physeq` or a list of cluster if dna_seq +#' args was used. +#' +#' @references +#' VSEARCH can be downloaded from +#' \url{https://github.com/torognes/vsearch}. +#' More information in the associated publication +#' \url{https://pubmed.ncbi.nlm.nih.gov/27781170}. +#' @export +#' @author Adrien Taudière +#' +#' @examples +#' +#' summary_plot_pq(data_fungi) +#' d_vs <- vsearch_clustering(data_fungi) +#' summary_plot_pq(d_vs) +#' @details +#' This function is mainly a wrapper of the work of others. +#' Please cite [vsearch](https://github.com/torognes/vsearch). +vsearch_clustering <- function(physeq = NULL, + dna_seq = NULL, + nproc = 1, + id = 0.97, + vsearchpath = "vsearch", + tax_adjust = 0, + vsearch_cluster_method = "--cluster_size", + vsearch_args = "--strand both", + keep_temporary_files = FALSE) { + dna <- physeq_or_string_to_dna(physeq = physeq, dna_seq = dna_seq) + + Biostrings::writeXStringSet(dna, paste0(tempdir(), "/", "temp.fasta")) + + system2( + vsearchpath, + paste0( + paste0( + " ", + vsearch_cluster_method, + " ", + paste0(tempdir(), "/", "temp.fasta"), + " ", + vsearch_args + ), + " -id ", + id, + " --centroids ", + paste0(tempdir(), "/", "cluster.fasta"), + " --uc ", + paste0(tempdir(), "/", "temp.uc") + ), + stdout = TRUE, + stderr = TRUE + ) + + pack_clusts <- + utils::read.table(paste0(tempdir(), "/", "temp.uc"), sep = "\t") + colnames(pack_clusts) <- + c( + "type", + "cluster", + "width", + "identity", + "strand", + "6", + "7", + "cigarAlignment", + "query", + "target" + ) + + clusters <- pack_clusts$cluster[pack_clusts$type != "C"] + names(clusters) <- pack_clusts$query[pack_clusts$type != "C"] + clusters <- clusters[match(taxa_names(physeq), names(clusters))] + + if (inherits(physeq, "phyloseq")) { + new_obj <- + merge_taxa_vec(physeq, + clusters, + tax_adjust = tax_adjust + ) + } else if (inherits(dna_seq, "character")) { + new_obj <- pack_clusts + } else { + stop( + "You must set the args physeq (object of class phyloseq) or dna_seq (character vector)." + ) + } + + if (file.exists(paste0(tempdir(), "/", "temp.fasta")) && + !keep_temporary_files) { + unlink(paste0(tempdir(), "/", "temp.fasta")) + } + if (file.exists(paste0(tempdir(), "/", "cluster.fasta")) && + !keep_temporary_files) { + unlink(paste0(tempdir(), "/", "cluster.fasta")) + } + if (file.exists(paste0(tempdir(), "/", "temp.uc")) && + !keep_temporary_files) { + unlink(paste0(tempdir(), "/", "temp.uc")) + } + return(new_obj) +} +############################################################################### + + ################################################################################ #' Search for a list of sequence in an object to remove chimera taxa #' using [vsearch](https://github.com/torognes/vsearch) @@ -32,16 +506,17 @@ #' @export #' #' @examples -#' # data(data_fungi) -#' data_fungi_nochim <- chimera_removal_vs(data_fungi) -#' data_fungi_nochim_16 <- chimera_removal_vs(data_fungi, -#' abskew = 16, -#' min_seq_length = 10 -#' ) -#' data_fungi_nochim2 <- -#' chimera_removal_vs(data_fungi, type = "Select_only_non_chim") -#' data_fungi_chimera <- -#' chimera_removal_vs(data_fungi, type = "Select_only_chim") +#' \dontrun{ +#' data_fungi_nochim <- chimera_removal_vs(data_fungi) +#' data_fungi_nochim_16 <- chimera_removal_vs(data_fungi, +#' abskew = 16, +#' min_seq_length = 10 +#' ) +#' data_fungi_nochim2 <- +#' chimera_removal_vs(data_fungi, type = "Select_only_non_chim") +#' data_fungi_chimera <- +#' chimera_removal_vs(data_fungi, type = "Select_only_chim") +#' } #' @author Adrien Taudière #' @details #' This function is mainly a wrapper of the work of others. @@ -154,11 +629,12 @@ chimera_removal_vs <- #' @export #' #' @examples -#' # data(data_fungi) -#' chimera_detection_vs( -#' seq2search = data_fungi@refseq, -#' nb_seq = taxa_sums(data_fungi) -#' ) +#' \dontrun{ +#' chimera_detection_vs( +#' seq2search = data_fungi@refseq, +#' nb_seq = taxa_sums(data_fungi) +#' ) +#' } #' @author Adrien Taudière #' @details #' This function is mainly a wrapper of the work of others. diff --git a/man/add_funguild_info.Rd b/man/add_funguild_info.Rd index 620985ad..3cf4242b 100644 --- a/man/add_funguild_info.Rd +++ b/man/add_funguild_info.Rd @@ -31,7 +31,7 @@ Please make a reference to \code{FUNGuildR} package and the associate use this function. } \examples{ -# data(data_fungi) + df <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 5000) \dontrun{ df <- add_funguild_info(df, diff --git a/man/add_info_to_sam_data.Rd b/man/add_info_to_sam_data.Rd index 94fcbe15..51264e99 100644 --- a/man/add_info_to_sam_data.Rd +++ b/man/add_info_to_sam_data.Rd @@ -34,7 +34,7 @@ Warning: The value nb_seq and nb_otu may be outdated if you transform your phyloseq object, e.g. using the \code{\link[=subset_taxa_pq]{subset_taxa_pq()}} function } \examples{ -# data(data_fungi) + data_fungi <- add_info_to_sam_data(data_fungi) boxplot(data_fungi@sam_data$nb_otu ~ data_fungi@sam_data$Time) diff --git a/man/are_modality_even_depth.Rd b/man/are_modality_even_depth.Rd index 50b358cd..0e2ab7c1 100644 --- a/man/are_modality_even_depth.Rd +++ b/man/are_modality_even_depth.Rd @@ -29,7 +29,7 @@ This function apply a Kruskal-Wallis rank sum test to the number of sequences per samples in function of the factor \code{fact}. } \examples{ -# data(data_fungi) + are_modality_even_depth(data_fungi, "Time")$p.value are_modality_even_depth(rarefy_even_depth(data_fungi), "Time")$p.value are_modality_even_depth(data_fungi, "Height", boxplot = TRUE) diff --git a/man/biplot_pq.Rd b/man/biplot_pq.Rd index 07026ee5..471f0eee 100644 --- a/man/biplot_pq.Rd +++ b/man/biplot_pq.Rd @@ -98,7 +98,7 @@ A plot \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} } \examples{ -# data(data_fungi) + data_fungi_2Height <- subset_samples(data_fungi, Height \%in\% c("Low", "High")) biplot_pq(data_fungi_2Height, "Height", merge_sample_by = "Height") } diff --git a/man/blast_to_phyloseq.Rd b/man/blast_to_phyloseq.Rd index 7bda4413..0755c137 100644 --- a/man/blast_to_phyloseq.Rd +++ b/man/blast_to_phyloseq.Rd @@ -79,7 +79,7 @@ the blast table } \examples{ \dontrun{ -# data(data_fungi) + blastpath <- "...YOUR_PATH_TO_BLAST..." blast_to_phyloseq(data_fungi, seq2search = system.file("extdata", "ex.fasta", diff --git a/man/build_phytree_pq.Rd b/man/build_phytree_pq.Rd index 711ee94f..bacbdb39 100644 --- a/man/build_phytree_pq.Rd +++ b/man/build_phytree_pq.Rd @@ -74,7 +74,7 @@ use this function. } \examples{ library("phangorn") -# data(data_fungi) + df <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 9000) df_tree <- build_phytree_pq(df, nb_bootstrap = 5) plot(df_tree$UPGMA) diff --git a/man/chimera_detection_vs.Rd b/man/chimera_detection_vs.Rd index e8266c22..51b13464 100644 --- a/man/chimera_detection_vs.Rd +++ b/man/chimera_detection_vs.Rd @@ -57,7 +57,7 @@ This function is mainly a wrapper of the work of others. Please make \href{https://github.com/torognes/vsearch}{vsearch}. } \examples{ -# data(data_fungi) + chimera_detection_vs( seq2search = data_fungi@refseq, nb_seq = taxa_sums(data_fungi) diff --git a/man/chimera_removal_vs.Rd b/man/chimera_removal_vs.Rd index 87efc694..3fee960a 100644 --- a/man/chimera_removal_vs.Rd +++ b/man/chimera_removal_vs.Rd @@ -45,7 +45,7 @@ This function is mainly a wrapper of the work of others. Please make \href{https://github.com/torognes/vsearch}{vsearch}. } \examples{ -# data(data_fungi) + data_fungi_nochim <- chimera_removal_vs(data_fungi) data_fungi_nochim_16 <- chimera_removal_vs(data_fungi, abskew = 16, diff --git a/man/compare_pairs_pq.Rd b/man/compare_pairs_pq.Rd index fafd7eac..64fa78bf 100644 --- a/man/compare_pairs_pq.Rd +++ b/man/compare_pairs_pq.Rd @@ -48,7 +48,7 @@ and diversity \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} #' For the moment refseq slot need to be not Null. } \examples{ -# data(data_fungi) + data_fungi_low_high <- subset_samples(data_fungi, Height \%in\% c("Low", "High")) compare_pairs_pq(data_fungi_low_high, bifactor = "Height", merge_sample_by = "Height") compare_pairs_pq(data_fungi_low_high, diff --git a/man/diff_fct_diff_class.Rd b/man/diff_fct_diff_class.Rd index 028cf36e..aa1a13bb 100644 --- a/man/diff_fct_diff_class.Rd +++ b/man/diff_fct_diff_class.Rd @@ -41,7 +41,7 @@ a single value Mainly an internal function useful in "lapply(..., tapply)" methods } \examples{ -# data(data_fungi) + diff_fct_diff_class( data_fungi@sam_data$Sample_id, numeric_fonction = sum, diff --git a/man/ggvenn_pq.Rd b/man/ggvenn_pq.Rd index ffeb4d56..a41e5fd1 100644 --- a/man/ggvenn_pq.Rd +++ b/man/ggvenn_pq.Rd @@ -66,7 +66,7 @@ and \code{+ scale_x_continuous(expand = expansion(mult = 0.5))}. See examples. } \examples{ -# data(data_fungi) + ggvenn_pq(data_fungi, fact = "Height") ggvenn_pq(data_fungi, fact = "Height") + ggplot2::scale_fill_distiller(palette = "BuPu", direction = 1) diff --git a/man/hill_pq.Rd b/man/hill_pq.Rd index 68739cd4..c22202af 100644 --- a/man/hill_pq.Rd +++ b/man/hill_pq.Rd @@ -64,7 +64,7 @@ Note that this function use a sqrt of the read numbers in the linear model in order to correct for uneven sampling depth. } \examples{ -# data(data_fungi) + p <- hill_pq(data_fungi, "Height") p_h1 <- p[[1]] + theme(legend.position = "none") p_h2 <- p[[2]] + theme(legend.position = "none") diff --git a/man/multi_biplot_pq.Rd b/man/multi_biplot_pq.Rd index fd5e550d..b7cd4319 100644 --- a/man/multi_biplot_pq.Rd +++ b/man/multi_biplot_pq.Rd @@ -32,7 +32,7 @@ This allow to plot all the possible \code{\link[=biplot_pq]{biplot_pq()}} combin using one factor. } \examples{ -# data(data_fungi) + data_fungi_abun <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 10000) p <- multi_biplot_pq(data_fungi_abun, "Height") lapply(p, print) diff --git a/man/physeq_or_string_to_dna.Rd b/man/physeq_or_string_to_dna.Rd index 4948e3ae..2d5b3433 100644 --- a/man/physeq_or_string_to_dna.Rd +++ b/man/physeq_or_string_to_dna.Rd @@ -26,7 +26,7 @@ Internally used in \code{\link[=vsearch_clustering]{vsearch_clustering()}}, \cod \code{\link[=asv2otu]{asv2otu()}}. } \examples{ -# data(data_fungi) + dna <- physeq_or_string_to_dna(data_fungi) dna diff --git a/man/plot_guild_pq.Rd b/man/plot_guild_pq.Rd index 16cfc6df..ca7aacaa 100644 --- a/man/plot_guild_pq.Rd +++ b/man/plot_guild_pq.Rd @@ -28,7 +28,7 @@ A ggplot2 object } \examples{ \dontrun{ -# data(data_fungi) + df <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 5000) df <- add_funguild_info(df, taxLevels = c( diff --git a/man/plot_mt.Rd b/man/plot_mt.Rd index f60423df..7aba0316 100644 --- a/man/plot_mt.Rd +++ b/man/plot_mt.Rd @@ -22,7 +22,7 @@ a \code{\link{ggplot}}2 plot of result of a mt test \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} } \examples{ -# data(data_fungi) + # Filter samples that don't have Enterotype data_fungi <- subset_samples(data_fungi, !is.na(Time)) res <- mt(data_fungi, "Time", method = "fdr", test = "f", B = 300) diff --git a/man/rename_samples_otu_table.Rd b/man/rename_samples_otu_table.Rd index ea64e185..10cb99e4 100644 --- a/man/rename_samples_otu_table.Rd +++ b/man/rename_samples_otu_table.Rd @@ -19,7 +19,7 @@ the matrix with new colnames (or rownames if \code{taxa_are_rows} is true) \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \examples{ -# data(data_fungi) + rename_samples_otu_table(data_fungi, as.character(seq_along(sample_names(data_fungi)))) } diff --git a/man/reorder_taxa_pq.Rd b/man/reorder_taxa_pq.Rd index 67c160a5..bf0992d4 100644 --- a/man/reorder_taxa_pq.Rd +++ b/man/reorder_taxa_pq.Rd @@ -26,7 +26,7 @@ Note that the taxa order in a physeq object with a tree is locked by the order of leaf in the phylogenetic tree. } \examples{ -# data(data_fungi) + data_fungi_ordered_by_genus <- reorder_taxa_pq( data_fungi, taxa_names(data_fungi)[order(as.vector(data_fungi@tax_table[, "Genus"]))] diff --git a/man/ridges_pq.Rd b/man/ridges_pq.Rd index 83818aff..334f6eda 100644 --- a/man/ridges_pq.Rd +++ b/man/ridges_pq.Rd @@ -31,8 +31,7 @@ taxonomic groups \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \examples{ -data(data_fungi) -data(data_fungi_sp_known) + ridges_pq(data_fungi, "Time", alpha = 0.5, log10trans = FALSE) + xlim(c(0, 1000)) ridges_pq(data_fungi, "Time", alpha = 0.5) ridges_pq( diff --git a/man/save_pq.Rd b/man/save_pq.Rd index 6f1ffbd0..185caa90 100644 --- a/man/save_pq.Rd +++ b/man/save_pq.Rd @@ -34,7 +34,7 @@ Write : } \examples{ \dontrun{ -# data(data_fungi) + save_pq(data_fungi, path = "phyloseq") } } diff --git a/man/select_one_sample.Rd b/man/select_one_sample.Rd index c433dbf4..783771d6 100644 --- a/man/select_one_sample.Rd +++ b/man/select_one_sample.Rd @@ -23,7 +23,7 @@ a physeq object with one sample Mostly for internal used, for example in function \code{\link[=track_wkflow_samples]{track_wkflow_samples()}}. } \examples{ -# data(data_fungi) + A8_005 <- select_one_sample(data_fungi, "A8-005_S4_MERGED.fastq.gz") A8_005 } diff --git a/man/subset_samples_pq.Rd b/man/subset_samples_pq.Rd index ed7a0961..3c7acc35 100644 --- a/man/subset_samples_pq.Rd +++ b/man/subset_samples_pq.Rd @@ -33,7 +33,7 @@ This function is robust when you use the sam_data slot of the phyloseq object used in physeq (see examples) } \examples{ -# data(data_fungi) + cond_samp <- grepl("A1", data_fungi@sam_data[["Sample_names"]]) subset_samples_pq(data_fungi, cond_samp) diff --git a/man/subset_taxa_pq.Rd b/man/subset_taxa_pq.Rd index 64ef0481..237a27c2 100644 --- a/man/subset_taxa_pq.Rd +++ b/man/subset_taxa_pq.Rd @@ -43,7 +43,7 @@ The main objective of this function is to complete the subset_taxa using a named boolean vector. Names must match taxa_names. } \examples{ -# data(data_fungi) + subset_taxa_pq(data_fungi, data_fungi@tax_table[, "Phylum"] == "Ascomycota") cond_taxa <- grepl("Endophyte", data_fungi@tax_table[, "Guild"]) diff --git a/man/subset_taxa_tax_control.Rd b/man/subset_taxa_tax_control.Rd index 417a84ae..3d1e105d 100644 --- a/man/subset_taxa_tax_control.Rd +++ b/man/subset_taxa_tax_control.Rd @@ -41,7 +41,7 @@ A new \code{\link{phyloseq-class}} object. \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \examples{ -# data(data_fungi) + subset_taxa_tax_control(data_fungi, as.numeric(data_fungi@otu_table[, 300]), diff --git a/man/summary_plot_pq.Rd b/man/summary_plot_pq.Rd index b1996c5e..41c08302 100644 --- a/man/summary_plot_pq.Rd +++ b/man/summary_plot_pq.Rd @@ -32,7 +32,7 @@ A ggplot2 object \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} } \examples{ -# data(data_fungi) + summary_plot_pq(data_fungi) summary_plot_pq(data_fungi, add_info = FALSE) + scale_fill_viridis_d() } diff --git a/man/swarm_clustering.Rd b/man/swarm_clustering.Rd index 76411de3..d15d06d5 100644 --- a/man/swarm_clustering.Rd +++ b/man/swarm_clustering.Rd @@ -71,7 +71,7 @@ Please cite \href{https://github.com/torognes/swarm}{SWARM}. } \examples{ \dontrun{ -# data(data_fungi) + summary_plot_pq(data_fungi) # Change with your PATH path_to_swarm <- "/home/adrien/miniconda3/bin/swarm" diff --git a/man/tax_bar_pq.Rd b/man/tax_bar_pq.Rd index acc435bb..ab2a4165 100644 --- a/man/tax_bar_pq.Rd +++ b/man/tax_bar_pq.Rd @@ -37,7 +37,7 @@ taxonomic groups \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \examples{ -# data(data_fungi) + data_fungi_ab <- subset_taxa_pq(data_fungi, taxa_sums(data_fungi) > 10000) tax_bar_pq(data_fungi_ab) + theme(legend.position = "none") tax_bar_pq(data_fungi_ab, taxa = "Class") diff --git a/man/track_wkflow_samples.Rd b/man/track_wkflow_samples.Rd index 6c7ee88b..41b984d2 100644 --- a/man/track_wkflow_samples.Rd +++ b/man/track_wkflow_samples.Rd @@ -23,7 +23,7 @@ Contrary to \code{\link[=track_wkflow]{track_wkflow()}}, only phyloseq object ar More information are available in the manual of the function \code{\link[=track_wkflow]{track_wkflow()}} } \examples{ -# data(data_fungi) + tree_A10_005 <- subset_samples(data_fungi, Tree_name == "A10-005") track_wkflow_samples(tree_A10_005) } diff --git a/man/tsne_pq.Rd b/man/tsne_pq.Rd index 21e5b6a2..9196cc99 100644 --- a/man/tsne_pq.Rd +++ b/man/tsne_pq.Rd @@ -28,6 +28,6 @@ See ?Rtsne::Rtsne() for more information Compute tSNE position of samples from a phyloseq object } \examples{ -# data(data_fungi) + res_tsne <- tsne_pq(data_fungi) } diff --git a/man/upset_pq.Rd b/man/upset_pq.Rd index 726a8c8d..d1417464 100644 --- a/man/upset_pq.Rd +++ b/man/upset_pq.Rd @@ -50,7 +50,7 @@ A \code{\link{ggplot}}2 plot Alternative to venn plot. } \examples{ -# data(data_fungi) + upset_pq(data_fungi, fact = "Height", width_ratio = 0.2) upset_pq(data_fungi, fact = "Height", width_ratio = 0.2, diff --git a/man/vs_search_global.Rd b/man/vs_search_global.Rd index 30075929..bdbbc900 100644 --- a/man/vs_search_global.Rd +++ b/man/vs_search_global.Rd @@ -55,7 +55,7 @@ seqinr::write.fasta("GCCCATTAGTATTCTAGTGGGCATGCCTGTTCGAGCGTCATTTTCA ACCCTCAAGCCCCTTATTGCTTGGTGTTGGGAGTTTAGCTGGCTTTATAGCGGTTAACTCCCTAAATATACTGGCG", file = file_dna, names = "seq1" ) -# data(data_fungi) + res <- vs_search_global(data_fungi, file_dna) unlink(file_dna) diff --git a/man/vsearch_clustering.Rd b/man/vsearch_clustering.Rd index 8f66b4d7..aa9537c6 100644 --- a/man/vsearch_clustering.Rd +++ b/man/vsearch_clustering.Rd @@ -71,7 +71,7 @@ This function is mainly a wrapper of the work of others. Please cite \href{https://github.com/torognes/vsearch}{vsearch}. } \examples{ -# data(data_fungi) + summary_plot_pq(data_fungi) d_vs <- vsearch_clustering(data_fungi) summary_plot_pq(d_vs) diff --git a/man/write_pq.Rd b/man/write_pq.Rd index 64304288..436e0252 100644 --- a/man/write_pq.Rd +++ b/man/write_pq.Rd @@ -84,7 +84,7 @@ and if present a phy_tree in Newick format \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} } \examples{ -# data(data_fungi) + # write_pq(data_fungi, path = "phyloseq") # write_pq(data_fungi, path = "phyloseq", one_file = TRUE) } diff --git a/tests/testthat/test_cutadapt.R b/tests/testthat/test_cutadapt.R new file mode 100644 index 00000000..e995227e --- /dev/null +++ b/tests/testthat/test_cutadapt.R @@ -0,0 +1,62 @@ +suppressWarnings(swarm_error_or_not <- + try(system("cutadapt 2>&1", intern = TRUE), silent = TRUE)) + +if (class(swarm_error_or_not) == "try-error") { + message( + "cutadapt_remove_primers() can't be + tested when cutadapt is not installed" + ) +} else { + test_that("cutadapt_remove_primers works fine", { + expect_silent(suppressMessages( + res_cut <- cutadapt_remove_primers("inst/extdata", "TTC", "GAA", + folder_output = paste0(tempdir(), "/output_cutadapt") + ))) + expect_type(res_cut, "list") + expect_equal(length(res_cut), 1) + expect_type(res_cut[[1]], "character") + + expect_equal(length(list.files(paste0(tempdir(), "/output_cutadapt"))), 2) + + expect_silent( + suppressMessages(res_cut2 <- cutadapt_remove_primers( + system.file("extdata", + package = "dada2" + ), + pattern_R1 = "F.fastq.gz", + pattern_R2 = "R.fastq.gz", + primer_fw = "TTC", + primer_rev = "GAA", + folder_output = tempdir() + )) + ) + + expect_type(res_cut2, "list") + expect_equal(length(res_cut2), 2) + + expect_type(res_cut2[[1]], "character") + + expect_equal(length(list.files(paste0(tempdir(), "/output_cutadapt"))), 2) + + expect_silent( + suppressMessages(res_cut3 <- cutadapt_remove_primers( + system.file("extdata", + package = "dada2" + ), + pattern_R1 = "F.fastq.gz", + primer_fw = "TTC", + folder_output = tempdir(), + cmd_is_run = FALSE + ) + )) + + expect_type(res_cut3, "list") + expect_equal(length(res_cut3), 2) + expect_type(res_cut3[[1]], "character") + + expect_equal(length(list.files(paste0(tempdir(), "/output_cutadapt"))), 2) + }) + + unlink(tempdir(), recursive = TRUE) + +} diff --git a/tests/testthat/test_figures_alpha_div.R b/tests/testthat/test_figures_alpha_div.R index 0cbc4ced..52ecadbf 100644 --- a/tests/testthat/test_figures_alpha_div.R +++ b/tests/testthat/test_figures_alpha_div.R @@ -70,7 +70,7 @@ test_that("hill_pq works with GP dataset", { test_that("iNEXT_pq works with data_fungi dataset", { library("iNEXT") expect_s3_class( - res_iNEXT <- iNEXT_pq( + suppressWarnings(res_iNEXT <- iNEXT_pq( data_fungi_sp_known, merge_sample_by = "Height", q = 1, @@ -78,7 +78,7 @@ test_that("iNEXT_pq works with data_fungi dataset", { nboot = 5 ), "iNEXT" - ) + )) expect_s3_class(ggiNEXT(res_iNEXT), "ggplot") expect_s3_class(ggiNEXT(res_iNEXT, type = 2), "ggplot") expect_s3_class(ggiNEXT(res_iNEXT, type = 3), "ggplot") diff --git a/tests/testthat/test_figures_beta_div.R b/tests/testthat/test_figures_beta_div.R index 5d30396f..7000b8a4 100644 --- a/tests/testthat/test_figures_beta_div.R +++ b/tests/testthat/test_figures_beta_div.R @@ -378,3 +378,32 @@ test_that("multipatt_pq works with data_fungi dataset", { ) expect_error(multipatt_pq(data_fungi, fact = "Time")) }) + +test_that("multipatt_pq works with data_fungi dataset", { + expect_type(suppressMessages(suppressWarnings(res_height <- ancombc_pq( + subset_taxa_pq( + data_fungi_sp_known, + taxa_sums(data_fungi_sp_known) > 5000 + ), + fact = "Height", + levels_fact = c("Low", "High"), + verbose = TRUE + ))), "list") + + expect_s3_class(res_height$bias_correct_log_table, "data.frame") + expect_equal(dim(res_height$res), c(6, 15)) + + expect_type(suppressMessages(suppressWarnings(res_time <- ancombc_pq( + subset_taxa_pq( + data_fungi_sp_known, + taxa_sums(data_fungi_sp_known) > 5000 + ), + fact = "Time", + levels_fact = c("0", "15"), + tax_level = "Family", + verbose = TRUE + ))), "list") + + expect_s3_class(res_time$ss_tab, "data.frame") + expect_equal(dim(res_time$res), c(12, 15)) +}) diff --git a/tests/testthat/test_phyloseq_class.R b/tests/testthat/test_phyloseq_class.R index e6f87eb7..e7547b6b 100644 --- a/tests/testthat/test_phyloseq_class.R +++ b/tests/testthat/test_phyloseq_class.R @@ -8,13 +8,15 @@ sequences_ex <- c( ) test_that("asv2otu works fine with Clusterize method", { expect_s4_class(data_fungi_sp_known, "phyloseq") - expect_s4_class(asv2otu(data_fungi_sp_known), "phyloseq") + expect_s4_class(suppressWarnings(asv2otu(data_fungi_sp_known)), "phyloseq") + expect_s4_class(suppressWarnings(asv2otu(data_fungi_sp_known, + method = "longest")), "phyloseq") expect_error(asv2otu(enterotype, dna_seq = sequences_ex)) expect_error(asv2otu(enterotype, method = "vsearch")) expect_error(asv2otu(enterotype)) expect_error(asv2otu()) - expect_error(asv2otu(method = "VsaerCh")) + expect_error(asv2otu(data_fungi_sp_known, method = "VsaerCh")) expect_error(asv2otu(data_fungi_sp_known, dna_seq = sequences_ex)) }) diff --git a/tests/testthat/test_swarm.R b/tests/testthat/test_swarm.R new file mode 100644 index 00000000..d4d2980b --- /dev/null +++ b/tests/testthat/test_swarm.R @@ -0,0 +1,50 @@ +suppressWarnings(swarm_error_or_not <- + try(system("swarm -h 2>&1", intern = TRUE), silent = TRUE)) + +if (class(swarm_error_or_not) == "try-error") { + message( + "swarm_clustering() and asv2otu(..., method=swarm) can't be + tested when swarm is not installed" + ) +} else { + + sequences_ex <- c( + "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTAATAACGAATTCATTGAATCA", + "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTACCTGGTAAGGCCCACTT", + "TACCTATGTTGCCTTGGCGGCTAAACCTACCCGGGATTTGATGGGGCGAATTACCTGGTAGAGGTG", + "TACCTATGTTGCCTTGGCGGCTAAACCTACC", + "CGGGATTTGATGGCGAATTACCTGGTATTTTAGCCCACTTACCCGGTACCATGAGGTG", + "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACCTGG", + "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG", + "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG", + "GCGGCTAAACCTACCCGGGATTTGATGGCGAATTACAAAG" + ) + + test_that("swarm_clustering works fine with phyloseq object", { + expect_s4_class(data_fungi_swarm <- swarm_clustering(data_fungi), "phyloseq") + expect_equal(ntaxa(data_fungi_swarm), 1301) + }) + + test_that("swarm_clustering works fine with dna sequences vector", { + expect_s3_class(sequences_ex_swarm <- swarm_clustering(dna_seq = sequences_ex), "data.frame") + expect_equal(dim(sequences_ex_swarm), c(12, 10)) + }) + + test_that("asv2otu works fine with swarm method", { + expect_s4_class( + d_swarm <- + asv2otu(data_fungi_sp_known, method = "swarm"), + "phyloseq" + ) + + expect_equal(ntaxa(d_swarm), 600) + expect_true(nsamples(d_swarm) == nsamples(data_fungi_sp_known)) + + expect_s3_class( + seq_swarm <- + asv2otu(dna_seq = sequences_ex, method = "swarm"), + "data.frame" + ) + expect_equal(dim(seq_swarm), c(12, 10)) + }) +} diff --git a/tests/testthat/test_targets.R b/tests/testthat/test_targets.R index 27801f60..ba55bf37 100644 --- a/tests/testthat/test_targets.R +++ b/tests/testthat/test_targets.R @@ -151,6 +151,7 @@ test_that("subsample_fastq function works fine", { }) test_that("sample_data_with_new_names function works fine", { + sam_file <- "inst/extdata/sam_data.csv" expect_silent(newdf <- sample_data_with_new_names(sam_file, paste0("Samples_", seq(1, 185)))) expect_equal(dim(newdf)[1], 185) expect_equal(dim(newdf)[2], 7) @@ -167,7 +168,7 @@ test_that("sample_data_with_new_names function works fine", { system.file("extdata", "sam2R.fastq.gz", package = "dada2") ) expect_silent(filt_fastq_fw <- filter_trim(testFastqs_fw, output_fw = tempdir())) - expect_equal(length(derepFastq(filt_fastq_fw[1])), 4) + expect_equal(length(derepFastq(filt_fastq_fw[1])), 2) expect_silent(filt_fastq_pe <- filter_trim(testFastqs_fw, testFastqs_rev, output_fw = tempdir("fw"), @@ -188,4 +189,4 @@ test_that("add_info_to_sam_data function works fine with data_fungi", { expect_equal(dim(data_fungi2@sam_data)[2], 11) expect_equal(length(data_fungi2@sam_data$nb_seq), 185) expect_equal(length(data_fungi2@sam_data$nb_otu), 185) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test_vsearch.R b/tests/testthat/test_vsearch.R index 07e1d8a1..7a863734 100644 --- a/tests/testthat/test_vsearch.R +++ b/tests/testthat/test_vsearch.R @@ -6,31 +6,125 @@ sequences_ex <- c( data("data_fungi") df_basidio <- subset_taxa(data_fungi, Phylum == "Basidiomycota") -df_basidio <- subset_taxa_pq(df_basidio, colSums(df_basidio@otu_table) > 1000) +df_basidio <- + subset_taxa_pq(df_basidio, colSums(df_basidio@otu_table) > 1000) # path_db <- "inst/extdata/100_sp_UNITE_sh_general_release_dynamic.fasta" -suppressWarnings(vsearch_error_or_not <- try(system("vsearch 2>&1", intern = TRUE), silent = TRUE)) +suppressWarnings(vsearch_error_or_not <- + try(system("vsearch 2>&1", intern = TRUE), silent = TRUE)) if (class(vsearch_error_or_not) == "try-error") { - message("vs_search_global() and asv2otu(..., method=vsearch) can't be tested when vsearch is not installed") + message( + "vs_search_global() and asv2otu(..., method=vsearch) can't be tested when vsearch is not installed" + ) } else { test_that("asv2otu works fine with vsearch method", { - expect_s4_class(d_vs <- asv2otu(data_fungi_sp_known, method = "vsearch"), "phyloseq") - expect_s4_class(d_fast <- asv2otu(data_fungi_sp_known, - method = "vsearch", - vsearch_cluster_method = "--cluster_fast" - ), "phyloseq") - expect_s3_class(asv2otu(dna_seq = sequences_ex, method = "vsearch"), "data.frame") + expect_s4_class( + d_vs <- + asv2otu(data_fungi_sp_known, method = "vsearch"), + "phyloseq" + ) + expect_s4_class( + d_fast <- asv2otu( + data_fungi_sp_known, + method = "vsearch", + vsearch_cluster_method = "--cluster_fast" + ), + "phyloseq" + ) + expect_s3_class( + asv2otu(dna_seq = sequences_ex, method = "vsearch"), + "data.frame" + ) expect_true(sum(!d_fast@refseq == d_vs@refseq) > 0) expect_equal(sum(dim(d_vs@otu_table) == dim(d_fast@otu_table)), 2) }) test_that("vs_search_global works fine with vsearch method", { - expect_s3_class(res <- vs_search_global(data_fungi, - path_to_fasta = "inst/extdata/ex_little.fasta" - ), "data.frame") + expect_s3_class( + res <- vs_search_global(data_fungi, + path_to_fasta = "inst/extdata/ex_little.fasta" + ), + "data.frame" + ) expect_equal(dim(res), c(1420, 10)) - expect_s3_class(res <- vs_search_global(data_fungi, sequences_ex), "data.frame") - expect_s3_class(res <- vs_search_global(data_fungi, Biostrings::DNAStringSet(sequences_ex)), "data.frame") + expect_s3_class( + res <- + vs_search_global(data_fungi, sequences_ex), + "data.frame" + ) + expect_s3_class( + res <- + vs_search_global(data_fungi, Biostrings::DNAStringSet(sequences_ex)), + "data.frame" + ) + }) + + test_that("chimera_detection_vs works fine", { + expect_type( + chimera_fungi <- chimera_detection_vs( + seq2search = data_fungi@refseq, + nb_seq = taxa_sums(data_fungi) + ), + "list" + ) + expect_s4_class(chimera_fungi$non_chimera, "AAStringSet") + + expect_equal(length(chimera_fungi$non_chimera), 1051) + expect_equal(length(chimera_fungi$chimera), 242) + expect_equal(length(chimera_fungi$borderline), 127) + }) + + test_that("chimera_detection_vs works fine", { + expect_s4_class( + data_fungi_nochim <- + chimera_removal_vs(data_fungi), + "phyloseq" + ) + expect_equal(ntaxa(data_fungi_nochim), 1178) + expect_s4_class( + data_fungi_nochim_16 <- chimera_removal_vs(data_fungi, + abskew = 16, + min_seq_length = 10 + ), + "phyloseq" + ) + expect_equal(ntaxa(data_fungi_nochim_16), 1259) + expect_s4_class( + data_fungi_nochim2 <- + chimera_removal_vs(data_fungi, type = "Select_only_non_chim"), + "phyloseq" + ) + expect_equal(ntaxa(data_fungi_nochim2), 1051) + + expect_s4_class( + data_fungi_chimera <- + chimera_removal_vs(data_fungi, type = "Select_only_chim"), + "phyloseq" + ) + expect_equal(ntaxa(data_fungi_chimera), 242) + }) + + + test_that("vsearch_clustering works fine", { + expect_s4_class(d_vs1 <- vsearch_clustering(data_fungi), "phyloseq") + expect_equal(ntaxa(d_vs1), 701) + + expect_s4_class(d_vs2 <- vsearch_clustering(data_fungi, + id = 0.98, + vsearch_cluster_method = "--cluster_size" + ), "phyloseq") + expect_equal(ntaxa(d_vs2), 817) + + expect_s4_class(d_vs3 <- vsearch_clustering(data_fungi, + id = 0.98, + vsearch_cluster_method = "--cluster_smallmem", + vsearch_args = "--strand both --usersort" + ), "phyloseq") + + + expect_type(seq_clustered <- vsearch_clustering(dna_seq = sequences_ex), + "list") + expect_equal(dim(seq_clustered), c(4, 10)) }) } diff --git a/vignettes/articles/essai_article.Rmd b/vignettes/articles/essai_article.Rmd deleted file mode 100644 index 289a5796..00000000 --- a/vignettes/articles/essai_article.Rmd +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: "essai_article" ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -library(MiscMetabar) -``` diff --git a/vignettes/articles/explore_data.Rmd b/vignettes/articles/explore_data.Rmd index f3e67083..82abc8dc 100644 --- a/vignettes/articles/explore_data.Rmd +++ b/vignettes/articles/explore_data.Rmd @@ -10,7 +10,8 @@ vignette: > ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, - comment = "#>" + comment = "#>", + message = FALSE ) ``` @@ -99,3 +100,9 @@ heatmap(fungi_order@otu_table) + +# Session information + +```{r} +sessionInfo() +```