diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..02882b57 --- /dev/null +++ b/.lintr @@ -0,0 +1,18 @@ +linters: linters_with_tags( + tags = c("default", "common_mistakes", "package_development", "consistency", "correctness", "robustness"), + undesirable_operator_linter = NULL, + line_length_linter(120), + keyword_quote_linter = NULL, + condition_message_linter = NULL, + paste_linter = NULL, + implicit_integer_linter = NULL, + packages = "lintr", + exclude_tags = "deprecated" + ) +encoding: "UTF-8" + +exclusions: list( + # excluded from all lints: + "tests/testthat.R" + ) + diff --git a/DESCRIPTION b/DESCRIPTION index 5e4a2487..93f8f4f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MiscMetabar Type: Package Title: Miscellaneous Functions for Metabarcoding Analysis -Version: 0.10.1 +Version: 0.10.2 Authors@R: person("Adrien", "Taudière", email = "adrien.taudiere@zaclys.net", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0003-1088-1182")) Description: Facilitate the description, transformation, exploration, and reproducibility of metabarcoding analyses. 'MiscMetabar' is mainly built on top of the 'phyloseq', 'dada2' and 'targets' R packages. It helps to build reproducible and robust bioinformatics pipelines in R. 'MiscMetabar' makes ecological analysis of alpha and beta-diversity easier, more reproducible and more powerful by integrating a large number of tools. Important features are described in Taudière A. (2023) . diff --git a/NEWS.md b/NEWS.md index a544438f..12620449 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ -# MiscMetabar 0.10.1 (in development) +# MiscMetabar 0.10.2 (in development) + +- Improve code thanks to {lintr} package +# MiscMetabar 0.10.1 + +> CRAN 2024-09-10 - Delete function `heat_tree_pq()` because {metacoder} package is archived from CRAN. diff --git a/R/alpha_div_test.R b/R/alpha_div_test.R index 85aff0c6..aa4495d2 100644 --- a/R/alpha_div_test.R +++ b/R/alpha_div_test.R @@ -302,7 +302,9 @@ hill_test_rarperm_pq <- function(physeq, #' Package leaps must then be loaded, and this can only be applied to linear models #' with covariates and no interactions. If "d", a simple summary of the candidate set #' is printed, including the number of candidate models. -#' @param crit The Information Criterion to be used. Default is the small-sample corrected AIC (aicc). This should be a function that accepts a fitted model as first argument. Other provided functions are the classic AIC, the Bayes IC (bic), and QAIC/QAICc (qaic and qaicc). +#' @param crit The Information Criterion to be used. Default is the small-sample corrected AIC (aicc). +#' This should be a function that accepts a fitted model as first argument. +#' Other provided functions are the classic AIC, the Bayes IC (bic), and QAIC/QAICc (qaic and qaicc). #' @param ... Other arguments passed on to [glmulti::glmulti()] function #' #' @return A data.frame summarizing the glmulti results with columns @@ -380,7 +382,7 @@ glmutli_pq <- if (fitfunction == "lm") { test <- vector("list", nrow(top_glmulti)) R2__h0 <- NULL - for (i in 1:nrow(top_glmulti)) { + for (i in seq_along(nrow(top_glmulti))) { test[[i]] <- summary(res_glmulti@objects[[i]]) R2__h0[i] <- test[[i]]$adj.r.squared } diff --git a/R/beta_div_test.R b/R/beta_div_test.R index 6e556aa0..c49de7bf 100644 --- a/R/beta_div_test.R +++ b/R/beta_div_test.R @@ -75,9 +75,7 @@ graph_test_pq <- function(physeq, nperm = nperm, ... ) - if (!return_plot) { - return(res_graph_test) - } else { + if (return_plot) { p <- phyloseqGraphTest::plot_test_network(res_graph_test) + labs( title = title, @@ -90,6 +88,8 @@ graph_test_pq <- function(physeq, ) ) return(p) + } else { + return(res_graph_test) } } ################################################################################ @@ -831,11 +831,8 @@ signif_ancombc <- function(ancombc_res, clnames <- colnames(signif_ancombc_res) name_modality <- - gsub( - "passed_ss", "", - clnames[grepl("passed_ss", clnames) & - !grepl("Intercept", clnames)] - ) + gsub("passed_ss", "", clnames[grepl("passed_ss", clnames) & + !grepl("Intercept", clnames)]) if (filter_passed) { signif_ancombc_res <- signif_ancombc_res %>% @@ -1356,13 +1353,6 @@ var_par_rarperm_pq <- ) } - if (dist_method %in% c("robust.aitchison", "aitchison")) { - dist_physeq <- - vegdist(as(otu_table(physeq, taxa_are_rows = FALSE), "matrix"), method = dist_method) - } else { - dist_physeq <- phyloseq::distance(physeq, method = dist_method) - } - res_perm <- vector("list", nperm) for (i in 1:nperm) { res_perm[[i]] <- diff --git a/R/blast.R b/R/blast.R index 57adcc3d..6b20fbce 100644 --- a/R/blast.R +++ b/R/blast.R @@ -115,12 +115,12 @@ blast_to_phyloseq <- function(physeq, blast_tab_OK <- FALSE } - if (!keep_temporary_files) { + if (keep_temporary_files) { + message(paste0("Temporary files are located at ", tempdir())) + } else { unlink(paste0(tempdir(), "/", "blast_result.txt")) unlink(list.files(tempdir(), pattern = "dbase")) unlink(paste0(tempdir(), "/", "db.fasta")) - } else { - message(paste0("Temporary files are located at ", tempdir())) } if (!blast_tab_OK) { @@ -292,12 +292,12 @@ blast_pq <- function(physeq, blast_tab_OK <- FALSE } - if (!keep_temporary_files) { + if (keep_temporary_files) { + message(paste0("Temporary files are located at ", tempdir())) + } else { unlink(paste0(tempdir(), "/", "blast_result.txt")) unlink(list.files(tempdir(), pattern = "dbase")) unlink(paste0(tempdir(), "/", "db.fasta")) - } else { - message(paste0("Temporary files are located at ", tempdir())) } if (!blast_tab_OK) { @@ -542,12 +542,12 @@ blast_to_derep <- function(derep, blast_tab_OK <- FALSE } - if (!keep_temporary_files) { + if (keep_temporary_files) { + message(paste0("Temporary files are located at ", tempdir())) + } else { unlink(paste0(tempdir(), "/", "blast_result.txt")) unlink(list.files(tempdir(), pattern = "dbase")) unlink(paste0(tempdir(), "/", "db.fasta")) - } else { - message(paste0("Temporary files are located at ", tempdir())) } if (!blast_tab_OK) { diff --git a/R/dada_phyloseq.R b/R/dada_phyloseq.R index 8caa3a0e..e1d81974 100644 --- a/R/dada_phyloseq.R +++ b/R/dada_phyloseq.R @@ -113,10 +113,7 @@ clean_pq <- function(physeq, verify_pq(physeq) if (reorder_taxa) { - physeq <- reorder_taxa_pq( - physeq, - taxa_names(physeq)[order(taxa_sums(physeq), decreasing = TRUE)] - ) + physeq <- reorder_taxa_pq(physeq, taxa_names(physeq)[order(taxa_sums(physeq), decreasing = TRUE)]) } if (rename_taxa) { @@ -137,23 +134,17 @@ clean_pq <- function(physeq, if (force_taxa_as_columns && taxa_are_rows(physeq)) { otu_table(physeq) <- - otu_table( - t(as.matrix(unclass( - physeq@otu_table - ))), - taxa_are_rows = FALSE - ) + otu_table(t(as.matrix(unclass( + physeq@otu_table + ))), taxa_are_rows = FALSE) message("Taxa are now in columns.") } if (force_taxa_as_rows && !taxa_are_rows(physeq)) { otu_table(physeq) <- - otu_table( - t(as.matrix(unclass( - physeq@otu_table - ))), - taxa_are_rows = TRUE - ) + otu_table(t(as.matrix(unclass( + physeq@otu_table + ))), taxa_are_rows = TRUE) message("Taxa are now in rows.") } @@ -276,15 +267,11 @@ track_wkflow <- function(list_of_objects, file.exists(object[1])) { if (summary(file(object[[1]]))$class == "gzfile") { pbapply::pbsapply(object, function(x) { - as.numeric(system(paste("zcat ", x, " | grep -c '^+$'", sep = ""), - intern = TRUE - )) + as.numeric(system(paste("zcat ", x, " | grep -c '^+$'", sep = ""), intern = TRUE)) }) } else if (grepl("\\.fastq$", object[1])) { pbapply::pbsapply(object, function(x) { - as.numeric(system(paste("cat ", x, " | grep -c '^+$'", sep = ""), - intern = TRUE - )) + as.numeric(system(paste("cat ", x, " | grep -c '^+$'", sep = ""), intern = TRUE)) }) } else { stop("Files must be either gzfile or .fastq") @@ -416,11 +403,7 @@ track_wkflow <- function(list_of_objects, matrix(ncol = length(list_of_objects), unlist(track_nb_sam_per_obj)) ) - rownames(track) <- c( - "nb_sequences", - "nb_clusters", - "nb_samples" - ) + rownames(track) <- c("nb_sequences", "nb_clusters", "nb_samples") } @@ -608,7 +591,8 @@ postcluster_pq <- function(physeq = NULL, if (method == "clusterize") { ## Find clusters of ASVs to form the new OTUs - clusters <- DECIPHER::Clusterize(dna, + clusters <- DECIPHER::Clusterize( + dna, cutoff = 1 - id, # e.g. `cutoff = 0.03` for a 97% OTU processors = nproc, @@ -618,10 +602,7 @@ postcluster_pq <- function(physeq = NULL, if (inherits(physeq, "phyloseq")) { new_obj <- - merge_taxa_vec(physeq, - clusters$cluster, - tax_adjust = tax_adjust - ) + merge_taxa_vec(physeq, clusters$cluster, tax_adjust = tax_adjust) } else if (inherits(dna_seq, "character")) { new_obj <- clusters } else { @@ -752,12 +733,9 @@ write_pq <- function(physeq, !is.null(physeq@otu_table) && !is.null(physeq@tax_table)) { if (!taxa_are_rows(physeq)) { otu_table(physeq) <- - otu_table( - t(as.matrix(unclass( - physeq@otu_table - ))), - taxa_are_rows = TRUE - ) + otu_table(t(as.matrix(unclass( + physeq@otu_table + ))), taxa_are_rows = TRUE) } df_physeq_interm <- cbind( physeq@otu_table, @@ -801,17 +779,11 @@ write_pq <- function(physeq, !is.null(physeq@tax_table)) { if (!taxa_are_rows(physeq)) { otu_table(physeq) <- - otu_table( - t(as.matrix(unclass( - physeq@otu_table - ))), - taxa_are_rows = TRUE - ) + otu_table(t(as.matrix(unclass( + physeq@otu_table + ))), taxa_are_rows = TRUE) } - df_physeq_interm <- cbind( - physeq@otu_table, - physeq@tax_table, - ) + df_physeq_interm <- cbind(physeq@otu_table, physeq@tax_table, ) colnames(df_physeq_interm) <- c( sample_names(physeq), @@ -1355,10 +1327,7 @@ mumu_pq <- function(physeq, } } - return(list( - "new_physeq" = new_physeq, - "mumu_results" = result_mumu - )) + return(list("new_physeq" = new_physeq, "mumu_results" = result_mumu)) } ################################################################################ @@ -1385,29 +1354,32 @@ mumu_pq <- function(physeq, #' Warnings if verbose = TRUE #' @export #' -verify_pq <- function( - physeq, - verbose = FALSE, - min_nb_seq_sample = 500, - min_nb_seq_taxa = 1) { +verify_pq <- function(physeq, + verbose = FALSE, + min_nb_seq_sample = 500, + min_nb_seq_taxa = 1) { if (!methods::validObject(physeq) || !inherits(physeq, "phyloseq")) { stop("The physeq argument is not a valid phyloseq object.") } if (verbose) { if (min(sample_sums(physeq)) < min_nb_seq_sample) { - warning(paste0( - "At least one of your sample contains less than ", - min_nb_seq_sample, - " sequences." - )) + warning( + paste0( + "At least one of your sample contains less than ", + min_nb_seq_sample, + " sequences." + ) + ) } if (min(sample_sums(physeq)) < min_nb_seq_sample) { - warning(paste0( - "At least one of your taxa is represent by less than ", - min_nb_seq_taxa, - " sequences." - )) + warning( + paste0( + "At least one of your taxa is represent by less than ", + min_nb_seq_taxa, + " sequences." + ) + ) } if (sum(is.na(physeq@sam_data)) > 0) { warning("At least one of your samples metadata columns contains NA.") @@ -1539,9 +1511,7 @@ subset_taxa_pq <- function(physeq, if (!sum(names(condition) %in% taxa_names(physeq)) == length(condition)) { stop(paste( "Some names in condition do not fit taxa_names of physeq : ", - paste(names(condition)[!names(condition) %in% taxa_names(physeq)], - collapse = "/" - ) + paste(names(condition)[!names(condition) %in% taxa_names(physeq)], collapse = "/") )) } @@ -1560,12 +1530,12 @@ subset_taxa_pq <- function(physeq, as(otu_table(new_physeq, taxa_are_rows = TRUE), "matrix") new_MA <- old_MA[cond, ] - if (!is.matrix(new_MA)) { - new_MA <- as.matrix(new_MA) + if (is.matrix(new_MA)) { new_otu_table <- otu_table(new_MA, taxa_are_rows = TRUE) - sample_names(new_otu_table) <- sample_names(new_physeq) } else { + new_MA <- as.matrix(new_MA) new_otu_table <- otu_table(new_MA, taxa_are_rows = TRUE) + sample_names(new_otu_table) <- sample_names(new_physeq) } otu_table(new_physeq) <- new_otu_table @@ -1806,16 +1776,10 @@ add_funguild_info <- function(physeq, FUNGuild_assign <- funguild_assign(data.frame( "Taxonomy" = - apply(tax_tab[, taxLevels], 1, - paste, - collapse = ";" - ) + apply(tax_tab[, taxLevels], 1, paste, collapse = ";") )) tax_tab <- - as.matrix(cbind( - tax_tab, - FUNGuild_assign - )) + as.matrix(cbind(tax_tab, FUNGuild_assign)) physeq@tax_table <- tax_table(tax_tab) return(physeq) } @@ -1884,12 +1848,9 @@ plot_guild_pq <- } guilds <- data.frame(sort(table(strsplit( - paste( - physeq@tax_table[, "guild"] - [physeq@tax_table[, "confidenceRanking"] %in% - c("Highly Probable", "Probable")], - collapse = "-" - ), + paste(physeq@tax_table[, "guild"] + [physeq@tax_table[, "confidenceRanking"] %in% + c("Highly Probable", "Probable")], collapse = "-"), split = "-" )))) @@ -1899,13 +1860,10 @@ plot_guild_pq <- guilds <- guilds[guilds$Var1 != "", ] # Number of sequences per guild - nb_seq_by_guild <- c() + nb_seq_by_guild <- vector("integer", length(guilds$Var1)) for (i in seq(1, length(guilds$Var1))) { nb_seq_by_guild[i] <- - sum(taxa_sums(physeq@otu_table)[grepl( - guilds$Var1[i], - physeq@tax_table[, "guild"] - )]) + sum(taxa_sums(physeq@otu_table)[grepl(guilds$Var1[i], physeq@tax_table[, "guild"])]) } names(nb_seq_by_guild) <- guilds$Var1 guilds$seq <- nb_seq_by_guild @@ -1914,9 +1872,7 @@ plot_guild_pq <- guilds$nb_seq <- as.numeric(guilds$nb_seq) guilds$nb_taxa <- as.numeric(guilds$nb_taxa) - guilds$Guild <- factor(as.vector(guilds$Guild), - levels = guilds$Guild[order(guilds$nb_seq)] - ) + guilds$Guild <- factor(as.vector(guilds$Guild), levels = guilds$Guild[order(guilds$nb_seq)]) COLORS <- rep("Others", nrow(guilds)) @@ -1934,7 +1890,8 @@ plot_guild_pq <- "Guild" = "All ASV", "nb_taxa" = ntaxa(physeq), "nb_seq" = sum(physeq@otu_table), - "colors" = "ALL" + "colors" = "ALL", + stringsAsFactors = FALSE ) ) guilds <- guilds[order(guilds$nb_seq), ] @@ -1942,26 +1899,18 @@ plot_guild_pq <- guilds$Guild <- factor(guilds$Guild, levels = levels_order) } - ggplot( - guilds, - aes( - y = Guild, - x = log10(nb_seq), - fill = colors - ) - ) + + ggplot(guilds, aes( + y = Guild, + x = log10(nb_seq), + fill = colors + )) + geom_bar(stat = "identity") + annotation_logticks(sides = "b", alpha = 0.5) + ylab("GUILD by FUNGuild") + scale_fill_manual("Guild", - values = c( - "gray", "Olivedrab", "cyan4", "tomato3", - "lightpink4" - ) - ) + - geom_text(aes(label = nb_taxa, x = log10(nb_seq) + 0.2), - family = "serif" + values = c("gray", "Olivedrab", "cyan4", "tomato3", "lightpink4") ) + + geom_text(aes(label = nb_taxa, x = log10(nb_seq) + 0.2), family = "serif") + geom_text(aes(label = nb_seq, x = log10(nb_seq) / 2), family = "mono", col = "white" @@ -2078,12 +2027,9 @@ build_phytree_pq <- function(physeq, ) if (nb_bootstrap > 0) { treeUPGMA_bs <- - phangorn::bootstrap.phyDat(phang.align, - function(x) { - phangorn::upgma(phangorn::dist.ml(x)) - }, - bs = nb_bootstrap - ) + phangorn::bootstrap.phyDat(phang.align, function(x) { + phangorn::upgma(phangorn::dist.ml(x)) + }, bs = nb_bootstrap) if (rearrangement == "NNI") { tree_ML_bs <- phangorn::bootstrap.pml( tree_ML, @@ -2112,12 +2058,9 @@ build_phytree_pq <- function(physeq, stop("rearrangement parameter one of the three value 'stochastic', 'NNI' or 'ratchet'") } - treeNJ_bs <- phangorn::bootstrap.phyDat(phang.align, - function(x) { - phangorn::NJ(phangorn::dist.ml(x)) - }, - bs = nb_bootstrap - ) + treeNJ_bs <- phangorn::bootstrap.phyDat(phang.align, function(x) { + phangorn::NJ(phangorn::dist.ml(x)) + }, bs = nb_bootstrap) return( list( "UPGMA" = treeUPGMA, @@ -2220,9 +2163,11 @@ reorder_taxa_pq <- function(physeq, names_ordered, remove_phy_tree = FALSE) { message("Removing phylogenetic tree!") new_physeq@phy_tree <- NULL } else { - stop("The taxa order in a physeq object with a tree is locked by + stop( + "The taxa order in a physeq object with a tree is locked by the order of leaf in the phylogenetic tree. You could use args - remove_phy_tree = TRUE.") + remove_phy_tree = TRUE." + ) } } @@ -2305,10 +2250,7 @@ add_info_to_sam_data <- function(physeq, } df_info <- df_info[match(sample_names(physeq), rownames(df_info)), ] - physeq@sam_data <- sample_data(cbind( - as.data.frame(physeq@sam_data), - df_info - )) + physeq@sam_data <- sample_data(cbind(as.data.frame(physeq@sam_data), df_info)) } return(physeq) } @@ -2357,8 +2299,7 @@ add_info_to_sam_data <- function(physeq, #' #' @seealso [Biostrings::DNAStringSet()] #' @author Adrien Taudière -physeq_or_string_to_dna <- function(physeq = NULL, - dna_seq = NULL) { +physeq_or_string_to_dna <- function(physeq = NULL, dna_seq = NULL) { if (inherits(physeq, "phyloseq")) { verify_pq(physeq) if (is.null(physeq@refseq)) { @@ -2488,7 +2429,8 @@ cutadapt_remove_primers <- function(path_to_fastq, ) } } else { - lff <- list_fastq_files(path_to_fastq, + lff <- list_fastq_files( + path_to_fastq, paired_end = TRUE, pattern = pattern, pattern_R1 = pattern_R1, @@ -2530,7 +2472,10 @@ cutadapt_remove_primers <- function(path_to_fastq, if (cmd_is_run) { writeLines(unlist(cmd), paste0(tempdir(), "/script_cutadapt.sh")) system2("bash", paste0(tempdir(), "/script_cutadapt.sh")) - message(paste0("Output files are available in the folder ", normalizePath(folder_output))) + message(paste0( + "Output files are available in the folder ", + normalizePath(folder_output) + )) unlink(paste0(tempdir(), "/script_cutadapt.sh")) } return(cmd) @@ -2634,7 +2579,10 @@ taxa_only_in_one_level <- function(physeq, #' geom_point(aes(x = raw, y = norm)) #' #' data_f_norm <- normalize_prop_pq(data_fungi_mini, base_log = NULL) -normalize_prop_pq <- function(physeq, base_log = 2, constante = 10000, digits = 4) { +normalize_prop_pq <- function(physeq, + base_log = 2, + constante = 10000, + digits = 4) { verify_pq(physeq) if (taxa_are_rows(physeq)) { new_otutab <- round((apply(physeq@otu_table, 2, function(x) { @@ -2728,12 +2676,20 @@ psmelt_samples_pq <- nrow() if (nsamples(physeq) != nb_distinct_samp) { - stop("The number of samples in physeq is different from the resulting - number in psm tibble.") + stop( + "The number of samples in physeq is different from the resulting + number in psm tibble." + ) } } else { psm <- psm |> - select(Sample, OTU, Abundance, colnames(physeq@sam_data), !!taxa_ranks) + select( + Sample, + OTU, + Abundance, + colnames(physeq@sam_data), + !!taxa_ranks + ) } if (is.null(taxa_ranks)) { @@ -2769,10 +2725,7 @@ psmelt_samples_pq <- if (!is.null(hill_scales)) { physeq <- taxa_as_rows(physeq) df_hill <- - vegan::renyi(t(physeq)@otu_table, - scales = hill_scales, - hill = TRUE - ) + vegan::renyi(t(physeq)@otu_table, scales = hill_scales, hill = TRUE) colnames(df_hill) <- paste0("Hill_", hill_scales) df_hill$Sample <- rownames(df_hill) @@ -2871,13 +2824,22 @@ taxa_as_rows <- function(physeq) { #' ggvenn_pq(data_fungi_mini, "Height") + ggvenn_pq(data_fungi_mini2, "Height") #' } rarefy_sample_count_by_modality <- - function(physeq, fact, rngseed = FALSE, verbose = TRUE) { + function(physeq, + fact, + rngseed = FALSE, + verbose = TRUE) { if (as(rngseed, "logical")) { set.seed(rngseed) if (verbose) { - message("`set.seed(", rngseed, ")` was used to initialize repeatable random subsampling.") + message( + "`set.seed(", + rngseed, + ")` was used to initialize repeatable random subsampling." + ) message("Please record this for your records so others can reproduce.") - message("Try `set.seed(", rngseed, "); .Random.seed` for the full vector", + message("Try `set.seed(", + rngseed, + "); .Random.seed` for the full vector", sep = "" ) message("...") @@ -2892,8 +2854,7 @@ rarefy_sample_count_by_modality <- } mod <- as.factor(physeq@sam_data[[fact]]) n_mod <- table(mod) - samples_names <- sample_names(physeq) - samp_to_keep <- c() + samp_to_keep <- NULL for (modality in levels(mod)) { vec_samp_mod <- c(as.numeric(grep(modality, mod))) @@ -2907,11 +2868,7 @@ rarefy_sample_count_by_modality <- samp_to_keep <- c( samp_to_keep, - sample( - vec_samp_mod, - size = min(n_mod), - replace = FALSE - ) + sample(vec_samp_mod, size = min(n_mod), replace = FALSE) ) } new_physeq <- @@ -2923,7 +2880,8 @@ rarefy_sample_count_by_modality <- "The number of final levels (sam_data of the output phyloseq object) is not equal to the inital (sam_data of the input phyloseq object) number of levels in the factor: '", - fact, "'" + fact, + "'" ) ) } diff --git a/R/deprecated.R b/R/deprecated.R index 88ef2aa6..ed8ab834 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -7,8 +7,8 @@ #' @name MiscMetabar-deprecated #' @param ... Parameters to be passed on to the modern version of the function #' @return Depend on the functions. -#' @export physeq_graph_test adonis_phyloseq clean_physeq lulu_phyloseq otu_circle biplot_physeq read_phyloseq write_phyloseq sankey_phyloseq summary_plot_phyloseq plot_deseq2_phyloseq plot_edgeR_phyloseq venn_phyloseq ggVenn_phyloseq hill_tuckey_phyloseq hill_phyloseq -#' @aliases physeq_graph_test adonis_phyloseq clean_physeq lulu_phyloseq otu_circle biplot_physeq read_phyloseq write_phyloseq sankey_phyloseq summary_plot_phyloseq plot_deseq2_phyloseq plot_edgeR_phyloseq venn_phyloseq ggVenn_phyloseq hill_tuckey_phyloseq hill_phyloseq +#' @export physeq_graph_test adonis_phyloseq clean_physeq lulu_phyloseq otu_circle biplot_physeq read_phyloseq write_phyloseq sankey_phyloseq summary_plot_phyloseq plot_deseq2_phyloseq plot_edgeR_phyloseq venn_phyloseq ggVenn_phyloseq hill_tuckey_phyloseq hill_phyloseq +#' @aliases physeq_graph_test adonis_phyloseq clean_physeq lulu_phyloseq otu_circle biplot_physeq read_phyloseq write_phyloseq sankey_phyloseq summary_plot_phyloseq plot_deseq2_phyloseq plot_edgeR_phyloseq venn_phyloseq ggVenn_phyloseq hill_tuckey_phyloseq hill_phyloseq #' @section Details: #' \tabular{rl}{ #' [graph_test_pq] \tab now a synonym for `physeq_graph_test`\cr diff --git a/R/funguild.R b/R/funguild.R index 574af2e5..34ba7f26 100644 --- a/R/funguild.R +++ b/R/funguild.R @@ -24,7 +24,6 @@ #' modified by Adrien Taudière #' get_funguild_db <- function(db_url = "http://www.stbates.org/funguild_db_2.php") { - taxon <- NULL httr::GET(url = db_url) %>% httr::content(as = "text") %>% stringr::str_split("\n") %>% diff --git a/R/lulu.R b/R/lulu.R index 77adb315..9c901a39 100644 --- a/R/lulu.R +++ b/R/lulu.R @@ -206,7 +206,7 @@ lulu <- function(otu_table, matchlist, minimum_ratio_type = "min", minimum_ratio } close(log_con) - total_abundances <- rowSums(otutable) + curation_table <- cbind(nOTUid = statistics_table$parent_id, otutable) statistics_table$curated <- "merged" curate_index <- row.names(statistics_table) == statistics_table$parent_id diff --git a/R/miscellanous.R b/R/miscellanous.R index fe200195..8c048a52 100644 --- a/R/miscellanous.R +++ b/R/miscellanous.R @@ -231,12 +231,12 @@ count_seq <- function(file_path = NULL, folder_path = NULL, pattern = NULL) { } else if (!is.null(file_path) && !is.null(folder_path)) { stop("You need to specify either file_path or folder_path param not both!") } else if (!is.null(file_path) && is.null(folder_path)) { - if (sum(get_file_extension(file_path) %in% "fasta") > 0) { + if (sum(get_file_extension(file_path) == "fasta") > 0) { seq_nb <- system(paste0("cat ", file_path, " | grep -ce '^>'"), intern = TRUE ) - } else if (sum(get_file_extension(file_path) %in% "fastq") > 0) { - if (sum(get_file_extension(file_path) %in% "gz") > 0) { + } else if (sum(get_file_extension(file_path) == "fastq") > 0) { + if (sum(get_file_extension(file_path) == "gz") > 0) { seq_nb <- system(paste0("zcat ", file_path, " | grep -ce '^+$'"), intern = TRUE ) @@ -414,7 +414,7 @@ is_cutadapt_installed <- function(args_before_cutadapt = "source ~/miniconda3/et cutadapt_error_or_not <- try(system(paste0("bash ", tempdir(), "/script_cutadapt.sh 2>&1"), intern = TRUE), silent = T) unlink(paste0(tempdir(), "/script_cutadapt.sh")) - return(class(cutadapt_error_or_not) != "try-error") + return(!inherits(cutadapt_error_or_not, "try-error")) } #' Test if falco is installed. @@ -435,9 +435,9 @@ is_cutadapt_installed <- function(args_before_cutadapt = "source ~/miniconda3/et #' @author Adrien Taudière is_falco_installed <- function(path = "falco") { - return(class(try(system(paste0(path, " 2>&1"), intern = TRUE), + return(!inherits(try(system(paste0(path, " 2>&1"), intern = TRUE), silent = TRUE - )) != "try-error") + ), "try-error")) } #' Test if swarm is installed. @@ -458,9 +458,9 @@ is_falco_installed <- function(path = "falco") { #' @author Adrien Taudière is_swarm_installed <- function(path = "swarm") { - return(class(try(system(paste0(path, " -h 2>&1"), intern = TRUE), + return(!inherits(try(system(paste0(path, " -h 2>&1"), intern = TRUE), silent = TRUE - )) != "try-error") + ), "try-error")) } #' Test if vsearch is installed. @@ -481,9 +481,9 @@ is_swarm_installed <- function(path = "swarm") { #' @author Adrien Taudière is_vsearch_installed <- function(path = "vsearch") { - return(class(try(system(paste0(path, " 2>&1"), intern = TRUE), + return(!inherits(try(system(paste0(path, " 2>&1"), intern = TRUE), silent = TRUE - )) != "try-error") + ), "try-error")) } #' Test if mumu is installed. @@ -504,9 +504,9 @@ is_vsearch_installed <- function(path = "vsearch") { #' @author Adrien Taudière is_mumu_installed <- function(path = "mumu") { - return(class(try(system(paste0(path, " 2>&1"), intern = TRUE), + return(!inherits(try(system(paste0(path, " 2>&1"), intern = TRUE), silent = TRUE - )) != "try-error") + ), "try-error")) } @@ -528,8 +528,8 @@ is_mumu_installed <- function(path = "mumu") { #' @author Adrien Taudière is_krona_installed <- function(path = "ktImportKrona") { - return(class(try(system(paste0(path, " 2>&1"), intern = TRUE), + return(inherits(try(system(paste0(path, " 2>&1"), intern = TRUE), silent = TRUE - )) != "try-error") + ), "try-error")) } ################################################################################ diff --git a/R/plot_functions.R b/R/plot_functions.R index 9af74305..6b5a2356 100644 --- a/R/plot_functions.R +++ b/R/plot_functions.R @@ -193,12 +193,12 @@ accu_plot <- fact_interm <- as.factor(unlist(unclass(physeq@sam_data[, fact])[fact])) - if (!by.fact) { - x <- t(physeq@otu_table) - } else { + if (by.fact) { x <- apply(physeq@otu_table, 1, function(x) { tapply(x, fact_interm, sum) }) + } else { + x <- t(physeq@otu_table) } tot <- rowSums(x) @@ -232,13 +232,13 @@ accu_plot <- df$x <- n_max[cond] - if (!by.fact) { + if (by.fact) { + df$fact <- df$.id + } else { df$fact <- as.factor(unlist(unclass(physeq@sam_data [match(df$.id, sample_names(physeq)), fact]) [fact])) - } else { - df$fact <- df$.id } df$ymin <- df$X1 - df$X2 * ci @@ -418,7 +418,7 @@ accu_plot_balanced_modality <- function(physeq, ... ) ))$data[, c(2:4, 6, 7)]) - plist[1:nrow(res_interm), , i] <- res_interm + plist[seq_along(nrow(res_interm)), , i] <- res_interm } if (progress_bar) { setTxtProgressBar(pb, i) @@ -591,10 +591,10 @@ circle_pq <- stop("physeq must be an object of class 'phyloseq'") } - if (!physeq@otu_table@taxa_are_rows) { - otu_tab <- t(physeq@otu_table) - } else { + if (physeq@otu_table@taxa_are_rows) { otu_tab <- physeq@otu_table + } else { + otu_tab <- t(physeq@otu_table) } if (!add_nb_seq) { @@ -795,10 +795,10 @@ sankey_pq <- stop("physeq must be an object of class 'phyloseq'") } - if (!physeq@otu_table@taxa_are_rows) { - otu_tab <- t(physeq@otu_table) - } else { + if (physeq@otu_table@taxa_are_rows) { otu_tab <- physeq@otu_table + } else { + otu_tab <- t(physeq@otu_table) } if (!add_nb_seq) { @@ -1045,7 +1045,8 @@ venn_pq <- table_value <- data.frame( combinations = as.character(combinations), - weights = as.double(weights) + weights = as.double(weights), + stringsAsFactors = FALSE ) venn <- venneuler::venneuler(data_venn > min_nb_seq) @@ -1132,7 +1133,7 @@ venn_pq <- width = 0.75, height = 1, x = 0.375, - y = .5 + y = 0.5 ) vpleg <- grid::viewport( @@ -1686,7 +1687,7 @@ hill_pq <- function(physeq, summarize(pos_letters = max(.data[[paste0("Hill_", hill_scales[[i]])]]) + 1) %>% inner_join(dt, by = join_by(!!fact)) - if (!kruskal_test | kt_res[[i]]$p.value < 0.05) { + if (!kruskal_test || kt_res[[i]]$p.value < 0.05) { p_list[[i]] <- p_list[[i]] + geom_label( data = data_letters, @@ -1898,7 +1899,8 @@ summary_plot_pq <- function(physeq, ) ) ) - ) + ), + stringsAsFactors = FALSE ) p <- ggplot() + @@ -2363,7 +2365,7 @@ biplot_pq <- function(physeq, ), ... ) + - geom_bar(stat = "identity", width = .6) + + geom_bar(stat = "identity", width = 0.6) + annotate( "rect", xmin = "Samples", @@ -2426,7 +2428,7 @@ biplot_pq <- function(physeq, p <- p + coord_flip() + theme_minimal() + - theme(plot.title = element_text(hjust = .5), axis.ticks = element_blank()) + + theme(plot.title = element_text(hjust = 0.5), axis.ticks = element_blank()) + scale_fill_manual(values = c(left_fill, right_fill)) + scale_color_manual(values = c(left_col, right_col), guide = "none") + ylim( @@ -3904,10 +3906,10 @@ plot_var_part_pq <- alpha = 63, id.size = 1.2, min_prop_pval_signif_dbrda = 0.95) { - if (show_dbrda_signif_pval > 1 | show_dbrda_signif_pval < 0) { + if (show_dbrda_signif_pval > 1 || show_dbrda_signif_pval < 0) { stop("show_dbrda_signif_pval value must be within the range [0-1]") } - if (min_prop_pval_signif_dbrda > 1 | + if (min_prop_pval_signif_dbrda > 1 || min_prop_pval_signif_dbrda < 0) { stop("show_dbrda_signif_pval value must be within the range [0-1]") } diff --git a/R/table_functions.R b/R/table_functions.R index 6d51eb8b..9f7bca7f 100644 --- a/R/table_functions.R +++ b/R/table_functions.R @@ -198,7 +198,8 @@ compare_pairs_pq <- function(physeq = NULL, nmodality <- bifactor } - res <- c() + res <- vector("list", length(nmodality)) + names(res) <- nmodality for (i in nmodality) { newphyseq <- physeq diff --git a/R/targets_misc.R b/R/targets_misc.R index cc55d68a..558e1d54 100644 --- a/R/targets_misc.R +++ b/R/targets_misc.R @@ -156,8 +156,8 @@ rename_samples_otu_table <- function(physeq, names_of_samples) { filter_trim <- function(fw = NULL, rev = NULL, - output_fw = paste(getwd(), "/output/filterAndTrim_fwd", sep = ""), - output_rev = paste(getwd(), "/output/filterAndTrim_rev", sep = ""), + output_fw = file.path(paste(getwd(), "/output/filterAndTrim_fwd", sep = "")), + output_rev = file.path(paste(getwd(), "/output/filterAndTrim_rev", sep = "")), return_a_vector = FALSE, ...) { if (length(fw) == 1) { diff --git a/R/vsearch.R b/R/vsearch.R index 8c38a19c..2f1ed378 100644 --- a/R/vsearch.R +++ b/R/vsearch.R @@ -107,7 +107,9 @@ vs_search_global <- function(physeq, "target" ) - if (!keep_temporary_files) { + if (keep_temporary_files) { + message(paste0("Temporary files are located at ", tempdir())) + } else { if (file.exists(paste0(tempdir(), "temp.fasta"))) { unlink(paste0(tempdir(), "temp.fasta")) } @@ -117,8 +119,6 @@ vs_search_global <- function(physeq, if (file.exists(paste0(tempdir(), "seq2search.fasta"))) { unlink(paste0(tempdir(), "seq2search.fasta")) } - } else { - message(paste0("Temporary files are located at ", tempdir())) } return(invisible(pack_clusts)) @@ -726,7 +726,9 @@ chimera_detection_vs <- function(seq2search, borderline_AAStringSet <- Biostrings::readAAStringSet(paste0(tempdir(), "/", "borderline.fasta")) - if (!keep_temporary_files) { + if (keep_temporary_files) { + message(paste0("Temporary files are located at ", tempdir())) + } else { if (file.exists(paste0(tempdir(), "temp.fasta"))) { unlink(paste0(tempdir(), "temp.fasta")) } @@ -736,8 +738,6 @@ chimera_detection_vs <- function(seq2search, if (file.exists(paste0(tempdir(), "chimeras.fasta"))) { unlink(paste0(tempdir(), "chimeras.fasta")) } - } else { - message(paste0("Temporary files are located at ", tempdir())) } return( diff --git a/_pkgdown.yml b/_pkgdown.yml index dcacd443..0099ff01 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -35,7 +35,7 @@ navbar: href: articles/fastq_quality_check.html - text: ------- - text: Examples with published dataset - - text: Tengeler + - text: Tengeler dataset href: articles/tengeler.html - text: ------- - text: "R ecosystem for metabarcoding" @@ -56,7 +56,7 @@ reference: - title: Transform phyloseq object - subtitle: Taxonomy - contents: + contents: - add_new_taxonomy_pq - add_blast_info - blast_to_phyloseq @@ -64,35 +64,35 @@ reference: - blast_to_derep - filter_asv_blast -- subtitle: Function to add information from taxonomy - contents: +- subtitle: Function to add information from taxonomy + contents: - add_funguild_info - funguild_assign - get_funguild_db - subtitle: Transform OTU table only - contents: + contents: - as_binary_otu_table - normalize_prop_pq - taxa_as_rows - taxa_as_columns - subtitle: Subset/merge taxa - contents: + contents: - subset_taxa_pq - subset_taxa_tax_control - select_taxa - merge_taxa_vec - subtitle: Subset/merge samples - contents: + contents: - merge_samples2 - rarefy_sample_count_by_modality - - select_one_sample - - subset_samples_pq + - select_one_sample + - subset_samples_pq - subtitle: (Re)clustering OTU table - contents: + contents: - asv2otu - swarm_clustering - vsearch_clustering @@ -110,11 +110,11 @@ reference: - add_info_to_sam_data - subtitle: Phylogenetic tree - contents: + contents: - build_phytree_pq - subtitle: Others - contents: + contents: - add_dna_to_phyloseq - clean_pq @@ -221,12 +221,12 @@ reference: - reorder_taxa_pq - title: Datasets - contents: + contents: - data_fungi - data_fungi_sp_known - data_fungi_mini - Tengeler2020_pq - + - title: Other utilities - subtitle: Color management contents: @@ -234,10 +234,10 @@ reference: - fac2col - transp - subtitle: File management - contents: + contents: - get_file_extension - subtitle: External software management - contents: + contents: - is_cutadapt_installed - is_falco_installed - is_krona_installed @@ -245,10 +245,10 @@ reference: - is_swarm_installed - is_vsearch_installed - subtitle: Plot management - contents: + contents: - multiplot - subtitle: Variable management - contents: + contents: - diff_fct_diff_class - perc - unique_or_na @@ -259,14 +259,14 @@ reference: - dist_pos_control - are_modality_even_depth - MiscMetabar-package - + - title: Deprecated contents: - - adonis_phyloseq + - adonis_phyloseq - biplot_physeq - - clean_physeq + - clean_physeq - ggVenn_phyloseq - - hill_phyloseq + - hill_phyloseq - hill_tuckey_phyloseq - lulu_phyloseq - otu_circle @@ -279,7 +279,7 @@ reference: - summary_plot_phyloseq - venn_phyloseq - write_phyloseq - + development: mode: auto diff --git a/docs/404.html b/docs/404.html index db605e84..1345ae21 100644 --- a/docs/404.html +++ b/docs/404.html @@ -27,7 +27,7 @@ MiscMetabar - 0.9.4 + 0.10.2