diff --git a/CHANGELOG.md b/CHANGELOG.md index 332a6b45..d21ec79b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#798](https://github.com/nf-core/ampliseq/pull/798) - Added SILVA version 138.2 of DADA2 taxonomy database: `silva=138.2` or `silva` as parameter to `--dada2_ref_taxonomy` - [#804](https://github.com/nf-core/ampliseq/pull/804) - Added version 10 of Unite as parameter for `--dada_ref_taxonomy` (issue [#768](https://github.com/nf-core/ampliseq/issues/768)) +- [#807](https://github.com/nf-core/ampliseq/pull/807) - Export of TreeSummarizedExperiment R object by default, can be omitted with `--skip_tse`, also added ability to skip phyloseq R object generation with `--skip_phyloseq` ### `Changed` diff --git a/CITATIONS.md b/CITATIONS.md index fa98ef19..f122b49a 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -149,6 +149,10 @@ > McMurdie PJ, Holmes S (2013). “phyloseq: An R package for reproducible interactive analysis and graphics of microbiome census data.” PLoS ONE, 8(4), e61217. +- [TreeSummarizedExperiment](https://doi.org/10.12688/f1000research.26669.2) + + > Huang R, Soneson C, Ernst FGM et al. TreeSummarizedExperiment: a S4 class for data with hierarchical structure [version 2; peer review: 3 approved]. F1000Research 2021, 9:1246. + ### Non-default tools - [ITSx](https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.12073) diff --git a/assets/report_template.Rmd b/assets/report_template.Rmd index 76119739..d4aa09f9 100644 --- a/assets/report_template.Rmd +++ b/assets/report_template.Rmd @@ -107,6 +107,7 @@ params: picrust_pathways: FALSE sbdi: FALSE phyloseq: FALSE + tse: FALSE --- @@ -1615,19 +1616,37 @@ but if you run nf-core/ampliseq with a sample metadata table (`--metadata`) any ")) ``` - + -```{r, eval = !isFALSE(params$phyloseq), results='asis'} +```{r, results='asis'} +any_robject <- !isFALSE(params$phyloseq) || !isFALSE(params$tse) +``` + +```{r, eval = !isFALSE(any_robject), results='asis'} cat(paste0(" -# Phyloseq +# R objects -[Phyloseq](https://doi.org/10.1371/journal.pone.0061217) -is a popular R package to analyse and visualize microbiom data. -The produced RDS files contain phyloseq objects and can be loaded directely into R and phyloseq. +Microbiome data can be analysed and visualized with certain R packages. For convenience, R objects in RDS format are provided. +")) + +if ( !isFALSE(params$phyloseq) ) { + cat(paste0(" +[Phyloseq](https://doi.org/10.1371/journal.pone.0061217) objects and can be loaded directely into R with package 'phyloseq'. The objects contain an ASV abundance table and a taxonomy table. If available, metadata and phylogenetic tree will also be included in the phyloseq object. The files can be found in folder [phyloseq](../phyloseq/). -")) + ")) +} + +if ( !isFALSE(params$tse) ) { + cat(paste0(" +[TreeSummarizedExperiment](https://doi.org/10.12688/f1000research.26669.2) (TreeSE, TSE) +objects can be loaded into R with package 'TreeSummarizedExperiment'. and contain an ASV abundance table, +a taxonomy table, and sequences. +If available, metadata and phylogenetic tree will also be included in the object. +The files can be found in folder [treesummarizedexperiment](../treesummarizedexperiment/). + ")) +} ``` diff --git a/conf/modules.config b/conf/modules.config index 5f940e57..cbbf66f0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1057,6 +1057,15 @@ process { pattern: "*.rds" ] } + + withName: TREESUMMARIZEDEXPERIMENT { + publishDir = [ + path: { "${params.outdir}/treesummarizedexperiment" }, + mode: params.publish_dir_mode, + pattern: "*.rds" + ] + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/conf/test_multi.config b/conf/test_multi.config index ea3e2e85..543a0271 100644 --- a/conf/test_multi.config +++ b/conf/test_multi.config @@ -30,4 +30,7 @@ params { dada_ref_taxonomy = "rdp=18" skip_dada_addspecies = true input = params.pipelines_testdata_base_path + "ampliseq/samplesheets/Samplesheet_multi.tsv" + + skip_phyloseq = true + skip_tse = true } diff --git a/conf/test_pacbio_its.config b/conf/test_pacbio_its.config index 65350d17..22177901 100644 --- a/conf/test_pacbio_its.config +++ b/conf/test_pacbio_its.config @@ -37,4 +37,6 @@ params { // Prevent default taxonomic classification skip_dada_taxonomy = true + + skip_phyloseq = true } diff --git a/conf/test_reftaxcustom.config b/conf/test_reftaxcustom.config index c87f1ab9..7ad52aa9 100644 --- a/conf/test_reftaxcustom.config +++ b/conf/test_reftaxcustom.config @@ -37,4 +37,6 @@ params { // Skip downstream analysis with QIIME2 skip_qiime_downstream = true + + skip_tse = true } diff --git a/docs/output.md b/docs/output.md index 33278748..387d5dc3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -46,7 +46,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Differential abundance analysis](#differential-abundance-analysis) - Calling differentially abundant features with ANCOM or ANCOM-BC - [PICRUSt2](#picrust2) - Predict the functional potential of a bacterial community - [SBDI export](#sbdi-export) - Swedish Biodiversity Infrastructure (SBDI) submission file -- [Phyloseq](#phyloseq) - Phyloseq R objects +- [R object](#r-objects) - Phyloseq and TreeSummarizedExperiment R objects - [Read count report](#read-count-report) - Report of read counts during various steps of the pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -629,15 +629,17 @@ Most of the fields in the template will not be populated by the export process, -### Phyloseq +### R objects -This directory will hold phyloseq objects for each taxonomy table produced by this pipeline. The objects will contain an ASV abundance table and a taxonomy table. If the pipeline is provided with metadata, that metadata will also be included in the phyloseq object. A phylogenetic tree will also be included if the pipeline produces a tree. +Pipeline results are stored in phyloseq and TreeSummarizedExperiment R objects for each taxonomy table produced by this pipeline. The R objects will contain an ASV abundance table and a taxonomy table, and optionally sequences, metadata and a phylogenetic tree.
Output files - `phyloseq/` - `_phyloseq.rds`: Phyloseq R object. +- `treesummarizedexperiment/` + - `_TreeSummarizedExperiment.rds`: TreeSummarizedExperiment R object.
diff --git a/modules/local/summary_report.nf b/modules/local/summary_report.nf index 309dcdf7..61c93733 100644 --- a/modules/local/summary_report.nf +++ b/modules/local/summary_report.nf @@ -58,6 +58,7 @@ process SUMMARY_REPORT { path(picrust_pathways) path(sbdi, stageAs: 'sbdi/*') path(phyloseq, stageAs: 'phyloseq/*') + path(tse, stageAs: 'tse/*') output: path "*.svg" , emit: svg, optional: true @@ -137,6 +138,7 @@ process SUMMARY_REPORT { ancombc_formula ? "ancombc_formula='"+ ancombc_formula.join(",") +"'" : "", sbdi ? "sbdi='"+ sbdi.join(",") +"'" : "", phyloseq ? "phyloseq='"+ phyloseq.join(",") +"'" : "", + tse ? "tse='"+ tse.join(",") +"'" : "", ] // groovy list to R named list string; findAll removes empty entries params_list_named_string = params_list_named.findAll().join(',').trim() diff --git a/modules/local/treesummarizedexperiment.nf b/modules/local/treesummarizedexperiment.nf new file mode 100644 index 00000000..79ee5c9f --- /dev/null +++ b/modules/local/treesummarizedexperiment.nf @@ -0,0 +1,82 @@ +process TREESUMMARIZEDEXPERIMENT { + tag "$prefix" + label 'process_low' + + conda "bioconda::bioconductor-treesummarizedexperiment=2.10.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bioconductor-treesummarizedexperiment:2.10.0--r43hdfd78af_0' : + 'biocontainers/bioconductor-treesummarizedexperiment:2.10.0--r43hdfd78af_0' }" + + input: + tuple val(prefix), path(tax_tsv), path(otu_tsv) + path sam_tsv + path tree + + output: + tuple val(prefix), path("*TreeSummarizedExperiment.rds"), emit: rds + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def sam_tsv = "\"${sam_tsv}\"" + def otu_tsv = "\"${otu_tsv}\"" + def tax_tsv = "\"${tax_tsv}\"" + def tree = "\"${tree}\"" + def prefix = "\"${prefix}\"" + """ + #!/usr/bin/env Rscript + + suppressPackageStartupMessages(library(TreeSummarizedExperiment)) + + # Read otu table. It must be in a SimpleList as a matrix where rows + # represent taxa and columns samples. + otu_mat <- read.table($otu_tsv, sep="\\t", header=TRUE, row.names=1) + otu_mat <- as.matrix(otu_mat) + assays <- SimpleList(counts = otu_mat) + # Read taxonomy table. Correct format for it is DataFrame. + taxonomy_table <- read.table($tax_tsv, sep="\\t", header=TRUE, row.names=1) + taxonomy_table <- DataFrame(taxonomy_table) + + # Match rownames between taxonomy table and abundance matrix. + taxonomy_table <- taxonomy_table[match(rownames(otu_mat), rownames(taxonomy_table)), ] + + # Create TreeSE object. + tse <- TreeSummarizedExperiment( + assays = assays, + rowData = taxonomy_table + ) + + # If taxonomy table contains sequences, move them to referenceSeq slot + if (!is.null(rowData(tse)[["sequence"]])) { + referenceSeq(tse) <- DNAStringSet( rowData(tse)[["sequence"]] ) + rowData(tse)[["sequence"]] <- NULL + } + + # If provided, we add sample metadata as DataFrame object. rownames of + # sample metadata must match with colnames of abundance matrix. + if (file.exists($sam_tsv)) { + sample_meta <- read.table($sam_tsv, sep="\\t", header=TRUE, row.names=1) + sample_meta <- sample_meta[match(colnames(tse), rownames(sample_meta)), ] + sample_meta <- DataFrame(sample_meta) + colData(tse) <- sample_meta + } + + # If provided, we add phylogeny. The rownames in abundance matrix must match + # with node labels in phylogeny. + if (file.exists($tree)) { + phylogeny <- ape::read.tree($tree) + rowTree(tse) <- phylogeny + } + + saveRDS(tse, file = paste0($prefix, "_TreeSummarizedExperiment.rds")) + + # Version information + writeLines(c("\\"${task.process}\\":", + paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")), + paste0(" TreeSummarizedExperiment: ", packageVersion("TreeSummarizedExperiment"))), + "versions.yml" + ) + """ +} diff --git a/nextflow.config b/nextflow.config index 64af5e64..04bf9007 100644 --- a/nextflow.config +++ b/nextflow.config @@ -105,6 +105,8 @@ params { skip_dada_addspecies = false skip_alpha_rarefaction = false skip_diversity_indices = false + skip_phyloseq = false + skip_tse = false skip_multiqc = false skip_report = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 10788090..5a23b626 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -790,6 +790,14 @@ "type": "boolean", "description": "Skip alpha and beta diversity analysis" }, + "skip_phyloseq": { + "type": "boolean", + "description": "Skip exporting phyloseq rds object(s)" + }, + "skip_tse": { + "type": "boolean", + "description": "Skip exporting TreeSummarizedExperiment rds object(s)" + }, "skip_multiqc": { "type": "boolean", "description": "Skip MultiQC reporting" diff --git a/subworkflows/local/phyloseq_workflow.nf b/subworkflows/local/phyloseq_workflow.nf deleted file mode 100644 index b96fa814..00000000 --- a/subworkflows/local/phyloseq_workflow.nf +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Create phyloseq objects - */ - -include { PHYLOSEQ } from '../../modules/local/phyloseq' -include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv' - -workflow PHYLOSEQ_WORKFLOW { - take: - ch_tax - ch_tsv - ch_meta - ch_tree - run_qiime2 - - main: - if ( params.metadata ) { - ch_phyloseq_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel - } else { - ch_phyloseq_inmeta = [] - } - - if ( params.pplace_tree ) { - ch_phyloseq_intree = ch_tree.map { it = it[1] }.first() - } else { - ch_phyloseq_intree = [] - } - - if ( run_qiime2 ) { - if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { - ch_phyloseq_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv - } else { - ch_phyloseq_inasv = ch_tsv - } - } else { - ch_phyloseq_inasv = ch_tsv - } - - PHYLOSEQ ( ch_tax.combine(ch_phyloseq_inasv), ch_phyloseq_inmeta, ch_phyloseq_intree ) - - emit: - rds = PHYLOSEQ.out.rds - versions = PHYLOSEQ.out.versions -} diff --git a/subworkflows/local/qiime2_diversity.nf b/subworkflows/local/qiime2_diversity.nf index 8d47a3ca..296ff587 100644 --- a/subworkflows/local/qiime2_diversity.nf +++ b/subworkflows/local/qiime2_diversity.nf @@ -27,7 +27,8 @@ workflow QIIME2_DIVERSITY { ch_versions_qiime2_diversity = Channel.empty() //Phylogenetic tree for beta & alpha diversities - if (!ch_tree) { + produce_tree = !ch_tree ? true : false + if (produce_tree) { QIIME2_TREE ( ch_seq ) ch_versions_qiime2_diversity = ch_versions_qiime2_diversity.mix(QIIME2_TREE.out.versions) ch_tree = QIIME2_TREE.out.qza @@ -82,6 +83,8 @@ workflow QIIME2_DIVERSITY { } emit: + tree_qza = ch_tree + tree_nwk = produce_tree ? QIIME2_TREE.out.nwk : [] depth = !skip_diversity_indices ? QIIME2_DIVERSITY_CORE.out.depth : [] alpha = !skip_diversity_indices ? QIIME2_DIVERSITY_ALPHA.out.alpha : [] beta = !skip_diversity_indices ? QIIME2_DIVERSITY_BETA.out.beta : [] diff --git a/subworkflows/local/robject_workflow.nf b/subworkflows/local/robject_workflow.nf new file mode 100644 index 00000000..9998af44 --- /dev/null +++ b/subworkflows/local/robject_workflow.nf @@ -0,0 +1,50 @@ +/* + * Create phyloseq objects + */ + +include { PHYLOSEQ } from '../../modules/local/phyloseq' +include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv' +include { TREESUMMARIZEDEXPERIMENT } from '../../modules/local/treesummarizedexperiment' + +workflow ROBJECT_WORKFLOW { + take: + ch_tax + ch_tsv + ch_meta + ch_robject_intree + run_qiime2 + + main: + ch_versions_robject_workflow = Channel.empty() + + if ( params.metadata ) { + ch_robject_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel + } else { + ch_robject_inmeta = [] + } + + if ( run_qiime2 ) { + if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) { + ch_robject_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv + } else { + ch_robject_inasv = ch_tsv + } + } else { + ch_robject_inasv = ch_tsv + } + + if ( !params.skip_phyloseq ) { + PHYLOSEQ ( ch_tax.combine(ch_robject_inasv), ch_robject_inmeta, ch_robject_intree ) + ch_versions_robject_workflow = ch_versions_robject_workflow.mix(PHYLOSEQ.out.versions) + } + + if ( !params.skip_tse ) { + TREESUMMARIZEDEXPERIMENT ( ch_tax.combine(ch_robject_inasv), ch_robject_inmeta, ch_robject_intree ) + ch_versions_robject_workflow = ch_versions_robject_workflow.mix(TREESUMMARIZEDEXPERIMENT.out.versions) + } + + emit: + phyloseq = !params.skip_phyloseq ? PHYLOSEQ.out.rds : [] + tse = !params.skip_tse ? TREESUMMARIZEDEXPERIMENT.out.rds : [] + versions = ch_versions_robject_workflow +} diff --git a/tests/pipeline/doubleprimers.nf.test b/tests/pipeline/doubleprimers.nf.test index 2b81571c..0e305f28 100644 --- a/tests/pipeline/doubleprimers.nf.test +++ b/tests/pipeline/doubleprimers.nf.test @@ -32,6 +32,7 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/input/Samplesheet_double_primer.tsv")).match("input") }, { assert new File("$outputDir/qiime2/abundance_tables/feature-table.tsv").exists() }, { assert new File("$outputDir/phyloseq/kraken2_phyloseq.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/kraken2_TreeSummarizedExperiment.rds").exists() }, { assert snapshot(path("$outputDir/kraken2/ASV_tax.greengenes.kraken2.classifiedreads.txt"), path("$outputDir/kraken2/ASV_tax.greengenes.kraken2.complete.tsv"), path("$outputDir/kraken2/ASV_tax.greengenes.kraken2.tsv")).match("kraken2") }, diff --git a/tests/pipeline/failed.nf.test b/tests/pipeline/failed.nf.test index dd39b9b1..032635b4 100644 --- a/tests/pipeline/failed.nf.test +++ b/tests/pipeline/failed.nf.test @@ -29,7 +29,8 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/diversity/alpha_diversity/shannon_vector/kruskal-wallis-pairwise-treatment1.csv").exists() }, { assert new File("$outputDir/qiime2/diversity/beta_diversity/bray_curtis_pcoa_results-PCoA/index.html").exists() }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/dada2_TreeSummarizedExperiment.rds").exists() } ) } } diff --git a/tests/pipeline/iontorrent.nf.test b/tests/pipeline/iontorrent.nf.test index 98d1618f..d14a831e 100644 --- a/tests/pipeline/iontorrent.nf.test +++ b/tests/pipeline/iontorrent.nf.test @@ -38,7 +38,8 @@ nextflow_pipeline { path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/sintax_TreeSummarizedExperiment.rds").exists() } ) } } diff --git a/tests/pipeline/multi.nf.test b/tests/pipeline/multi.nf.test index 59dd21b5..10eef81c 100644 --- a/tests/pipeline/multi.nf.test +++ b/tests/pipeline/multi.nf.test @@ -64,8 +64,7 @@ nextflow_pipeline { { assert new File("$outputDir/qiime2/representative_sequences/rep-seq.fasta").exists() }, { assert snapshot(path("$outputDir/qiime2/representative_sequences/descriptive_stats.tsv"), path("$outputDir/qiime2/representative_sequences/seven_number_summary.tsv")).match("qiime2") }, - { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/summary_report/summary_report.html").exists() } ) } } diff --git a/tests/pipeline/pacbio_its.nf.test b/tests/pipeline/pacbio_its.nf.test index e4092920..f1c811b7 100644 --- a/tests/pipeline/pacbio_its.nf.test +++ b/tests/pipeline/pacbio_its.nf.test @@ -50,7 +50,7 @@ nextflow_pipeline { { assert new File("$outputDir/SBDI/annotation.tsv").exists() }, { assert new File("$outputDir/SBDI/asv-table.tsv").exists() }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + { assert new File("$outputDir/treesummarizedexperiment/sintax_TreeSummarizedExperiment.rds").exists() } ) } } diff --git a/tests/pipeline/pplace.nf.test b/tests/pipeline/pplace.nf.test index 0c6f4f67..c1851020 100644 --- a/tests/pipeline/pplace.nf.test +++ b/tests/pipeline/pplace.nf.test @@ -58,7 +58,9 @@ nextflow_pipeline { path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, - { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/pplace_phyloseq.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/qiime2_TreeSummarizedExperiment.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/pplace_TreeSummarizedExperiment.rds").exists() } ) } } diff --git a/tests/pipeline/single.nf.test b/tests/pipeline/single.nf.test index add750b9..68abd513 100644 --- a/tests/pipeline/single.nf.test +++ b/tests/pipeline/single.nf.test @@ -46,7 +46,8 @@ nextflow_pipeline { path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/dada2_TreeSummarizedExperiment.rds").exists() } ) } } diff --git a/tests/pipeline/sintax.nf.test b/tests/pipeline/sintax.nf.test index 03490f18..78194221 100644 --- a/tests/pipeline/sintax.nf.test +++ b/tests/pipeline/sintax.nf.test @@ -67,7 +67,8 @@ nextflow_pipeline { { assert snapshot(path("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt"), path("$outputDir/multiqc/multiqc_data/multiqc_cutadapt.txt")).match("multiqc") }, { assert new File("$outputDir/summary_report/summary_report.html").exists() }, - { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() } + { assert new File("$outputDir/phyloseq/sintax_phyloseq.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/sintax_TreeSummarizedExperiment.rds").exists() } ) } } diff --git a/tests/pipeline/test.nf.test b/tests/pipeline/test.nf.test index 2c215ad3..06b42216 100644 --- a/tests/pipeline/test.nf.test +++ b/tests/pipeline/test.nf.test @@ -102,6 +102,8 @@ nextflow_pipeline { { assert new File("$outputDir/summary_report/summary_report.html").exists() }, { assert new File("$outputDir/phyloseq/dada2_phyloseq.rds").exists() }, { assert new File("$outputDir/phyloseq/qiime2_phyloseq.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/dada2_TreeSummarizedExperiment.rds").exists() }, + { assert new File("$outputDir/treesummarizedexperiment/qiime2_TreeSummarizedExperiment.rds").exists() }, { assert snapshot(path("$outputDir/vsearch_cluster/ASV_post_clustering_filtered.fna"), path("$outputDir/vsearch_cluster/ASV_post_clustering_filtered.stats.tsv"), path("$outputDir/vsearch_cluster/ASV_post_clustering_filtered.table.tsv")).match("vsearch_cluster") } diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index 7b9168b4..4f4c2483 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -156,8 +156,8 @@ if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) if ( workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) { log.warn "Conda or mamba is enabled, any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." } } -// This tracks tax tables produced during pipeline and each table will be used during phyloseq -ch_tax_for_phyloseq = Channel.empty() +// This tracks tax tables produced during pipeline and each table will be used during phyloseq and treesummarizedexperiment +ch_tax_for_robject = Channel.empty() /* @@ -237,7 +237,7 @@ include { QIIME2_EXPORT } from '../subworkflows/local/qiime2_exp include { QIIME2_BARPLOTAVG } from '../subworkflows/local/qiime2_barplotavg' include { QIIME2_DIVERSITY } from '../subworkflows/local/qiime2_diversity' include { QIIME2_ANCOM } from '../subworkflows/local/qiime2_ancom' -include { PHYLOSEQ_WORKFLOW } from '../subworkflows/local/phyloseq_workflow' +include { ROBJECT_WORKFLOW } from '../subworkflows/local/robject_workflow' // // FUNCTIONS @@ -606,7 +606,7 @@ workflow AMPLISEQ { taxlevels ).tax.set { ch_dada2_tax } ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions) - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } ) + ch_tax_for_robject = ch_tax_for_robject.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } ) } else { ch_dada2_tax = Channel.empty() } @@ -620,7 +620,7 @@ workflow AMPLISEQ { kraken2_taxlevels ).qiime2_tsv.set { ch_kraken2_tax } ch_versions = ch_versions.mix(KRAKEN2_TAXONOMY_WF.out.versions) - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } ) + ch_tax_for_robject = ch_tax_for_robject.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } ) } else { ch_kraken2_tax = Channel.empty() } @@ -635,7 +635,7 @@ workflow AMPLISEQ { sintax_taxlevels ).tax.set { ch_sintax_tax } ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions) - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } ) + ch_tax_for_robject = ch_tax_for_robject.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } ) } else { ch_sintax_tax = Channel.empty() } @@ -657,7 +657,7 @@ workflow AMPLISEQ { FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data ) ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions ) ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) + ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } ) } else { ch_pplace_tax = Channel.empty() } @@ -679,7 +679,7 @@ workflow AMPLISEQ { ) ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions ) ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv - ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) + ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } ) } else { ch_qiime2_tax = Channel.empty() } @@ -866,23 +866,27 @@ workflow AMPLISEQ { } // - // SUBWORKFLOW: Create phyloseq objects + // SUBWORKFLOW: Create R objects // - if ( !params.skip_taxonomy ) { + if ( !params.skip_taxonomy && ( !params.skip_phyloseq || !params.skip_tse ) ) { if ( params.pplace_tree ) { - ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny + ch_tree_for_robject = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny.map { it = it[1] }.first() + } else if (params.multiregion) { + ch_tree_for_robject = SIDLE_WF.out.tree_nwk + } else if ( run_qiime2 && params.metadata && (!params.skip_alpha_rarefaction || !params.skip_diversity_indices) ) { + ch_tree_for_robject = QIIME2_DIVERSITY.out.tree_nwk } else { - ch_tree_for_phyloseq = [] + ch_tree_for_robject = [] } - PHYLOSEQ_WORKFLOW ( - ch_tax_for_phyloseq, + ROBJECT_WORKFLOW ( + ch_tax_for_robject, ch_tsv, ch_metadata.ifEmpty([]), - ch_tree_for_phyloseq, + ch_tree_for_robject, run_qiime2 ) - ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first()) + ch_versions = ch_versions.mix(ROBJECT_WORKFLOW.out.versions) } // @@ -1012,7 +1016,8 @@ workflow AMPLISEQ { run_qiime2 && params.ancombc_formula && params.metadata ? QIIME2_ANCOM.out.ancombc_formula.collect().ifEmpty( [] ) : [], params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [], params.sbdiexport ? SBDIEXPORT.out.sbditables.mix(SBDIEXPORTREANNOTATE.out.sbdiannottables).collect().ifEmpty( [] ) : [], - !params.skip_taxonomy ? PHYLOSEQ_WORKFLOW.out.rds.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : [] + !params.skip_taxonomy && !params.skip_phyloseq ? ROBJECT_WORKFLOW.out.phyloseq.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : [], + !params.skip_taxonomy && !params.skip_tse ? ROBJECT_WORKFLOW.out.tse.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : [] ) ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions) }