Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export TreeSummarizedExperiment R object #807

Merged
merged 15 commits into from
Jan 14, 2025
9 changes: 9 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,15 @@ process {
pattern: "*.rds"
]
}

withName: TREESUMMARIZEDEXPERIMENT {
publishDir = [
path: { "${params.outdir}/treesummarizedexperiment" },
mode: params.publish_dir_mode,
pattern: "*.rds"
]
}

withName: 'MULTIQC' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
Expand Down
76 changes: 76 additions & 0 deletions modules/local/treesummarizedexperiment.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
process TREESUMMARIZEDEXPERIMENT {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This all looks pretty straight forward, any reason why this is not an official nf-core module?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason why this is not an official nf-core module?

First we needed to find a usable implementation, this is now done, now it seems just laziness on my part, but we can call it time efficiency or priorities are on other stuff or just 24h/day instead of 240h/day.

tag "$prefix"
label 'process_low'

conda "bioconda::bioconductor-treesummarizedexperiment=2.10.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
d4straub marked this conversation as resolved.
Show resolved Hide resolved
'https://depot.galaxyproject.org/singularity/bioconductor-treesummarizedexperiment%3A2.10.0--r43hdfd78af_0' :
'bioconductor-treesummarizedexperiment%3A2.10.0--r43hdfd78af_0' }"

input:
tuple val(prefix), path(tax_tsv), path(otu_tsv)
path sam_tsv
path tree

output:
tuple val(prefix), path("*TreeSummarizedExperiment.rds"), emit: rds
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def sam_tsv = "\"${sam_tsv}\""
def otu_tsv = "\"${otu_tsv}\""
def tax_tsv = "\"${tax_tsv}\""
def tree = "\"${tree}\""
def prefix = "\"${prefix}\""
"""
#!/usr/bin/env Rscript

suppressPackageStartupMessages(library(TreeSummarizedExperiment))

# Read otu table. It must be in a SimpleList as a matrix where rows
# represent taxa and columns samples.
otu_mat <- read.table($otu_tsv, sep="\\t", header=TRUE, row.names=1)
otu_mat <- as.matrix(otu_mat)
assays <- SimpleList(counts = otu_mat)
# Read taxonomy table. Correct format for it is DataFrame.
taxonomy_table <- read.table($tax_tsv, sep="\\t", header=TRUE, row.names=1)
taxonomy_table <- DataFrame(taxonomy_table)

# Match rownames between taxonomy table and abundance matrix.
taxonomy_table <- taxonomy_table[match(rownames(otu_mat), rownames(taxonomy_table)), ]

# Create TreeSE object.
tse <- TreeSummarizedExperiment(
assays = assays,
rowData = taxonomy_table
)

# If provided, we add sample metadata as DataFrame object. rownames of
# sample metadata must match with colnames of abundance matrix.
if (file.exists($sam_tsv)) {
sample_meta <- read.table($sam_tsv, sep="\\t", header=TRUE, row.names=1)
sample_meta <- sample_meta[match(colnames(tse), rownames(sample_meta)), ]
sample_meta <- DataFrame(sample_meta)
colData(tse) <- sample_meta
d4straub marked this conversation as resolved.
Show resolved Hide resolved
}

# If provided, we add phylogeny. The rownames in abundance matrix must match
# with node labels in phylogeny.
if (file.exists($tree)) {
phylogeny <- read_tree($tree)
rowTree(tse) <- phylogeny
}

saveRDS(tse, file = paste0($prefix, "_TreeSummarizedExperiment.rds"))

# Version information
writeLines(c("\\"${task.process}\\":",
paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),
paste0(" TreeSummarizedExperiment: ", packageVersion("TreeSummarizedExperiment"))),
"versions.yml"
)
"""
}
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ params {
skip_dada_addspecies = false
skip_alpha_rarefaction = false
skip_diversity_indices = false
skip_phyloseq = false
skip_tse = false
skip_multiqc = false
skip_report = false

Expand Down
8 changes: 8 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,14 @@
"type": "boolean",
"description": "Skip alpha and beta diversity analysis"
},
"skip_phyloseq": {
"type": "boolean",
"description": "Skip exporting phyloseq rds object(s)"
},
"skip_tse": {
"type": "boolean",
"description": "Skip exporting TreeSummarizedExperiment rds object(s)"
},
"skip_multiqc": {
"type": "boolean",
"description": "Skip MultiQC reporting"
Expand Down
44 changes: 0 additions & 44 deletions subworkflows/local/phyloseq_workflow.nf

This file was deleted.

56 changes: 56 additions & 0 deletions subworkflows/local/robject_workflow.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Create phyloseq objects
*/

include { PHYLOSEQ } from '../../modules/local/phyloseq'
include { PHYLOSEQ_INASV } from '../../modules/local/phyloseq_inasv'
include { TREESUMMARIZEDEXPERIMENT } from '../../modules/local/treesummarizedexperiment'

workflow ROBJECT_WORKFLOW {
take:
ch_tax
ch_tsv
ch_meta
ch_tree
run_qiime2

main:
ch_versions_robject_workflow = Channel.empty()

if ( params.metadata ) {
ch_robject_inmeta = ch_meta.first() // The .first() is to make sure it's a value channel
} else {
ch_robject_inmeta = []
}

if ( params.pplace_tree ) {
ch_robject_intree = ch_tree.map { it = it[1] }.first()
} else {
ch_robject_intree = []
}

if ( run_qiime2 ) {
if ( params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1 ) {
ch_robject_inasv = PHYLOSEQ_INASV ( ch_tsv ).tsv
} else {
ch_robject_inasv = ch_tsv
}
} else {
ch_robject_inasv = ch_tsv
}

if ( !params.skip_phyloseq ) {
PHYLOSEQ ( ch_tax.combine(ch_robject_inasv), ch_robject_inmeta, ch_robject_intree )
ch_versions_robject_workflow = ch_versions_robject_workflow.mix(PHYLOSEQ.out.versions)
}

if ( !params.skip_tse ) {
TREESUMMARIZEDEXPERIMENT ( ch_tax.combine(ch_robject_inasv), ch_robject_inmeta, ch_robject_intree )
ch_versions_robject_workflow = ch_versions_robject_workflow.mix(TREESUMMARIZEDEXPERIMENT.out.versions)
}

emit:
phyloseq = !params.skip_phyloseq ? PHYLOSEQ.out.rds : []
tse = !params.skip_tse ? TREESUMMARIZEDEXPERIMENT.out.rds : []
versions = ch_versions_robject_workflow
}
35 changes: 18 additions & 17 deletions workflows/ampliseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1)
if ( workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) { log.warn "Conda or mamba is enabled, any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." }
}

// This tracks tax tables produced during pipeline and each table will be used during phyloseq
ch_tax_for_phyloseq = Channel.empty()
// This tracks tax tables produced during pipeline and each table will be used during phyloseq and treesummarizedexperiment
ch_tax_for_robject = Channel.empty()


/*
Expand Down Expand Up @@ -237,7 +237,7 @@ include { QIIME2_EXPORT } from '../subworkflows/local/qiime2_exp
include { QIIME2_BARPLOTAVG } from '../subworkflows/local/qiime2_barplotavg'
include { QIIME2_DIVERSITY } from '../subworkflows/local/qiime2_diversity'
include { QIIME2_ANCOM } from '../subworkflows/local/qiime2_ancom'
include { PHYLOSEQ_WORKFLOW } from '../subworkflows/local/phyloseq_workflow'
include { ROBJECT_WORKFLOW } from '../subworkflows/local/robject_workflow'

//
// FUNCTIONS
Expand Down Expand Up @@ -606,7 +606,7 @@ workflow AMPLISEQ {
taxlevels
).tax.set { ch_dada2_tax }
ch_versions = ch_versions.mix(DADA2_TAXONOMY_WF.out.versions)
ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } )
ch_tax_for_robject = ch_tax_for_robject.mix ( ch_dada2_tax.map { it = [ "dada2", file(it) ] } )
} else {
ch_dada2_tax = Channel.empty()
}
Expand All @@ -620,7 +620,7 @@ workflow AMPLISEQ {
kraken2_taxlevels
).qiime2_tsv.set { ch_kraken2_tax }
ch_versions = ch_versions.mix(KRAKEN2_TAXONOMY_WF.out.versions)
ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } )
ch_tax_for_robject = ch_tax_for_robject.mix ( ch_kraken2_tax.map { it = [ "kraken2", file(it) ] } )
} else {
ch_kraken2_tax = Channel.empty()
}
Expand All @@ -635,7 +635,7 @@ workflow AMPLISEQ {
sintax_taxlevels
).tax.set { ch_sintax_tax }
ch_versions = ch_versions.mix(SINTAX_TAXONOMY_WF.out.versions)
ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } )
ch_tax_for_robject = ch_tax_for_robject.mix ( ch_sintax_tax.map { it = [ "sintax", file(it) ] } )
} else {
ch_sintax_tax = Channel.empty()
}
Expand All @@ -657,7 +657,7 @@ workflow AMPLISEQ {
FASTA_NEWICK_EPANG_GAPPA ( ch_pp_data )
ch_versions = ch_versions.mix( FASTA_NEWICK_EPANG_GAPPA.out.versions )
ch_pplace_tax = FORMAT_PPLACETAX ( FASTA_NEWICK_EPANG_GAPPA.out.taxonomy_per_query ).tsv
ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } )
ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_PPLACE ( ch_pplace_tax ).tsv.map { it = [ "pplace", file(it) ] } )
} else {
ch_pplace_tax = Channel.empty()
}
Expand All @@ -679,7 +679,7 @@ workflow AMPLISEQ {
)
ch_versions = ch_versions.mix( QIIME2_TAXONOMY.out.versions )
ch_qiime2_tax = QIIME2_TAXONOMY.out.tsv
ch_tax_for_phyloseq = ch_tax_for_phyloseq.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } )
ch_tax_for_robject = ch_tax_for_robject.mix ( PHYLOSEQ_INTAX_QIIME2 ( ch_qiime2_tax ).tsv.map { it = [ "qiime2", file(it) ] } )
} else {
ch_qiime2_tax = Channel.empty()
}
Expand Down Expand Up @@ -866,23 +866,23 @@ workflow AMPLISEQ {
}

//
// SUBWORKFLOW: Create phyloseq objects
// SUBWORKFLOW: Create R objects
//
if ( !params.skip_taxonomy ) {
if ( !params.skip_taxonomy && ( !params.skip_phyloseq || !params.skip_tse ) ) {
if ( params.pplace_tree ) {
ch_tree_for_phyloseq = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny
ch_tree_for_robject = FASTA_NEWICK_EPANG_GAPPA.out.grafted_phylogeny
} else {
ch_tree_for_phyloseq = []
ch_tree_for_robject = []
}

PHYLOSEQ_WORKFLOW (
ch_tax_for_phyloseq,
ROBJECT_WORKFLOW (
ch_tax_for_robject,
ch_tsv,
ch_metadata.ifEmpty([]),
ch_tree_for_phyloseq,
ch_tree_for_robject,
run_qiime2
)
ch_versions = ch_versions.mix(PHYLOSEQ_WORKFLOW.out.versions.first())
ch_versions = ch_versions.mix(ROBJECT_WORKFLOW.out.versions)
}

//
Expand Down Expand Up @@ -1012,7 +1012,8 @@ workflow AMPLISEQ {
run_qiime2 && params.ancombc_formula && params.metadata ? QIIME2_ANCOM.out.ancombc_formula.collect().ifEmpty( [] ) : [],
params.picrust ? PICRUST.out.pathways.ifEmpty( [] ) : [],
params.sbdiexport ? SBDIEXPORT.out.sbditables.mix(SBDIEXPORTREANNOTATE.out.sbdiannottables).collect().ifEmpty( [] ) : [],
!params.skip_taxonomy ? PHYLOSEQ_WORKFLOW.out.rds.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : []
!params.skip_taxonomy && !params.skip_phyloseq ? ROBJECT_WORKFLOW.out.phyloseq.map{info,rds -> [rds]}.collect().ifEmpty( [] ) : []
//TODO: add treesummarizedexperiment
)
ch_versions = ch_versions.mix(SUMMARY_REPORT.out.versions)
}
Expand Down
Loading