-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathSnakefile_downstream_only
106 lines (98 loc) · 3.97 KB
/
Snakefile_downstream_only
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Snakemake pipeline for kraken2 classification of metagenomic samples
# Downstream processing steps
# Developed by Ben Siranosian 2018-2023
# Originally developed in the Bhatt Lab, Stanford Genetics
# MIT Licensed. https://github.com/bhattlab/kraken2_classification/
from os.path import join, exists
import sys
import snakemake
import time
localrules: downstream_processing_kraken, downstream_processing_bracken, copy_files_processing, create_taxonomy_array
# Include code from other files
# scripts/setup.smk interprets config file and sets pipeline options
include: "scripts/functions.smk"
include: "scripts/setup_downstream_only.smk"
rule all:
input:
join(outdir, 'processed_results_kraken/plots/classified_taxonomy_barplot_species.pdf'),
run_extra_all_outputs,
join(outdir, "kraken2_processing_completed.txt")
rule create_taxonomy_array:
input:
join(config['database'], 'taxo.k2d')
output:
join(config['database'], 'taxonomy_array.tsv')
params:
db = config['database'],
improve_taxonomy_script = join(workflow.basedir, 'scripts', 'improve_taxonomy.py')
conda: "envs/anytree/anytree.yaml"
container: "docker://bsiranosian/anytree:2.8"
shell: """
python {params.improve_taxonomy_script} {params.db}
"""
# Running the pipeline with singularity had a strange bug, where these files had
# to be present in the output directory. This rule accomplishes that.
rule copy_files_processing:
input:
tax_array = join(config['database'], 'taxonomy_array.tsv')
output:
join(outdir, 'taxonomy_array.tsv'),
join(outdir, 'scripts/post_classification_workflow.R')
params:
scriptdir = join(workflow.basedir, 'scripts')
shell: """
cp -r {params.scriptdir} {outdir}
cp {input.tax_array} {outdir}
"""
# Downstream processing with R
## Run for Kraken, and also Bracken if the tool was run
rule downstream_processing_kraken:
input:
downstream_processing_input_kraken,
tax_array = join(outdir, 'taxonomy_array.tsv'),
script_test = join(outdir, 'scripts/post_classification_workflow.R')
params:
sample_reads_file = config["sample_reads_file"],
sample_reports_file = config["sample_reports_file"],
sample_groups_file = config["sample_groups_file"],
workflow_outdir = outdir,
result_dir = join(outdir, 'processed_results_kraken'),
use_bracken_report = False,
remove_chordata = config['remove_chordata']
singularity: "shub://bhattlab/kraken2_classification:kraken2_processing"
output:
join(outdir, 'processed_results_kraken/plots/classified_taxonomy_barplot_species.pdf')
script:
'scripts/post_classification_workflow.R'
rule downstream_processing_bracken:
input:
downstream_processing_input_bracken,
tax_array = join(outdir, 'taxonomy_array.tsv'),
script_test = join(outdir, 'scripts/post_classification_workflow.R')
params:
sample_reads_file = config["sample_reads_file"],
sample_reports_file = config["sample_reports_file"],
sample_groups_file = config["sample_groups_file"],
workflow_outdir = outdir,
result_dir = join(outdir, 'processed_results_bracken'),
use_bracken_report = config['run_bracken'],
remove_chordata = config['remove_chordata']
singularity: "shub://bhattlab/kraken2_classification:kraken2_processing"
output:
join(outdir, 'processed_results_bracken/plots/classified_taxonomy_barplot_species.pdf')
script:
'scripts/post_classification_workflow.R'
# Remove file copied files during setup
rule remove_files_processing:
input:
rules.downstream_processing_kraken.output,
run_extra_all_outputs
output:
join(outdir, "kraken2_processing_completed.txt")
params:
workflow_outdir = outdir
shell: """
rm -rf {params.workflow_outdir}/scripts
rm -f {params.workflow_outdir}/taxonomy_array.tsv
touch {output}
"""