forked from sunbeam-labs/sunbeam
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
151 lines (117 loc) · 4.53 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#
# Sunbeam: an iridescent HTS pipeline
#
# Author: Erik Clarke <[email protected]>
# Created: 2016-04-28
#
import os
import re
import sys
import yaml
import configparser
from pprint import pprint
from pathlib import Path, PurePath
from snakemake.utils import update_config, listfiles
from snakemake.exceptions import WorkflowError
from sunbeamlib import load_sample_list, read_seq_ids
from sunbeamlib.config import *
from sunbeamlib.reports import *
# Disallow slashes in our sample names during Snakemake's wildcard evaluation.
# Slashes should always be interpreted as directory separators.
wildcard_constraints:
sample="[^/]+"
# Load config file
if not config:
raise SystemExit(
"No config file specified. Run `sunbeam init` to generate a "
"config file, and specify with --configfile")
sunbeam_dir = ""
try:
sunbeam_dir = os.environ["SUNBEAM_DIR"]
except KeyError:
raise SystemExit(
"$SUNBEAM_DIR environment variable not defined. Are you sure you're "
"running this from the Sunbeam conda env?")
# Check for major version compatibility
pkg_major, cfg_major = check_compatibility(config)
if pkg_major > cfg_major:
raise SystemExit(
"\nThis config file was created with an older version of Sunbeam"
" and may not be compatible. Create a new config file using"
"`sunbeam init` and update it using `sunbeam_mod_config`\n")
elif pkg_major < cfg_major:
raise SystemExit(
"\nThis config file was created with an newer version of Sunbeam"
" and may not be compatible. Create a new config file using "
"`sunbeam init` and update it using `sunbeam_mod_config`\n")
# Load extensions
sbxs = list(listfiles(sunbeam_dir+"/extensions/{sbx_folder}/{sbx}.rules"))
for sbx in sbxs:
sys.stderr.write("Found extension {sbx} in folder {sbx_folder}\n".format(**sbx[1]))
# Setting up config files and samples
Cfg = check_config(config)
Blastdbs = process_databases(Cfg['blastdbs'])
Samples = load_sample_list(Cfg['all']['samplelist_fp'], Cfg['all']['paired_end'], Cfg['all']['download_reads'], Cfg["all"]['root']/Cfg['all']['output_fp'])
Pairs = ['1', '2'] if Cfg['all']['paired_end'] else ['1']
# Collect host (contaminant) genomes
sys.stderr.write("Collecting host/contaminant genomes... ")
if Cfg['qc']['host_fp'] == Cfg['all']['root']:
HostGenomeFiles = []
else:
HostGenomeFiles = [f for f in Cfg['qc']['host_fp'].glob('*.fasta')]
if not HostGenomeFiles:
sys.stderr.write(
"\n\nWARNING: No files detected in host genomes folder ({}). "
"If this is not intentional, make sure all files end in "
".fasta and the folder is specified correctly.\n\n".format(
Cfg['qc']['host_fp']
))
HostGenomes = {Path(g.name).stem: read_seq_ids(Cfg['qc']['host_fp'] / g) for g in HostGenomeFiles}
sys.stderr.write("done.\n")
sys.stderr.write("Collecting target genomes... ")
if Cfg['mapping']['genomes_fp'] == Cfg['all']['root']:
GenomeFiles = []
GenomeSegments = {}
else:
GenomeFiles = [f for f in Cfg['mapping']['genomes_fp'].glob('*.fasta')]
GenomeSegments = {PurePath(g.name).stem: read_seq_ids(Cfg['mapping']['genomes_fp'] / g) for g in GenomeFiles}
sys.stderr.write("done.\n")
# ---- Change your workdir to output_fp
workdir: str(Cfg['all']['output_fp'])
# ---- Set up output paths for the various steps
DOWNLOAD_FP = output_subdir(Cfg, 'download')
QC_FP = output_subdir(Cfg, 'qc')
ASSEMBLY_FP = output_subdir(Cfg, 'assembly')
ANNOTATION_FP = output_subdir(Cfg, 'annotation')
CLASSIFY_FP = output_subdir(Cfg, 'classify')
MAPPING_FP = output_subdir(Cfg, 'mapping')
# ---- Download rules
if Cfg['all']['download_reads']:
include: "rules/download/download.rules"
# ---- Targets rules
include: "rules/targets/targets.rules"
# ---- Quality control rules
include: "rules/qc/qc.rules"
include: "rules/qc/decontaminate.rules"
# ---- Assembly rules
include: "rules/assembly/assembly.rules"
include: "rules/assembly/coverage.rules"
# ---- Contig annotation rules
include: "rules/annotation/annotation.rules"
include: "rules/annotation/blast.rules"
include: "rules/annotation/orf.rules"
# ---- Classifier rules
include: "rules/classify/kraken.rules"
# ---- Mapping rules
include: "rules/mapping/mapping.rules"
# ---- Reports rules
include: "rules/reports/reports.rules"
for sbx_path, wildcards in sbxs:
include: sbx_path
# ---- Rule all: run all targets
rule all:
input: TARGET_ALL
rule samples:
message: "Samples to be processed:"
run:
[print(sample) for sample in sorted(list(Samples.keys()))]