-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsnp-dist.nf
131 lines (107 loc) · 3.09 KB
/
snp-dist.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
process.executor = "slurm"
// Authors: Garrick Stott
// Purpose: Nextflow pipe to generate snp-distances from raw fastas
// Print log info to user screen.
log.info """
SNP distance
=============================
Project : $workflow.projectDir
Git info: $workflow.repository - $workflow.revision [$workflow.commitId]
Cmd line: $workflow.commandLine
Manifest's pipeline version: $workflow.manifest.version
=============================
"""
// Set default parameter values
temp_out_dir = "./"
output_dir = "../out/"
mem = "64GB"
threads = 8
// Check for user inputs
if (params.input != null){
input_dir = params.input
}
if (params.temp_out_dir != null){
temp_out_dir = params.temp_out_dir
}
if (params.output_dir != null){
output_dir = params.output_dir
}
if (params.run_mode != null){
run_mode = params.run_mode
}
threads = 1
if (params.threads != null){
threads = params.threads
}
// Align fasta sequences to a reference strain (Original Wuhan sequence) with MAFFT
process mafft{
// Initialize environment in conda
// conda "mafft"
// Set slurm options.
cpus threads
memory mem
time "6h"
queue "batch"
clusterOptions "--ntasks $threads"
// Establish output directory
// publishDir = out_dir
input:
path fasta
output:
path "${fasta.simpleName}.aligned.fasta"
// Add new fragments to the existing alignment set by the original wuhan sequence.
script:
"""
mafft --6merpair --thread ${threads} --addfragments ${fasta} /scratch/gs69042/PMeND/data/EPI_ISL_402124.fasta > ${fasta.simpleName}.aligned.fasta
"""
}
// Split codons
process codonsplit {
conda "$workflow.projectDir/envs/codonSplit.yaml"
// Set slurm options.
cpus threads
memory mem
time "6h"
queue "batch"
clusterOptions "--ntasks $threads"
input:
path fasta
output:
path "${fasta.simpleName}.split*.fasta"
script:
"""
python3 /scratch/gs69042/supreme-potato/scripts/codonSplit.py $fasta ${fasta.simpleName}.split12.fasta ${fasta.simpleName}.split3.fasta
"""
}
// Calculate SNP Distance
process snpDist {
// Initialize environment in conda
conda "$workflow.projectDir/envs/snp-dist.yaml"
// Set slurm options.
cpus threads
memory mem
time "6h"
queue "batch"
clusterOptions "--ntasks $threads"
// Establish output directory
publishDir = output_dir
input:
path splitFasta
output:
path "${splitFasta.simpleName}.snpdist.csv"
script:
"""
snp-dists -m -c $splitFasta > "${splitFasta.simpleName}.snpdist.csv"
"""
}
workflow {
// Input fasta files for tree building process
input_files = Channel.fromPath( "$input_dir*.fasta" )
log.info "List of files to be used: \n$input_files\n"
// Standard process for generating hamming distances at positions 1+2 and 3
mafft(input_files)
codonsplit(mafft.out)
snpDist(codonsplit.out)
}