-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmap_genes.nf
99 lines (82 loc) · 3.19 KB
/
map_genes.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env nextflow
// Using DSL-2
nextflow.enable.dsl=2
// Import helpers
GroovyShell shell = new GroovyShell()
def helpers = shell.parse(new File("${workflow.projectDir}/helpers.gvy"))
// Import the process to run
include {
shard;
map_genes_blast;
map_genes_diamond;
filter;
join
} from './modules/processes/map_genes'
// Standalone entrypoint
workflow {
helpers.help_message(
"""
Compare two collections of genes, finding the best match for each of the
genes in the query set from the collection of genes in the reference set.
In the results which are provided, each query gene will have no more than
one alignment reported. However, each reference gene may have alignments
reported to more than one query gene.
The output file will be written in CSV format as gene_mapping.csv.gz in
the output directory
Parameters:
--queries Query gene collection (FASTA)
--references Reference gene collection (FASTA)
--output Output directory
--min_coverage Minimum proportion of a gene which must align in order to retain the alignment
(default: ${params.min_coverage}, ranges 0-100)
--min_identity Minimum percent identity of the amino acid alignment required to retain the alignment
(default: ${params.min_identity}, ranges 0-100)
--max_evalue Maximum E-value threshold used to filter all alignments
(default: ${params.max_evalue})
--map_batchsize Number of genes to align in a batch
(default: ${params.map_batchsize})
--aligner Algorithm used for alignment (default: ${params.aligner}, options: diamond, blast)
--aln_fmt Column headings used for alignment outputs (see DIAMOND documentation for details)
(default: ${params.aln_fmt})
""",
params.help
)
// Make sure that the required parameters were provided
helpers.require_param(params.output, "output")
helpers.require_param(params.queries, "queries")
helpers.require_param(params.references, "references")
// Shard the genes
shard(
file(params.queries, checkIfExists: true, glob: false)
)
// Combine with the references
shard
.out
.flatten()
.combine(
Channel.of(
file(
params.references,
checkIfExists: true,
glob: false
)
)
)
.set { joined }
// Run the alignment
if ( "${params.aligner}" == "blast" ){
map_genes_blast(joined)
unfiltered_aln = map_genes_blast.out
}else{
if ( "${params.aligner}" == "diamond" ){
map_genes_diamond(joined)
unfiltered_aln = map_genes_diamond.out
}else{
error "Parameter 'aligner' must be diamond or blast, not ${params.aligner}"
}
}
// Filter down to just the top alignment per query
filter(unfiltered_aln)
// Join the shards
join(filter.out.toSortedList())
}