Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add param --quality_type #801

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Added`

- [#801](https://github.com/nf-core/ampliseq/pull/801) - Parameter `--quality_type` allows specifying the type of quality scores in raw read data, by default `Auto` (i.e. default behavior did not change)

### `Changed`

### `Fixed`
Expand Down
7 changes: 5 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ process {
max_len = params.max_len ?: "Inf"
withName: DADA2_FILTNTRIM {
ext.args = [
'maxN = 0, truncQ = 2, trimRight = 0, minQ = 0, rm.lowcomplex = 0, orient.fwd = NULL, matchIDs = FALSE, id.sep = "\\\\s", id.field = NULL, n = 1e+05, OMP = TRUE, qualityType = "Auto"',
'maxN = 0, truncQ = 2, trimRight = 0, minQ = 0, rm.lowcomplex = 0, orient.fwd = NULL, matchIDs = FALSE, id.sep = "\\\\s", id.field = NULL, n = 1e+05, OMP = TRUE',
"qualityType = \"${params.quality_type}\"",
params.pacbio || params.iontorrent || params.single_end ? "maxEE = ${params.max_ee}" : "maxEE = c(${params.max_ee}, ${params.max_ee})",
params.pacbio ? "trimLeft = 0, minLen = ${params.min_len}, maxLen = $max_len, rm.phix = FALSE" :
params.iontorrent ? "trimLeft = 15, minLen = ${params.min_len}, maxLen = $max_len, rm.phix = TRUE" :
Expand Down Expand Up @@ -179,7 +180,8 @@ process {
ext.seed = "${params.seed}"
ext.prefix = { meta.region ? "region-${meta.region}_run-${meta.run}" : "${meta.run}" }
ext.args = [
'nbases = 1e8, nreads = NULL, randomize = TRUE, MAX_CONSIST = 10, OMEGA_C = 0, qualityType = "Auto"',
'nbases = 1e8, nreads = NULL, randomize = TRUE, MAX_CONSIST = 10, OMEGA_C = 0',
"qualityType = \"${params.quality_type}\"",
params.pacbio ? "errorEstimationFunction = PacBioErrfun" : "errorEstimationFunction = loessErrfun"
].join(',').replaceAll('(,)*$', "")
publishDir = [
Expand Down Expand Up @@ -223,6 +225,7 @@ process {

withName: DADA2_DENOISING {
ext.prefix = { meta.region ? "region-${meta.region}_run-${meta.run}" : "${meta.run}" }
ext.quality_type = "${params.quality_type}"
// standard setting can be inspected with getDadaOpt(option = NULL)
ext.args = [
'selfConsist = FALSE, priors = character(0), DETECT_SINGLETONS = FALSE, GAPLESS = TRUE, GAP_PENALTY = -8, GREEDY = TRUE, KDIST_CUTOFF = 0.42, MATCH = 5, MAX_CLUST = 0, MAX_CONSIST = 10, MIN_ABUNDANCE = 1, MIN_FOLD = 1, MIN_HAMMING = 1, MISMATCH = -4, OMEGA_A = 1e-40, OMEGA_C = 1e-40, OMEGA_P = 1e-4, PSEUDO_ABUNDANCE = Inf, PSEUDO_PREVALENCE = 2, SSE = 2, USE_KMERS = TRUE, USE_QUALS = TRUE, VECTORIZED_ALIGNMENT = TRUE',
Expand Down
21 changes: 18 additions & 3 deletions modules/local/dada2_denoising.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ process DADA2_DENOISING {

script:
def prefix = task.ext.prefix ?: "prefix"
def quality_type = task.ext.quality_type ?: "Auto"
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
if (!meta.single_end) {
Expand All @@ -39,9 +40,17 @@ process DADA2_DENOISING {

#denoising
sink(file = "${prefix}.dada.log")
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
if ("${quality_type}" == "Auto") {
# Avoid using memory-inefficient derepFastq() if not necessary
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
dadaRs <- dada(filtRs, err = errR, $args, multithread = $task.cpus)
} else {
derepFs <- derepFastq(filtFs, qualityType="${quality_type}")
dadaFs <- dada(derepFs, err = errF, $args, multithread = $task.cpus)
derepRs <- derepFastq(filtRs, qualityType="${quality_type}")
dadaRs <- dada(derepRs, err = errR, $args, multithread = $task.cpus)
}
saveRDS(dadaFs, "${prefix}_1.dada.rds")
dadaRs <- dada(filtRs, err = errR, $args, multithread = $task.cpus)
saveRDS(dadaRs, "${prefix}_2.dada.rds")
sink(file = NULL)

Expand All @@ -66,7 +75,13 @@ process DADA2_DENOISING {

#denoising
sink(file = "${prefix}.dada.log")
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
if ("${quality_type}" == "Auto") {
# Avoid using memory-inefficient derepFastq() if not necessary
dadaFs <- dada(filtFs, err = errF, $args, multithread = $task.cpus)
} else {
derepFs <- derepFastq(filtFs, qualityType="${quality_type}")
dadaFs <- dada(derepFs, err = errF, $args, multithread = $task.cpus)
}
saveRDS(dadaFs, "${prefix}.dada.rds")
sink(file = NULL)

Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ params {
extension = "/*_R{1,2}_001.fastq.gz"
pacbio = false
iontorrent = false
quality_type = "Auto"
FW_primer = null
RV_primer = null
classifier = null
Expand Down
8 changes: 8 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,14 @@
"help_text": "This will cause the pipeline to\n- not truncate input reads if not `--trunclenf` and `--trunclenr` are overwriting defaults\n- remove reverse complement primers from the end of reads in case the read length exceeds the amplicon length",
"fa_icon": "fas fa-align-justify"
},
"quality_type": {
"type": "string",
"default": "Auto",
"description": "Type of quality scores in raw read data",
"help_text": "From R package 'ShortRead' function 'readFastq': Representation to be used for quality scores, must be one of `Auto` (infer automatically), `FastqQuality` (Phred-like base 33 encoding), `SFastqQuality` (Illumina base 64 encoding).",
"enum": ["Auto", "FastqQuality", "SFastqQuality"],
"fa_icon": "fab fa-amilia"
},
"multiple_sequencing_runs": {
"type": "boolean",
"description": "If using `--input_folder`: samples were sequenced in multiple sequencing runs",
Expand Down
Loading