Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Non-human WGS Pipeline with Basic QC and Variant Calls (SNVs/Indels) #714

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 173 additions & 0 deletions definitions/pipelines/germline_wgs_non_human.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: Workflow
label: "wgs alignment and germline variant detection"
requirements:
- class: SchemaDefRequirement
types:
- $import: ../types/labelled_file.yml
- class: SubworkflowFeatureRequirement
inputs:
reference: string
bams:
type: File[]
readgroups:
type: string[]
picard_metric_accumulation_level:
type: string
emit_reference_confidence:
type: string
gvcf_gq_bands:
type: string[]
intervals:
type:
type: array
items:
type: array
items: string
qc_intervals:
type: File
variant_reporting_intervals:
type: File
vep_cache_dir:
type: string
vep_ensembl_assembly:
type: string
doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38"
vep_ensembl_version:
type: string
doc: "ensembl version - Must be present in the cache directory. Example: 95"
vep_ensembl_species:
type: string
doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus"
synonyms_file:
type: File?
annotate_coding_only:
type: boolean?
minimum_mapping_quality:
type: int?
minimum_base_quality:
type: int?
per_base_intervals:
type: ../types/labelled_file.yml#labelled_file[]
per_target_intervals:
type: ../types/labelled_file.yml#labelled_file[]
summary_intervals:
type: ../types/labelled_file.yml#labelled_file[]
ploidy:
type: int
outputs:
cram:
type: File
outputSource: index_cram/indexed_cram
mark_duplicates_metrics:
type: File
outputSource: alignment_and_qc/mark_duplicates_metrics
insert_size_metrics:
type: File
outputSource: alignment_and_qc/insert_size_metrics
insert_size_histogram:
type: File
outputSource: alignment_and_qc/insert_size_histogram
alignment_summary_metrics:
type: File
outputSource: alignment_and_qc/alignment_summary_metrics
gc_bias_metrics:
type: File
outputSource: alignment_and_qc/gc_bias_metrics
gc_bias_metrics_chart:
type: File
outputSource: alignment_and_qc/gc_bias_metrics_chart
gc_bias_metrics_summary:
type: File
outputSource: alignment_and_qc/gc_bias_metrics_summary
wgs_metrics:
type: File
outputSource: alignment_and_qc/wgs_metrics
flagstats:
type: File
outputSource: alignment_and_qc/flagstats
gvcf:
type: File[]
outputSource: detect_variants/gvcf
final_vcf:
type: File
outputSource: detect_variants/final_vcf
secondaryFiles: [.tbi]
coding_vcf:
type: File
outputSource: detect_variants/coding_vcf
secondaryFiles: [.tbi]
limited_vcf:
type: File
outputSource: detect_variants/limited_vcf
secondaryFiles: [.tbi]
vep_summary:
type: File
outputSource: detect_variants/vep_summary
per_base_coverage_metrics:
type: File[]
outputSource: alignment_and_qc/per_base_coverage_metrics
per_base_hs_metrics:
type: File[]
outputSource: alignment_and_qc/per_base_hs_metrics
per_target_coverage_metrics:
type: File[]
outputSource: alignment_and_qc/per_target_coverage_metrics
per_target_hs_metrics:
type: File[]
outputSource: alignment_and_qc/per_target_hs_metrics
summary_hs_metrics:
type: File[]
outputSource: alignment_and_qc/summary_hs_metrics
bamcoverage_bigwig:
type: File
outputSource: alignment_and_qc/bamcoverage_bigwig
steps:
alignment_and_qc:
run: wgs_alignment_and_qc_non_human.cwl
in:
reference: reference
bams: bams
readgroups: readgroups
intervals: qc_intervals
picard_metric_accumulation_level: picard_metric_accumulation_level
minimum_mapping_quality: minimum_mapping_quality
minimum_base_quality: minimum_base_quality
per_base_intervals: per_base_intervals
per_target_intervals: per_target_intervals
summary_intervals: summary_intervals
out:
[bam, mark_duplicates_metrics, insert_size_metrics, insert_size_histogram, alignment_summary_metrics, gc_bias_metrics, gc_bias_metrics_chart, gc_bias_metrics_summary, wgs_metrics, flagstats, per_base_coverage_metrics, per_base_hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, summary_hs_metrics, bamcoverage_bigwig]
detect_variants:
run: ../subworkflows/germline_detect_variants.cwl
in:
reference: reference
bam: alignment_and_qc/bam
emit_reference_confidence: emit_reference_confidence
gvcf_gq_bands: gvcf_gq_bands
intervals: intervals
vep_cache_dir: vep_cache_dir
synonyms_file: synonyms_file
annotate_coding_only: annotate_coding_only
limit_variant_intervals: variant_reporting_intervals
vep_ensembl_assembly: vep_ensembl_assembly
vep_ensembl_version: vep_ensembl_version
vep_ensembl_species: vep_ensembl_species
ploidy: ploidy
out:
[gvcf, final_vcf, coding_vcf, limited_vcf, vep_summary]
bam_to_cram:
run: ../tools/bam_to_cram.cwl
in:
bam: alignment_and_qc/bam
reference: reference
out:
[cram]
index_cram:
run: ../tools/index_cram.cwl
in:
cram: bam_to_cram/cram
out:
[indexed_cram]
100 changes: 100 additions & 0 deletions definitions/pipelines/wgs_alignment_and_qc_non_human.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/usr/bin/env cwl-runner

cwlVersion: v1.0
class: Workflow
label: "wgs alignment with qc"
requirements:
- class: SchemaDefRequirement
types:
- $import: ../types/labelled_file.yml
- class: SubworkflowFeatureRequirement
inputs:
reference: string
bams:
type: File[]
readgroups:
type: string[]
intervals:
type: File
picard_metric_accumulation_level:
type: string
minimum_mapping_quality:
type: int?
minimum_base_quality:
type: int?
per_base_intervals:
type: ../types/labelled_file.yml#labelled_file[]
per_target_intervals:
type: ../types/labelled_file.yml#labelled_file[]
summary_intervals:
type: ../types/labelled_file.yml#labelled_file[]
outputs:
bam:
type: File
outputSource: alignment/final_bam
mark_duplicates_metrics:
type: File
outputSource: alignment/mark_duplicates_metrics_file
insert_size_metrics:
type: File
outputSource: qc/insert_size_metrics
insert_size_histogram:
type: File
outputSource: qc/insert_size_histogram
alignment_summary_metrics:
type: File
outputSource: qc/alignment_summary_metrics
gc_bias_metrics:
type: File
outputSource: qc/gc_bias_metrics
gc_bias_metrics_chart:
type: File
outputSource: qc/gc_bias_metrics_chart
gc_bias_metrics_summary:
type: File
outputSource: qc/gc_bias_metrics_summary
wgs_metrics:
type: File
outputSource: qc/wgs_metrics
flagstats:
type: File
outputSource: qc/flagstats
per_base_coverage_metrics:
type: File[]
outputSource: qc/per_base_coverage_metrics
per_base_hs_metrics:
type: File[]
outputSource: qc/per_base_hs_metrics
per_target_coverage_metrics:
type: File[]
outputSource: qc/per_target_coverage_metrics
per_target_hs_metrics:
type: File[]
outputSource: qc/per_target_hs_metrics
summary_hs_metrics:
type: File[]
outputSource: qc/summary_hs_metrics
bamcoverage_bigwig:
type: File
outputSource: qc/bamcoverage_bigwig
steps:
alignment:
run: ../subworkflows/align_sort_markdup.cwl
in:
reference: reference
bams: bams
readgroups: readgroups
out: [final_bam,mark_duplicates_metrics_file]
qc:
run: ../subworkflows/qc_wgs_no_verify_bam.cwl
in:
bam: alignment/final_bam
reference: reference
intervals: intervals
picard_metric_accumulation_level: picard_metric_accumulation_level
minimum_mapping_quality: minimum_mapping_quality
minimum_base_quality: minimum_base_quality
per_base_intervals: per_base_intervals
per_target_intervals: per_target_intervals
summary_intervals: summary_intervals
out: [insert_size_metrics, insert_size_histogram, alignment_summary_metrics, gc_bias_metrics, gc_bias_metrics_chart, gc_bias_metrics_summary, wgs_metrics, flagstats, per_base_coverage_metrics, per_base_hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, summary_hs_metrics, bamcoverage_bigwig]
4 changes: 4 additions & 0 deletions definitions/subworkflows/germline_detect_variants.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ inputs:
custom_clinvar_vcf:
type: File?
secondaryFiles: [.tbi]
ploidy:
type: int?
outputs:
gvcf:
type: File[]
Expand Down Expand Up @@ -77,13 +79,15 @@ steps:
gvcf_gq_bands: gvcf_gq_bands
intervals: intervals
contamination_fraction: contamination_fraction
ploidy: ploidy
out:
[gvcf]
genotype_gvcfs:
run: ../tools/gatk_genotypegvcfs.cwl
in:
reference: reference
gvcfs: haplotype_caller/gvcf
ploidy: ploidy
out:
[genotype_vcf]
annotate_variants:
Expand Down
Loading