From ee4c7ea5d394b57b31a76d422e28bf152f17b90a Mon Sep 17 00:00:00 2001 From: Jason Walker Date: Tue, 18 Jun 2019 20:13:38 +0000 Subject: [PATCH 1/2] Add ploidy to germline_detect_variants and pass along to GATK GenotypeGVCFs and HaplotypeCaller --- definitions/subworkflows/germline_detect_variants.cwl | 4 ++++ definitions/tools/gatk_genotypegvcfs.cwl | 6 ++++++ definitions/tools/gatk_haplotype_caller.cwl | 8 +++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/definitions/subworkflows/germline_detect_variants.cwl b/definitions/subworkflows/germline_detect_variants.cwl index bea6ee153..260de3c37 100644 --- a/definitions/subworkflows/germline_detect_variants.cwl +++ b/definitions/subworkflows/germline_detect_variants.cwl @@ -48,6 +48,8 @@ inputs: custom_clinvar_vcf: type: File? secondaryFiles: [.tbi] + ploidy: + type: int? outputs: gvcf: type: File[] @@ -77,6 +79,7 @@ steps: gvcf_gq_bands: gvcf_gq_bands intervals: intervals contamination_fraction: contamination_fraction + ploidy: ploidy out: [gvcf] genotype_gvcfs: @@ -84,6 +87,7 @@ steps: in: reference: reference gvcfs: haplotype_caller/gvcf + ploidy: ploidy out: [genotype_vcf] annotate_variants: diff --git a/definitions/tools/gatk_genotypegvcfs.cwl b/definitions/tools/gatk_genotypegvcfs.cwl index d4cb09122..21c5a4332 100644 --- a/definitions/tools/gatk_genotypegvcfs.cwl +++ b/definitions/tools/gatk_genotypegvcfs.cwl @@ -26,6 +26,12 @@ inputs: prefix: "--variant" inputBinding: position: 2 + ploidy: + type: int? + default: 2 + inputBinding: + prefix: "--sample_ploidy" + position: 3 outputs: genotype_vcf: type: File diff --git a/definitions/tools/gatk_haplotype_caller.cwl b/definitions/tools/gatk_haplotype_caller.cwl index a173d1b26..d05dc3f25 100644 --- a/definitions/tools/gatk_haplotype_caller.cwl +++ b/definitions/tools/gatk_haplotype_caller.cwl @@ -53,12 +53,18 @@ inputs: inputBinding: prefix: "-contamination" position: 7 + ploidy: + type: int? + default: 2 + inputBinding: + prefix: "--sample_ploidy" + position: 8 output_file_name: type: string default: "output.g.vcf.gz" inputBinding: prefix: "-o" - position: 8 + position: 9 outputs: gvcf: type: File From 6314580f8d119258c27654423c2efdd30a3b6ac5 Mon Sep 17 00:00:00 2001 From: Jason Walker Date: Tue, 18 Jun 2019 20:41:04 +0000 Subject: [PATCH 2/2] Add non-human WGS pipeline including basic variant calling. --- .../pipelines/germline_wgs_non_human.cwl | 173 ++++++++++++++++++ .../wgs_alignment_and_qc_non_human.cwl | 100 ++++++++++ .../subworkflows/qc_wgs_no_verify_bam.cwl | 131 +++++++++++++ 3 files changed, 404 insertions(+) create mode 100644 definitions/pipelines/germline_wgs_non_human.cwl create mode 100644 definitions/pipelines/wgs_alignment_and_qc_non_human.cwl create mode 100644 definitions/subworkflows/qc_wgs_no_verify_bam.cwl diff --git a/definitions/pipelines/germline_wgs_non_human.cwl b/definitions/pipelines/germline_wgs_non_human.cwl new file mode 100644 index 000000000..55521148f --- /dev/null +++ b/definitions/pipelines/germline_wgs_non_human.cwl @@ -0,0 +1,173 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "wgs alignment and germline variant detection" +requirements: + - class: SchemaDefRequirement + types: + - $import: ../types/labelled_file.yml + - class: SubworkflowFeatureRequirement +inputs: + reference: string + bams: + type: File[] + readgroups: + type: string[] + picard_metric_accumulation_level: + type: string + emit_reference_confidence: + type: string + gvcf_gq_bands: + type: string[] + intervals: + type: + type: array + items: + type: array + items: string + qc_intervals: + type: File + variant_reporting_intervals: + type: File + vep_cache_dir: + type: string + vep_ensembl_assembly: + type: string + doc: "genome assembly to use in vep. Examples: GRCh38 or GRCm38" + vep_ensembl_version: + type: string + doc: "ensembl version - Must be present in the cache directory. Example: 95" + vep_ensembl_species: + type: string + doc: "ensembl species - Must be present in the cache directory. Examples: homo_sapiens or mus_musculus" + synonyms_file: + type: File? + annotate_coding_only: + type: boolean? + minimum_mapping_quality: + type: int? + minimum_base_quality: + type: int? + per_base_intervals: + type: ../types/labelled_file.yml#labelled_file[] + per_target_intervals: + type: ../types/labelled_file.yml#labelled_file[] + summary_intervals: + type: ../types/labelled_file.yml#labelled_file[] + ploidy: + type: int +outputs: + cram: + type: File + outputSource: index_cram/indexed_cram + mark_duplicates_metrics: + type: File + outputSource: alignment_and_qc/mark_duplicates_metrics + insert_size_metrics: + type: File + outputSource: alignment_and_qc/insert_size_metrics + insert_size_histogram: + type: File + outputSource: alignment_and_qc/insert_size_histogram + alignment_summary_metrics: + type: File + outputSource: alignment_and_qc/alignment_summary_metrics + gc_bias_metrics: + type: File + outputSource: alignment_and_qc/gc_bias_metrics + gc_bias_metrics_chart: + type: File + outputSource: alignment_and_qc/gc_bias_metrics_chart + gc_bias_metrics_summary: + type: File + outputSource: alignment_and_qc/gc_bias_metrics_summary + wgs_metrics: + type: File + outputSource: alignment_and_qc/wgs_metrics + flagstats: + type: File + outputSource: alignment_and_qc/flagstats + gvcf: + type: File[] + outputSource: detect_variants/gvcf + final_vcf: + type: File + outputSource: detect_variants/final_vcf + secondaryFiles: [.tbi] + coding_vcf: + type: File + outputSource: detect_variants/coding_vcf + secondaryFiles: [.tbi] + limited_vcf: + type: File + outputSource: detect_variants/limited_vcf + secondaryFiles: [.tbi] + vep_summary: + type: File + outputSource: detect_variants/vep_summary + per_base_coverage_metrics: + type: File[] + outputSource: alignment_and_qc/per_base_coverage_metrics + per_base_hs_metrics: + type: File[] + outputSource: alignment_and_qc/per_base_hs_metrics + per_target_coverage_metrics: + type: File[] + outputSource: alignment_and_qc/per_target_coverage_metrics + per_target_hs_metrics: + type: File[] + outputSource: alignment_and_qc/per_target_hs_metrics + summary_hs_metrics: + type: File[] + outputSource: alignment_and_qc/summary_hs_metrics + bamcoverage_bigwig: + type: File + outputSource: alignment_and_qc/bamcoverage_bigwig +steps: + alignment_and_qc: + run: wgs_alignment_and_qc_non_human.cwl + in: + reference: reference + bams: bams + readgroups: readgroups + intervals: qc_intervals + picard_metric_accumulation_level: picard_metric_accumulation_level + minimum_mapping_quality: minimum_mapping_quality + minimum_base_quality: minimum_base_quality + per_base_intervals: per_base_intervals + per_target_intervals: per_target_intervals + summary_intervals: summary_intervals + out: + [bam, mark_duplicates_metrics, insert_size_metrics, insert_size_histogram, alignment_summary_metrics, gc_bias_metrics, gc_bias_metrics_chart, gc_bias_metrics_summary, wgs_metrics, flagstats, per_base_coverage_metrics, per_base_hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, summary_hs_metrics, bamcoverage_bigwig] + detect_variants: + run: ../subworkflows/germline_detect_variants.cwl + in: + reference: reference + bam: alignment_and_qc/bam + emit_reference_confidence: emit_reference_confidence + gvcf_gq_bands: gvcf_gq_bands + intervals: intervals + vep_cache_dir: vep_cache_dir + synonyms_file: synonyms_file + annotate_coding_only: annotate_coding_only + limit_variant_intervals: variant_reporting_intervals + vep_ensembl_assembly: vep_ensembl_assembly + vep_ensembl_version: vep_ensembl_version + vep_ensembl_species: vep_ensembl_species + ploidy: ploidy + out: + [gvcf, final_vcf, coding_vcf, limited_vcf, vep_summary] + bam_to_cram: + run: ../tools/bam_to_cram.cwl + in: + bam: alignment_and_qc/bam + reference: reference + out: + [cram] + index_cram: + run: ../tools/index_cram.cwl + in: + cram: bam_to_cram/cram + out: + [indexed_cram] diff --git a/definitions/pipelines/wgs_alignment_and_qc_non_human.cwl b/definitions/pipelines/wgs_alignment_and_qc_non_human.cwl new file mode 100644 index 000000000..47702e931 --- /dev/null +++ b/definitions/pipelines/wgs_alignment_and_qc_non_human.cwl @@ -0,0 +1,100 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "wgs alignment with qc" +requirements: + - class: SchemaDefRequirement + types: + - $import: ../types/labelled_file.yml + - class: SubworkflowFeatureRequirement +inputs: + reference: string + bams: + type: File[] + readgroups: + type: string[] + intervals: + type: File + picard_metric_accumulation_level: + type: string + minimum_mapping_quality: + type: int? + minimum_base_quality: + type: int? + per_base_intervals: + type: ../types/labelled_file.yml#labelled_file[] + per_target_intervals: + type: ../types/labelled_file.yml#labelled_file[] + summary_intervals: + type: ../types/labelled_file.yml#labelled_file[] +outputs: + bam: + type: File + outputSource: alignment/final_bam + mark_duplicates_metrics: + type: File + outputSource: alignment/mark_duplicates_metrics_file + insert_size_metrics: + type: File + outputSource: qc/insert_size_metrics + insert_size_histogram: + type: File + outputSource: qc/insert_size_histogram + alignment_summary_metrics: + type: File + outputSource: qc/alignment_summary_metrics + gc_bias_metrics: + type: File + outputSource: qc/gc_bias_metrics + gc_bias_metrics_chart: + type: File + outputSource: qc/gc_bias_metrics_chart + gc_bias_metrics_summary: + type: File + outputSource: qc/gc_bias_metrics_summary + wgs_metrics: + type: File + outputSource: qc/wgs_metrics + flagstats: + type: File + outputSource: qc/flagstats + per_base_coverage_metrics: + type: File[] + outputSource: qc/per_base_coverage_metrics + per_base_hs_metrics: + type: File[] + outputSource: qc/per_base_hs_metrics + per_target_coverage_metrics: + type: File[] + outputSource: qc/per_target_coverage_metrics + per_target_hs_metrics: + type: File[] + outputSource: qc/per_target_hs_metrics + summary_hs_metrics: + type: File[] + outputSource: qc/summary_hs_metrics + bamcoverage_bigwig: + type: File + outputSource: qc/bamcoverage_bigwig +steps: + alignment: + run: ../subworkflows/align_sort_markdup.cwl + in: + reference: reference + bams: bams + readgroups: readgroups + out: [final_bam,mark_duplicates_metrics_file] + qc: + run: ../subworkflows/qc_wgs_no_verify_bam.cwl + in: + bam: alignment/final_bam + reference: reference + intervals: intervals + picard_metric_accumulation_level: picard_metric_accumulation_level + minimum_mapping_quality: minimum_mapping_quality + minimum_base_quality: minimum_base_quality + per_base_intervals: per_base_intervals + per_target_intervals: per_target_intervals + summary_intervals: summary_intervals + out: [insert_size_metrics, insert_size_histogram, alignment_summary_metrics, gc_bias_metrics, gc_bias_metrics_chart, gc_bias_metrics_summary, wgs_metrics, flagstats, per_base_coverage_metrics, per_base_hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, summary_hs_metrics, bamcoverage_bigwig] diff --git a/definitions/subworkflows/qc_wgs_no_verify_bam.cwl b/definitions/subworkflows/qc_wgs_no_verify_bam.cwl new file mode 100644 index 000000000..20008cd14 --- /dev/null +++ b/definitions/subworkflows/qc_wgs_no_verify_bam.cwl @@ -0,0 +1,131 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow +label: "WGS QC workflow" +requirements: + - class: SchemaDefRequirement + types: + - $import: ../types/labelled_file.yml + - class: SubworkflowFeatureRequirement +inputs: + bam: + type: File + secondaryFiles: [^.bai] + reference: + type: string + intervals: + type: File + picard_metric_accumulation_level: + type: string? + default: ALL_READS + minimum_mapping_quality: + type: int? + minimum_base_quality: + type: int? + per_base_intervals: + type: ../types/labelled_file.yml#labelled_file[] + per_target_intervals: + type: ../types/labelled_file.yml#labelled_file[] + summary_intervals: + type: ../types/labelled_file.yml#labelled_file[] +outputs: + insert_size_metrics: + type: File + outputSource: collect_insert_size_metrics/insert_size_metrics + insert_size_histogram: + type: File + outputSource: collect_insert_size_metrics/insert_size_histogram + alignment_summary_metrics: + type: File + outputSource: collect_alignment_summary_metrics/alignment_summary_metrics + gc_bias_metrics: + type: File + outputSource: collect_gc_bias_metrics/gc_bias_metrics + gc_bias_metrics_chart: + type: File + outputSource: collect_gc_bias_metrics/gc_bias_metrics_chart + gc_bias_metrics_summary: + type: File + outputSource: collect_gc_bias_metrics/gc_bias_metrics_summary + wgs_metrics: + type: File + outputSource: collect_wgs_metrics/wgs_metrics + flagstats: + type: File + outputSource: samtools_flagstat/flagstats + per_base_coverage_metrics: + type: File[] + outputSource: collect_hs_metrics/per_base_coverage_metrics + per_base_hs_metrics: + type: File[] + outputSource: collect_hs_metrics/per_base_hs_metrics + per_target_coverage_metrics: + type: File[] + outputSource: collect_hs_metrics/per_target_coverage_metrics + per_target_hs_metrics: + type: File[] + outputSource: collect_hs_metrics/per_target_hs_metrics + summary_hs_metrics: + type: File[] + outputSource: collect_hs_metrics/summary_hs_metrics + bamcoverage_bigwig: + type: File + outputSource: deeptools_bamcoverage/outfile +steps: + collect_insert_size_metrics: + run: ../tools/collect_insert_size_metrics.cwl + in: + bam: bam + reference: reference + metric_accumulation_level: picard_metric_accumulation_level + out: + [insert_size_metrics, insert_size_histogram] + collect_alignment_summary_metrics: + run: ../tools/collect_alignment_summary_metrics.cwl + in: + bam: bam + reference: reference + metric_accumulation_level: picard_metric_accumulation_level + out: + [alignment_summary_metrics] + collect_gc_bias_metrics: + run: ../tools/collect_gc_bias_metrics.cwl + in: + bam: bam + reference: reference + metric_accumulation_level: picard_metric_accumulation_level + out: + [gc_bias_metrics, gc_bias_metrics_chart, gc_bias_metrics_summary] + collect_wgs_metrics: + run: ../tools/collect_wgs_metrics.cwl + in: + bam: bam + reference: reference + intervals: intervals + out: + [wgs_metrics] + samtools_flagstat: + run: ../tools/samtools_flagstat.cwl + in: + bam: bam + out: [flagstats] + collect_hs_metrics: + run: hs_metrics.cwl + in: + bam: bam + minimum_mapping_quality: minimum_mapping_quality + minimum_base_quality: minimum_base_quality + per_base_intervals: per_base_intervals + per_target_intervals: per_target_intervals + reference: reference + summary_intervals: summary_intervals + out: + [per_base_coverage_metrics, per_base_hs_metrics, per_target_coverage_metrics, per_target_hs_metrics, summary_hs_metrics] + deeptools_bamcoverage: + run: ../tools/deeptools_bamcoverage.cwl + in: + bam: bam + min_mapping_quality: minimum_mapping_quality + out: + [outfile]