From 21cca892af4142a91e9f7d1c693a370b848e9c7b Mon Sep 17 00:00:00 2001 From: Feiyu Du Date: Wed, 7 Sep 2022 12:40:05 -0500 Subject: [PATCH] Directly use dragen align input files on storage1 --- MyeloseqHD.wdl | 25 ++++++++++++------------- scripts/launcher.pl | 20 +++----------------- 2 files changed, 15 insertions(+), 30 deletions(-) diff --git a/MyeloseqHD.wdl b/MyeloseqHD.wdl index 8fe9afc..78b58d7 100644 --- a/MyeloseqHD.wdl +++ b/MyeloseqHD.wdl @@ -27,25 +27,24 @@ workflow MyeloseqHD { Array[String] Adapters = ["GATCGGAAGAGCACACGTCTGAACTCCAGTCAC","AGATCGGAAGAGCGTCGTGTAGGGAAA"] String DragenReference = "/staging/runs/Chromoseq/refdata/dragen_hg38" - String Reference = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.fa" - String ReferenceDict = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.dict" - - String VEP = "/storage1/fs1/gtac-mgi/Active/CLE/reference/VEP_cache" - String QcMetrics = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHDQCMetrics.json" + String Reference = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.fa" + String ReferenceDict = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.dict" + String VEP = "/storage1/fs1/gtac-mgi/Active/CLE/reference/VEP_cache" String HaplotectBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq.haplotect_snppairs_hg38.bed" String AmpliconBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHD.16462-1615924889.Amplicons.hg38.bed" String CoverageBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed" - String DragenCoverageBed = "/staging/runs/MyeloSeqHD/dragen_align_inputs/MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed" - String DragenHotspot = "/staging/runs/MyeloSeqHD/dragen_align_inputs/myeloseq_hotspots.vcf.gz" + String GenotypeVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseqhd.forcegenotype.vcf.gz" + String QcMetrics = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHDQCMetrics.json" + String Hotspot = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_hotspots.vcf.gz" String CustomAnnotationVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_custom_annotations.annotated.hg38.vcf.gz" String CustomAnnotationIndex = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_custom_annotations.annotated.hg38.vcf.gz.tbi" String CustomAnnotationParameters = "MYELOSEQ,vcf,exact,0,TCGA_AC,MDS_AC,MYELOSEQBLACKLIST" - String GenotypeVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseqhd.forcegenotype.vcf.gz" - String QC_pl = "/usr/local/bin/QC_metrics.pl" + String QC_pl = "/usr/local/bin/QC_metrics.pl" String xfer_pl = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/scripts/data_transfer.pl" + String DemuxFastqDir = "/storage1/fs1/gtac-mgi/Active/CLE/assay/myeloseqhd/demux_fastq" @@ -85,7 +84,7 @@ workflow MyeloseqHD { call dragen_align { input: DragenRef=DragenReference, - DragenHotspot=DragenHotspot, + Hotspot=Hotspot, fastq1=select_first([trim_reads.read1,samples[13]]), fastq2=select_first([trim_reads.read2,samples[14]]), Name=samples[1], @@ -94,7 +93,7 @@ workflow MyeloseqHD { LB=samples[5] + '.' + samples[0], readfamilysize=readfamilysize, AmpliconBed=AmpliconBed, - CoverageBed=DragenCoverageBed, + CoverageBed=CoverageBed, OutputDir=OutputDir, SubDir=samples[1] + '_' + samples[0], queue=DragenQueue, @@ -290,7 +289,7 @@ task trim_reads { task dragen_align { String Name String DragenRef - String DragenHotspot + String Hotspot String fastq1 String fastq2 String RG @@ -322,7 +321,7 @@ task dragen_align { /bin/mkdir ${LocalSampleDir} && \ /bin/mkdir ${outdir} && \ - /opt/edico/bin/dragen -r ${DragenRef} --tumor-fastq1 ${fastq1} --tumor-fastq2 ${fastq2} --RGSM-tumor ${SM} --RGID-tumor ${RG} --RGLB-tumor ${LB} --enable-map-align true --enable-sort true --enable-map-align-output true --vc-enable-umi-liquid true --vc-combine-phased-variants-distance 3 --gc-metrics-enable=true --qc-coverage-region-1 ${CoverageBed} --qc-coverage-reports-1 full_res --umi-enable true --umi-min-supporting-reads ${readfamilysize} --umi-correction-scheme=random --umi-enable-probability-model-merging=false --umi-fuzzy-window-size=0 --enable-variant-caller=true --vc-target-bed ${CoverageBed} --enable-sv true --sv-call-regions-bed ${CoverageBed} --sv-exome true --sv-output-contigs true --vc-somatic-hotspots ${DragenHotspot} --umi-metrics-interval-file ${CoverageBed} --read-trimmers=fixed-len --trim-r1-5prime=${default=1 TrimLen} --trim-r1-3prime=${default=1 TrimLen} --trim-r2-5prime=${default=1 TrimLen} --trim-r2-3prime=${default=1 TrimLen} --output-dir ${LocalSampleDir} --output-file-prefix ${Name} --output-format BAM &> ${log} && \ + /opt/edico/bin/dragen -r ${DragenRef} --tumor-fastq1 ${fastq1} --tumor-fastq2 ${fastq2} --RGSM-tumor ${SM} --RGID-tumor ${RG} --RGLB-tumor ${LB} --enable-map-align true --enable-sort true --enable-map-align-output true --vc-enable-umi-liquid true --vc-combine-phased-variants-distance 3 --gc-metrics-enable=true --qc-coverage-region-1 ${CoverageBed} --qc-coverage-reports-1 full_res --umi-enable true --umi-min-supporting-reads ${readfamilysize} --umi-correction-scheme=random --umi-enable-probability-model-merging=false --umi-fuzzy-window-size=0 --enable-variant-caller=true --vc-target-bed ${CoverageBed} --enable-sv true --sv-call-regions-bed ${CoverageBed} --sv-exome true --sv-output-contigs true --vc-somatic-hotspots ${Hotspot} --umi-metrics-interval-file ${CoverageBed} --read-trimmers=fixed-len --trim-r1-5prime=${default=1 TrimLen} --trim-r1-3prime=${default=1 TrimLen} --trim-r2-5prime=${default=1 TrimLen} --trim-r2-3prime=${default=1 TrimLen} --output-dir ${LocalSampleDir} --output-file-prefix ${Name} --output-format BAM &> ${log} && \ /bin/mv ${log} ./ && \ /bin/mv ${LocalSampleDir} ${dragen_outdir} } diff --git a/scripts/launcher.pl b/scripts/launcher.pl index 4334ed7..ff5285d 100755 --- a/scripts/launcher.pl +++ b/scripts/launcher.pl @@ -16,31 +16,17 @@ use lib "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/perl5/lib/perl5"; use Spreadsheet::Read; -use File::Copy::Recursive qw(dircopy); use JSON qw(from_json to_json); use IO::File; use File::Spec; -use File::Compare; -##THIS LAUNCHER SCRIPT NEEDS TO BE RUN ON DRAGEN NODE compute1-dragen-2 TO BE ABLE TO CHECK DIFF ON SOME DRAGEN INPUT FILES die "Provide rundir, excel sample spreadsheet, and batch name in order" unless @ARGV == 3; my ($rundir, $sample_sheet, $batch_name) = @ARGV; die "$rundir is not valid" unless -d $rundir; die "$sample_sheet is not valid" unless -s $sample_sheet; -my $staging_dir = '/staging/runs/MyeloSeqHD'; my $dir = '/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD'; - -#check diff on two key files -for my $name (qw(MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed myeloseq_hotspots.vcf.gz)) { - my $staging = File::Spec->join($staging_dir, 'dragen_align_inputs', $name); - my $process = File::Spec->join($dir, 'process', 'git', 'cle-myeloseqhd', 'accessory_files', $name); - unless (compare($staging, $process)==0) { - die "$staging and $process are not SAME !"; - } -} - my $git_dir = File::Spec->join($dir, 'process', 'git', 'cle-myeloseqhd'); my $conf = File::Spec->join($git_dir, 'application.conf'); @@ -118,9 +104,9 @@ unless (exists $all_hash{$all_id}) { die "For RESEQ $lib its MRN and accession can not be found in CoPath daily all_accession log"; } - $sex = $all_hash{$id}->{sex}; - $DOB = $all_hash{$id}->{DOB}; - $all_MRNs = $all_hash{$id}->{all_MRNs}; + $sex = $all_hash{$all_id}->{sex}; + $DOB = $all_hash{$all_id}->{DOB}; + $all_MRNs = $all_hash{$all_id}->{all_MRNs}; } else { #NOTRANSFER RESEARCH They will skip query_DB and upload_DB tasks in WF ($mrn, $accession, $sex, $DOB, $all_MRNs) = ('NONE') x 5;