Skip to content

Commit

Permalink
Merge pull request #71 from dufeiyu/dragen_input
Browse files Browse the repository at this point in the history
Directly use dragen align input files on storage1
  • Loading branch information
dufeiyu authored Sep 7, 2022
2 parents 8c05555 + 21cca89 commit f8e6bb2
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 30 deletions.
25 changes: 12 additions & 13 deletions MyeloseqHD.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,24 @@ workflow MyeloseqHD {
Array[String] Adapters = ["GATCGGAAGAGCACACGTCTGAACTCCAGTCAC","AGATCGGAAGAGCGTCGTGTAGGGAAA"]

String DragenReference = "/staging/runs/Chromoseq/refdata/dragen_hg38"
String Reference = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.fa"
String ReferenceDict = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.dict"

String VEP = "/storage1/fs1/gtac-mgi/Active/CLE/reference/VEP_cache"
String QcMetrics = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHDQCMetrics.json"
String Reference = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.fa"
String ReferenceDict = "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/refdata/hg38/all_sequences.dict"
String VEP = "/storage1/fs1/gtac-mgi/Active/CLE/reference/VEP_cache"

String HaplotectBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq.haplotect_snppairs_hg38.bed"
String AmpliconBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHD.16462-1615924889.Amplicons.hg38.bed"
String CoverageBed = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed"
String DragenCoverageBed = "/staging/runs/MyeloSeqHD/dragen_align_inputs/MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed"
String DragenHotspot = "/staging/runs/MyeloSeqHD/dragen_align_inputs/myeloseq_hotspots.vcf.gz"
String GenotypeVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseqhd.forcegenotype.vcf.gz"
String QcMetrics = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/MyeloseqHDQCMetrics.json"
String Hotspot = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_hotspots.vcf.gz"

String CustomAnnotationVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_custom_annotations.annotated.hg38.vcf.gz"
String CustomAnnotationIndex = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseq_custom_annotations.annotated.hg38.vcf.gz.tbi"
String CustomAnnotationParameters = "MYELOSEQ,vcf,exact,0,TCGA_AC,MDS_AC,MYELOSEQBLACKLIST"
String GenotypeVcf = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/accessory_files/myeloseqhd.forcegenotype.vcf.gz"

String QC_pl = "/usr/local/bin/QC_metrics.pl"
String QC_pl = "/usr/local/bin/QC_metrics.pl"
String xfer_pl = "/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD/process/git/cle-myeloseqhd/scripts/data_transfer.pl"

String DemuxFastqDir = "/storage1/fs1/gtac-mgi/Active/CLE/assay/myeloseqhd/demux_fastq"


Expand Down Expand Up @@ -85,7 +84,7 @@ workflow MyeloseqHD {

call dragen_align {
input: DragenRef=DragenReference,
DragenHotspot=DragenHotspot,
Hotspot=Hotspot,
fastq1=select_first([trim_reads.read1,samples[13]]),
fastq2=select_first([trim_reads.read2,samples[14]]),
Name=samples[1],
Expand All @@ -94,7 +93,7 @@ workflow MyeloseqHD {
LB=samples[5] + '.' + samples[0],
readfamilysize=readfamilysize,
AmpliconBed=AmpliconBed,
CoverageBed=DragenCoverageBed,
CoverageBed=CoverageBed,
OutputDir=OutputDir,
SubDir=samples[1] + '_' + samples[0],
queue=DragenQueue,
Expand Down Expand Up @@ -290,7 +289,7 @@ task trim_reads {
task dragen_align {
String Name
String DragenRef
String DragenHotspot
String Hotspot
String fastq1
String fastq2
String RG
Expand Down Expand Up @@ -322,7 +321,7 @@ task dragen_align {

/bin/mkdir ${LocalSampleDir} && \
/bin/mkdir ${outdir} && \
/opt/edico/bin/dragen -r ${DragenRef} --tumor-fastq1 ${fastq1} --tumor-fastq2 ${fastq2} --RGSM-tumor ${SM} --RGID-tumor ${RG} --RGLB-tumor ${LB} --enable-map-align true --enable-sort true --enable-map-align-output true --vc-enable-umi-liquid true --vc-combine-phased-variants-distance 3 --gc-metrics-enable=true --qc-coverage-region-1 ${CoverageBed} --qc-coverage-reports-1 full_res --umi-enable true --umi-min-supporting-reads ${readfamilysize} --umi-correction-scheme=random --umi-enable-probability-model-merging=false --umi-fuzzy-window-size=0 --enable-variant-caller=true --vc-target-bed ${CoverageBed} --enable-sv true --sv-call-regions-bed ${CoverageBed} --sv-exome true --sv-output-contigs true --vc-somatic-hotspots ${DragenHotspot} --umi-metrics-interval-file ${CoverageBed} --read-trimmers=fixed-len --trim-r1-5prime=${default=1 TrimLen} --trim-r1-3prime=${default=1 TrimLen} --trim-r2-5prime=${default=1 TrimLen} --trim-r2-3prime=${default=1 TrimLen} --output-dir ${LocalSampleDir} --output-file-prefix ${Name} --output-format BAM &> ${log} && \
/opt/edico/bin/dragen -r ${DragenRef} --tumor-fastq1 ${fastq1} --tumor-fastq2 ${fastq2} --RGSM-tumor ${SM} --RGID-tumor ${RG} --RGLB-tumor ${LB} --enable-map-align true --enable-sort true --enable-map-align-output true --vc-enable-umi-liquid true --vc-combine-phased-variants-distance 3 --gc-metrics-enable=true --qc-coverage-region-1 ${CoverageBed} --qc-coverage-reports-1 full_res --umi-enable true --umi-min-supporting-reads ${readfamilysize} --umi-correction-scheme=random --umi-enable-probability-model-merging=false --umi-fuzzy-window-size=0 --enable-variant-caller=true --vc-target-bed ${CoverageBed} --enable-sv true --sv-call-regions-bed ${CoverageBed} --sv-exome true --sv-output-contigs true --vc-somatic-hotspots ${Hotspot} --umi-metrics-interval-file ${CoverageBed} --read-trimmers=fixed-len --trim-r1-5prime=${default=1 TrimLen} --trim-r1-3prime=${default=1 TrimLen} --trim-r2-5prime=${default=1 TrimLen} --trim-r2-3prime=${default=1 TrimLen} --output-dir ${LocalSampleDir} --output-file-prefix ${Name} --output-format BAM &> ${log} && \
/bin/mv ${log} ./ && \
/bin/mv ${LocalSampleDir} ${dragen_outdir}
}
Expand Down
20 changes: 3 additions & 17 deletions scripts/launcher.pl
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,17 @@

use lib "/storage1/fs1/duncavagee/Active/SEQ/Chromoseq/process/perl5/lib/perl5";
use Spreadsheet::Read;
use File::Copy::Recursive qw(dircopy);
use JSON qw(from_json to_json);
use IO::File;
use File::Spec;
use File::Compare;

##THIS LAUNCHER SCRIPT NEEDS TO BE RUN ON DRAGEN NODE compute1-dragen-2 TO BE ABLE TO CHECK DIFF ON SOME DRAGEN INPUT FILES
die "Provide rundir, excel sample spreadsheet, and batch name in order" unless @ARGV == 3;

my ($rundir, $sample_sheet, $batch_name) = @ARGV;
die "$rundir is not valid" unless -d $rundir;
die "$sample_sheet is not valid" unless -s $sample_sheet;

my $staging_dir = '/staging/runs/MyeloSeqHD';
my $dir = '/storage1/fs1/duncavagee/Active/SEQ/MyeloSeqHD';

#check diff on two key files
for my $name (qw(MyeloseqHD.16462-1615924889.CoverageQC.hg38.bed myeloseq_hotspots.vcf.gz)) {
my $staging = File::Spec->join($staging_dir, 'dragen_align_inputs', $name);
my $process = File::Spec->join($dir, 'process', 'git', 'cle-myeloseqhd', 'accessory_files', $name);
unless (compare($staging, $process)==0) {
die "$staging and $process are not SAME !";
}
}

my $git_dir = File::Spec->join($dir, 'process', 'git', 'cle-myeloseqhd');

my $conf = File::Spec->join($git_dir, 'application.conf');
Expand Down Expand Up @@ -118,9 +104,9 @@
unless (exists $all_hash{$all_id}) {
die "For RESEQ $lib its MRN and accession can not be found in CoPath daily all_accession log";
}
$sex = $all_hash{$id}->{sex};
$DOB = $all_hash{$id}->{DOB};
$all_MRNs = $all_hash{$id}->{all_MRNs};
$sex = $all_hash{$all_id}->{sex};
$DOB = $all_hash{$all_id}->{DOB};
$all_MRNs = $all_hash{$all_id}->{all_MRNs};
}
else { #NOTRANSFER RESEARCH They will skip query_DB and upload_DB tasks in WF
($mrn, $accession, $sex, $DOB, $all_MRNs) = ('NONE') x 5;
Expand Down

0 comments on commit f8e6bb2

Please sign in to comment.