From c90e65af250ce365b4141fb392cf0f4dc8027eac Mon Sep 17 00:00:00 2001 From: Charles Shale Date: Mon, 9 Dec 2024 11:16:20 +1100 Subject: [PATCH] Esvee: optionally specify 'esvee_prep_dir' for inputs for assembly and caller --- .../esvee/assembly/AssemblyApplication.java | 9 ++++++--- .../esvee/assembly/AssemblyConfig.java | 18 ++++++++---------- .../esvee/caller/CallerApplication.java | 10 ++-------- .../hmftools/esvee/caller/CallerConfig.java | 10 +++++++--- .../hmftools/esvee/common/FileCommon.java | 14 +++++++++++--- 5 files changed, 34 insertions(+), 27 deletions(-) diff --git a/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyApplication.java b/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyApplication.java index e5e277b942..9b8b592914 100644 --- a/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyApplication.java +++ b/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyApplication.java @@ -20,6 +20,7 @@ import static com.hartwig.hmftools.esvee.assembly.output.WriteType.JUNC_ASSEMBLY; import static com.hartwig.hmftools.esvee.assembly.output.WriteType.ASSEMBLY_BAM; import static com.hartwig.hmftools.esvee.assembly.output.WriteType.ASSEMBLY_READ; +import static com.hartwig.hmftools.esvee.common.FileCommon.formFragmentLengthDistFilename; import static com.hartwig.hmftools.esvee.prep.PrepConstants.DISCORDANT_GROUP_MIN_FRAGMENTS; import static com.hartwig.hmftools.esvee.prep.PrepConstants.DISCORDANT_GROUP_MIN_FRAGMENTS_SHORT; import static com.hartwig.hmftools.esvee.prep.PrepConstants.MIN_HOTSPOT_JUNCTION_SUPPORT; @@ -226,13 +227,15 @@ private boolean loadJunctionFiles() private void loadFragmentLengthBounds() { - if(!Files.exists(Paths.get(mConfig.FragmentLengthFile))) + String fragmentLengthFile = formFragmentLengthDistFilename(mConfig.PrepDir, mConfig.sampleId()); + + if(!Files.exists(Paths.get(fragmentLengthFile))) { - SV_LOGGER.error("missing fragment length file: {}", mConfig.FragmentLengthFile); + SV_LOGGER.error("missing fragment length file: {}", fragmentLengthFile); System.exit(1); } - FragmentLengthBounds fragmentLengthBounds = FragmentSizeDistribution.loadFragmentLengthBounds(mConfig.FragmentLengthFile); + FragmentLengthBounds fragmentLengthBounds = FragmentSizeDistribution.loadFragmentLengthBounds(fragmentLengthFile); if(fragmentLengthBounds.isValid()) { diff --git a/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyConfig.java b/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyConfig.java index 1a8577eeaa..127d9f8b2e 100644 --- a/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyConfig.java +++ b/esvee/src/main/java/com/hartwig/hmftools/esvee/assembly/AssemblyConfig.java @@ -33,14 +33,13 @@ import static com.hartwig.hmftools.esvee.assembly.AssemblyConstants.DEFAULT_DISC_RATE_INCREMENT; import static com.hartwig.hmftools.esvee.assembly.alignment.BwaAligner.loadAlignerLibrary; import static com.hartwig.hmftools.esvee.assembly.output.WriteType.fromConfig; -import static com.hartwig.hmftools.esvee.common.FileCommon.FRAG_LENGTHS_FILE; -import static com.hartwig.hmftools.esvee.common.FileCommon.FRAG_LENGTHS_FILE_DESC; import static com.hartwig.hmftools.esvee.common.FileCommon.JUNCTION_FILE; import static com.hartwig.hmftools.esvee.common.FileCommon.JUNCTION_FILE_DESC; +import static com.hartwig.hmftools.esvee.common.FileCommon.PREP_DIR; +import static com.hartwig.hmftools.esvee.common.FileCommon.PREP_DIR_DESC; import static com.hartwig.hmftools.esvee.common.FileCommon.REF_GENOME_IMAGE_EXTENSION; import static com.hartwig.hmftools.esvee.assembly.output.WriteType.ASSEMBLY_READ; import static com.hartwig.hmftools.esvee.common.FileCommon.formEsveeInputFilename; -import static com.hartwig.hmftools.esvee.common.FileCommon.formFragmentLengthDistFilename; import static com.hartwig.hmftools.esvee.common.FileCommon.formPrepInputFilename; import static com.hartwig.hmftools.esvee.prep.PrepConstants.PREP_JUNCTION_FILE_ID; @@ -79,7 +78,7 @@ public class AssemblyConfig public final List ReferenceBams; public final List JunctionFiles; - public final String FragmentLengthFile; + public final String PrepDir; public final RefGenomeVersion RefGenVersion; public final RefGenomeCoordinates RefGenomeCoords; @@ -172,6 +171,8 @@ public AssemblyConfig(final ConfigBuilder configBuilder) OutputDir = parseOutputDir(configBuilder); OutputId = configBuilder.getValue(OUTPUT_ID); + PrepDir = configBuilder.hasValue(PREP_DIR) ? configBuilder.getValue(PREP_DIR) : OutputDir; + JunctionFiles = Lists.newArrayList(); if(configBuilder.hasValue(JUNCTION_FILE)) @@ -181,15 +182,12 @@ public AssemblyConfig(final ConfigBuilder configBuilder) else { // since Prep now reads multiple BAMs, only the tumor-labelled junctions file needs to be loaded - String junctionFile = formPrepInputFilename(OutputDir, TumorIds.get(0), PREP_JUNCTION_FILE_ID, OutputId); + String junctionFile = formPrepInputFilename(PrepDir, TumorIds.get(0), PREP_JUNCTION_FILE_ID, OutputId); if(Files.exists(Paths.get(junctionFile))) JunctionFiles.add(junctionFile); } - FragmentLengthFile = configBuilder.getValue( - FRAG_LENGTHS_FILE, formFragmentLengthDistFilename(OutputDir, TumorIds.get(0))); - BamToolPath = configBuilder.getValue(BAMTOOL_PATH); RefGenVersion = RefGenomeVersion.from(configBuilder); @@ -311,7 +309,7 @@ public static void registerConfig(final ConfigBuilder configBuilder) configBuilder.addConfigItem(REFERENCE_BAM, false, REFERENCE_BAMS_DESC); configBuilder.addPaths(JUNCTION_FILE, false, JUNCTION_FILE_DESC); - configBuilder.addPaths(FRAG_LENGTHS_FILE, false, FRAG_LENGTHS_FILE_DESC); + configBuilder.addPaths(PREP_DIR, false, PREP_DIR_DESC); addRefGenomeConfig(configBuilder, true); configBuilder.addPath(REF_GENOME_IMAGE, false, REFERENCE_BAM_DESC); @@ -373,7 +371,7 @@ public AssemblyConfig() ReferenceBams = Collections.emptyList(); JunctionFiles = Collections.emptyList(); - FragmentLengthFile = ""; + PrepDir = null; RefGenVersion = V38; RefGenomeCoords = null; diff --git a/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerApplication.java b/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerApplication.java index ded6fbdfc0..6b6b23ac85 100644 --- a/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerApplication.java +++ b/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerApplication.java @@ -16,13 +16,11 @@ import static com.hartwig.hmftools.esvee.caller.LineChecker.adjustLineSites; import static com.hartwig.hmftools.esvee.caller.VariantFilters.logFilterTypeCounts; import static com.hartwig.hmftools.esvee.common.FileCommon.APP_NAME; -import static com.hartwig.hmftools.esvee.common.FileCommon.FRAG_LENGTHS_FILE; import static com.hartwig.hmftools.esvee.common.FileCommon.formFragmentLengthDistFilename; import static com.hartwig.hmftools.esvee.prep.types.DiscordantStats.formDiscordantStatsFilename; import static com.hartwig.hmftools.esvee.prep.types.DiscordantStats.loadDiscordantStats; import com.hartwig.hmftools.common.utils.config.ConfigBuilder; -import com.hartwig.hmftools.common.utils.file.FileWriterUtils; import com.hartwig.hmftools.common.utils.version.VersionInfo; import com.hartwig.hmftools.common.variant.GenotypeIds; import com.hartwig.hmftools.common.variant.VcfFileReader; @@ -66,14 +64,10 @@ public CallerApplication(final ConfigBuilder configBuilder) mPonCache = new PonCache(configBuilder); mHotspotCache = new HotspotCache(configBuilder); - String inputDir = FileWriterUtils.pathFromFile(mConfig.VcfFile); - - String fragLengthFilename = configBuilder.getValue( - FRAG_LENGTHS_FILE, formFragmentLengthDistFilename(inputDir, mConfig.fileSampleId())); - + String fragLengthFilename = formFragmentLengthDistFilename(mConfig.PrepDir, mConfig.fileSampleId()); FragmentLengthBounds fragmentLengthBounds = FragmentSizeDistribution.loadFragmentLengthBounds(fragLengthFilename); - String discStatsFilename = formDiscordantStatsFilename(inputDir, mConfig.fileSampleId()); + String discStatsFilename = formDiscordantStatsFilename(mConfig.PrepDir, mConfig.fileSampleId()); DiscordantStats discordantStats = loadDiscordantStats(discStatsFilename); SV_LOGGER.info("fragment length dist: {}", fragmentLengthBounds); diff --git a/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerConfig.java b/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerConfig.java index c884224e2c..256928e156 100644 --- a/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerConfig.java +++ b/esvee/src/main/java/com/hartwig/hmftools/esvee/caller/CallerConfig.java @@ -15,10 +15,10 @@ import static com.hartwig.hmftools.common.utils.file.FileWriterUtils.parseOutputDir; import static com.hartwig.hmftools.esvee.assembly.AssemblyConfig.SV_LOGGER; import static com.hartwig.hmftools.esvee.common.FileCommon.DEPTH_VCF_SUFFIX; -import static com.hartwig.hmftools.esvee.common.FileCommon.FRAG_LENGTHS_FILE; -import static com.hartwig.hmftools.esvee.common.FileCommon.FRAG_LENGTHS_FILE_DESC; import static com.hartwig.hmftools.esvee.common.FileCommon.INPUT_VCF; import static com.hartwig.hmftools.esvee.common.FileCommon.INPUT_VCF_DESC; +import static com.hartwig.hmftools.esvee.common.FileCommon.PREP_DIR; +import static com.hartwig.hmftools.esvee.common.FileCommon.PREP_DIR_DESC; import static com.hartwig.hmftools.esvee.common.FileCommon.formEsveeInputFilename; import java.nio.file.Files; @@ -36,7 +36,9 @@ public class CallerConfig public final String SampleId; public final String ReferenceId; public final RefGenomeVersion RefGenVersion; + public final String VcfFile; + public final String PrepDir; public final String OutputDir; public final String OutputId; @@ -63,6 +65,8 @@ public CallerConfig(final ConfigBuilder configBuilder) VcfFile = formEsveeInputFilename(OutputDir, fileSampleId, DEPTH_VCF_SUFFIX, OutputId); } + PrepDir = configBuilder.hasValue(PREP_DIR) ? configBuilder.getValue(PREP_DIR) : OutputDir; + RefGenVersion = RefGenomeVersion.from(configBuilder); SpecificChromosomes = loadSpecificChromsomes(configBuilder); @@ -110,7 +114,7 @@ public static void registerConfig(final ConfigBuilder configBuilder) configBuilder.addConfigItem(SAMPLE, SAMPLE_DESC); configBuilder.addConfigItem(REFERENCE, REFERENCE_DESC); configBuilder.addPath(INPUT_VCF, false, INPUT_VCF_DESC); - configBuilder.addPaths(FRAG_LENGTHS_FILE, false, FRAG_LENGTHS_FILE_DESC); + configBuilder.addPaths(PREP_DIR, false, PREP_DIR_DESC); configBuilder.addInteger(MANUAL_REF_DEPTH, "Manually set ref depth for testing", 0); addOutputOptions(configBuilder); diff --git a/esvee/src/main/java/com/hartwig/hmftools/esvee/common/FileCommon.java b/esvee/src/main/java/com/hartwig/hmftools/esvee/common/FileCommon.java index bad9a427e8..7ee54eeaeb 100644 --- a/esvee/src/main/java/com/hartwig/hmftools/esvee/common/FileCommon.java +++ b/esvee/src/main/java/com/hartwig/hmftools/esvee/common/FileCommon.java @@ -26,6 +26,17 @@ public final class FileCommon public static final String ESVEE_FILE_ID = "esvee"; public static final String PREP_FILE_ID = "esvee.prep"; + public static final String PREP_DIR = "esvee_prep_dir"; + public static final String PREP_DIR_DESC = "Esvee prep input directory"; + + /* + public static final String ASSEMBLY_DIR = "esvee_assembly_dir"; + public static final String ASSEMBLY_DIR_DESC = "Esvee assembly input directory"; + + public static final String REF_DEPTH_DIR = "esvee_ref_depth_dir"; + public static final String REF_DEPTH_DIR_DESC = "Esvee ref depth input directory"; + */ + public static final String RAW_VCF_SUFFIX = "raw" + VCF_ZIP_EXTENSION; public static final String DEPTH_VCF_SUFFIX = "ref_depth" + VCF_ZIP_EXTENSION; @@ -33,9 +44,6 @@ public final class FileCommon public static final String JUNCTION_FILE = "junction_file"; public static final String JUNCTION_FILE_DESC = "Esvee Prep junction file, default is to match by sample name"; - public static final String FRAG_LENGTHS_FILE = "frag_length_file"; - public static final String FRAG_LENGTHS_FILE_DESC = "Esvee Prep fragment length distribution file, default is to match by sample name"; - public static final String FILE_NAME_DELIM = "."; public static final String REF_GENOME_IMAGE_EXTENSION = ".img";