Skip to content

Commit

Permalink
[bugfix] Fix an issue with cbcl parsing when barcodes files are in a …
Browse files Browse the repository at this point in the history
…different directory from basecalls. (#1672)

* [bugfix] Fix an issue with cbcl parsing when barcodes files are in a different directory from basecalls.
* Add readability assertion and exception if new barcode files are found.
* Check nonNull barcodesFile size.
  • Loading branch information
Jay Carey authored Apr 28, 2021
1 parent 380891c commit 0fd4218
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,7 @@
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.*;

import static htsjdk.samtools.util.CollectionUtil.makeList;
import static htsjdk.samtools.util.CollectionUtil.makeSet;
Expand Down Expand Up @@ -93,6 +83,15 @@ public class IlluminaDataProviderFactory {
* basecallDirectory holds QSeqs or bcls *
*/
private final File basecallDirectory;

/**
* barcodesDirectory holds the barcodes file generated by ExtractIlluminaBarcodes
*/
private final File barcodesDirectory;

/**
* The lane to read
*/
private final int lane;

/**
Expand Down Expand Up @@ -152,6 +151,11 @@ public IlluminaDataProviderFactory(final File basecallDirectory, File barcodesDi
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final Set<IlluminaDataType> dataTypes) {
this.basecallDirectory = basecallDirectory;
this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;
if (barcodesDirectory != null) {
this.barcodesDirectory = barcodesDirectory;
} else {
this.barcodesDirectory = basecallDirectory;
}

this.lane = lane;
/* The types of data that will be returned by any IlluminaDataProviders created by this factory.
Expand Down Expand Up @@ -238,7 +242,7 @@ public BaseIlluminaDataProvider makeDataProvider(List<Integer> requestedTiles) {
}
}
if (IlluminaFileUtil.hasCbcls(basecallDirectory, lane)) {
return new NewIlluminaDataProvider(outputMapping, basecallDirectory, lane, requestedTiles);
return new NewIlluminaDataProvider(outputMapping, basecallDirectory, barcodesDirectory, lane, requestedTiles);
} else {
final Map<IlluminaParser, Set<IlluminaDataType>> parsersToDataType = new HashMap<>();
for (final Map.Entry<SupportedIlluminaFormat, Set<IlluminaDataType>> fmToDt : formatToDataTypes.entrySet()) {
Expand Down
21 changes: 18 additions & 3 deletions src/main/java/picard/illumina/parser/NewIlluminaDataProvider.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
import picard.illumina.parser.readers.LocsFileReader;

import java.io.File;
import java.util.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

Expand All @@ -31,11 +37,14 @@ class NewIlluminaDataProvider extends BaseIlluminaDataProvider {
*
* @param outputMapping Mapping of reads types to be output.
* @param basecallDirectory The baseCalls directory of a complete Illumina directory.
* @param barcodesDirectory The directory containing the barcode files created by ExtractIlluminaBarcodes.
* @param lane The lane that to provide data for.
* @param requestedTiles The list of tiles that data is requested for.
*/
NewIlluminaDataProvider(final OutputMapping outputMapping,
final File basecallDirectory, final int lane, List<Integer> requestedTiles) {
final File basecallDirectory,
final File barcodesDirectory,
final int lane, List<Integer> requestedTiles) {
super(lane, outputMapping);
requestedTiles.stream().sorted(TILE_NUMBER_COMPARATOR).forEach(tileOrder::add);
currentTile = tileOrder.first();
Expand Down Expand Up @@ -66,7 +75,13 @@ class NewIlluminaDataProvider extends BaseIlluminaDataProvider {
//barcodes
final Pattern barcodeRegex = Pattern.compile(ParameterizedFileUtil.escapePeriods(
ParameterizedFileUtil.makeBarcodeRegex(lane)));
final File[] barcodeFiles = getTiledFiles(basecallDirectory, barcodeRegex);

final File[] barcodeFiles = getTiledFiles(barcodesDirectory, barcodeRegex);
if (Arrays.stream(barcodeFiles).noneMatch(Objects::nonNull)) {
throw new PicardException("No barcode files found in the barcodesDirectory " + barcodesDirectory.getAbsolutePath());
}

IOUtil.assertFilesAreReadable(Arrays.asList(barcodeFiles));
this.barcodeFileMap = new HashMap<>();
for (File barcodeFile : barcodeFiles) {
barcodeFileMap.put(fileToTile(barcodeFile.getName()), new BarcodeFileReader(barcodeFile));
Expand Down
49 changes: 30 additions & 19 deletions src/test/java/picard/illumina/IlluminaBasecallsToSamTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
private static final File TEST_DATA_DIR_WITH_4M_INDEX = new File(ILLUMINA_TEST_DIR, "25T8B25T/sams_with_4M");
private static final File TEST_DATA_DIR_WITH_4M4M_INDEX = new File(ILLUMINA_TEST_DIR, "25T8B25T/sams_with_4M4M");
private static final File TEST_DATA_DIR_WITH_CBCLS = new File(ILLUMINA_TEST_DIR, "151T8B8B151T_cbcl/Data/Intensities/BaseCalls");
private static final File TEST_DATA_BARCODES_DIR_WITH_CBCLS = new File(TEST_DATA_DIR_WITH_CBCLS, "barcodes");
private static final File DUAL_CBCL_TEST_DATA_DIR = new File(ILLUMINA_TEST_DIR, "151T8B8B151T_cbcl/sams");
private static final File TEST_DATA_HISEQX_SINGLE_LOCS = new File(ILLUMINA_TEST_DIR, "25T8B8B25T_hiseqx/Data/Intensities/BaseCalls");
private static final File HISEQX_TEST_DATA_DIR = new File(ILLUMINA_TEST_DIR, "25T8B8B25T_hiseqx/sams");
Expand Down Expand Up @@ -176,30 +177,32 @@ public Object[][] multiplexedData() {
@Test(dataProvider = "multiplexedData")
public void testMultiplexed(final boolean includeBcInHeader, final ClusterDataToSamConverter.PopulateBarcode populateBarcode,
final boolean includeBarcodeQuality, final File testDataDir) throws Exception {
runStandardTest(new int[]{1}, "multiplexedBarcode.", "library.params", 1, "25T8B25T", BASECALLS_DIR, testDataDir, null, includeBcInHeader, populateBarcode, includeBarcodeQuality);
runStandardTest(new int[]{1}, "multiplexedBarcode.", "library.params", 1, "25T8B25T", BASECALLS_DIR, BASECALLS_DIR, testDataDir, null, includeBcInHeader, populateBarcode, includeBarcodeQuality);
}

@DataProvider
public Object[][] variousConfigurationsData() {
return new Object[][]{
{"multiplexedBarcode.", "library.params", 1, "25T8B25T", BASECALLS_DIR, new File(TEST_DATA_DIR.getParentFile(),"sams_with_DS"), null, new int[]{1}},
{"multiplexedBarcode.", "library.params", 1, "25T8B25T", BASECALLS_DIR, TEST_DATA_DIR, null, new int[]{1}},
{"multiplexedBarcode.", "library.params", 1, "25T8B4M21T", BASECALLS_DIR, TEST_DATA_DIR_WITH_4M_INDEX, null, new int[]{1}},
{"multiplexedBarcode2.", "library.params", 1, "25T8B4M4M17T", BASECALLS_DIR, TEST_DATA_DIR_WITH_4M4M_INDEX, null, new int[]{1}},
{"singleBarcodeAltName.", "multiplexed_positive_rgtags.params", 1, "25T8B25T", BASECALLS_DIR, TEST_DATA_DIR, null, new int[]{1}},
{"dualBarcode.", "library_double.params", 2, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_TEST_DATA_DIR, null, new int[]{1}},
{"cbclConvert.", "library_double.params", 2, "151T8B8B151T", TEST_DATA_DIR_WITH_CBCLS, DUAL_CBCL_TEST_DATA_DIR, null, new int[]{1}},
{"hiseqxSingleLocs.", "library_double.params", 2, "25T8B8B25T", TEST_DATA_HISEQX_SINGLE_LOCS, HISEQX_TEST_DATA_DIR, null, new int[]{1}},
{"hiseqxSingleLocs.", "library_double.params", 2, "25T8B8B25T", TEST_DATA_HISEQX_SINGLE_LOCS, HISEQX_TEST_DATA_DIR, null, new int[]{1}},
{"dualBarcode.", "library_double.params", 2, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_TEST_DATA_DIR, 1101, new int[]{1}},
{"multilane.", "library_double.params", 2, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_TEST_DATA_DIR, null, new int[]{1,2}},
{"cbclConvert.", "library_double.params", 2, "151T8B8B151T", TEST_DATA_DIR_WITH_CBCLS, DUAL_CBCL_TEST_DATA_DIR, 1102, new int[]{1}}
{"multiplexedBarcode.", "library.params", 1, "25T8B25T", BASECALLS_DIR, BASECALLS_DIR, new File(TEST_DATA_DIR.getParentFile(),"sams_with_DS"), null, new int[]{1}},
{"multiplexedBarcode.", "library.params", 1, "25T8B25T", BASECALLS_DIR, BASECALLS_DIR, TEST_DATA_DIR, null, new int[]{1}},
{"multiplexedBarcode.", "library.params", 1, "25T8B4M21T", BASECALLS_DIR, BASECALLS_DIR, TEST_DATA_DIR_WITH_4M_INDEX, null, new int[]{1}},
{"multiplexedBarcode2.", "library.params", 1, "25T8B4M4M17T", BASECALLS_DIR, BASECALLS_DIR, TEST_DATA_DIR_WITH_4M4M_INDEX, null, new int[]{1}},
{"singleBarcodeAltName.", "multiplexed_positive_rgtags.params", 1, "25T8B25T", BASECALLS_DIR, BASECALLS_DIR, TEST_DATA_DIR, null, new int[]{1}},
{"dualBarcode.", "library_double.params", 2, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_BASECALLS_DIR, DUAL_TEST_DATA_DIR, null, new int[]{1}},
{"cbclConvert.", "library_double.params", 2, "151T8B8B151T", TEST_DATA_DIR_WITH_CBCLS, TEST_DATA_DIR_WITH_CBCLS, DUAL_CBCL_TEST_DATA_DIR, null, new int[]{1}},
{"hiseqxSingleLocs.", "library_double.params", 2, "25T8B8B25T", TEST_DATA_HISEQX_SINGLE_LOCS, TEST_DATA_HISEQX_SINGLE_LOCS, HISEQX_TEST_DATA_DIR, null, new int[]{1}},
{"hiseqxSingleLocs.", "library_double.params", 2, "25T8B8B25T", TEST_DATA_HISEQX_SINGLE_LOCS, TEST_DATA_HISEQX_SINGLE_LOCS, HISEQX_TEST_DATA_DIR, null, new int[]{1}},
{"dualBarcode.", "library_double.params", 2, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_BASECALLS_DIR,DUAL_TEST_DATA_DIR, 1101, new int[]{1}},
{"multilane.", "library_double.params", 2, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_BASECALLS_DIR, DUAL_TEST_DATA_DIR, null, new int[]{1,2}},
{"cbclConvert.", "library_double.params", 2, "151T8B8B151T", TEST_DATA_DIR_WITH_CBCLS, TEST_DATA_DIR_WITH_CBCLS, DUAL_CBCL_TEST_DATA_DIR, 1102, new int[]{1}},
// Test barcodes in a separate directory
{"cbclConvert.", "library_double.params", 2, "151T8B8B151T", TEST_DATA_DIR_WITH_CBCLS, TEST_DATA_BARCODES_DIR_WITH_CBCLS, DUAL_CBCL_TEST_DATA_DIR, null, new int[]{1}},
};
}

@Test(dataProvider = "variousConfigurationsData")
public void testVariousConfigurations(final String jobName, final String libraryParamsFile, final int nColumnFields, final String cigar, final File baseCallingDir, final File samDir, final Integer tile, final int[] lanes) throws Exception {
runStandardTest(lanes, jobName, libraryParamsFile, nColumnFields, cigar, baseCallingDir, samDir, tile, false, ClusterDataToSamConverter.PopulateBarcode.ORPHANS_ONLY, false);
public void testVariousConfigurations(final String jobName, final String libraryParamsFile, final int nColumnFields, final String cigar, final File baseCallingDir, final File barcodesDir, final File samDir, final Integer tile, final int[] lanes) throws Exception {
runStandardTest(lanes, jobName, libraryParamsFile, nColumnFields, cigar, baseCallingDir, barcodesDir, samDir, tile, false, ClusterDataToSamConverter.PopulateBarcode.ORPHANS_ONLY, false);
}

/**
Expand All @@ -209,7 +212,7 @@ public void testVariousConfigurations(final String jobName, final String library
public void testCorruptDataReturnCode() throws Exception {
boolean exceptionThrown = false;
try {
runStandardTest(new int[]{9}, "dualBarcode.", "negative_test.params", 2, "30T8B8B", BASECALLS_DIR, TEST_DATA_DIR, null, false, ClusterDataToSamConverter.PopulateBarcode.ORPHANS_ONLY, false);
runStandardTest(new int[]{9}, "dualBarcode.", "negative_test.params", 2, "30T8B8B", BASECALLS_DIR, BASECALLS_DIR, TEST_DATA_DIR, null, false, ClusterDataToSamConverter.PopulateBarcode.ORPHANS_ONLY, false);
} catch (Throwable e) {
exceptionThrown = true;
} finally {
Expand All @@ -229,9 +232,16 @@ public void testCorruptDataReturnCode() throws Exception {
* @param populateBarcode
* @param includeBarcodeQuality @throws Exception
*/
private void runStandardTest(final int[] lanes, final String jobName, final String libraryParamsFile,
final int concatNColumnFields, final String readStructure,
final File baseCallsDir, final File testDataDir, final Integer tile, final boolean includeBcInHeader, final ClusterDataToSamConverter.PopulateBarcode populateBarcode,
private void runStandardTest(final int[] lanes, final String jobName,
final String libraryParamsFile,
final int concatNColumnFields,
final String readStructure,
final File baseCallsDir,
final File barcodesDir,
final File testDataDir,
final Integer tile,
final boolean includeBcInHeader,
final ClusterDataToSamConverter.PopulateBarcode populateBarcode,
final boolean includeBarcodeQuality) throws Exception {
for (final boolean sort : new boolean[]{true, false}) {
final Path outputDir = Files.createTempDirectory(jobName + sort);
Expand Down Expand Up @@ -267,6 +277,7 @@ private void runStandardTest(final int[] lanes, final String jobName, final Stri

List<String> args = new ArrayList<>();
args.add("BASECALLS_DIR=" + baseCallsDir);
args.add("BARCODES_DIR=" + barcodesDir);
args.add("RUN_BARCODE=HiMom");
args.add("READ_STRUCTURE=" + readStructure);
args.add("SEQUENCING_CENTER=BI");
Expand Down
Binary file not shown.
Binary file not shown.

0 comments on commit 0fd4218

Please sign in to comment.