diff --git a/src/main/java/htsjdk/variant/vcf/VCFCodec.java b/src/main/java/htsjdk/variant/vcf/VCFCodec.java index 3ebf47c02a..844a07688a 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/VCFCodec.java @@ -104,7 +104,7 @@ protected void reportDuplicateInfoKeyValue(final String duplicateKey, final Stri * @return a mapping of keys to objects */ protected Map parseInfo(String infoField) { - if (infoField.indexOf(' ') != -1) { + if ((infoField.indexOf(' ') != -1) && !version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) { generateException( String.format("Whitespace is not allowed in the INFO field in VCF version %s: %s", version == null ? diff --git a/src/test/java/htsjdk/variant/vcf/VCFCodec42FeaturesTest.java b/src/test/java/htsjdk/variant/vcf/VCFCodec42FeaturesTest.java index 9f39228d5a..bbc66d58bc 100644 --- a/src/test/java/htsjdk/variant/vcf/VCFCodec42FeaturesTest.java +++ b/src/test/java/htsjdk/variant/vcf/VCFCodec42FeaturesTest.java @@ -1,11 +1,15 @@ package htsjdk.variant.vcf; import htsjdk.HtsjdkTest; +import htsjdk.samtools.util.Tuple; +import htsjdk.tribble.TribbleException; +import htsjdk.variant.variantcontext.VariantContext; import org.testng.Assert; import org.testng.annotations.Test; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.List; public class VCFCodec42FeaturesTest extends HtsjdkTest { private static final Path TEST_PATH = Paths.get("src/test/resources/htsjdk/variant/"); @@ -21,4 +25,19 @@ public void testV42PedigreeParsing() { Assert.assertEquals(vcf42PedigreeLine.getClass(), VCFHeaderLine.class); Assert.assertEquals(vcf42PedigreeLine.getValue(), ""); } + + @Test(expectedExceptions = TribbleException.class) + public void testVCF42RejectsInfoFieldWithSpaces() { + // 1st variant has an info field with a value containing an embedded space + final Path infoSpace42File = TEST_PATH.resolve("infoSpace42.vcf"); + + try ( final VCFFileReader vcfReader = new VCFFileReader(infoSpace42File, false) ){ + for (final VariantContext vc : vcfReader) { + + } + } catch (final TribbleException e) { + Assert.assertTrue(e.getMessage().contains("Whitespace is not allowed")); + throw e; + } + } } diff --git a/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java index 8dbf6dd30d..fd9a4ffadb 100644 --- a/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java +++ b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java @@ -1,5 +1,7 @@ package htsjdk.variant.vcf; +import htsjdk.beta.io.IOPathUtils; +import htsjdk.io.IOPath; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.Interval; @@ -19,8 +21,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; +import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; @@ -146,7 +147,7 @@ public void testVCF43PercentEncoding(final Path testFile, int ignored) { Assert.assertEquals(vc.getContig(), "1"); Assert.assertEquals(vc.getStart(), 327); // set=fil%3AteredInBoth - Assert.assertEquals(vc.getCommonInfo().getAttribute("set"), "fil:teredInBoth"); + Assert.assertEquals(vc.getCommonInfo().getAttribute("set"), "fil:teredIn Both"); } @Test(dataProvider="all43Files") @@ -203,6 +204,42 @@ public void testVCF43IndexRoundTripQuery(final Path testFile) throws IOException } } + @Test + public void testVCF43AcceptsInfoFieldWithSpaces() { + // 1st variant has an info field with a value containing an embedded space + final Path infoSpaceFile = TEST_PATH.resolve("infoSpace43.vcf"); + final Tuple> infoSpace43 = readEntireVCFIntoMemory(infoSpaceFile); + Assert.assertTrue(infoSpace43.b.get(0).getAttribute("set").toString().contains(" ")); + +// // also make sure it fails if we read in a VCF4.2 that contains a space in an info field +// final IOPath tmpVCF = IOPathUtils.createTempPath("testInfoSpace", FileExtensions.VCF); +// final Set headerLinesWithoutVersionLine = infoSpace43.a +// .getMetaDataInInputOrder() +// .stream() +// .filter(l -> !VCFHeaderVersion.isFormatString(l.getKey())).collect(Collectors.toSet()); +// final Set vcf42HeaderLines = new LinkedHashSet<>(headerLinesWithoutVersionLine); +// vcf42HeaderLines.add(VCFHeader.makeHeaderVersionLine(VCFHeaderVersion.VCF4_2)); +// final VCFHeader vcf42Header = new VCFHeader(vcf42HeaderLines, infoSpace43.a.getSampleNamesInOrder()); +// // if its not 4.3, the rest of the test isn't valid +// Assert.assertEquals(vcf42Header.getVCFHeaderVersion(), VCFHeaderVersion.VCF4_2); +// +// boolean testPasses = false; +// try (final VariantContextWriter writer = new VariantContextWriterBuilder() +// .setOutputFile(tmpVCF.toString()) +// .unsetOption(Options.INDEX_ON_THE_FLY) +// .build()) { +// writer.writeHeader(vcf42Header); +// writer.add(infoSpace43.b.get(0)); +// } +// try { +// readEntireVCFIntoMemory(tmpVCF.toPath()); +// } catch (final TribbleException e) { +// Assert.assertTrue(e.getMessage().contains("Whitespace is not allowed")); +// testPasses = true; +// } +// Assert.assertTrue(testPasses); + } + // // UTF8-specific tests // diff --git a/src/test/resources/htsjdk/variant/infoSpace42.vcf b/src/test/resources/htsjdk/variant/infoSpace42.vcf new file mode 100644 index 0000000000..089063193f --- /dev/null +++ b/src/test/resources/htsjdk/variant/infoSpace42.vcf @@ -0,0 +1,62 @@ +##fileformat=VCFv4.2 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19238 NA19239 NA19240 +1 327 . T <*> 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredIn Both GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 diff --git a/src/test/resources/htsjdk/variant/vcf43/infoSpace43.vcf b/src/test/resources/htsjdk/variant/vcf43/infoSpace43.vcf new file mode 100644 index 0000000000..9040c6af1f --- /dev/null +++ b/src/test/resources/htsjdk/variant/vcf43/infoSpace43.vcf @@ -0,0 +1,62 @@ +##fileformat=VCFv4.3 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19238 NA19239 NA19240 +1 327 . T <*> 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredIn Both GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99