Skip to content

Commit

Permalink
include recovered SVS in copy number segments
Browse files Browse the repository at this point in the history
  • Loading branch information
jonbaber committed Mar 28, 2019
1 parent 4bfc93d commit 0da0a76
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 58 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@

<amber.version>2.2</amber.version>
<cobalt.version>1.5</cobalt.version>
<purple.version>2.23</purple.version>
<purple.version>2.24</purple.version>

<immutables.version>2.4.4</immutables.version>
<htsjdk.version>2.12.0</htsjdk.version>
Expand Down
2 changes: 2 additions & 0 deletions purity-ploidy-estimator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,8 @@ Threads | Elapsed Time| CPU Time | Peak Mem


## Version History
- 2.24
- Recovered SVs were not being used for re-segmentation.
- 2.23
- Fixed bug where SV VCF samples were being sorted into alphabetical order. Now they will be in same order as input VCF.
- 2.22
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@
import java.io.File;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.TreeSet;
import java.util.function.Predicate;

import com.google.common.annotations.VisibleForTesting;
Expand All @@ -34,17 +32,16 @@
import com.hartwig.hmftools.common.variant.structural.StructuralVariant;
import com.hartwig.hmftools.common.variant.structural.StructuralVariantFactory;
import com.hartwig.hmftools.common.variant.structural.StructuralVariantType;
import com.hartwig.hmftools.purple.sv.VariantContextCollection;

import org.apache.logging.log4j.util.Strings;
import org.jetbrains.annotations.NotNull;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.index.tabix.TabixIndexCreator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.VariantContextComparator;
import htsjdk.variant.variantcontext.filter.PassingVariantFilter;
import htsjdk.variant.variantcontext.writer.Options;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
Expand Down Expand Up @@ -85,36 +82,32 @@ class PurpleStructuralVariantSupplier {

private final String outputVCF;
private final Optional<VCFHeader> header;
private final TreeSet<VariantContext> variantContexts;
private final List<StructuralVariant> variants = Lists.newArrayList();
private boolean modified = true;
private final VariantContextCollection variants;

private int counter = 0;

PurpleStructuralVariantSupplier() {
header = Optional.empty();
variantContexts = new TreeSet<>();
outputVCF = Strings.EMPTY;
variants = new VariantContextCollection(Collections.emptyList());
}

PurpleStructuralVariantSupplier(@NotNull final String version, @NotNull final String templateVCF, @NotNull final String outputVCF) {
final VCFFileReader vcfReader = new VCFFileReader(new File(templateVCF), false);
this.outputVCF = outputVCF;
header = Optional.of(generateOutputHeader(version, vcfReader.getFileHeader()));
variantContexts = new TreeSet<>(new VCComparator(header.get().getSequenceDictionary()));
variants = new VariantContextCollection(header.get());

for (VariantContext context : vcfReader) {
variantContexts.add(context);
variants.add(context);
}

vcfReader.close();
}

public void addVariant(@NotNull final VariantContext variantContext) {
if (enabled()) {
if (variantContext.contains(variantContext)) {
variantContexts.remove(variantContext);
variantContexts.add(variantContext);
}
variants.add(variantContext);
}
}

Expand All @@ -124,14 +117,13 @@ public void inferMissingVariant(@NotNull final List<PurpleCopyNumber> copyNumber
PurpleCopyNumber copyNumber = copyNumbers.get(i);
if (copyNumber.segmentStartSupport() == SegmentSupport.NONE) {
final PurpleCopyNumber prev = copyNumbers.get(i - 1);
variantContexts.add(infer(copyNumber, prev));
variants.add(infer(copyNumber, prev));
}
}
}
}

public int removeLowVAFSingles(@NotNull final PurityAdjuster purityAdjuster, @NotNull final List<PurpleCopyNumber> copyNumbers) {
int removed = 0;
final ListMultimap<Chromosome, PurpleCopyNumber> copyNumberMap = Multimaps.fromRegions(copyNumbers);

final StructuralVariantLegCopyNumberChangeFactory copyNumberChangeFactory =
Expand All @@ -140,27 +132,23 @@ public int removeLowVAFSingles(@NotNull final PurityAdjuster purityAdjuster, @No
final StructuralVariantLegPloidyFactory<PurpleCopyNumber> ploidyFactory =
new StructuralVariantLegPloidyFactory<>(purityAdjuster, PurpleCopyNumber::averageTumorCopyNumber);

final Iterator<VariantContext> iterator = variantContexts.iterator();
while (iterator.hasNext()) {
final VariantContext variantContext = iterator.next();
final Predicate<VariantContext> removePredicate = variantContext -> {
if (!isRecovered(variantContext)) {
final StructuralVariantFactory factory = new StructuralVariantFactory(new PassingVariantFilter());
factory.addVariantContext(variantContext);
final List<StructuralVariant> variants = factory.results();
if (!variants.isEmpty()) {
final StructuralVariant variant = variants.get(0);
if (variant.type() == StructuralVariantType.SGL && !isLinked(variant)) {
if (filter(copyNumberChangeFactory, ploidyFactory.create(variant, copyNumberMap))) {
iterator.remove();
removed++;
modified = true;
}
return filter(copyNumberChangeFactory, ploidyFactory.create(variant, copyNumberMap));
}
}
}
}

return removed;
return false;
};

return variants.remove(removePredicate);
}

@NotNull
Expand Down Expand Up @@ -207,31 +195,32 @@ public void write(@NotNull final PurityAdjuster purityAdjuster, @NotNull final L
}
}

private TreeSet<VariantContext> enriched(@NotNull final PurityAdjuster purityAdjuster,
@NotNull
private Iterable<VariantContext> enriched(@NotNull final PurityAdjuster purityAdjuster,
@NotNull final List<PurpleCopyNumber> copyNumbers) {
assert (header.isPresent());

final StructuralVariantFactory svFactory = new StructuralVariantFactory(x -> true);
variantContexts.forEach(svFactory::addVariantContext);
variants.forEach(svFactory::addVariantContext);

final CopyNumberEnrichedStructuralVariantFactory svEnricher =
new CopyNumberEnrichedStructuralVariantFactory(purityAdjuster, Multimaps.fromRegions(copyNumbers));

final TreeSet<VariantContext> enrichedContexts = new TreeSet<>(new VCComparator(header.get().getSequenceDictionary()));
final VariantContextCollection enrichedCollection = new VariantContextCollection(header.get());
for (EnrichedStructuralVariant enrichedSV : svEnricher.enrich(svFactory.results())) {
final VariantContext startContext = enrichedSV.startContext();
if (startContext != null) {
enrichedContexts.add(enrich(enrichedSV, startContext, false));
enrichedCollection.add(enrich(enrichedSV, startContext, false));
}

final VariantContext endContext = enrichedSV.endContext();
if (endContext != null) {
enrichedContexts.add(enrich(enrichedSV, endContext, true));
enrichedCollection.add(enrich(enrichedSV, endContext, true));
}
}

enrichedContexts.addAll(svFactory.unmatched());
return enrichedContexts;
svFactory.unmatched().forEach(enrichedCollection::add);
return enrichedCollection;
}

@NotNull
Expand Down Expand Up @@ -279,16 +268,7 @@ private VariantContext enrich(@NotNull final EnrichedStructuralVariant variant,

@NotNull
public List<StructuralVariant> variants() {
if (modified) {
modified = false;
final StructuralVariantFactory factory = new StructuralVariantFactory(new PassingVariantFilter());
variantContexts.forEach(factory::addVariantContext);

variants.clear();
variants.addAll(factory.results());
}

return variants;
return variants.passingVariants();
}

@NotNull
Expand Down Expand Up @@ -342,20 +322,6 @@ private static boolean isRecovered(@NotNull VariantContext variantContext) {
return variantContext.hasAttribute(StructuralVariantFactory.RECOVERED);
}

private class VCComparator extends VariantContextComparator {

VCComparator(final SAMSequenceDictionary dictionary) {
super(dictionary);
}

@Override
public int compare(final VariantContext firstVariantContext, final VariantContext secondVariantContext) {
int positionResult = super.compare(firstVariantContext, secondVariantContext);

return positionResult == 0 ? firstVariantContext.getID().compareTo(secondVariantContext.getID()) : positionResult;
}
}

private boolean enabled() {
return header.isPresent();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package com.hartwig.hmftools.purple.sv;

import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
import java.util.function.Predicate;

import com.google.common.collect.Lists;
import com.hartwig.hmftools.common.variant.structural.StructuralVariant;
import com.hartwig.hmftools.common.variant.structural.StructuralVariantFactory;

import org.jetbrains.annotations.NotNull;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextComparator;
import htsjdk.variant.variantcontext.filter.PassingVariantFilter;
import htsjdk.variant.vcf.VCFHeader;

public class VariantContextCollection implements Iterable<VariantContext> {

private final TreeSet<VariantContext> variantContexts;
private final List<StructuralVariant> variants = Lists.newArrayList();

private boolean modified;

public VariantContextCollection(@NotNull final VCFHeader header) {
variantContexts = new TreeSet<>(new VCComparator(header.getSequenceDictionary()));
}

public VariantContextCollection(@NotNull final List<String> contigs) {
variantContexts = new TreeSet<>(new VCComparator(contigs));
}

public void add(@NotNull final VariantContext variantContext) {
modified = true;
if (variantContext.contains(variantContext)) {
variantContexts.remove(variantContext);
variantContexts.add(variantContext);
}
}

public int remove(@NotNull final Predicate<VariantContext> removePredicate) {
int removed = 0;
final Iterator<VariantContext> iterator = variantContexts.iterator();
while (iterator.hasNext()) {
final VariantContext variantContext = iterator.next();
if (removePredicate.test(variantContext)) {
iterator.remove();
removed++;
modified = true;
}
}
return removed;
}

@NotNull
public List<StructuralVariant> passingVariants() {
if (modified) {
modified = false;
final StructuralVariantFactory factory = new StructuralVariantFactory(new PassingVariantFilter());
variantContexts.forEach(factory::addVariantContext);

variants.clear();
variants.addAll(factory.results());
}

return variants;
}

@NotNull
@Override
public Iterator<VariantContext> iterator() {
return variantContexts.iterator();
}

private class VCComparator extends VariantContextComparator {

VCComparator(final List<String> contigs) {
super(contigs);
}

VCComparator(final SAMSequenceDictionary dictionary) {
super(dictionary);
}

@Override
public int compare(final VariantContext firstVariantContext, final VariantContext secondVariantContext) {
int positionResult = super.compare(firstVariantContext, secondVariantContext);

return positionResult == 0 ? firstVariantContext.getID().compareTo(secondVariantContext.getID()) : positionResult;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package com.hartwig.hmftools.purple.sv;

import static org.junit.Assert.assertEquals;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

import org.jetbrains.annotations.NotNull;
import org.junit.Before;
import org.junit.Test;

import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderVersion;

public class VariantContextCollectionTest {
private static final String SAMPLE = "sample";

private VCFCodec codec;

@Before
public void setup() {
codec = createTestCodec();
}


@Test
public void testAddSetsModifiedFlag() {
final VariantContextCollection victim = new VariantContextCollection(Lists.newArrayList("1"));
assertEquals(0, victim.passingVariants().size());

victim.add(codec.decode("1\t192614842\tgridss14_291648b\tT\tTCTCTACACAAG.\t2076.59\tPASS\tSVTYPE=BND\tGT\t./."));
assertEquals(1, victim.passingVariants().size());
}

@NotNull
private static VCFCodec createTestCodec() {
VCFCodec codec = new VCFCodec();
VCFHeader header = new VCFHeader(Sets.newHashSet(), Sets.newHashSet(SAMPLE));
codec.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
return codec;
}
}

0 comments on commit 0da0a76

Please sign in to comment.