Skip to content

Commit

Permalink
Picard modifications to support flow based sequencing (broadinstitute…
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyasoifer authored Jun 29, 2022
1 parent 09183c8 commit c97fce5
Show file tree
Hide file tree
Showing 35 changed files with 1,738 additions and 167 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ report
jacoco.data
.gradle
build
*.swp
14 changes: 13 additions & 1 deletion src/main/java/picard/analysis/AlignmentSummaryMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,12 @@ public enum Category {UNPAIRED, FIRST_OF_PAIR, SECOND_OF_PAIR, PAIR}
/** The standard deviation of the read lengths. Computed using all read lengths including clipped bases. */
public double SD_READ_LENGTH;

/** The median read length. Computed using all read lengths including clipped bases. */
/**
* The median read length of the set of reads examined. When looking at the data for a single lane with
* equal length reads this number is just the read length. When looking at data for merged lanes with
* differing read lengths this is the median read length of all reads. Computed using all bases in reads,
* including clipped bases.
*/
public double MEDIAN_READ_LENGTH;

/**
Expand All @@ -155,6 +160,13 @@ public enum Category {UNPAIRED, FIRST_OF_PAIR, SECOND_OF_PAIR, PAIR}
/** The maximum read length. Computed using all read lengths including clipped bases. */
public double MAX_READ_LENGTH;

/**
* The mean aligned read length of the set of reads examined. When looking at the data for a single lane with
* equal length reads this number is just the read length. When looking at data for merged lanes with
* differing read lengths this is the mean read length of all reads. Clipped bases are not counted.
*/
public double MEAN_ALIGNED_READ_LENGTH;

/**
* The number of aligned reads whose mate pair was also aligned to the reference.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ public void finish() {
metrics.PCT_PF_READS_ALIGNED = MathUtil.divide(metrics.PF_READS_ALIGNED, (double) metrics.PF_READS);
metrics.PCT_READS_ALIGNED_IN_PAIRS = MathUtil.divide(metrics.READS_ALIGNED_IN_PAIRS, (double) metrics.PF_READS_ALIGNED);
metrics.PCT_PF_READS_IMPROPER_PAIRS = MathUtil.divide(metrics.PF_READS_IMPROPER_PAIRS, (double) metrics.PF_READS_ALIGNED);
metrics.MEAN_ALIGNED_READ_LENGTH = alignedReadLengthHistogram.getMean();
metrics.STRAND_BALANCE = MathUtil.divide(numPositiveStrand, (double) metrics.PF_READS_ALIGNED);
metrics.PCT_CHIMERAS = MathUtil.divide(chimeras, (double) chimerasDenominator);
metrics.PF_INDEL_RATE = MathUtil.divide(indels, (double) metrics.PF_ALIGNED_BASES);
Expand Down
108 changes: 100 additions & 8 deletions src/main/java/picard/analysis/CollectQualityYieldMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import picard.PicardException;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.DiagnosticsAndQCProgramGroup;
import picard.util.help.HelpConstants;
Expand Down Expand Up @@ -92,6 +92,9 @@ public class CollectQualityYieldMetrics extends SinglePassSamProgram {
"of bases if there are supplemental alignments in the input file.")
public boolean INCLUDE_SUPPLEMENTAL_ALIGNMENTS = false;

@Argument(doc = "If true, calculates flow-specific READ_LENGTH_AVG_Q metrics.")
public boolean FLOW_MODE = false;

/**
* Ensure that we get all reads regardless of alignment status.
*/
Expand All @@ -103,7 +106,7 @@ protected boolean usesNoRefReads() {
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
IOUtil.assertFileIsWritable(OUTPUT);
this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS);
this.collector = new QualityYieldMetricsCollector(USE_ORIGINAL_QUALITIES, INCLUDE_SECONDARY_ALIGNMENTS, INCLUDE_SUPPLEMENTAL_ALIGNMENTS, FLOW_MODE);
}

@Override
Expand Down Expand Up @@ -132,15 +135,30 @@ public static class QualityYieldMetricsCollector {
// of bases if there are supplemental alignments in the input file.
public final boolean includeSupplementalAlignments;

// If true collects RLQ25/RLQ30
private final boolean flowMode;
// The metrics to be accumulated
private final QualityYieldMetrics metrics = new QualityYieldMetrics();
private final QualityYieldMetrics metrics;

public QualityYieldMetricsCollector(final boolean useOriginalQualities,
final boolean includeSecondaryAlignments,
final boolean includeSupplementalAlignments){
this(useOriginalQualities, includeSecondaryAlignments, includeSupplementalAlignments, false);
}

public QualityYieldMetricsCollector(final boolean useOriginalQualities,
final boolean includeSecondaryAlignments,
final boolean includeSupplementalAlignments) {
final boolean includeSupplementalAlignments,
final boolean flowMode) {
this.useOriginalQualities = useOriginalQualities;
this.includeSecondaryAlignments = includeSecondaryAlignments;
this.includeSupplementalAlignments = includeSupplementalAlignments;
this.flowMode = flowMode;
if (flowMode){
this.metrics = new QualityYieldMetricsFlow(useOriginalQualities);
} else {
this.metrics = new QualityYieldMetrics(useOriginalQualities);
}
}

public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {
Expand Down Expand Up @@ -187,12 +205,15 @@ public void acceptRecord(final SAMRecord rec, final ReferenceSequence ref) {
}
}
}

if (flowMode) {
((QualityYieldMetricsFlow)metrics).addRecordToHistogramGenerator(rec);
}
}

public void finish() {
metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20;
metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20;

metrics.calculateDerivedFields();
}

Expand All @@ -201,12 +222,69 @@ public void addMetricsToFile(final MetricsFile<QualityYieldMetrics, Integer> met
}
}

public static class QualityYieldMetricsFlow extends QualityYieldMetrics{
/** The length of the longest interval on the reads where the average quality per-base is above (Q30) */
@NoMergingIsDerived
public long READ_LENGTH_AVG_Q_ABOVE_30 = 0;

/** The length of the longest interval on the reads where the average quality per-base is above (Q25) */
@NoMergingIsDerived
public long READ_LENGTH_AVG_Q_ABOVE_25 = 0;

@MergingIsManual
protected final HistogramGenerator histogramGenerator;

public QualityYieldMetricsFlow(){
this(false);
}

public QualityYieldMetricsFlow(final boolean useOriginalBaseQualities){

super(useOriginalBaseQualities);
histogramGenerator=new HistogramGenerator(useOriginalQualities);
}

public QualityYieldMetricsFlow(final boolean useOriginalBaseQualities, final HistogramGenerator hg) {
histogramGenerator=hg;
}

@Override
public void calculateDerivedFields() {
super.calculateDerivedFields();
this.READ_LENGTH_AVG_Q_ABOVE_25 = histogramGenerator.calculateLQ(25, 1, 5);
this.READ_LENGTH_AVG_Q_ABOVE_30 = histogramGenerator.calculateLQ(30, 1, 5);
}

@Override
public MergeableMetricBase merge(final MergeableMetricBase other) {
if (!(other instanceof QualityYieldMetricsFlow)){
throw new PicardException("Only objects of the same type can be merged");
}
this.histogramGenerator.addOtherHistogramGenerator(((QualityYieldMetricsFlow)other).histogramGenerator);
super.merge(other);
return this;
}

protected void addRecordToHistogramGenerator(final SAMRecord rec) {
histogramGenerator.addRecord(rec);
}

}
/**
* A set of metrics used to describe the general quality of a BAM file
*/
@DocumentedFeature(groupName = HelpConstants.DOC_CAT_METRICS, summary = HelpConstants.DOC_CAT_METRICS_SUMMARY)
public static class QualityYieldMetrics extends MergeableMetricBase {

public QualityYieldMetrics() {
this(false);
}

public QualityYieldMetrics(final boolean useOriginalQualities) {
super();
this.useOriginalQualities = useOriginalQualities;
}

/**
* The total number of reads in the input file
*/
Expand All @@ -220,7 +298,7 @@ public static class QualityYieldMetrics extends MergeableMetricBase {
public long PF_READS = 0;

/**
* The average read length of all the reads (will be fixed for a lane)
* The average read length of all the reads
*/
@NoMergingIsDerived
public int READ_LENGTH = 0;
Expand Down Expand Up @@ -273,12 +351,26 @@ public static class QualityYieldMetrics extends MergeableMetricBase {
@MergeByAdding
public long PF_Q20_EQUIVALENT_YIELD = 0;

@MergeByAssertEquals
protected final boolean useOriginalQualities;

@Override
public void calculateDerivedFields() {
super.calculateDerivedFields();

this.READ_LENGTH = this.TOTAL_READS == 0 ? 0 : (int) (this.TOTAL_BASES / this.TOTAL_READS);
}
}

@Override
public MergeableMetricBase merge(final MergeableMetricBase other) {
if (!(other instanceof QualityYieldMetrics)){
throw new PicardException("Only objects of the same type can be merged");
}

final QualityYieldMetrics otherMetric = (QualityYieldMetrics) other;

super.merge(otherMetric);
calculateDerivedFields();
return this;
}
}
}
Loading

0 comments on commit c97fce5

Please sign in to comment.