diff --git a/CHANGELOG.md b/CHANGELOG.md index b4be00c..84732b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] + ## [0.4.0] ### Added - Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56)). - Added doi badge. - Added test to ensure EOF_BLOCK gets written. +- Added `isreversestrand`. +- Added `isfirstsegment`. +- Added `islastsegment`. ### Changed @@ -23,9 +27,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated to use [Automa](https://github.com/BioJulia/Automa.jl) v1 ([#65](https://github.com/BioJulia/XAM.jl/pull/65)). - Pointed the Unit Tests badge at the develop branch. - Pluralised flag. +- Renamed `ismateunmapped` to `isnextunmapped`. +- Renamed `isreverse` to `isreversecomplemented`. +- Renamed `isforward` to `isforwardstrand`. +- `ispositivestrand` aliases `isforwardstrand`. +- `isnegativestrand` aliases `isreversestrand`. +- Renamed `ismatereverse` to `isnextreversecomplemented`. +- `isread1` aliases `isfirstsegment`. +- `isread2` aliases `islastsegment`. ### Fixed - Updated hts-files.md ([#62](https://github.com/BioJulia/XAM.jl/pull/62)). +- Corrected the behaviour of `isprimaryalignment` with `isprimary`. + +### Removed +- Moved the functionality of `isprimary` into `isprimaryalignment`. + ## [0.3.1] @@ -33,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Upgraded to BioAlignments v3 ([#55](https://github.com/BioJulia/XAM.jl/pull/55)). + ## [0.3.0] - 2022-10-10 ## Added diff --git a/docs/src/man/hts-files.md b/docs/src/man/hts-files.md index 43e7ed7..a8a5f7e 100644 --- a/docs/src/man/hts-files.md +++ b/docs/src/man/hts-files.md @@ -112,7 +112,7 @@ The `XAM` package supports the following accessors for `SAM.Record` types. ```@docs XAM.SAM.flags XAM.SAM.ismapped -XAM.SAM.isprimary +XAM.SAM.isprimaryalignment XAM.SAM.refname XAM.SAM.position XAM.SAM.rightposition @@ -137,7 +137,7 @@ The `XAM` package supports the following accessors for `BAM.Record` types. ```@docs XAM.BAM.flags XAM.BAM.ismapped -XAM.BAM.isprimary +XAM.BAM.isprimaryalignment XAM.BAM.refid XAM.BAM.refname XAM.BAM.reflen diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 6bacfd2..2120d01 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -7,7 +7,7 @@ using BioGenerics using GenomicFeatures using XAM.SAM import ..XAM: flags, XAMRecord, XAMReader, XAMWriter, - ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. + ismapped, isprimaryalignment, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. import BGZFStreams import BioAlignments diff --git a/src/flags.jl b/src/flags.jl index 96a371e..8e000a6 100644 --- a/src/flags.jl +++ b/src/flags.jl @@ -7,6 +7,7 @@ Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag being OR'd together. + The possible flags are: 0x0001 template having multiple segments in sequencing @@ -25,20 +26,24 @@ The possible flags are: function flags end + + + # Bitwise flag (or FLAG). for (name, bits, doc) in [ - (:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"), - (:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ), - (:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with FLAG_PROPER_PAIR" ), - (:MUNMAP, UInt16(0x008), "the mate is unmapped" ), - (:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ), - (:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ), - (:READ1, UInt16(0x040), "this is read1" ), - (:READ2, UInt16(0x080), "this is read2" ), - (:SECONDARY, UInt16(0x100), "not primary alignment" ), - (:QCFAIL, UInt16(0x200), "QC failure" ), - (:DUP, UInt16(0x400), "optical or PCR duplicate" ), - (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] + (:PAIRED, UInt16(0x001), "the segment is paired with other segments"), + (:PROPER_PAIR, UInt16(0x002), "the segment is in a template where all segments are properly aligned according to the aligner"), + (:UNMAPPED, UInt16(0x004), "the segment itself is unmapped; conflictive with FLAG_PROPER_PAIR"), + (:NEXT_UNMAPPED, UInt16(0x008), "the next segment in the template is unmapped"), + (:REVERSE, UInt16(0x010), "the *SEQ*uence is reverse complemented"), + (:NEXT_REVERSE, UInt16(0x020), "the *SEQ*uence of the next segment in the template is reverse complemented" ), + (:FIRST_SEGMENT, UInt16(0x040), "the segment is the first in the template"), + (:LAST_SEGMENT, UInt16(0x080), "the segment is last in the template"), + (:SECONDARY, UInt16(0x100), "not primary alignment"), + (:QCFAIL, UInt16(0x200), "QC failure"), + (:DUPLICATE, UInt16(0x400), "optical or PCR duplicate"), + (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment"), + ] @assert bits isa UInt16 "The bits must be of type UInt16." sym = Symbol("FLAG_", name) docstring = """ $sym @@ -54,7 +59,7 @@ end """ ispaired(record::XAMRecord)::Bool -Query whether the `record`'s template has multiple segments in sequencing. +Query whether the segment is in a template having multiple segments in sequencing. """ function ispaired(record::XAMRecord)::Bool return flags(record) & FLAG_PAIRED == FLAG_PAIRED @@ -63,7 +68,7 @@ end """ isproperpair(record::XAMRecord)::Bool -Query whether each segment of the `record`'s template properly aligned according to the aligner. +Query whether the segment is in a template where all segments are properly aligned according to the aligner. """ function isproperpair(record::XAMRecord)::Bool return flags(record) & PROPER_PAIR == PROPER_PAIR @@ -72,125 +77,145 @@ end """ isunmapped(record::XAMRecord)::Bool -Query whether the `record` is unmapped. +Query whether the segment is unmapped. """ function isunmapped(record::XAMRecord)::Bool - return flags(record) & FLAG_UNMAP == FLAG_UNMAP + return flags(record) & FLAG_UNMAPPED == FLAG_UNMAPPED end """ ismapped(record::XAMRecord)::Bool -Query whether the `record` is mapped. +Query whether the segment is mapped. """ function ismapped(record::XAMRecord)::Bool - # return flags(record) & FLAG_UNMAP == 0 - return isfilled(record) && (flags(record) & FLAG_UNMAP == 0) + # return flags(record) & FLAG_UNMAPPED == 0 + return isfilled(record) && (flags(record) & FLAG_UNMAPPED == 0) end """ - ismateunmapped(record::XAMRecord)::Bool + isnextunmapped(record::XAMRecord)::Bool -Query whether the `record`'s mate is unmapped. +Query whether the next segment in the template is unmapped. """ -function ismateunmapped(record::XAMRecord)::Bool - return flags(record) & FLAG_MUNMAP == FLAG_MUNMAP +function isnextunmapped(record::XAMRecord)::Bool + return flags(record) & FLAG_NEXT_UNMAPPED == FLAG_NEXT_UNMAPPED end """ isnextmapped(record::XAMRecord)::Bool -Test if the mate/next read of `record` is mapped. +Query whether the next segment in the template is mapped. """ function isnextmapped(record::XAMRecord)::Bool - return flags(record) & FLAG_MUNMAP == 0 + return flags(record) & FLAG_NEXT_UNMAPPED == 0 end """ isreverse(record::XAMRecord)::Bool -Query whether the `record` is mapped to the reverse strand. +Query whether the `record.SEQ`uence is reverse complemented. """ -function isreverse(record::XAMRecord)::Bool +function isreversecomplemented(record::XAMRecord)::Bool return flags(record) & FLAG_REVERSE == FLAG_REVERSE end """ isforward(record::XAMRecord)::Bool -Query whether the `record` is mapped to the forward strand. +Query whether the `record.SEQ`uence is mapped to the forward strand. """ -function isforward(record::XAMRecord)::Bool - return flags(record) & FLAG_REVERSE == 0 +function isforwardstrand(record::XAMRecord)::Bool + # return flags(record) & FLAG_REVERSE == 0 + return !isreversecomplemented(record) # Note: this is an interpretation of FLAG_REVERSE. end """ ispositivestrand(record::XAMRecord)::Bool -Query whether `record` is aligned to the positive strand. +Query whether *SEQ*uence is aligned to the positive strand. """ function ispositivestrand(record::XAMRecord)::Bool - return isforward(record) + return isforwardstrand(record) +end + +""" + isreversestrand(record::XAMRecord)::Bool + +Query whether *SEQ*uence is aligned to the reverse strand. +""" +function isreversestrand(record::XAMRecord)::Bool + return isreversecomplemented(record) # Note: this is an interpretation of FLAG_REVERSE. end """ ispositivestrand(record::XAMRecord)::Bool -Query whether `record` is aligned to the negative strand. +Query whether *SEQ*uence is aligned to the negative strand. """ function isnegativestrand(record::XAMRecord)::Bool - return isreverse(record) + return isreversestrand(record) +end + +""" + isnextreversecomplemented(record::XAMRecord)::Bool + +Query whether the next segment in the template is reverse complemented. +""" +function isnextreversecomplemented(record::XAMRecord)::Bool + return flags(record) & FLAG_NEXT_REVERSE == FLAG_NEXT_REVERSE end """ - ismatereverse(record::XAMRecord)::Bool + isfirstsegment(record::XAMRecord)::Bool -Query whether the `record`'s mate is mapped to the reverse strand. +Query whether the segemnt is first in the template. """ -function ismatereverse(record::XAMRecord)::Bool - return flags(record) & FLAG_MREVERSE == FLAG_MREVERSE +function isfirstsegment(record::XAMRecord)::Bool + return flags(record) & FLAG_FIRST_SEGMENT == FLAG_FIRST_SEGMENT end """ isread1(record::XAMRecord)::Bool -Query whether the `record` is read1. +From a paired-end sequencing point of view, query whether the read is read1. """ function isread1(record::XAMRecord)::Bool - return flags(record) & FLAG_READ1 == FLAG_READ1 + return isfirstsegment(record) +end + +""" + islastsegment(record::XAMRecord)::Bool + +Query whether the segemnt is last in the template. +""" +function islastsegment(record::XAMRecord)::Bool + return flags(record) & FLAG_LAST_SEGMENT == FLAG_LAST_SEGMENT end """ isread2(record::XAMRecord)::Bool -Query whether the `record` is read2. +From a paired-end sequencing point of view, query whether the read is read2. """ function isread2(record::XAMRecord)::Bool - return flags(record) & FLAG_READ2 == FLAG_READ2 + return islastsegment(record) end """ issecondaryalignment(record::XAMRecord)::Bool -Query whether the `record` is a secondary alignment. +Query whether the read is considered to be the secondary alignment. """ function issecondaryalignment(record::XAMRecord)::Bool return flags(record) & FLAG_SECONDARY == FLAG_SECONDARY end -""" - isprimaryalignment(record::XAMRecord)::Bool - -Query whether the `record` is the primary alignment. -""" -function isprimaryalignment(record::XAMRecord)::Bool - return flags(record) & FLAG_SECONDARY == 0 -end """ isqcfail(record::XAMRecord)::Bool -Query whether the `record` did not pass filters, such as platform/vendor quality controls. +Query whether the read failed filters, such as platform/vendor quality controls. """ function isqcfail(record::XAMRecord)::Bool return flags(record) & FLAG_QCFAIL == FLAG_QCFAIL @@ -199,28 +224,28 @@ end """ isduplicate(record::XAMRecord)::Bool -Query whether the `record` is a PCR or optical duplicate. +Query whether the read is a PCR or optical duplicate. """ function isduplicate(record::XAMRecord)::Bool - return flags(record) & FLAG_DUP == FLAG_DUP + return flags(record) & FLAG_DUPLICATE == FLAG_DUPLICATE end """ issupplementaryalignment(record::XAMRecord)::Bool -Query whether the `record` is a supplementary alignment. +Query whether the read alignment is considered to be a supplementary alignment. """ function issupplementaryalignment(record::XAMRecord)::Bool return flags(record) & FLAG_SUPPLEMENTARY == FLAG_SUPPLEMENTARY end """ - isprimary(record::XAMRecord)::Bool - -Query whether `record` is a primary line of the read. + isprimaryalignment(record::XAMRecord)::Bool -This is equivalent to `flags(record) & 0x900 == 0`. +Query whether the read alignment is considered to be the primary alignment. +This is primary line of the read and is equivalent to `flags(record) & 0x900 == 0`. """ -function isprimary(record::XAMRecord)::Bool +function isprimaryalignment(record::XAMRecord)::Bool + # return !issecondaryalignment(record) && !issupplementaryalignment(record) return flags(record) & 0x900 == 0 end diff --git a/src/sam/sam.jl b/src/sam/sam.jl index ad6e282..2c1718d 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -12,7 +12,7 @@ import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream import ..XAM: flags, XAMRecord, XAMReader, XAMWriter, - ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. + ismapped, isprimaryalignment, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. using Printf: @sprintf diff --git a/test/test_bam.jl b/test/test_bam.jl index 3bfa29d..ca515aa 100644 --- a/test/test_bam.jl +++ b/test/test_bam.jl @@ -54,8 +54,8 @@ record = BAM.Record() read!(reader, record) @test BAM.ismapped(record) - @test BAM.isprimary(record) - @test ! BAM.ispositivestrand(record) + @test BAM.isprimaryalignment(record) + @test !BAM.ispositivestrand(record) @test BAM.refname(record) == "CHROMOSOME_I" @test BAM.refid(record) === 1 @test BAM.hasnextrefid(record) diff --git a/test/test_sam.jl b/test/test_sam.jl index 837129d..4d4f4b3 100644 --- a/test/test_sam.jl +++ b/test/test_sam.jl @@ -54,7 +54,7 @@ @test isfilled(record) @test occursin(r"^XAM.SAM.Record:\n", repr(record)) @test SAM.ismapped(record) - @test SAM.isprimary(record) + @test SAM.isprimaryalignment(record) @test SAM.hastempname(record) @test SAM.tempname(record) == "r001" @test SAM.hasflags(record)