From e0ae571115adbe8affb7ca379afc277a6f646873 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Wed, 15 Jan 2025 12:23:44 -0500
Subject: [PATCH 01/20] Reorganize new models

---
 src/fusor/models.py  | 64 +++++++++++++++++++++++++++++++++++++++++++-
 tests/test_models.py | 35 ++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/src/fusor/models.py b/src/fusor/models.py
index 3a21f4d..ff60c21 100644
--- a/src/fusor/models.py
+++ b/src/fusor/models.py
@@ -39,8 +39,10 @@ class FUSORTypes(str, Enum):
     MULTIPLE_POSSIBLE_GENES_ELEMENT = "MultiplePossibleGenesElement"
     BREAKPOINT_COVERAGE = "BreakpointCoverage"
     CONTIG_SEQUENCE = "ContigSequence"
+    ANCHORED_READS = "AnchoredReads"
     SPLIT_READS = "SplitReads"
     SPANNING_READS = "SpanningReads"
+    READ_DATA = "ReadData"
     REGULATORY_ELEMENT = "RegulatoryElement"
     CATEGORICAL_FUSION = "CategoricalFusion"
     ASSAYED_FUSION = "AssayedFusion"
@@ -154,6 +156,18 @@ class ContigSequence(BaseStructuralElement):
     )
 
 
+class AnchoredReads(BaseStructuralElement):
+    """Define AnchoredReads class
+
+    This class can be used to report the number of reads that span the
+    fusion junction. This is used at the TranscriptSegment level, as it
+    indicates the transcript where the longer segment of the read is found
+    """
+
+    type: Literal[FUSORTypes.ANCHORED_READS] = FUSORTypes.ANCHORED_READS
+    reads: int = Field(ge=0)
+
+
 class SplitReads(BaseStructuralElement):
     """Define SplitReads class.
 
@@ -184,6 +198,28 @@ class SpanningReads(BaseStructuralElement):
     )
 
 
+class ReadData(BaseStructuralElement):
+    """Define ReadData class.
+
+    This class is used at the AssayedFusion level when a fusion caller reports
+    metadata describing sequencing reads for the fusion event
+    """
+
+    type: Literal[FUSORTypes.READ_DATA] = FUSORTypes.READ_DATA
+    split: SplitReads | None = None
+    spanning: SpanningReads | None = None
+
+    model_config = ConfigDict(
+        json_schema_extra={
+            "example": {
+                "type": "ReadData",
+                "split": {"type": "SplitReads", "splitReads": 100},
+                "spanning": {"type": "SpanningReads", "spanningReads": 80},
+            }
+        }
+    )
+
+
 class TranscriptSegmentElement(BaseStructuralElement):
     """Define TranscriptSegment class"""
 
@@ -199,6 +235,7 @@ class TranscriptSegmentElement(BaseStructuralElement):
     elementGenomicStart: SequenceLocation | None = None
     elementGenomicEnd: SequenceLocation | None = None
     coverage: BreakpointCoverage | None = None
+    anchoredReads: AnchoredReads | None = None
 
     @model_validator(mode="before")
     def check_exons(cls, values):
@@ -264,6 +301,14 @@ def check_exons(cls, values):
                     },
                     "start": 154170399,
                 },
+                "coverage": {
+                    "type": "BreakpointCoverage",
+                    "fragmentCoverage": 185,
+                },
+                "anchoredReads": {
+                    "type": "AnchoredReads",
+                    "reads": 100,
+                },
             }
         },
     )
@@ -645,7 +690,8 @@ class Assay(BaseModelForbidExtra):
     | TemplatedSequenceElement
     | LinkerElement
     | UnknownGeneElement
-    | ContigSequence,
+    | ContigSequence
+    | ReadData,
     Field(discriminator="type"),
 ]
 
@@ -695,6 +741,7 @@ class AssayedFusion(AbstractFusion):
     causativeEvent: CausativeEvent | None = None
     assay: Assay | None = None
     contig: ContigSequence | None = None
+    readData: ReadData | None = None
 
     model_config = ConfigDict(
         json_schema_extra={
@@ -712,6 +759,21 @@ class AssayedFusion(AbstractFusion):
                     "assayName": "fluorescence in-situ hybridization assay",
                     "fusionDetection": "inferred",
                 },
+                "contig": {
+                    "type": "ContigSequence",
+                    "contig": "GTACTACTGATCTAGCATCTAGTA",
+                },
+                "readData": {
+                    "type": "ReadData",
+                    "split": {
+                        "type": "SplitReads",
+                        "splitReads": 100,
+                    },
+                    "spanning": {
+                        "type": "SpanningReads",
+                        "spanningReads": 80,
+                    },
+                },
                 "structure": [
                     {
                         "type": "GeneElement",
diff --git a/tests/test_models.py b/tests/test_models.py
index 72879af..4159383 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -6,6 +6,7 @@
 
 from fusor.models import (
     AbstractFusion,
+    AnchoredReads,
     Assay,
     AssayedFusion,
     BreakpointCoverage,
@@ -17,6 +18,7 @@
     GeneElement,
     LinkerElement,
     MultiplePossibleGenesElement,
+    ReadData,
     RegulatoryElement,
     SpanningReads,
     SplitReads,
@@ -178,6 +180,8 @@ def transcript_segments(sequence_locations, gene_examples):
             "gene": gene_examples[0],
             "elementGenomicStart": sequence_locations[2],
             "elementGenomicEnd": sequence_locations[3],
+            "coverage": BreakpointCoverage(fragmentCoverage=100),
+            "anchoredReads": AnchoredReads(reads=85),
         },
         {
             "type": "TranscriptSegmentElement",
@@ -379,6 +383,8 @@ def test_transcript_segment_element(transcript_segments):
     assert test_region_start.type == "SequenceLocation"
     test_region_end = test_element.elementGenomicEnd
     assert test_region_end.type == "SequenceLocation"
+    assert test_element.coverage.fragmentCoverage == 100
+    assert test_element.anchoredReads.reads == 85
 
     test_element = TranscriptSegmentElement(**transcript_segments[3])
     assert test_element.transcript == "refseq:NM_938439.4"
@@ -386,6 +392,8 @@ def test_transcript_segment_element(transcript_segments):
     assert test_element.exonStartOffset == 0
     assert test_element.exonEnd is None
     assert test_element.exonEndOffset is None
+    assert test_element.coverage is None
+    assert test_element.anchoredReads is None
 
     # check CURIE requirement
     with pytest.raises(ValidationError) as exc_info:
@@ -640,6 +648,18 @@ def test_contig():
     check_validation_error(exc_info, msg)
 
 
+def test_anchored_reads():
+    """Test that AnchoredReads class initializes correctly"""
+    test_anchored_reads = AnchoredReads(reads=100)
+    assert test_anchored_reads.reads == 100
+
+    # test enum validation
+    with pytest.raises(ValidationError) as exc_info:
+        assert AnchoredReads(type="anchoredreads")
+    msg = "Input should be <FUSORTypes.ANCHORED_READS: 'AnchoredReads'>"
+    check_validation_error(exc_info, msg)
+
+
 def test_split_reads():
     """Test that SplitReads class initializes correctly"""
     test_split_reads = SplitReads(splitReads=97)
@@ -664,6 +684,21 @@ def test_spanning_reads():
     check_validation_error(exc_info, msg)
 
 
+def test_read_data():
+    """Test that ReadData class initializes correctly"""
+    test_read_data = ReadData(
+        split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=90)
+    )
+    assert test_read_data.split.splitReads == 100
+    assert test_read_data.spanning.spanningReads == 90
+
+    # test enum validation
+    with pytest.raises(ValidationError) as exc_info:
+        assert ReadData(type="readata")
+    msg = "Input should be <FUSORTypes.READ_DATA: 'ReadData'>"
+    check_validation_error(exc_info, msg)
+
+
 def test_event():
     """Test Event object initializes correctly"""
     rearrangement = EventType.REARRANGEMENT

From 23a164447f681a231861c1033bc88e0f36a5f4fc Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Wed, 15 Jan 2025 14:45:51 -0500
Subject: [PATCH 02/20] Incorporate metadata classes into translators

---
 src/fusor/fusor.py        |   8 +
 src/fusor/models.py       |   4 +-
 src/fusor/translator.py   | 120 +++++++++++-
 tests/test_translators.py | 379 +++++++++++++++++++++++++++++---------
 4 files changed, 417 insertions(+), 94 deletions(-)

diff --git a/src/fusor/fusor.py b/src/fusor/fusor.py
index cbf9319..0d631b5 100644
--- a/src/fusor/fusor.py
+++ b/src/fusor/fusor.py
@@ -23,10 +23,12 @@
 
 from fusor.exceptions import FUSORParametersException, IDTranslationException
 from fusor.models import (
+    AnchoredReads,
     Assay,
     AssayedFusion,
     AssayedFusionElement,
     BaseStructuralElement,
+    BreakpointCoverage,
     CategoricalFusion,
     CategoricalFusionElement,
     CausativeEvent,
@@ -219,6 +221,8 @@ async def transcript_segment_element(
         tx_to_genomic_coords: bool = True,
         use_minimal_gene: bool = True,
         seq_id_target_namespace: str | None = None,
+        coverage: BreakpointCoverage | None = None,
+        reads: AnchoredReads | None = None,
         **kwargs,
     ) -> tuple[TranscriptSegmentElement | None, list[str] | None]:
         """Create transcript segment element.
@@ -230,6 +234,8 @@ async def transcript_segment_element(
             gene-normalizer's entire gene object will be used
         :param seq_id_target_namespace: If want to use digest for ``sequence_id``, set
             this to the namespace you want the digest for. Otherwise, leave as ``None``.
+        :param coverage: The read coverage located near the specified breakpoint
+        :param reads: The read data for the specified breakpoint
         :param kwargs:
             If ``tx_to_genomic_coords``, possible key word arguments:
 
@@ -307,6 +313,8 @@ async def transcript_segment_element(
                 gene=normalized_gene_response[0],
                 elementGenomicStart=genomic_start_location,
                 elementGenomicEnd=genomic_end_location,
+                coverage=coverage if coverage else None,
+                anchoredReads=reads if reads else None,
             ),
             None,
         )
diff --git a/src/fusor/models.py b/src/fusor/models.py
index ff60c21..9eed493 100644
--- a/src/fusor/models.py
+++ b/src/fusor/models.py
@@ -146,7 +146,9 @@ class ContigSequence(BaseStructuralElement):
     type: Literal[FUSORTypes.CONTIG_SEQUENCE] = FUSORTypes.CONTIG_SEQUENCE
     contig: Annotated[
         str,
-        StringConstraints(strip_whitespace=True, to_upper=True, pattern=r"^[ACGT]+$"),
+        StringConstraints(
+            strip_whitespace=True, to_upper=True, pattern=r"^(?:[^A-Za-z]|[ACTGactg])*$"
+        ),
     ]
 
     model_config = ConfigDict(
diff --git a/src/fusor/translator.py b/src/fusor/translator.py
index 1698206..60284d3 100644
--- a/src/fusor/translator.py
+++ b/src/fusor/translator.py
@@ -10,11 +10,17 @@
 
 from fusor.fusor import FUSOR
 from fusor.models import (
+    AnchoredReads,
     Assay,
     AssayedFusion,
+    BreakpointCoverage,
     CausativeEvent,
+    ContigSequence,
     EventType,
     GeneElement,
+    ReadData,
+    SpanningReads,
+    SplitReads,
     TranscriptSegmentElement,
 )
 
@@ -56,6 +62,8 @@ def _format_fusion(
         ce: CausativeEvent | None = None,
         rf: bool | None = None,
         assay: Assay | None = None,
+        contig: ContigSequence | None = None,
+        reads: ReadData | None = None,
     ) -> AssayedFusion:
         """Format classes to create AssayedFusion objects
 
@@ -66,6 +74,8 @@ def _format_fusion(
         :param ce: CausativeEvent
         :param rf: A boolean indicating if the reading frame is preserved
         :param assay: Assay
+        :param contig: The contig sequence
+        :param reads: The read data
         :return AssayedFusion object
         """
         params = {}
@@ -84,6 +94,10 @@ def _format_fusion(
             params["readingFramePreserved"] = rf
         if assay:
             params["assay"] = assay
+        if contig:
+            params["contig"] = contig
+        if reads:
+            params["readData"] = reads
         return AssayedFusion(**params)
 
     def _get_causative_event(
@@ -186,6 +200,8 @@ async def from_jaffa(
         rearrangement: bool,
         classification: str,
         inframe: bool,
+        spanning_reads: int,
+        spanning_pairs: int,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion | None:
@@ -199,6 +215,12 @@ async def from_jaffa(
         :param rearrangement: A boolean indicating if a rearrangement occured
         :param classification: The classification associated with the called fusion
         :param inframe: A boolean indicating if the fusion occurred in-frame
+        :param spanning_reads: The number of deteced reads that span the junction
+            bewtween the two transcript. Although described as spanning reads, this
+            aligns with our defintion of split reads (i.e. reads that have sequence
+            belonging to the fusion partners)
+        :param spanning_pairs: The number of detected reads that align entirely on
+            either side of the breakpoint
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
@@ -238,8 +260,19 @@ async def from_jaffa(
         else:
             ce = None
 
+        read_data = ReadData(
+            split=SplitReads(splitReads=spanning_reads),
+            spanning=SpanningReads(spanningReads=spanning_pairs),
+        )
+
         return self._format_fusion(
-            gene_5prime_element, gene_3prime_element, tr_5prime, tr_3prime, ce, inframe
+            gene_5prime_element,
+            gene_3prime_element,
+            tr_5prime,
+            tr_3prime,
+            ce,
+            inframe,
+            reads=read_data,
         )
 
     async def from_star_fusion(
@@ -249,6 +282,8 @@ async def from_star_fusion(
         left_breakpoint: str,
         right_breakpoint: str,
         annots: str,
+        junction_read_count: int,
+        spanning_frag_count: int,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
@@ -259,6 +294,11 @@ async def from_star_fusion(
         :param left_breakpoint: The gene indicated in the LeftBreakpoint column
         :param right_breakpoint: The gene indicated in the RightBreakpoint column
         :param annots: The annotations associated with the fusion
+        :param junction_read_count: The number of RNA-seq fragments that split the
+            junction between the two transcript segments (from STAR-Fusion documentation)
+        :param spanning_frag_count: The number of RNA-seq fragments that encompass the
+            fusion junction such that one read of the pair aligns to a different gene
+            than the other paired-end read of that fragment (from STAR-Fusion documentation)
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
@@ -295,8 +335,18 @@ async def from_star_fusion(
         )
 
         ce = self._get_causative_event(five_prime[0], three_prime[0], ",".join(annots))
+        read_data = ReadData(
+            split=SplitReads(splitReads=junction_read_count),
+            spanning=SpanningReads(spanningReads=spanning_frag_count),
+        )
+
         return self._format_fusion(
-            gene_5prime_element, gene_3prime_element, tr_5prime, tr_3prime, ce
+            gene_5prime_element,
+            gene_3prime_element,
+            tr_5prime,
+            tr_3prime,
+            ce,
+            reads=read_data,
         )
 
     async def from_fusion_catcher(
@@ -306,6 +356,9 @@ async def from_fusion_catcher(
         five_prime_fusion_point: str,
         three_prime_fusion_point: str,
         predicted_effect: str,
+        spanning_unique_reads: int,
+        spanning_reads: int,
+        fusion_sequence: str,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
@@ -319,6 +372,10 @@ async def from_fusion_catcher(
         fusion junction. This coordinate is 1-based
         :param predicted_effect: The predicted effect of the fusion event, created
         using annotation from the Ensembl database
+        :param spanning_unique_reads: The number of unique reads that map on the fusion
+            junction
+        :param spanning_reads: The number of paired reads that support the fusion
+        :param fusion_sequence: The inferred sequence around the fusion junction
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
@@ -356,8 +413,20 @@ async def from_fusion_catcher(
         )
 
         ce = self._get_causative_event(five_prime[0], three_prime[0], predicted_effect)
+        read_data = ReadData(
+            split=SplitReads(splitReads=spanning_unique_reads),
+            spanning=SpanningReads(spanningReads=spanning_reads),
+        )
+        contig = ContigSequence(contig=fusion_sequence)
+
         return self._format_fusion(
-            gene_5prime_element, gene_3prime_element, tr_5prime, tr_3prime, ce
+            gene_5prime_element,
+            gene_3prime_element,
+            tr_5prime,
+            tr_3prime,
+            ce,
+            contig=contig,
+            reads=read_data,
         )
 
     async def from_fusion_map(
@@ -431,6 +500,12 @@ async def from_arriba(
         direction1: str,
         direction2: str,
         rf: str,
+        split_reads1: int,
+        split_reads2: int,
+        discordant_mates: int,
+        coverage1: int,
+        coverage2: int,
+        fusion_transcript: str,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
@@ -449,6 +524,12 @@ async def from_arriba(
         :param direction2: A description that indicates if the transcript segment
             starts or ends at breakpoint2
         :param rf: A description if the reading frame is preserved for the fusion
+        :param split_reads1: Number of supporting split fragments with anchor in gene1
+        :param split_reads2: Number of supporting split fragments with anchor in gene2
+        :param discordant_mates: Number of discordant mates supporting the fusion
+        :param coverage1: Number of fragments retained near breakpoint1
+        :param coverage2: Number of fragments retained near breakpoint2
+        :param fusion_transcript: The assembled fusion transcript
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
@@ -484,6 +565,8 @@ async def from_arriba(
             seg_start_genomic=int(breakpoint1[1]) if gene1_seg_start else None,
             seg_end_genomic=int(breakpoint1[1]) if not gene1_seg_start else None,
             gene=gene_5prime,
+            coverage=BreakpointCoverage(fragmentCoverage=coverage1),
+            reads=AnchoredReads(reads=split_reads1),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
@@ -494,6 +577,8 @@ async def from_arriba(
             seg_start_genomic=int(breakpoint2[1]) if gene2_seg_start else None,
             seg_end_genomic=int(breakpoint2[1]) if not gene2_seg_start else None,
             gene=gene_3prime,
+            coverage=BreakpointCoverage(fragmentCoverage=coverage2),
+            reads=AnchoredReads(reads=split_reads2),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
@@ -510,8 +595,18 @@ async def from_arriba(
             )
         )
         rf = bool(rf == "in-frame") if rf != "." else None
+        read_data = ReadData(spanning=SpanningReads(spanningReads=discordant_mates))
+        contig = ContigSequence(contig=fusion_transcript)
+
         return self._format_fusion(
-            gene_5prime_element, gene_3prime_element, tr_5prime, tr_3prime, ce, rf
+            gene_5prime_element,
+            gene_3prime_element,
+            tr_5prime,
+            tr_3prime,
+            ce,
+            rf,
+            contig=contig,
+            reads=read_data,
         )
 
     async def from_cicero(
@@ -524,6 +619,11 @@ async def from_cicero(
         pos_3prime: int,
         sv_ort: str,
         event_type: str,
+        reads_a: int,
+        reads_b: int,
+        coverage_a: int,
+        coverage_b: int,
+        contig: str,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion | str:
@@ -538,6 +638,11 @@ async def from_cicero(
         :param sv_ort: Whether the mapping orientation of assembled contig (driven by
             structural variation) has confident biological meaning
         :param event_type: The structural variation event that created the called fusion
+        :param readsA: The number of reads that support the breakpoint for the 5' partner
+        :param readsB: The number of reads that support the breakpoint for the 3' partner
+        :param coverageA: The fragment coverage at the 5' breakpoint
+        :param coverageB: The fragment coverage at the 3' breakpoint
+        :param contig: The assembled contig sequence for the fusion
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
@@ -571,6 +676,8 @@ async def from_cicero(
             genomic_ac=self._get_genomic_ac(chr_5prime, rb),
             seg_end_genomic=pos_5prime,
             gene=gene_5prime,
+            coverage=BreakpointCoverage(fragmentCoverage=coverage_a),
+            reads=AnchoredReads(reads=reads_a),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
@@ -580,6 +687,8 @@ async def from_cicero(
             genomic_ac=self._get_genomic_ac(chr_3prime, rb),
             seg_start_genomic=pos_3prime,
             gene=gene_3prime,
+            coverage=BreakpointCoverage(fragmentCoverage=coverage_b),
+            reads=AnchoredReads(reads=reads_b),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
@@ -594,12 +703,15 @@ async def from_cicero(
                 eventType=EventType("rearrangement"),
                 eventDescription=event_type,
             )
+        contig = ContigSequence(contig=contig)
+
         return self._format_fusion(
             gene_5prime_element,
             gene_3prime_element,
             tr_5prime,
             tr_3prime,
             ce,
+            contig=contig,
         )
 
     async def from_mapsplice(
diff --git a/tests/test_translators.py b/tests/test_translators.py
index 0be503e..0d4beb3 100644
--- a/tests/test_translators.py
+++ b/tests/test_translators.py
@@ -4,108 +4,126 @@
 import pytest
 from cool_seq_tool.schemas import Assembly, CoordinateType
 
-from fusor.models import AssayedFusion
+from fusor.models import (
+    AnchoredReads,
+    AssayedFusion,
+    BreakpointCoverage,
+    ContigSequence,
+    ReadData,
+    SpanningReads,
+    SplitReads,
+)
 from fusor.translator import Caller
 
 
 @pytest.fixture(scope="module")
 def fusion_data_example():
     """Create example assayed fusion for TPM3::PDGFRB with exonic breakpoints"""
-    params = {
-        "type": "AssayedFusion",
-        "structure": [
-            {
-                "type": "TranscriptSegmentElement",
-                "transcript": "refseq:NM_152263.4",
-                "exonEnd": 8,
-                "exonEndOffset": -66,
-                "gene": {"id": "hgnc:12012", "type": "Gene", "label": "TPM3"},
-                "elementGenomicEnd": {
-                    "id": "ga4gh:SL.6lXn5i3zqcZUfmtBSieTiVL4Nt2gPGKY",
-                    "type": "SequenceLocation",
-                    "digest": "6lXn5i3zqcZUfmtBSieTiVL4Nt2gPGKY",
-                    "sequenceReference": {
-                        "id": "refseq:NC_000001.11",
-                        "type": "SequenceReference",
-                        "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
+
+    def _create_base_fixture(**kwargs):
+        params = {
+            "type": "AssayedFusion",
+            "structure": [
+                {
+                    "type": "TranscriptSegmentElement",
+                    "transcript": "refseq:NM_152263.4",
+                    "exonEnd": 8,
+                    "exonEndOffset": -66,
+                    "gene": {"id": "hgnc:12012", "type": "Gene", "label": "TPM3"},
+                    "elementGenomicEnd": {
+                        "id": "ga4gh:SL.6lXn5i3zqcZUfmtBSieTiVL4Nt2gPGKY",
+                        "type": "SequenceLocation",
+                        "digest": "6lXn5i3zqcZUfmtBSieTiVL4Nt2gPGKY",
+                        "sequenceReference": {
+                            "id": "refseq:NC_000001.11",
+                            "type": "SequenceReference",
+                            "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
+                        },
+                        "start": 154170465,
                     },
-                    "start": 154170465,
                 },
-            },
-            {
-                "type": "TranscriptSegmentElement",
-                "transcript": "refseq:NM_002609.4",
-                "exonStart": 11,
-                "exonStartOffset": 2,
-                "gene": {"id": "hgnc:8804", "type": "Gene", "label": "PDGFRB"},
-                "elementGenomicStart": {
-                    "id": "ga4gh:SL.Sp1lwuHbRCkWIoe4zzwVKPsS8zK8i0ck",
-                    "type": "SequenceLocation",
-                    "digest": "Sp1lwuHbRCkWIoe4zzwVKPsS8zK8i0ck",
-                    "sequenceReference": {
-                        "id": "refseq:NC_000005.10",
-                        "type": "SequenceReference",
-                        "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
+                {
+                    "type": "TranscriptSegmentElement",
+                    "transcript": "refseq:NM_002609.4",
+                    "exonStart": 11,
+                    "exonStartOffset": 2,
+                    "gene": {"id": "hgnc:8804", "type": "Gene", "label": "PDGFRB"},
+                    "elementGenomicStart": {
+                        "id": "ga4gh:SL.Sp1lwuHbRCkWIoe4zzwVKPsS8zK8i0ck",
+                        "type": "SequenceLocation",
+                        "digest": "Sp1lwuHbRCkWIoe4zzwVKPsS8zK8i0ck",
+                        "sequenceReference": {
+                            "id": "refseq:NC_000005.10",
+                            "type": "SequenceReference",
+                            "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
+                        },
+                        "end": 150126612,
                     },
-                    "end": 150126612,
                 },
-            },
-        ],
-        "causativeEvent": {"type": "CausativeEvent", "eventType": "rearrangement"},
-        "r_frame_preserved": True,
-        "assay": None,
-    }
-    return AssayedFusion(**params)
+            ],
+            "causativeEvent": {"type": "CausativeEvent", "eventType": "rearrangement"},
+            "r_frame_preserved": True,
+            "assay": None,
+        }
+        assayed_fusion = AssayedFusion(**params)
+        return assayed_fusion.model_copy(update=kwargs)
+
+    return _create_base_fixture
 
 
 @pytest.fixture(scope="module")
 def fusion_data_example_nonexonic():
     """Create example assayed fusion for TPM3::PDGFRB with non-exonic breakpoints"""
-    params = {
-        "type": "AssayedFusion",
-        "structure": [
-            {
-                "type": "TranscriptSegmentElement",
-                "transcript": "refseq:NM_152263.4",
-                "exonEnd": 4,
-                "exonEndOffset": 5,
-                "gene": {"id": "hgnc:12012", "type": "Gene", "label": "TPM3"},
-                "elementGenomicEnd": {
-                    "id": "ga4gh:SL.O1rVKQA2FTdy_FFWg3qJVSTG_TF_Mkex",
-                    "type": "SequenceLocation",
-                    "digest": "O1rVKQA2FTdy_FFWg3qJVSTG_TF_Mkex",
-                    "sequenceReference": {
-                        "id": "refseq:NC_000001.11",
-                        "type": "SequenceReference",
-                        "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
+
+    def _create_base_fixture(**kwargs):
+        params = {
+            "type": "AssayedFusion",
+            "structure": [
+                {
+                    "type": "TranscriptSegmentElement",
+                    "transcript": "refseq:NM_152263.4",
+                    "exonEnd": 4,
+                    "exonEndOffset": 5,
+                    "gene": {"id": "hgnc:12012", "type": "Gene", "label": "TPM3"},
+                    "elementGenomicEnd": {
+                        "id": "ga4gh:SL.O1rVKQA2FTdy_FFWg3qJVSTG_TF_Mkex",
+                        "type": "SequenceLocation",
+                        "digest": "O1rVKQA2FTdy_FFWg3qJVSTG_TF_Mkex",
+                        "sequenceReference": {
+                            "id": "refseq:NC_000001.11",
+                            "type": "SequenceReference",
+                            "refgetAccession": "SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO",
+                        },
+                        "start": 154173078,
                     },
-                    "start": 154173078,
                 },
-            },
-            {
-                "type": "TranscriptSegmentElement",
-                "transcript": "refseq:NM_002609.4",
-                "exonStart": 11,
-                "exonStartOffset": -559,
-                "gene": {"id": "hgnc:8804", "type": "Gene", "label": "PDGFRB"},
-                "elementGenomicStart": {
-                    "id": "ga4gh:SL.GtoWMuox4tOyX2I5L9Baobnpgc1pDIVJ",
-                    "type": "SequenceLocation",
-                    "digest": "GtoWMuox4tOyX2I5L9Baobnpgc1pDIVJ",
-                    "sequenceReference": {
-                        "id": "refseq:NC_000005.10",
-                        "type": "SequenceReference",
-                        "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
+                {
+                    "type": "TranscriptSegmentElement",
+                    "transcript": "refseq:NM_002609.4",
+                    "exonStart": 11,
+                    "exonStartOffset": -559,
+                    "gene": {"id": "hgnc:8804", "type": "Gene", "label": "PDGFRB"},
+                    "elementGenomicStart": {
+                        "id": "ga4gh:SL.GtoWMuox4tOyX2I5L9Baobnpgc1pDIVJ",
+                        "type": "SequenceLocation",
+                        "digest": "GtoWMuox4tOyX2I5L9Baobnpgc1pDIVJ",
+                        "sequenceReference": {
+                            "id": "refseq:NC_000005.10",
+                            "type": "SequenceReference",
+                            "refgetAccession": "SQ.aUiQCzCPZ2d0csHbMSbh2NzInhonSXwI",
+                        },
+                        "end": 150127173,
                     },
-                    "end": 150127173,
                 },
-            },
-        ],
-        "causativeEvent": {"type": "CausativeEvent", "eventType": "rearrangement"},
-        "r_frame_preserved": True,
-        "assay": None,
-    }
-    return AssayedFusion(**params)
+            ],
+            "causativeEvent": {"type": "CausativeEvent", "eventType": "rearrangement"},
+            "r_frame_preserved": True,
+            "assay": None,
+        }
+        assayed_fusion = AssayedFusion(**params)
+        return assayed_fusion.model_copy(update=kwargs)
+
+    return _create_base_fixture
 
 
 def test_gene_element_arriba(translator_instance):
@@ -138,6 +156,8 @@ async def test_jaffa(
     rearrangement = True
     classification = "HighConfidence"
     inframe = True
+    spanning_reads = 100
+    spanning_pairs = 80
 
     jaffa_fusor = await translator_instance.from_jaffa(
         fusion_genes,
@@ -148,10 +168,18 @@ async def test_jaffa(
         rearrangement,
         classification,
         inframe,
+        spanning_reads,
+        spanning_pairs,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example = fusion_data_example(
+        readData=ReadData(
+            split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=80)
+        )
+    )
     assert jaffa_fusor.structure == fusion_data_example.structure
+    assert jaffa_fusor.readData == fusion_data_example.readData
 
     # Test non-exonic breakpoint
     fusion_genes = "TPM3:PDGFRB"
@@ -162,6 +190,8 @@ async def test_jaffa(
     rearrangement = True
     classification = "HighConfidence"
     inframe = True
+    spanning_reads = 100
+    spanning_pairs = 80
 
     jaffa_fusor_nonexonic = await translator_instance.from_jaffa(
         fusion_genes,
@@ -172,10 +202,18 @@ async def test_jaffa(
         rearrangement,
         classification,
         inframe,
+        spanning_reads,
+        spanning_pairs,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example_nonexonic = fusion_data_example_nonexonic(
+        readData=ReadData(
+            split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=80)
+        )
+    )
     assert jaffa_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure
+    assert jaffa_fusor_nonexonic.readData == fusion_data_example_nonexonic.readData
 
 
 @pytest.mark.asyncio()
@@ -189,6 +227,8 @@ async def test_star_fusion(
     left_breakpoint = "chr1:154170465:-"
     right_breakpoint = "chr5:150126612:-"
     annots = '["INTERCHROMOSOMAL]'
+    junction_read_count = 100
+    spanning_frag_count = 80
 
     star_fusion_fusor = await translator_instance.from_star_fusion(
         left_gene,
@@ -196,10 +236,18 @@ async def test_star_fusion(
         left_breakpoint,
         right_breakpoint,
         annots,
+        junction_read_count,
+        spanning_frag_count,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example = fusion_data_example(
+        readData=ReadData(
+            split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=80)
+        )
+    )
     assert star_fusion_fusor.structure == fusion_data_example.structure
+    assert star_fusion_fusor.readData == fusion_data_example.readData
 
     # Test non-exonic breakpoints
     left_gene = "TPM3^ENSG00000143549.19"
@@ -207,6 +255,8 @@ async def test_star_fusion(
     left_breakpoint = "chr1:154173079:-"
     right_breakpoint = "chr5:150127173:-"
     annots = '["INTERCHROMOSOMAL]'
+    junction_read_count = 100
+    spanning_frag_count = 80
 
     star_fusion_fusor_nonexonic = await translator_instance.from_star_fusion(
         left_gene,
@@ -214,12 +264,22 @@ async def test_star_fusion(
         left_breakpoint,
         right_breakpoint,
         annots,
+        junction_read_count,
+        spanning_frag_count,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example_nonexonic = fusion_data_example_nonexonic(
+        readData=ReadData(
+            split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=80)
+        )
+    )
     assert (
         star_fusion_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure
     )
+    assert (
+        star_fusion_fusor_nonexonic.readData == fusion_data_example_nonexonic.readData
+    )
 
 
 @pytest.mark.asyncio()
@@ -233,6 +293,9 @@ async def test_fusion_catcher(
     five_prime_fusion_point = "1:154170465:-"
     three_prime_fusion_point = "5:150126612:-"
     predicted_effect = "exonic(no-known-CDS)/exonic(no-known-CDS)"
+    spanning_unique_reads = 100
+    spanning_reads = 80
+    fusion_sequence = "CTAGATGAC*TACTACTA"
 
     fusion_catcher_fusor = await translator_instance.from_fusion_catcher(
         five_prime_partner,
@@ -240,10 +303,21 @@ async def test_fusion_catcher(
         five_prime_fusion_point,
         three_prime_fusion_point,
         predicted_effect,
+        spanning_unique_reads,
+        spanning_reads,
+        fusion_sequence,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example = fusion_data_example(
+        readData=ReadData(
+            split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=80)
+        ),
+        contig=ContigSequence(contig="CTAGATGAC*TACTACTA"),
+    )
     assert fusion_catcher_fusor.structure == fusion_data_example.structure
+    assert fusion_catcher_fusor.readData == fusion_data_example.readData
+    assert fusion_catcher_fusor.contig == fusion_catcher_fusor.contig
 
     # Test non-exonic breakpoint
     five_prime_partner = "TPM3"
@@ -251,6 +325,9 @@ async def test_fusion_catcher(
     five_prime_fusion_point = "1:154173079:-"
     three_prime_fusion_point = "5:150127173:-"
     predicted_effect = "exonic(no-known-CDS)/exonic(no-known-CDS)"
+    spanning_unique_reads = 100
+    spanning_reads = 80
+    fusion_sequence = "CTAGATGAC*TACTACTA"
 
     fusion_catcher_fusor_nonexonic = await translator_instance.from_fusion_catcher(
         five_prime_partner,
@@ -258,13 +335,27 @@ async def test_fusion_catcher(
         five_prime_fusion_point,
         three_prime_fusion_point,
         predicted_effect,
+        spanning_unique_reads,
+        spanning_reads,
+        fusion_sequence,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example_nonexonic = fusion_data_example_nonexonic(
+        readData=ReadData(
+            split=SplitReads(splitReads=100), spanning=SpanningReads(spanningReads=80)
+        ),
+        contig=ContigSequence(contig="CTAGATGAC*TACTACTA"),
+    )
     assert (
         fusion_catcher_fusor_nonexonic.structure
         == fusion_data_example_nonexonic.structure
     )
+    assert (
+        fusion_catcher_fusor_nonexonic.readData
+        == fusion_data_example_nonexonic.readData
+    )
+    assert fusion_catcher_fusor_nonexonic.contig == fusion_data_example_nonexonic.contig
 
 
 @pytest.mark.asyncio()
@@ -289,7 +380,7 @@ async def test_fusion_map(
     fusion_map_fusor = await translator_instance.from_fusion_map(
         fusion_map_data, CoordinateType.INTER_RESIDUE.value, Assembly.GRCH38.value
     )
-    assert fusion_map_fusor.structure == fusion_data_example.structure
+    assert fusion_map_fusor.structure == fusion_data_example().structure
 
     # Test non-exonic breakpoint
     fusion_map_data_nonexonic = pl.DataFrame(
@@ -309,7 +400,8 @@ async def test_fusion_map(
         fusion_map_data_nonexonic, CoordinateType.RESIDUE.value, Assembly.GRCH38.value
     )
     assert (
-        fusion_map_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure
+        fusion_map_fusor_nonexonic.structure
+        == fusion_data_example_nonexonic().structure
     )
 
 
@@ -330,6 +422,12 @@ async def test_arriba(
     direction1 = "upstream"
     direction2 = "downstream"
     rf = "in-frame"
+    split_reads1 = 100
+    split_reads2 = 95
+    discordant_mates = 30
+    coverage1 = 200
+    coverage2 = 190
+    fusion_transcript = "CTAGATGAC_TACTACTA|GTACTACT"
 
     arriba_fusor = await translator_instance.from_arriba(
         gene1,
@@ -343,10 +441,26 @@ async def test_arriba(
         direction1,
         direction2,
         rf,
+        split_reads1,
+        split_reads2,
+        discordant_mates,
+        coverage1,
+        coverage2,
+        fusion_transcript,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example = fusion_data_example(
+        readData=ReadData(spanning=SpanningReads(spanningReads=30)),
+        contig=ContigSequence(contig=fusion_transcript),
+    )
+    fusion_data_example.structure[0].coverage = BreakpointCoverage(fragmentCoverage=200)
+    fusion_data_example.structure[0].anchoredReads = AnchoredReads(reads=100)
+    fusion_data_example.structure[1].coverage = BreakpointCoverage(fragmentCoverage=190)
+    fusion_data_example.structure[1].anchoredReads = AnchoredReads(reads=95)
     assert arriba_fusor.structure == fusion_data_example.structure
+    assert arriba_fusor.readData == fusion_data_example.readData
+    assert arriba_fusor.contig == fusion_data_example.contig
 
     # Test non-exonic breakpoint
     gene1 = "TPM3"
@@ -360,6 +474,11 @@ async def test_arriba(
     direction1 = "upstream"
     direction2 = "downstream"
     rf = "in-frame"
+    split_reads1 = 100
+    split_reads2 = 95
+    discordant_mates = 30
+    coverage1 = 200
+    coverage2 = 190
 
     arriba_fusor_nonexonic = await translator_instance.from_arriba(
         gene1,
@@ -373,10 +492,30 @@ async def test_arriba(
         direction1,
         direction2,
         rf,
+        split_reads1,
+        split_reads2,
+        discordant_mates,
+        coverage1,
+        coverage2,
+        fusion_transcript,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example_nonexonic = fusion_data_example_nonexonic(
+        readData=ReadData(spanning=SpanningReads(spanningReads=30)),
+        contig=ContigSequence(contig=fusion_transcript),
+    )
+    fusion_data_example_nonexonic.structure[0].coverage = BreakpointCoverage(
+        fragmentCoverage=200
+    )
+    fusion_data_example_nonexonic.structure[0].anchoredReads = AnchoredReads(reads=100)
+    fusion_data_example_nonexonic.structure[1].coverage = BreakpointCoverage(
+        fragmentCoverage=190
+    )
+    fusion_data_example_nonexonic.structure[1].anchoredReads = AnchoredReads(reads=95)
     assert arriba_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure
+    assert arriba_fusor_nonexonic.readData == fusion_data_example_nonexonic.readData
+    assert arriba_fusor_nonexonic.contig == fusion_data_example_nonexonic.contig
 
 
 @pytest.mark.asyncio()
@@ -393,6 +532,11 @@ async def test_cicero(
     pos_3prime = 150126612
     sv_ort = ">"
     event_type = "CTX"
+    reads_a = 100
+    reads_b = 90
+    coverage_a = 200
+    coverage_b = 190
+    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
 
     cicero_fusor = await translator_instance.from_cicero(
         gene_5prime,
@@ -403,10 +547,22 @@ async def test_cicero(
         pos_3prime,
         sv_ort,
         event_type,
+        reads_a,
+        reads_b,
+        coverage_a,
+        coverage_b,
+        contig,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example = fusion_data_example(contig=ContigSequence(contig=contig))
+    fusion_data_example.structure[0].coverage = BreakpointCoverage(fragmentCoverage=200)
+    fusion_data_example.structure[0].anchoredReads = AnchoredReads(reads=100)
+    fusion_data_example.structure[1].coverage = BreakpointCoverage(fragmentCoverage=190)
+    fusion_data_example.structure[1].anchoredReads = AnchoredReads(reads=90)
     assert cicero_fusor.structure == fusion_data_example.structure
+    assert cicero_fusor.readData == fusion_data_example.readData
+    assert cicero_fusor.contig == fusion_data_example.contig
 
     # Test non-exonic breakpoint
     gene_5prime = "TPM3"
@@ -417,6 +573,11 @@ async def test_cicero(
     pos_3prime = 150127173
     sv_ort = ">"
     event_type = "CTX"
+    reads_a = 100
+    reads_b = 90
+    coverage_a = 200
+    coverage_b = 190
+    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
 
     cicero_fusor_nonexonic = await translator_instance.from_cicero(
         gene_5prime,
@@ -427,10 +588,28 @@ async def test_cicero(
         pos_3prime,
         sv_ort,
         event_type,
+        reads_a,
+        reads_b,
+        coverage_a,
+        coverage_b,
+        contig,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
+    fusion_data_example_nonexonic = fusion_data_example_nonexonic(
+        contig=ContigSequence(contig=contig)
+    )
+    fusion_data_example_nonexonic.structure[0].coverage = BreakpointCoverage(
+        fragmentCoverage=200
+    )
+    fusion_data_example_nonexonic.structure[0].anchoredReads = AnchoredReads(reads=100)
+    fusion_data_example_nonexonic.structure[1].coverage = BreakpointCoverage(
+        fragmentCoverage=190
+    )
+    fusion_data_example_nonexonic.structure[1].anchoredReads = AnchoredReads(reads=90)
     assert cicero_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure
+    assert cicero_fusor_nonexonic.readData == fusion_data_example_nonexonic.readData
+    assert cicero_fusor_nonexonic.contig == fusion_data_example_nonexonic.contig
 
     # Test case where the called fusion does not have confident biological meaning
     gene_5prime = "TPM3"
@@ -441,6 +620,11 @@ async def test_cicero(
     pos_3prime = 150127173
     sv_ort = "?"
     event_type = "CTX"
+    reads_a = 100
+    reads_b = 90
+    coverage_a = 200
+    coverage_b = 190
+    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
 
     non_confident_bio = await translator_instance.from_cicero(
         gene_5prime,
@@ -451,6 +635,11 @@ async def test_cicero(
         pos_3prime,
         sv_ort,
         event_type,
+        reads_a,
+        reads_b,
+        coverage_a,
+        coverage_b,
+        contig,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -468,6 +657,11 @@ async def test_cicero(
     pos_3prime = 150127173
     sv_ort = "?"
     event_type = "CTX"
+    reads_a = 100
+    reads_b = 90
+    coverage_a = 200
+    coverage_b = 190
+    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
 
     multiple_genes_fusion_partner = await translator_instance.from_cicero(
         gene_5prime,
@@ -478,6 +672,11 @@ async def test_cicero(
         pos_3prime,
         sv_ort,
         event_type,
+        reads_a,
+        reads_b,
+        coverage_a,
+        coverage_b,
+        contig,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -510,7 +709,7 @@ async def test_enfusion(
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
-    assert enfusion_fusor.structure == fusion_data_example.structure
+    assert enfusion_fusor.structure == fusion_data_example().structure
 
     # Test non-exonic breakpoint
     gene_5prime = "TPM3"
@@ -530,7 +729,9 @@ async def test_enfusion(
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
-    assert enfusion_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure
+    assert (
+        enfusion_fusor_nonexonic.structure == fusion_data_example_nonexonic().structure
+    )
 
 
 @pytest.mark.asyncio()
@@ -560,7 +761,7 @@ async def test_genie(
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
-    assert genie_fusor.structure == fusion_data_example.structure
+    assert genie_fusor.structure == fusion_data_example().structure
 
     # Test non-exonic breakpoint
     site1_hugo = "TPM3"
@@ -584,4 +785,4 @@ async def test_genie(
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
-    assert genie_fusor_nonexonic.structure == fusion_data_example_nonexonic.structure
+    assert genie_fusor_nonexonic.structure == fusion_data_example_nonexonic().structure

From 3d9218073ccd6850ef55c2ff3d7b41523e0f3327 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 16 Jan 2025 10:39:16 -0500
Subject: [PATCH 03/20] Add class for JAFFA

---
 src/fusor/fusion_caller_models.py | 54 +++++++++++++++++++++++++
 src/fusor/translator.py           | 47 ++++++----------------
 tests/test_translators.py         | 67 +++++++++++++------------------
 3 files changed, 94 insertions(+), 74 deletions(-)
 create mode 100644 src/fusor/fusion_caller_models.py

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
new file mode 100644
index 0000000..ab34e75
--- /dev/null
+++ b/src/fusor/fusion_caller_models.py
@@ -0,0 +1,54 @@
+"""Schemas for fusion callers used in translator.py"""
+
+from enum import Enum
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+class BaseModelForbidExtra(BaseModel, extra="forbid"):
+    """Base Pydantic model class with extra values forbidden."""
+
+
+class FusionCallerTypes(str, Enum):
+    """Define FusionCaller type values"""
+
+    JAFFA = "JAFFA"
+
+
+class JAFFA(BaseModel):
+    """Define parameters for JAFFA model"""
+
+    type: Literal[FusionCallerTypes.JAFFA] = FusionCallerTypes.JAFFA
+    fusion_genes: str = Field(
+        ..., description="A string containing the two fusion partners"
+    )
+    chrom1: str = Field(
+        ..., description="The chromosome indicated in the chrom1 column"
+    )
+    base1: int = Field(
+        ..., description="The genomic position indicated in the base1 column"
+    )
+    chrom2: str = Field(
+        ..., description="The chromosome indicated in the chrom2 column"
+    )
+    base2: int = Field(
+        ..., description="The genomic position indicated in the base2 column"
+    )
+    rearrangement: bool = Field(
+        ..., description=" A boolean indicating if a rearrangement occured"
+    )
+    classification: str = Field(
+        ..., description="The classification associated with the called fusion"
+    )
+    inframe: bool = Field(
+        ..., description="A boolean indicating if the fusion occurred in-frame"
+    )
+    spanning_reads: int = Field(
+        ...,
+        description="The number of deteced reads that span the junction bewtween the two transcript. Although described as spanning reads, this aligns with our defintion of split reads i.e. reads that have sequence belonging to the fusion partners",
+    )
+    spanning_pairs: int = Field(
+        ...,
+        description="The number of detected reads that align entirely on either side of the breakpoint",
+    )
diff --git a/src/fusor/translator.py b/src/fusor/translator.py
index 60284d3..1e1fe8a 100644
--- a/src/fusor/translator.py
+++ b/src/fusor/translator.py
@@ -8,6 +8,7 @@
 import polars as pl
 from cool_seq_tool.schemas import Assembly, CoordinateType
 
+from fusor.fusion_caller_models import JAFFA
 from fusor.fusor import FUSOR
 from fusor.models import (
     AnchoredReads,
@@ -192,40 +193,18 @@ def _get_genomic_ac(self, chrom: str, build: Assembly) -> str:
 
     async def from_jaffa(
         self,
-        fusion_genes: str,
-        chrom1: str,
-        base1: int,
-        chrom2: str,
-        base2: int,
-        rearrangement: bool,
-        classification: str,
-        inframe: bool,
-        spanning_reads: int,
-        spanning_pairs: int,
+        jaffa: JAFFA,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion | None:
         """Parse JAFFA fusion output to create AssayedFusion object
 
-        :param fusion_genes: A string containing the two fusion partners
-        :param chrom1: The chromosome indicated in the chrom1 column
-        :param base1: The genomic position indicated in the base1 column
-        :param chrom2: The chromosome indicated in the chrom2 column
-        :param base2: The genomic position indicated in the base2 column
-        :param rearrangement: A boolean indicating if a rearrangement occured
-        :param classification: The classification associated with the called fusion
-        :param inframe: A boolean indicating if the fusion occurred in-frame
-        :param spanning_reads: The number of deteced reads that span the junction
-            bewtween the two transcript. Although described as spanning reads, this
-            aligns with our defintion of split reads (i.e. reads that have sequence
-            belonging to the fusion partners)
-        :param spanning_pairs: The number of detected reads that align entirely on
-            either side of the breakpoint
+        :param JAFFA: A JAFFA object
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
         """
-        genes = fusion_genes.split(":")
+        genes = jaffa.fusion_genes.split(":")
         gene_5prime_element = self._get_gene_element(genes[0], Caller.JAFFA)
         gene_3prime_element = self._get_gene_element(genes[1], Caller.JAFFA)
         gene_5prime = gene_5prime_element.gene.label
@@ -236,8 +215,8 @@ async def from_jaffa(
 
         tr_5prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(chrom1, rb),
-            seg_end_genomic=base1,
+            genomic_ac=self._get_genomic_ac(jaffa.chrom1, rb),
+            seg_end_genomic=jaffa.base1,
             gene=gene_5prime,
             coordinate_type=coordinate_type,
             starting_assembly=rb,
@@ -245,24 +224,24 @@ async def from_jaffa(
 
         tr_3prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(chrom2, rb),
-            seg_start_genomic=base2,
+            genomic_ac=self._get_genomic_ac(jaffa.chrom2, rb),
+            seg_start_genomic=jaffa.base2,
             gene=gene_3prime,
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
 
-        if rearrangement:
+        if jaffa.rearrangement:
             ce = CausativeEvent(
                 eventType=EventType("rearrangement"),
-                eventDescription=classification,
+                eventDescription=jaffa.classification,
             )
         else:
             ce = None
 
         read_data = ReadData(
-            split=SplitReads(splitReads=spanning_reads),
-            spanning=SpanningReads(spanningReads=spanning_pairs),
+            split=SplitReads(splitReads=jaffa.spanning_reads),
+            spanning=SpanningReads(spanningReads=jaffa.spanning_pairs),
         )
 
         return self._format_fusion(
@@ -271,7 +250,7 @@ async def from_jaffa(
             tr_5prime,
             tr_3prime,
             ce,
-            inframe,
+            jaffa.inframe,
             reads=read_data,
         )
 
diff --git a/tests/test_translators.py b/tests/test_translators.py
index 0d4beb3..b06194a 100644
--- a/tests/test_translators.py
+++ b/tests/test_translators.py
@@ -4,6 +4,7 @@
 import pytest
 from cool_seq_tool.schemas import Assembly, CoordinateType
 
+from fusor.fusion_caller_models import JAFFA
 from fusor.models import (
     AnchoredReads,
     AssayedFusion,
@@ -148,28 +149,21 @@ async def test_jaffa(
 ):
     """Test JAFFA translator"""
     # Test exonic breakpoint
-    fusion_genes = "TPM3:PDGFRB"
-    chrom1 = "chr1"
-    base1 = 154170465
-    chrom2 = "chr5"
-    base2 = 150126612
-    rearrangement = True
-    classification = "HighConfidence"
-    inframe = True
-    spanning_reads = 100
-    spanning_pairs = 80
+    jaffa = JAFFA(
+        fusion_genes="TPM3:PDGFRB",
+        chrom1="chr1",
+        base1=154170465,
+        chrom2="chr5",
+        base2=150126612,
+        rearrangement=True,
+        classification="HighConfidence",
+        inframe=True,
+        spanning_reads=100,
+        spanning_pairs=80,
+    )
 
     jaffa_fusor = await translator_instance.from_jaffa(
-        fusion_genes,
-        chrom1,
-        base1,
-        chrom2,
-        base2,
-        rearrangement,
-        classification,
-        inframe,
-        spanning_reads,
-        spanning_pairs,
+        jaffa,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -182,28 +176,21 @@ async def test_jaffa(
     assert jaffa_fusor.readData == fusion_data_example.readData
 
     # Test non-exonic breakpoint
-    fusion_genes = "TPM3:PDGFRB"
-    chrom1 = "chr1"
-    base1 = 154173079
-    chrom2 = "chr5"
-    base2 = 150127173
-    rearrangement = True
-    classification = "HighConfidence"
-    inframe = True
-    spanning_reads = 100
-    spanning_pairs = 80
+    jaffa = JAFFA(
+        fusion_genes="TPM3:PDGFRB",
+        chrom1="chr1",
+        base1=154173079,
+        chrom2="chr5",
+        base2=150127173,
+        rearrangement=True,
+        classification="HighConfidence",
+        inframe=True,
+        spanning_reads=100,
+        spanning_pairs=80,
+    )
 
     jaffa_fusor_nonexonic = await translator_instance.from_jaffa(
-        fusion_genes,
-        chrom1,
-        base1,
-        chrom2,
-        base2,
-        rearrangement,
-        classification,
-        inframe,
-        spanning_reads,
-        spanning_pairs,
+        jaffa,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )

From 2a7fbf034225f4fe5c64d25b1db22d96722d38f9 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 16 Jan 2025 10:51:52 -0500
Subject: [PATCH 04/20] Add STAR-Fusion

---
 src/fusor/fusion_caller_models.py | 39 +++++++++++++++++++++--
 src/fusor/translator.py           | 52 +++++++------------------------
 tests/test_translators.py         | 51 +++++++++++++-----------------
 3 files changed, 69 insertions(+), 73 deletions(-)

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index ab34e75..7430d5b 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -10,16 +10,24 @@ class BaseModelForbidExtra(BaseModel, extra="forbid"):
     """Base Pydantic model class with extra values forbidden."""
 
 
-class FusionCallerTypes(str, Enum):
-    """Define FusionCaller type values"""
+class Caller(str, Enum):
+    """Define different supported callers"""
 
     JAFFA = "JAFFA"
+    STAR_FUSION = "STAR-Fusion"
+    FUSION_CATCHER = "FusionCatcher"
+    FUSION_MAP = "FusionMap"
+    ARRIBA = "Arriba"
+    CICERO = "CICERO"
+    MAPSPLICE = "MapSplice"
+    ENFUSION = "EnFusion"
+    GENIE = "GENIE"
 
 
 class JAFFA(BaseModel):
     """Define parameters for JAFFA model"""
 
-    type: Literal[FusionCallerTypes.JAFFA] = FusionCallerTypes.JAFFA
+    type: Literal[Caller.JAFFA] = Caller.JAFFA
     fusion_genes: str = Field(
         ..., description="A string containing the two fusion partners"
     )
@@ -52,3 +60,28 @@ class JAFFA(BaseModel):
         ...,
         description="The number of detected reads that align entirely on either side of the breakpoint",
     )
+
+
+class STARFusion(BaseModel):
+    """Define parameters for STAR-Fusion model"""
+
+    type: Literal[Caller.STAR_FUSION] = Caller.STAR_FUSION
+    left_gene: str = Field(..., description="The gene indicated in the LeftGene column")
+    right_gene: str = Field(
+        ..., description="The gene indicated in the RightGene column"
+    )
+    left_breakpoint: str = Field(
+        ..., description="The gene indicated in the LeftBreakpoint column"
+    )
+    right_breakpoint: str = Field(
+        ..., description="The gene indicated in the RightBreakpoint column"
+    )
+    annots: str = Field(..., description="The annotations associated with the fusion")
+    junction_read_count: int = Field(
+        ...,
+        description="The number of RNA-seq fragments that split the junction between the two transcript segments (from STAR-Fusion documentation)",
+    )
+    spanning_frag_count: int = Field(
+        ...,
+        description="The number of RNA-seq fragments that encompass the fusion junction such that one read of the pair aligns to a different gene than the other paired-end read of that fragment (from STAR-Fusion documentation)",
+    )
diff --git a/src/fusor/translator.py b/src/fusor/translator.py
index 1e1fe8a..c6770e3 100644
--- a/src/fusor/translator.py
+++ b/src/fusor/translator.py
@@ -3,12 +3,11 @@
 """
 
 import logging
-from enum import Enum
 
 import polars as pl
 from cool_seq_tool.schemas import Assembly, CoordinateType
 
-from fusor.fusion_caller_models import JAFFA
+from fusor.fusion_caller_models import JAFFA, Caller, STARFusion
 from fusor.fusor import FUSOR
 from fusor.models import (
     AnchoredReads,
@@ -28,20 +27,6 @@
 _logger = logging.getLogger(__name__)
 
 
-class Caller(str, Enum):
-    """Define different supported callers"""
-
-    JAFFA = "JAFFA"
-    STAR_FUSION = "STAR-Fusion"
-    FUSION_CATCHER = "FusionCatcher"
-    FUSION_MAP = "FusionMap"
-    ARRIBA = "Arriba"
-    CICERO = "CICERO"
-    MAPSPLICE = "MapSplice"
-    ENFUSION = "EnFusion"
-    GENIE = "GENIE"
-
-
 class Translator:
     """Class for translating outputs from different fusion detection algorithms
     to FUSOR AssayedFusion objects
@@ -256,34 +241,19 @@ async def from_jaffa(
 
     async def from_star_fusion(
         self,
-        left_gene: str,
-        right_gene: str,
-        left_breakpoint: str,
-        right_breakpoint: str,
-        annots: str,
-        junction_read_count: int,
-        spanning_frag_count: int,
+        star_fusion: STARFusion,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
         """Parse STAR-Fusion output to create AssayedFusion object
 
-        :param left_gene: The gene indicated in the LeftGene column
-        :param right_gene: The gene indicated in the RightGene column
-        :param left_breakpoint: The gene indicated in the LeftBreakpoint column
-        :param right_breakpoint: The gene indicated in the RightBreakpoint column
-        :param annots: The annotations associated with the fusion
-        :param junction_read_count: The number of RNA-seq fragments that split the
-            junction between the two transcript segments (from STAR-Fusion documentation)
-        :param spanning_frag_count: The number of RNA-seq fragments that encompass the
-            fusion junction such that one read of the pair aligns to a different gene
-            than the other paired-end read of that fragment (from STAR-Fusion documentation)
+        :param star_fusion: A STARFusion caller object
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
         """
-        gene1 = left_gene.split("^")[0]
-        gene2 = right_gene.split("^")[0]
+        gene1 = star_fusion.left_gene.split("^")[0]
+        gene2 = star_fusion.right_gene.split("^")[0]
         gene_5prime_element = self._get_gene_element(gene1, Caller.STAR_FUSION)
         gene_3prime_element = self._get_gene_element(gene2, Caller.STAR_FUSION)
         gene_5prime = gene_5prime_element.gene.label
@@ -292,8 +262,8 @@ async def from_star_fusion(
         if not self._are_fusion_partners_different(gene_5prime, gene_3prime):
             return None
 
-        five_prime = left_breakpoint.split(":")
-        three_prime = right_breakpoint.split(":")
+        five_prime = star_fusion.left_breakpoint.split(":")
+        three_prime = star_fusion.right_breakpoint.split(":")
 
         tr_5prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
@@ -313,10 +283,12 @@ async def from_star_fusion(
             starting_assembly=rb,
         )
 
-        ce = self._get_causative_event(five_prime[0], three_prime[0], ",".join(annots))
+        ce = self._get_causative_event(
+            five_prime[0], three_prime[0], ",".join(star_fusion.annots)
+        )
         read_data = ReadData(
-            split=SplitReads(splitReads=junction_read_count),
-            spanning=SpanningReads(spanningReads=spanning_frag_count),
+            split=SplitReads(splitReads=star_fusion.junction_read_count),
+            spanning=SpanningReads(spanningReads=star_fusion.spanning_frag_count),
         )
 
         return self._format_fusion(
diff --git a/tests/test_translators.py b/tests/test_translators.py
index b06194a..1eddcd2 100644
--- a/tests/test_translators.py
+++ b/tests/test_translators.py
@@ -4,7 +4,7 @@
 import pytest
 from cool_seq_tool.schemas import Assembly, CoordinateType
 
-from fusor.fusion_caller_models import JAFFA
+from fusor.fusion_caller_models import JAFFA, Caller, STARFusion
 from fusor.models import (
     AnchoredReads,
     AssayedFusion,
@@ -14,7 +14,6 @@
     SpanningReads,
     SplitReads,
 )
-from fusor.translator import Caller
 
 
 @pytest.fixture(scope="module")
@@ -209,22 +208,18 @@ async def test_star_fusion(
 ):
     """Test STAR-Fusion translator"""
     # Test exonic breakpoints
-    left_gene = "TPM3^ENSG00000143549.19"
-    right_gene = "PDGFRB^ENSG00000113721"
-    left_breakpoint = "chr1:154170465:-"
-    right_breakpoint = "chr5:150126612:-"
-    annots = '["INTERCHROMOSOMAL]'
-    junction_read_count = 100
-    spanning_frag_count = 80
+    star_fusion = STARFusion(
+        left_gene="TPM3^ENSG00000143549.19",
+        right_gene="PDGFRB^ENSG00000113721",
+        left_breakpoint="chr1:154170465:-",
+        right_breakpoint="chr5:150126612:-",
+        annots='["INTERCHROMOSOMAL]',
+        junction_read_count=100,
+        spanning_frag_count=80,
+    )
 
     star_fusion_fusor = await translator_instance.from_star_fusion(
-        left_gene,
-        right_gene,
-        left_breakpoint,
-        right_breakpoint,
-        annots,
-        junction_read_count,
-        spanning_frag_count,
+        star_fusion,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -237,22 +232,18 @@ async def test_star_fusion(
     assert star_fusion_fusor.readData == fusion_data_example.readData
 
     # Test non-exonic breakpoints
-    left_gene = "TPM3^ENSG00000143549.19"
-    right_gene = "PDGFRB^ENSG00000113721"
-    left_breakpoint = "chr1:154173079:-"
-    right_breakpoint = "chr5:150127173:-"
-    annots = '["INTERCHROMOSOMAL]'
-    junction_read_count = 100
-    spanning_frag_count = 80
+    star_fusion = STARFusion(
+        left_gene="TPM3^ENSG00000143549.19",
+        right_gene="PDGFRB^ENSG00000113721",
+        left_breakpoint="chr1:154173079:-",
+        right_breakpoint="chr5:150127173:-",
+        annots='["INTERCHROMOSOMAL]',
+        junction_read_count=100,
+        spanning_frag_count=80,
+    )
 
     star_fusion_fusor_nonexonic = await translator_instance.from_star_fusion(
-        left_gene,
-        right_gene,
-        left_breakpoint,
-        right_breakpoint,
-        annots,
-        junction_read_count,
-        spanning_frag_count,
+        star_fusion,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )

From f52b36f805850efac89fc36e2c4b322408f007aa Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 16 Jan 2025 12:19:42 -0500
Subject: [PATCH 05/20] Store work

---
 src/fusor/fusion_caller_models.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 7430d5b..b0bc7b2 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -85,3 +85,24 @@ class STARFusion(BaseModel):
         ...,
         description="The number of RNA-seq fragments that encompass the fusion junction such that one read of the pair aligns to a different gene than the other paired-end read of that fragment (from STAR-Fusion documentation)",
     )
+
+
+class FusionCatcher(BaseModel):
+    """Define parameters for FusionCatcher model"""
+
+    type: Literal[Caller.FUSION_CATCHER] = Caller.FUSION_CATCHER
+    five_prime_partner: str = Field(
+        ..., description="Gene symbol for the 5' fusion partner"
+    )
+    three_prime_partner: str = Field(
+        ..., description="Gene symbol for the 3' fusion partner"
+    )
+    five_prime_fusion_point: str = Field(
+        ...,
+        description="Chromosomal position for the 5' end of the fusion junction. This coordinate is 1-based",
+    )
+    three_prime_fusion_point: str
+    predicted_effect: str
+    spanning_unique_reads: int
+    spanning_reads: int
+    fusion_sequence: str

From cf1dafa053881355c66444eab482d90e8da3b919 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 16 Jan 2025 14:03:38 -0500
Subject: [PATCH 06/20] Add pydantic classes for remaining translators

---
 src/fusor/fusion_caller_models.py | 140 ++++++++++-
 src/fusor/translator.py           | 262 +++++++-------------
 tests/test_translators.py         | 392 ++++++++----------------------
 3 files changed, 328 insertions(+), 466 deletions(-)

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index b0bc7b2..309b5c4 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -101,8 +101,138 @@ class FusionCatcher(BaseModel):
         ...,
         description="Chromosomal position for the 5' end of the fusion junction. This coordinate is 1-based",
     )
-    three_prime_fusion_point: str
-    predicted_effect: str
-    spanning_unique_reads: int
-    spanning_reads: int
-    fusion_sequence: str
+    three_prime_fusion_point: str = Field(
+        ...,
+        description="Chromosomal position for the 3' end of the fusion junction. This coordinate is 1-based",
+    )
+    predicted_effect: str = Field(
+        ...,
+        description="The predicted effect of the fusion event, created using annotation from the Ensembl database",
+    )
+    spanning_unique_reads: int = Field(
+        ..., description="The number of unique reads that map on the fusion junction"
+    )
+    spanning_reads: int = Field(
+        ..., description="The number of paired reads that support the fusion"
+    )
+    fusion_sequence: str = Field(
+        ..., description="The inferred sequence around the fusion junction"
+    )
+
+
+class Arriba(BaseModel):
+    """Define parameters for Arriba model"""
+
+    type: Literal[Caller.ARRIBA] = Caller.ARRIBA
+    gene1: str = Field(..., description="The 5' gene fusion partner")
+    gene2: str = Field(..., description="The 3' gene fusion partner")
+    strand1: str = Field(
+        ..., description="The strand information for the 5' gene fusion partner"
+    )
+    strand2: str = Field(
+        ..., description="The strand information for the 3' gene fusion partner"
+    )
+    breakpoint1: str = Field(..., description="The chromosome and breakpoint for gene1")
+    breakpoint2: str = Field(..., description="The chromosome and breakpoint for gene2")
+    event: str = Field(..., description=" An inference about the type of fusion event")
+    confidence: str = Field(
+        ..., description="A metric describing the confidence of the fusion prediction"
+    )
+    direction1: str = Field(
+        ...,
+        description="A description that indicates if the transcript segment starts or ends at breakpoint1",
+    )
+    direction2: str = Field(
+        ...,
+        description="A description that indicates if the transcript segment starts or ends at breakpoint2",
+    )
+    rf: str = Field(
+        ...,
+        description="A description if the reading frame is preserved for the fusion",
+    )
+    split_reads1: int = Field(
+        ..., description="Number of supporting split fragments with anchor in gene1"
+    )
+    split_reads2: int = Field(
+        ..., description="Number of supporting split fragments with anchor in gene2"
+    )
+    discordant_mates: int = Field(
+        ..., description="Number of discordant mates supporting the fusion"
+    )
+    coverage1: int = Field(
+        ..., description="Number of fragments retained near breakpoint1"
+    )
+    coverage2: int = Field(
+        ..., description="Number of fragments retained near breakpoint2"
+    )
+    fusion_transcript: str = Field(..., description="The assembled fusion transcript")
+
+
+class Cicero(BaseModel):
+    """Define parameters for CICERO model"""
+
+    type: Literal[Caller.CICERO] = Caller.CICERO
+    gene_5prime: str = Field(..., description="The gene symbol for the 5' partner")
+    gene_3prime: str = Field(..., description="The gene symbol for the 3' partner")
+    chr_5prime: str = Field(..., description="The chromosome for the 5' partner")
+    chr_3prime: str = Field(..., description="The chromosome for the 3' partner")
+    pos_5prime: int = Field(
+        ..., description="The genomic breakpoint for the 5' partner"
+    )
+    pos_3prime: int = Field(
+        ..., description="The genomic breakpoint for the 3' partner"
+    )
+    sv_ort: str = Field(
+        ...,
+        description="Whether the mapping orientation of assembled contig (driven by structural variation) has confident biological meaning",
+    )
+    event_type: str = Field(
+        ..., description="The structural variation event that created the called fusion"
+    )
+    reads_5prime: int = Field(
+        ...,
+        description="The number of reads that support the breakpoint for the 5' partner",
+    )
+    reads_3prime: int = Field(
+        ...,
+        description="The number of reads that support the breakpoint for the 3' partner",
+    )
+    coverage_5prime: int = Field(
+        ..., description="The fragment coverage at the 5' breakpoint"
+    )
+    coverage_3prime: int = Field(
+        ..., description="The fragment coverage at the 3' breakpoint"
+    )
+    contig: str = Field(..., description="The assembled contig sequence for the fusion")
+
+
+class EnFusion(BaseModel):
+    """Define parameters for EnFusion model"""
+
+    type: Literal[Caller.ENFUSION] = Caller.ENFUSION
+    gene_5prime: str = Field(..., description="The 5' gene fusion partner")
+    gene_3prime: str = Field(..., description="The 3' gene fusion partner")
+    chr_5prime: int = Field(..., description="The 5' gene fusion partner chromosome")
+    chr_3prime: int = Field(..., description="The 3' gene fusion partner chromosome")
+    break_5prime: int = Field(
+        ..., description="The 5' gene fusion partner genomic breakpoint"
+    )
+    break_3prime: int = Field(
+        ..., description="The 3' gene fusion partner genomic breakpoint"
+    )
+
+
+class Genie(BaseModel):
+    """Define parameters for Genie model"""
+
+    type: Literal[Caller.GENIE] = Caller.GENIE
+    site1_hugo: str = Field(..., description="The HUGO symbol reported at site 1")
+    site2_hugo: str = Field(..., description="The HUGO symbol reported at site 2")
+    site1_chrom: int = Field(..., description="The chromosome reported at site 1")
+    site2_chrom: int = Field(..., description="The chromosome reported at site 2")
+    site1_pos: int = Field(..., description="The breakpoint reported at site 1")
+    site2_pos: int = Field(..., description="The breakpoint reported at site 2")
+    annot: str = Field(..., description="The annotation for the fusion event")
+    reading_frame: str = Field(
+        ..., description="The reading frame status of the fusion"
+    )
diff --git a/src/fusor/translator.py b/src/fusor/translator.py
index c6770e3..86998cb 100644
--- a/src/fusor/translator.py
+++ b/src/fusor/translator.py
@@ -7,7 +7,16 @@
 import polars as pl
 from cool_seq_tool.schemas import Assembly, CoordinateType
 
-from fusor.fusion_caller_models import JAFFA, Caller, STARFusion
+from fusor.fusion_caller_models import (
+    JAFFA,
+    Arriba,
+    Caller,
+    Cicero,
+    EnFusion,
+    FusionCatcher,
+    Genie,
+    STARFusion,
+)
 from fusor.fusor import FUSOR
 from fusor.models import (
     AnchoredReads,
@@ -302,48 +311,30 @@ async def from_star_fusion(
 
     async def from_fusion_catcher(
         self,
-        five_prime_partner: str,
-        three_prime_partner: str,
-        five_prime_fusion_point: str,
-        three_prime_fusion_point: str,
-        predicted_effect: str,
-        spanning_unique_reads: int,
-        spanning_reads: int,
-        fusion_sequence: str,
+        fusion_catcher: FusionCatcher,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
         """Parse FusionCatcher output to create AssayedFusion object
 
-        :param five_prime_partner: Gene symbol for the 5' fusion partner
-        :param three_prime_partner: Gene symbol for the 3' fusion partner
-        :param five_prime_fusion_point: Chromosomal position for the 5' end of the
-        fusion junction. This coordinate is 1-based
-        :param three_prime_fusion_point:  Chromosomal position for the 3' end of the
-        fusion junction. This coordinate is 1-based
-        :param predicted_effect: The predicted effect of the fusion event, created
-        using annotation from the Ensembl database
-        :param spanning_unique_reads: The number of unique reads that map on the fusion
-            junction
-        :param spanning_reads: The number of paired reads that support the fusion
-        :param fusion_sequence: The inferred sequence around the fusion junction
+        :param fusion_catcher: A FusionCatcher object
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
         """
         gene_5prime_element = self._get_gene_element(
-            five_prime_partner, Caller.FUSION_CATCHER
+            fusion_catcher.five_prime_partner, Caller.FUSION_CATCHER
         )
         gene_3prime_element = self._get_gene_element(
-            three_prime_partner, Caller.FUSION_CATCHER
+            fusion_catcher.three_prime_partner, Caller.FUSION_CATCHER
         )
         if not self._are_fusion_partners_different(
             gene_5prime_element.gene.label, gene_3prime_element.gene.label
         ):
             return None
 
-        five_prime = five_prime_fusion_point.split(":")
-        three_prime = three_prime_fusion_point.split(":")
+        five_prime = fusion_catcher.five_prime_fusion_point.split(":")
+        three_prime = fusion_catcher.three_prime_fusion_point.split(":")
 
         tr_5prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
@@ -363,12 +354,14 @@ async def from_fusion_catcher(
             starting_assembly=rb,
         )
 
-        ce = self._get_causative_event(five_prime[0], three_prime[0], predicted_effect)
+        ce = self._get_causative_event(
+            five_prime[0], three_prime[0], fusion_catcher.predicted_effect
+        )
         read_data = ReadData(
-            split=SplitReads(splitReads=spanning_unique_reads),
-            spanning=SpanningReads(spanningReads=spanning_reads),
+            split=SplitReads(splitReads=fusion_catcher.spanning_unique_reads),
+            spanning=SpanningReads(spanningReads=fusion_catcher.spanning_reads),
         )
-        contig = ContigSequence(contig=fusion_sequence)
+        contig = ContigSequence(contig=fusion_catcher.fusion_sequence)
 
         return self._format_fusion(
             gene_5prime_element,
@@ -440,47 +433,13 @@ async def from_fusion_map(
 
     async def from_arriba(
         self,
-        gene1: str,
-        gene2: str,
-        strand1: str,
-        strand2: str,
-        breakpoint1: str,
-        breakpoint2: str,
-        event: str,
-        confidence: str,
-        direction1: str,
-        direction2: str,
-        rf: str,
-        split_reads1: int,
-        split_reads2: int,
-        discordant_mates: int,
-        coverage1: int,
-        coverage2: int,
-        fusion_transcript: str,
+        arriba: Arriba,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
         """Parse Arriba output to create AssayedFusion object
 
-        :param gene1: The 5' gene fusion partner
-        :param gene2: The 3' gene fusion partner
-        :param strand1: The strand information for the 5' gene fusion partner
-        :param strand2: The strand information for the 3' gene fusion partner
-        :param breakpoint1: The chromosome and breakpoint for gene1
-        :param breakpoint2: The chromosome and breakpoint for gene2
-        :param event: An inference about the type of fusion event
-        :param confidence: A metric describing the confidence of the fusion prediction
-        :param direction1: A description that indicates if the transcript segment
-            starts or ends at breakpoint1
-        :param direction2: A description that indicates if the transcript segment
-            starts or ends at breakpoint2
-        :param rf: A description if the reading frame is preserved for the fusion
-        :param split_reads1: Number of supporting split fragments with anchor in gene1
-        :param split_reads2: Number of supporting split fragments with anchor in gene2
-        :param discordant_mates: Number of discordant mates supporting the fusion
-        :param coverage1: Number of fragments retained near breakpoint1
-        :param coverage2: Number of fragments retained near breakpoint2
-        :param fusion_transcript: The assembled fusion transcript
+        :param arriba: An Arriba class instance
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
@@ -488,27 +447,27 @@ async def from_arriba(
         # Arriba reports two gene symbols if a breakpoint occurs in an intergenic
         # space. We select the gene symbol with the smallest distance from the
         # breakpoint.
-        gene_5prime_element = self._get_gene_element(gene1, "arriba")
-        gene_3prime_element = self._get_gene_element(gene2, "arriba")
+        gene_5prime_element = self._get_gene_element(arriba.gene1, "arriba")
+        gene_3prime_element = self._get_gene_element(arriba.gene2, "arriba")
         gene_5prime = gene_5prime_element.gene.label
         gene_3prime = gene_3prime_element.gene.label
 
         if not self._are_fusion_partners_different(gene_5prime, gene_3prime):
             return None
 
-        strand1 = strand1.split("/")[1]  # Determine strand that is transcribed
-        strand2 = strand2.split("/")[1]  # Determine strand that is transcribed
+        strand1 = arriba.strand1.split("/")[1]  # Determine strand that is transcribed
+        strand2 = arriba.strand2.split("/")[1]  # Determine strand that is transcribed
         if strand1 == "+":
-            gene1_seg_start = direction1 == "upstream"
+            gene1_seg_start = arriba.direction1 == "upstream"
         else:
-            gene1_seg_start = direction1 == "downstream"
+            gene1_seg_start = arriba.direction1 == "downstream"
         if strand2 == "+":
-            gene2_seg_start = direction2 == "upstream"
+            gene2_seg_start = arriba.direction2 == "upstream"
         else:
-            gene2_seg_start = direction2 == "downstream"
+            gene2_seg_start = arriba.direction2 == "downstream"
 
-        breakpoint1 = breakpoint1.split(":")
-        breakpoint2 = breakpoint2.split(":")
+        breakpoint1 = arriba.breakpoint1.split(":")
+        breakpoint2 = arriba.breakpoint2.split(":")
 
         tr_5prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
@@ -516,8 +475,8 @@ async def from_arriba(
             seg_start_genomic=int(breakpoint1[1]) if gene1_seg_start else None,
             seg_end_genomic=int(breakpoint1[1]) if not gene1_seg_start else None,
             gene=gene_5prime,
-            coverage=BreakpointCoverage(fragmentCoverage=coverage1),
-            reads=AnchoredReads(reads=split_reads1),
+            coverage=BreakpointCoverage(fragmentCoverage=arriba.coverage1),
+            reads=AnchoredReads(reads=arriba.split_reads1),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
@@ -528,8 +487,8 @@ async def from_arriba(
             seg_start_genomic=int(breakpoint2[1]) if gene2_seg_start else None,
             seg_end_genomic=int(breakpoint2[1]) if not gene2_seg_start else None,
             gene=gene_3prime,
-            coverage=BreakpointCoverage(fragmentCoverage=coverage2),
-            reads=AnchoredReads(reads=split_reads2),
+            coverage=BreakpointCoverage(fragmentCoverage=arriba.coverage2),
+            reads=AnchoredReads(reads=arriba.split_reads2),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
@@ -537,17 +496,19 @@ async def from_arriba(
         ce = (
             CausativeEvent(
                 eventType=EventType("read-through"),
-                eventDescription=confidence,
+                eventDescription=arriba.confidence,
             )
-            if "read_through" in event
+            if "read_through" in arriba.event
             else CausativeEvent(
                 eventType=EventType("rearrangement"),
-                eventDescription=confidence,
+                eventDescription=arriba.confidence,
             )
         )
-        rf = bool(rf == "in-frame") if rf != "." else None
-        read_data = ReadData(spanning=SpanningReads(spanningReads=discordant_mates))
-        contig = ContigSequence(contig=fusion_transcript)
+        rf = bool(arriba.rf == "in-frame") if arriba.rf != "." else None
+        read_data = ReadData(
+            spanning=SpanningReads(spanningReads=arriba.discordant_mates)
+        )
+        contig = ContigSequence(contig=arriba.fusion_transcript)
 
         return self._format_fusion(
             gene_5prime_element,
@@ -562,38 +523,13 @@ async def from_arriba(
 
     async def from_cicero(
         self,
-        gene_5prime: str,
-        gene_3prime: str,
-        chr_5prime: str,
-        chr_3prime: str,
-        pos_5prime: int,
-        pos_3prime: int,
-        sv_ort: str,
-        event_type: str,
-        reads_a: int,
-        reads_b: int,
-        coverage_a: int,
-        coverage_b: int,
-        contig: str,
+        cicero: Cicero,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion | str:
         """Parse CICERO output to create AssayedFusion object
 
-        :param gene_5prime: The gene symbol for the 5' partner
-        :param gene_3prime: The gene symbol for the 3' partner
-        :param chr_5prime: The chromosome for the 5' partner
-        :param chr_3prime: The chromosome for the 3' partner
-        :param pos_5prime: The genomic breakpoint for the 5' partner
-        :param pos_3prime: The genomic breakpoint for the 3' partner
-        :param sv_ort: Whether the mapping orientation of assembled contig (driven by
-            structural variation) has confident biological meaning
-        :param event_type: The structural variation event that created the called fusion
-        :param readsA: The number of reads that support the breakpoint for the 5' partner
-        :param readsB: The number of reads that support the breakpoint for the 3' partner
-        :param coverageA: The fragment coverage at the 5' breakpoint
-        :param coverageB: The fragment coverage at the 3' breakpoint
-        :param contig: The assembled contig sequence for the fusion
+        :param cicero: A Cicero class instance
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
@@ -602,20 +538,20 @@ async def from_cicero(
         # gene symbols for `gene_5prime` or `gene_3prime`, which are separated by a comma. As
         # there is not a precise way to resolve this ambiguity, we do not process
         # these events
-        if "," in gene_5prime or "," in gene_3prime:
+        if "," in cicero.gene_5prime or "," in cicero.gene_3prime:
             msg = "Ambiguous gene symbols are reported by CICERO for at least one of the fusion partners"
             _logger.warning(msg)
             return msg
 
         # Check CICERO annotation regarding the confidence that the called fusion
         # has biological meaning
-        if sv_ort != ">":
+        if cicero.sv_ort != ">":
             msg = "CICERO annotation indicates that this event does not have confident biological meaning"
             _logger.warning(msg)
             return msg
 
-        gene_5prime_element = self._get_gene_element(gene_5prime, "cicero")
-        gene_3prime_element = self._get_gene_element(gene_3prime, "cicero")
+        gene_5prime_element = self._get_gene_element(cicero.gene_5prime, "cicero")
+        gene_3prime_element = self._get_gene_element(cicero.gene_3prime, "cicero")
         gene_5prime = gene_5prime_element.gene.label
         gene_3prime = gene_3prime_element.gene.label
 
@@ -624,37 +560,37 @@ async def from_cicero(
 
         tr_5prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(chr_5prime, rb),
-            seg_end_genomic=pos_5prime,
+            genomic_ac=self._get_genomic_ac(cicero.chr_5prime, rb),
+            seg_end_genomic=cicero.pos_5prime,
             gene=gene_5prime,
-            coverage=BreakpointCoverage(fragmentCoverage=coverage_a),
-            reads=AnchoredReads(reads=reads_a),
+            coverage=BreakpointCoverage(fragmentCoverage=cicero.coverage_5prime),
+            reads=AnchoredReads(reads=cicero.reads_5prime),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
 
         tr_3prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(chr_3prime, rb),
-            seg_start_genomic=pos_3prime,
+            genomic_ac=self._get_genomic_ac(cicero.chr_3prime, rb),
+            seg_start_genomic=cicero.pos_3prime,
             gene=gene_3prime,
-            coverage=BreakpointCoverage(fragmentCoverage=coverage_b),
-            reads=AnchoredReads(reads=reads_b),
+            coverage=BreakpointCoverage(fragmentCoverage=cicero.coverage_3prime),
+            reads=AnchoredReads(reads=cicero.reads_3prime),
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
 
-        if event_type == "read_through":
+        if cicero.event_type == "read_through":
             ce = CausativeEvent(
                 eventType=EventType("read-through"),
-                eventDescription=event_type,
+                eventDescription=cicero.event_type,
             )
         else:
             ce = CausativeEvent(
                 eventType=EventType("rearrangement"),
-                eventDescription=event_type,
+                eventDescription=cicero.event_type,
             )
-        contig = ContigSequence(contig=contig)
+        contig = ContigSequence(contig=cicero.contig)
 
         return self._format_fusion(
             gene_5prime_element,
@@ -710,29 +646,19 @@ async def from_mapsplice(
 
     async def from_enfusion(
         self,
-        gene_5prime: str,
-        gene_3prime: str,
-        chr_5prime: int,
-        chr_3prime: int,
-        break_5prime: int,
-        break_3prime: int,
+        enfusion: EnFusion,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
         """Parse EnFusion output to create AssayedFusion object
 
-        :param gene_5prime: The 5' gene fusion partner
-        :param gene_3prime: The 3' gene fusion partner
-        :param chr_5prime: The 5' gene fusion partner chromosome
-        :param chr_3prime: The 3' gene fusion partner chromosome
-        :param break_5prime: The 5' gene fusion partner genomic breakpoint
-        :param break_3prime: The 3' gene fusion partner genomic breakpoint
-        :param rb: The reference build used to call the fusion
+        :param enfusion: An Enfusion class instance
         :param coordinate_type: If the coordinate is inter-residue or residue
+        :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
         """
-        gene_5prime_element = self._get_gene_element(gene_5prime, "enfusion")
-        gene_3prime_element = self._get_gene_element(gene_3prime, "enfusion")
+        gene_5prime_element = self._get_gene_element(enfusion.gene_5prime, "enfusion")
+        gene_3prime_element = self._get_gene_element(enfusion.gene_3prime, "enfusion")
         gene_5prime = gene_5prime_element.gene.label
         gene_3prime = gene_3prime_element.gene.label
 
@@ -741,8 +667,8 @@ async def from_enfusion(
 
         tr_5prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(chr_5prime, rb),
-            seg_end_genomic=break_5prime,
+            genomic_ac=self._get_genomic_ac(enfusion.chr_5prime, rb),
+            seg_end_genomic=enfusion.break_5prime,
             gene=gene_5prime,
             coordinate_type=coordinate_type,
             starting_assembly=rb,
@@ -750,16 +676,16 @@ async def from_enfusion(
 
         tr_3prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(chr_3prime, rb),
-            seg_start_genomic=break_3prime,
+            genomic_ac=self._get_genomic_ac(enfusion.chr_3prime, rb),
+            seg_start_genomic=enfusion.break_3prime,
             gene=gene_3prime,
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
 
         ce = self._get_causative_event(
-            chr_5prime,
-            chr_3prime,
+            enfusion.chr_5prime,
+            enfusion.chr_3prime,
         )
         return self._format_fusion(
             gene_5prime_element, gene_3prime_element, tr_5prime, tr_3prime, ce
@@ -767,33 +693,19 @@ async def from_enfusion(
 
     async def from_genie(
         self,
-        site1_hugo: str,
-        site2_hugo: str,
-        site1_chrom: int,
-        site2_chrom: int,
-        site1_pos: int,
-        site2_pos: int,
-        annot: str,
-        reading_frame: str,
+        genie: Genie,
         coordinate_type: CoordinateType,
         rb: Assembly,
     ) -> AssayedFusion:
         """Parse GENIE output to create AssayedFusion object
 
-        :param site1_hugo: The HUGO symbol reported at site 1
-        :param site2_hugo: The HUGO symbol reported at site 2
-        :param site1_chrom: The chromosome reported at site 1
-        :param site2_chrom: The chromosome reported at site 2
-        :param site1_pos: The breakpoint reported at site 1
-        :param site2_pos: The breakpoint reported at site 2
-        :param annot: The annotation for the fusion event
-        :param reading_frame: The reading frame status of the fusion
+        :param genie: A Genie class instance
         :param coordinate_type: If the coordinate is inter-residue or residue
         :param rb: The reference build used to call the fusion
         :return: An AssayedFusion object, if construction is successful
         """
-        gene_5prime_element = self._get_gene_element(site1_hugo, "genie")
-        gene_3prime_element = self._get_gene_element(site2_hugo, "genie")
+        gene_5prime_element = self._get_gene_element(genie.site1_hugo, "genie")
+        gene_3prime_element = self._get_gene_element(genie.site2_hugo, "genie")
         gene_5prime = gene_5prime_element.gene.label
         gene_3prime = gene_3prime_element.gene.label
 
@@ -802,8 +714,8 @@ async def from_genie(
 
         tr_5prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(site1_chrom, rb),
-            seg_end_genomic=site1_pos,
+            genomic_ac=self._get_genomic_ac(genie.site1_chrom, rb),
+            seg_end_genomic=genie.site1_pos,
             gene=gene_5prime,
             coordinate_type=coordinate_type,
             starting_assembly=rb,
@@ -811,19 +723,19 @@ async def from_genie(
 
         tr_3prime = await self.fusor.transcript_segment_element(
             tx_to_genomic_coords=False,
-            genomic_ac=self._get_genomic_ac(site2_chrom, rb),
-            seg_start_genomic=site2_pos,
+            genomic_ac=self._get_genomic_ac(genie.site2_chrom, rb),
+            seg_start_genomic=genie.site2_pos,
             gene=gene_3prime,
             coordinate_type=coordinate_type,
             starting_assembly=rb,
         )
 
         ce = self._get_causative_event(
-            site1_chrom,
-            site2_chrom,
-            annot,
+            genie.site1_chrom,
+            genie.site2_chrom,
+            genie.annot,
         )
-        rf = bool(reading_frame == "in frame")
+        rf = bool(genie.reading_frame == "in frame")
         return self._format_fusion(
             gene_5prime, gene_3prime, tr_5prime, tr_3prime, ce, rf
         )
diff --git a/tests/test_translators.py b/tests/test_translators.py
index 1eddcd2..6811a5d 100644
--- a/tests/test_translators.py
+++ b/tests/test_translators.py
@@ -4,7 +4,16 @@
 import pytest
 from cool_seq_tool.schemas import Assembly, CoordinateType
 
-from fusor.fusion_caller_models import JAFFA, Caller, STARFusion
+from fusor.fusion_caller_models import (
+    JAFFA,
+    Arriba,
+    Caller,
+    Cicero,
+    EnFusion,
+    FusionCatcher,
+    Genie,
+    STARFusion,
+)
 from fusor.models import (
     AnchoredReads,
     AssayedFusion,
@@ -175,18 +184,8 @@ async def test_jaffa(
     assert jaffa_fusor.readData == fusion_data_example.readData
 
     # Test non-exonic breakpoint
-    jaffa = JAFFA(
-        fusion_genes="TPM3:PDGFRB",
-        chrom1="chr1",
-        base1=154173079,
-        chrom2="chr5",
-        base2=150127173,
-        rearrangement=True,
-        classification="HighConfidence",
-        inframe=True,
-        spanning_reads=100,
-        spanning_pairs=80,
-    )
+    jaffa.base1 = 154173079
+    jaffa.base2 = 150127173
 
     jaffa_fusor_nonexonic = await translator_instance.from_jaffa(
         jaffa,
@@ -232,15 +231,8 @@ async def test_star_fusion(
     assert star_fusion_fusor.readData == fusion_data_example.readData
 
     # Test non-exonic breakpoints
-    star_fusion = STARFusion(
-        left_gene="TPM3^ENSG00000143549.19",
-        right_gene="PDGFRB^ENSG00000113721",
-        left_breakpoint="chr1:154173079:-",
-        right_breakpoint="chr5:150127173:-",
-        annots='["INTERCHROMOSOMAL]',
-        junction_read_count=100,
-        spanning_frag_count=80,
-    )
+    star_fusion.left_breakpoint = "chr1:154173079:-"
+    star_fusion.right_breakpoint = "chr5:150127173:-"
 
     star_fusion_fusor_nonexonic = await translator_instance.from_star_fusion(
         star_fusion,
@@ -266,24 +258,19 @@ async def test_fusion_catcher(
 ):
     """Test Fusion Catcher translator"""
     # Test exonic breakpoint
-    five_prime_partner = "TPM3"
-    three_prime_partner = "PDGFRB"
-    five_prime_fusion_point = "1:154170465:-"
-    three_prime_fusion_point = "5:150126612:-"
-    predicted_effect = "exonic(no-known-CDS)/exonic(no-known-CDS)"
-    spanning_unique_reads = 100
-    spanning_reads = 80
-    fusion_sequence = "CTAGATGAC*TACTACTA"
+    fusion_catcher = FusionCatcher(
+        five_prime_partner="TPM3",
+        three_prime_partner="PDGFRB",
+        five_prime_fusion_point="1:154170465:-",
+        three_prime_fusion_point="5:150126612:-",
+        predicted_effect="exonic(no-known-CDS)/exonic(no-known-CDS)",
+        spanning_unique_reads=100,
+        spanning_reads=80,
+        fusion_sequence="CTAGATGAC*TACTACTA",
+    )
 
     fusion_catcher_fusor = await translator_instance.from_fusion_catcher(
-        five_prime_partner,
-        three_prime_partner,
-        five_prime_fusion_point,
-        three_prime_fusion_point,
-        predicted_effect,
-        spanning_unique_reads,
-        spanning_reads,
-        fusion_sequence,
+        fusion_catcher,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -297,25 +284,11 @@ async def test_fusion_catcher(
     assert fusion_catcher_fusor.readData == fusion_data_example.readData
     assert fusion_catcher_fusor.contig == fusion_catcher_fusor.contig
 
-    # Test non-exonic breakpoint
-    five_prime_partner = "TPM3"
-    three_prime_partner = "PDGFRB"
-    five_prime_fusion_point = "1:154173079:-"
-    three_prime_fusion_point = "5:150127173:-"
-    predicted_effect = "exonic(no-known-CDS)/exonic(no-known-CDS)"
-    spanning_unique_reads = 100
-    spanning_reads = 80
-    fusion_sequence = "CTAGATGAC*TACTACTA"
+    fusion_catcher.five_prime_fusion_point = "1:154173079:-"
+    fusion_catcher.three_prime_fusion_point = "5:150127173:-"
 
     fusion_catcher_fusor_nonexonic = await translator_instance.from_fusion_catcher(
-        five_prime_partner,
-        three_prime_partner,
-        five_prime_fusion_point,
-        three_prime_fusion_point,
-        predicted_effect,
-        spanning_unique_reads,
-        spanning_reads,
-        fusion_sequence,
+        fusion_catcher,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -389,48 +362,34 @@ async def test_arriba(
 ):
     """Test Arriba translator"""
     # Test exonic breakpoint
-    gene1 = "TPM3"
-    gene2 = "PDGFRB"
-    strand1 = "-/-"
-    strand2 = "-/-"
-    breakpoint1 = "1:154170465"
-    breakpoint2 = "5:150126612"
-    event = "translocation"
-    confidence = "high"
-    direction1 = "upstream"
-    direction2 = "downstream"
-    rf = "in-frame"
-    split_reads1 = 100
-    split_reads2 = 95
-    discordant_mates = 30
-    coverage1 = 200
-    coverage2 = 190
-    fusion_transcript = "CTAGATGAC_TACTACTA|GTACTACT"
+    arriba = Arriba(
+        gene1="TPM3",
+        gene2="PDGFRB",
+        strand1="-/-",
+        strand2="-/-",
+        breakpoint1="1:154170465",
+        breakpoint2="5:150126612",
+        event="translocation",
+        confidence="high",
+        direction1="upstream",
+        direction2="downstream",
+        rf="in-frame",
+        split_reads1=100,
+        split_reads2=95,
+        discordant_mates=30,
+        coverage1=200,
+        coverage2=190,
+        fusion_transcript="CTAGATGAC_TACTACTA|GTACTACT",
+    )
 
     arriba_fusor = await translator_instance.from_arriba(
-        gene1,
-        gene2,
-        strand1,
-        strand2,
-        breakpoint1,
-        breakpoint2,
-        event,
-        confidence,
-        direction1,
-        direction2,
-        rf,
-        split_reads1,
-        split_reads2,
-        discordant_mates,
-        coverage1,
-        coverage2,
-        fusion_transcript,
+        arriba,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
     fusion_data_example = fusion_data_example(
         readData=ReadData(spanning=SpanningReads(spanningReads=30)),
-        contig=ContigSequence(contig=fusion_transcript),
+        contig=ContigSequence(contig=arriba.fusion_transcript),
     )
     fusion_data_example.structure[0].coverage = BreakpointCoverage(fragmentCoverage=200)
     fusion_data_example.structure[0].anchoredReads = AnchoredReads(reads=100)
@@ -441,47 +400,17 @@ async def test_arriba(
     assert arriba_fusor.contig == fusion_data_example.contig
 
     # Test non-exonic breakpoint
-    gene1 = "TPM3"
-    gene2 = "PDGFRB"
-    strand1 = "-/-"
-    strand2 = "-/-"
-    breakpoint1 = "1:154173079"
-    breakpoint2 = "5:150127173"
-    event = "translocation"
-    confidence = "high"
-    direction1 = "upstream"
-    direction2 = "downstream"
-    rf = "in-frame"
-    split_reads1 = 100
-    split_reads2 = 95
-    discordant_mates = 30
-    coverage1 = 200
-    coverage2 = 190
+    arriba.breakpoint1 = "1:154173079"
+    arriba.breakpoint2 = "5:150127173"
 
     arriba_fusor_nonexonic = await translator_instance.from_arriba(
-        gene1,
-        gene2,
-        strand1,
-        strand2,
-        breakpoint1,
-        breakpoint2,
-        event,
-        confidence,
-        direction1,
-        direction2,
-        rf,
-        split_reads1,
-        split_reads2,
-        discordant_mates,
-        coverage1,
-        coverage2,
-        fusion_transcript,
+        arriba,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
     fusion_data_example_nonexonic = fusion_data_example_nonexonic(
         readData=ReadData(spanning=SpanningReads(spanningReads=30)),
-        contig=ContigSequence(contig=fusion_transcript),
+        contig=ContigSequence(contig=arriba.fusion_transcript),
     )
     fusion_data_example_nonexonic.structure[0].coverage = BreakpointCoverage(
         fragmentCoverage=200
@@ -502,38 +431,30 @@ async def test_cicero(
 ):
     """Test CICERO translator"""
     # Test exonic breakpoint
-    gene_5prime = "TPM3"
-    gene_3prime = "PDGFRB"
-    chr_5prime = "1"
-    chr_3prime = "5"
-    pos_5prime = 154170465
-    pos_3prime = 150126612
-    sv_ort = ">"
-    event_type = "CTX"
-    reads_a = 100
-    reads_b = 90
-    coverage_a = 200
-    coverage_b = 190
-    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
+    cicero = Cicero(
+        gene_5prime="TPM3",
+        gene_3prime="PDGFRB",
+        chr_5prime="1",
+        chr_3prime="5",
+        pos_5prime=154170465,
+        pos_3prime=150126612,
+        sv_ort=">",
+        event_type="CTX",
+        reads_5prime=100,
+        reads_3prime=90,
+        coverage_5prime=200,
+        coverage_3prime=190,
+        contig="ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT",
+    )
 
     cicero_fusor = await translator_instance.from_cicero(
-        gene_5prime,
-        gene_3prime,
-        chr_5prime,
-        chr_3prime,
-        pos_5prime,
-        pos_3prime,
-        sv_ort,
-        event_type,
-        reads_a,
-        reads_b,
-        coverage_a,
-        coverage_b,
-        contig,
+        cicero,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
-    fusion_data_example = fusion_data_example(contig=ContigSequence(contig=contig))
+    fusion_data_example = fusion_data_example(
+        contig=ContigSequence(contig=cicero.contig)
+    )
     fusion_data_example.structure[0].coverage = BreakpointCoverage(fragmentCoverage=200)
     fusion_data_example.structure[0].anchoredReads = AnchoredReads(reads=100)
     fusion_data_example.structure[1].coverage = BreakpointCoverage(fragmentCoverage=190)
@@ -543,39 +464,16 @@ async def test_cicero(
     assert cicero_fusor.contig == fusion_data_example.contig
 
     # Test non-exonic breakpoint
-    gene_5prime = "TPM3"
-    gene_3prime = "PDGFRB"
-    chr_5prime = "1"
-    chr_3prime = "5"
-    pos_5prime = 154173079
-    pos_3prime = 150127173
-    sv_ort = ">"
-    event_type = "CTX"
-    reads_a = 100
-    reads_b = 90
-    coverage_a = 200
-    coverage_b = 190
-    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
+    cicero.pos_5prime = 154173079
+    cicero.pos_3prime = 150127173
 
     cicero_fusor_nonexonic = await translator_instance.from_cicero(
-        gene_5prime,
-        gene_3prime,
-        chr_5prime,
-        chr_3prime,
-        pos_5prime,
-        pos_3prime,
-        sv_ort,
-        event_type,
-        reads_a,
-        reads_b,
-        coverage_a,
-        coverage_b,
-        contig,
+        cicero,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
     fusion_data_example_nonexonic = fusion_data_example_nonexonic(
-        contig=ContigSequence(contig=contig)
+        contig=ContigSequence(contig=cicero.contig)
     )
     fusion_data_example_nonexonic.structure[0].coverage = BreakpointCoverage(
         fragmentCoverage=200
@@ -590,34 +488,10 @@ async def test_cicero(
     assert cicero_fusor_nonexonic.contig == fusion_data_example_nonexonic.contig
 
     # Test case where the called fusion does not have confident biological meaning
-    gene_5prime = "TPM3"
-    gene_3prime = "PDGFRB"
-    chr_5prime = "1"
-    chr_3prime = "5"
-    pos_5prime = 154173079
-    pos_3prime = 150127173
-    sv_ort = "?"
-    event_type = "CTX"
-    reads_a = 100
-    reads_b = 90
-    coverage_a = 200
-    coverage_b = 190
-    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
+    cicero.sv_ort = "?"
 
     non_confident_bio = await translator_instance.from_cicero(
-        gene_5prime,
-        gene_3prime,
-        chr_5prime,
-        chr_3prime,
-        pos_5prime,
-        pos_3prime,
-        sv_ort,
-        event_type,
-        reads_a,
-        reads_b,
-        coverage_a,
-        coverage_b,
-        contig,
+        cicero,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -627,34 +501,10 @@ async def test_cicero(
     )
 
     # Test case where multiple gene symbols are reported for a fusion partner
-    gene_5prime = "TPM3"
-    gene_3prime = "PDGFRB,PDGFRB-FGFR4,FGFR4"
-    chr_5prime = "1"
-    chr_3prime = "5"
-    pos_5prime = 154173079
-    pos_3prime = 150127173
-    sv_ort = "?"
-    event_type = "CTX"
-    reads_a = 100
-    reads_b = 90
-    coverage_a = 200
-    coverage_b = 190
-    contig = "ATCATACTAGATACTACTACGATGAGAGAGTACATAGAT"
+    cicero.gene_3prime = "PDGFRB,PDGFRB-FGFR4,FGFR4"
 
     multiple_genes_fusion_partner = await translator_instance.from_cicero(
-        gene_5prime,
-        gene_3prime,
-        chr_5prime,
-        chr_3prime,
-        pos_5prime,
-        pos_3prime,
-        sv_ort,
-        event_type,
-        reads_a,
-        reads_b,
-        coverage_a,
-        coverage_b,
-        contig,
+        cicero,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -670,40 +520,28 @@ async def test_enfusion(
 ):
     """Test EnFusion translator"""
     # Test exonic breakpoint
-    gene_5prime = "TPM3"
-    gene_3prime = "PDGFRB"
-    chr_5prime = 1
-    chr_3prime = 5
-    break_5prime = 154170465
-    break_3prime = 150126612
+    enfusion = EnFusion(
+        gene_5prime="TPM3",
+        gene_3prime="PDGFRB",
+        chr_5prime=1,
+        chr_3prime=5,
+        break_5prime=154170465,
+        break_3prime=150126612,
+    )
 
     enfusion_fusor = await translator_instance.from_enfusion(
-        gene_5prime,
-        gene_3prime,
-        chr_5prime,
-        chr_3prime,
-        break_5prime,
-        break_3prime,
+        enfusion,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
     assert enfusion_fusor.structure == fusion_data_example().structure
 
     # Test non-exonic breakpoint
-    gene_5prime = "TPM3"
-    gene_3prime = "PDGFRB"
-    chr_5prime = 1
-    chr_3prime = 5
-    break_5prime = 154173079
-    break_3prime = 150127173
+    enfusion.break_5prime = 154173079
+    enfusion.break_3prime = 150127173
 
     enfusion_fusor_nonexonic = await translator_instance.from_enfusion(
-        gene_5prime,
-        gene_3prime,
-        chr_5prime,
-        chr_3prime,
-        break_5prime,
-        break_3prime,
+        enfusion,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )
@@ -718,48 +556,30 @@ async def test_genie(
 ):
     """Test GENIE Translator"""
     # Test exonic breakpoint
-    site1_hugo = "TPM3"
-    site2_hugo = "PDGFRB"
-    site1_chrom = 1
-    site2_chrom = 5
-    site1_pos = 154170465
-    site2_pos = 150126612
-    annot = "TMP3 (NM_152263.4) - PDGFRB (NM_002609.4) fusion"
-    reading_frame = "In_frame"
+    genie = Genie(
+        site1_hugo="TPM3",
+        site2_hugo="PDGFRB",
+        site1_chrom=1,
+        site2_chrom=5,
+        site1_pos=154170465,
+        site2_pos=150126612,
+        annot="TMP3 (NM_152263.4) - PDGFRB (NM_002609.4) fusion",
+        reading_frame="In_frame",
+    )
 
     genie_fusor = await translator_instance.from_genie(
-        site1_hugo,
-        site2_hugo,
-        site1_chrom,
-        site2_chrom,
-        site1_pos,
-        site2_pos,
-        annot,
-        reading_frame,
+        genie,
         CoordinateType.INTER_RESIDUE.value,
         Assembly.GRCH38.value,
     )
     assert genie_fusor.structure == fusion_data_example().structure
 
     # Test non-exonic breakpoint
-    site1_hugo = "TPM3"
-    site2_hugo = "PDGFRB"
-    site1_chrom = 1
-    site2_chrom = 5
-    site1_pos = 154173079
-    site2_pos = 150127173
-    annot = "TMP3 (NM_152263.4) - PDGFRB (NM_002609.4) fusion"
-    reading_frame = "In_frame"
+    genie.site1_pos = 154173079
+    genie.site2_pos = 150127173
 
     genie_fusor_nonexonic = await translator_instance.from_genie(
-        site1_hugo,
-        site2_hugo,
-        site1_chrom,
-        site2_chrom,
-        site1_pos,
-        site2_pos,
-        annot,
-        reading_frame,
+        genie,
         CoordinateType.RESIDUE.value,
         Assembly.GRCH38.value,
     )

From ee3b1e43fc0f5b478e5c3d0077d3b6ce465d6211 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 16 Jan 2025 14:55:34 -0500
Subject: [PATCH 07/20] Add small changes to attributes based on jaffa column
 names

---
 src/fusor/fusion_caller_models.py | 5 +++--
 src/fusor/translator.py           | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 309b5c4..3b436a9 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -49,8 +49,9 @@ class JAFFA(BaseModel):
     classification: str = Field(
         ..., description="The classification associated with the called fusion"
     )
-    inframe: bool = Field(
-        ..., description="A boolean indicating if the fusion occurred in-frame"
+    inframe: bool | str = Field(
+        ...,
+        description="A boolean or string indicating if the fusion occurred in-frame",
     )
     spanning_reads: int = Field(
         ...,
diff --git a/src/fusor/translator.py b/src/fusor/translator.py
index 86998cb..73a77cb 100644
--- a/src/fusor/translator.py
+++ b/src/fusor/translator.py
@@ -244,7 +244,7 @@ async def from_jaffa(
             tr_5prime,
             tr_3prime,
             ce,
-            jaffa.inframe,
+            jaffa.inframe if isinstance(jaffa.inframe, bool) else None,
             reads=read_data,
         )
 

From 3b718eda4740db3d080347fbc8b1f48bd124bc66 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 17 Jan 2025 11:09:31 -0500
Subject: [PATCH 08/20] Add initial work for extraction methods

---
 src/fusor/extract.py                          | 149 ++++++
 src/fusor/fusion_caller_models.py             |   4 +-
 tests/conftest.py                             |   9 +
 tests/fixtures/annotated.fusion.txt           |   2 +
 .../final-list_candidate-fusion-genes.txt     | 356 +++++++++++++
 tests/fixtures/fusions_arriba_test.tsv        |   2 +
 tests/fixtures/jaffa_results.csv              | 492 ++++++++++++++++++
 ...tar-fusion.fusion_predictions.abridged.tsv |  38 ++
 tests/test_extractions.py                     |  66 +++
 9 files changed, 1117 insertions(+), 1 deletion(-)
 create mode 100644 src/fusor/extract.py
 create mode 100644 tests/fixtures/annotated.fusion.txt
 create mode 100644 tests/fixtures/final-list_candidate-fusion-genes.txt
 create mode 100644 tests/fixtures/fusions_arriba_test.tsv
 create mode 100644 tests/fixtures/jaffa_results.csv
 create mode 100644 tests/fixtures/star-fusion.fusion_predictions.abridged.tsv
 create mode 100644 tests/test_extractions.py

diff --git a/src/fusor/extract.py b/src/fusor/extract.py
new file mode 100644
index 0000000..c049bf5
--- /dev/null
+++ b/src/fusor/extract.py
@@ -0,0 +1,149 @@
+"""Module for extracting data from fusion caller output and coverting to pydantic
+objects
+"""
+
+import csv
+import logging
+from pathlib import Path
+
+from fusor.fusion_caller_models import JAFFA, Arriba, Cicero, FusionCatcher, STARFusion
+
+_logger = logging.getLogger(__name__)
+
+
+def get_jaffa_records(path: Path) -> list[JAFFA] | None:
+    """Load fusions from JAFFA csv file
+
+    :param path: The path to the file of JAFFA fusions
+    :return A list of JAFFA objects, or None if the specified file does not exist
+    """
+    if not path.exists():
+        statement = f"{path!s} does not exist"
+        _logger.error(statement)
+        return None
+    fusions_list: list[JAFFA] = []
+    column_rename = {
+        "fusion genes": "fusion_genes",
+        "spanning reads": "spanning_reads",
+        "spanning pairs": "spanning_pairs",
+    }
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(JAFFA(**row))
+    return fusions_list
+
+
+def get_star_fusion_records(path: Path) -> list[STARFusion] | None:
+    """Load fusions from STAR-Fusion tsv file
+
+    :param path: The path to the file of STAR-Fusion fusions
+    :return A list of STAR-Fusion objects, or None if the specified file does not exist
+    """
+    if not path.exists():
+        statement = f"{path!s} does not exist"
+        _logger.error(statement)
+        return None
+    fusions_list: list[STARFusion] = []
+    column_rename = {
+        "LeftGene": "left_gene",
+        "RightGene": "right_gene",
+        "LeftBreakpoint": "left_breakpoint",
+        "RightBreakpoint": "right_breakpoint",
+        "JunctionReadCount": "junction_read_count",
+        "SpanningFragCount": "spanning_frag_count",
+    }
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile, delimiter="\t")
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(STARFusion(**row))
+    return fusions_list
+
+
+def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
+    """Load fusions from FusionCatcher txt file
+
+    :param path: The path to the file of FusionCatcher fusions
+    :return A list of FusionCatcher objects, or None if the specified file does not exist
+    """
+    if not path.exists():
+        statement = f"{path!s} does not exist"
+        _logger.error(statement)
+        return None
+    fusions_list: list[FusionCatcher] = []
+    column_rename = {
+        "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
+        "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
+        "Fusion_point_for_gene_1(5end_fusion_partner)": "five_prime_fusion_point",
+        "Fusion_point_for_gene_2(3end_fusion_partner)": "three_prime_fusion_point",
+        "Predicted_effect": "predicted_effect",
+        "Spanning_unique_reads": "spanning_unique_reads",
+        "Spanning_pairs": "spanning_reads",
+        "Fusion_sequence": "fusion_sequence",
+    }
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile, delimiter="\t")
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(FusionCatcher(**row))
+    return fusions_list
+
+
+def get_arriba_records(path: Path) -> list[Arriba]:
+    """Load fusions from Arriba tsv file
+
+    :param path: The path to the file of Arriba fusions
+    :return A list of Arriba objects, or None if the specified file does not exist
+    """
+    if not path.exists():
+        statement = f"{path!s} does not exist"
+        _logger.error(statement)
+        return None
+    fusions_list: list[Arriba] = []
+    column_rename = {
+        "#gene1": "gene1",
+        "strand1(gene/fusion)": "strand1",
+        "strand2(gene/fusion)": "strand2",
+        "type": "event_type",
+        "reading_frame": "rf",
+    }
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile, delimiter="\t")
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(Arriba(**row))
+    return fusions_list
+
+
+def get_cicero_records(path: Path) -> list[Cicero]:
+    """Load fusions from Cicero txt file
+
+    :param path: The path to the file of Cicero fusions
+    :return A list of Cicero objects, or None if the specified file does not exist
+    """
+    if not path.exists():
+        statement = f"{path!s} does not exist"
+        _logger.error(statement)
+        return None
+    fusions_list: list[Cicero] = []
+    column_rename = {
+        "geneA": "gene_5prime",
+        "geneB": "gene_3prime",
+        "chrA": "chr_5prime",
+        "chrB": "chr_3prime",
+        "posA": "pos_5prime",
+        "posB": "pos_3prime",
+        "type": "event_type",
+        "readsA": "reads_5prime",
+        "readsB": "reads_3prime",
+        "coverageA": "coverage_5prime",
+        "coverageB": "coverage_3prime",
+    }
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile, delimiter="\t")
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(Cicero(**row))
+    return fusions_list
diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 3b436a9..45c56a7 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -135,7 +135,9 @@ class Arriba(BaseModel):
     )
     breakpoint1: str = Field(..., description="The chromosome and breakpoint for gene1")
     breakpoint2: str = Field(..., description="The chromosome and breakpoint for gene2")
-    event: str = Field(..., description=" An inference about the type of fusion event")
+    event_type: str = Field(
+        ..., description=" An inference about the type of fusion event"
+    )
     confidence: str = Field(
         ..., description="A metric describing the confidence of the fusion prediction"
     )
diff --git a/tests/conftest.py b/tests/conftest.py
index ec7f498..390b24f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import logging
+from pathlib import Path
 
 import pytest
 from cool_seq_tool.app import CoolSeqTool
@@ -9,6 +10,8 @@
 from fusor.fusor import FUSOR
 from fusor.translator import Translator
 
+FIXTURE_DATA_DIR = Path(__file__).parents[0].resolve() / "fixtures"
+
 
 def pytest_addoption(parser):
     """Add custom commands to pytest invocation.
@@ -39,6 +42,12 @@ def event_loop():
     loop.close()
 
 
+@pytest.fixture(scope="session")
+def fixture_data_dir():
+    """Provide test data directory."""
+    return FIXTURE_DATA_DIR
+
+
 @pytest.fixture(scope="session")
 def fusor_instance():
     """Create test fixture for fusor object
diff --git a/tests/fixtures/annotated.fusion.txt b/tests/fixtures/annotated.fusion.txt
new file mode 100644
index 0000000..f76562b
--- /dev/null
+++ b/tests/fixtures/annotated.fusion.txt
@@ -0,0 +1,2 @@
+sample	geneA	chrA	posA	ortA	featureA	geneB	chrB	posB	ortB	featureB	sv_ort	readsA	readsB	matchA	matchB	repeatA	repeatB	coverageA	coverageB	ratioA	ratioB	qposA	qposB	total_readsA	total_readsB	contig	type
+test	TCF3	chr19	1619111	-	coding	PBX1	chr1	164792493	+	coding	>	62	78	98	93	0.00	0.00	1145	1756	0.38	0.77	98	97	399	227	CCTCGCAGGCAGCACCAGCCTCATGCACAACCACGCGGCCCTCCCCAGCCAGCCAGGCACCCTCCCTGACCTGTCTCGGCCTCCCGACTCCTACAGTGTTTTGAGTATCCGAGGAGCCCAGGAGGAGGAACCCACAGACCCCCAGCTGATGCGGCTGGACAACATGCTGTTAGCGGAAGGCGTGGCGGGGG	CTX
diff --git a/tests/fixtures/final-list_candidate-fusion-genes.txt b/tests/fixtures/final-list_candidate-fusion-genes.txt
new file mode 100644
index 0000000..d60e82f
--- /dev/null
+++ b/tests/fixtures/final-list_candidate-fusion-genes.txt
@@ -0,0 +1,356 @@
+Gene_1_symbol(5end_fusion_partner)	Gene_2_symbol(3end_fusion_partner)	Fusion_description	Counts_of_common_mapping_reads	Spanning_pairs	Spanning_unique_reads	Longest_anchor_found	Fusion_finding_method	Fusion_point_for_gene_1(5end_fusion_partner)	Fusion_point_for_gene_2(3end_fusion_partner)	Gene_1_id(5end_fusion_partner)	Gene_2_id(3end_fusion_partner)	Exon_1_id(5end_fusion_partner)	Exon_2_id(3end_fusion_partner)	Fusion_sequence	Predicted_effect
+ALK	EML4	known,similar_reads,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,ambiguous,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,m543,tcga-cancer	39165	545	4	36	BOWTIE+BLAT	2:29223427:-	2:42295181:+	ENSG00000171094	ENSG00000143924			CCCTGAGTACAAGCTGAGCAAGCTCCGCACCTCGACCATCATGACCGACT*AATTACATGCGGTAAATCTCATATTTTCTTCTGGACCTGGAGCGGCAATT	out-of-frame
+EML4	ALK	known,similar_reads,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,ambiguous,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,m543,tcga-cancer,exon-exon	39165	545	27	30	BOWTIE;BOWTIE+STAR	2:42295516:+	2:29223528:-	ENSG00000143924	ENSG00000171094	ENSE00003554806	ENSE00001154407	AGCAAAACTACTGTAGAGCCCACACCTGGGAAAGGACCTAAAG*TGTACCGCCGGAAGCACCAGGAGCTGCAAGCCATGCAGATGGA	in-frame
+EML4	ALK	known,similar_reads,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,ambiguous,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,m543,tcga-cancer	39165	545	2	32	BOWTIE+STAR	2:42235381:+	2:29790679:-	ENSG00000143924	ENSG00000171094			CACCTGTAATCCCAGCTACTCAGGAGGCTAAGGCGCGAGAATTACTTGAG*CCTGGGAGGCAGAGGTTGCAGTGAGCCGAGATCGTGCCATTGCATTCCAG	intronic/intronic
+EML4	ALK	known,similar_reads,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,ambiguous,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,m543,tcga-cancer	39165	545	2	19	BOWTIE+STAR	2:42177812:+	2:29286943:-	ENSG00000143924	ENSG00000171094			TGTGCATGACCTGTTAATGGTTTTTGAAATCAGTTTAGTCGTGGTCAGGA*AAAAAAAAAAAAAAATGAGATGGTGTGTCACAGACTAAGTCAAGGAATAT	intronic/intronic
+KIF5B	RET	known,oncogene,cosmic,cgp,ticdb,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,exon-exon	0	505	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	10:32017143:-	10:43114480:+	ENSG00000170759	ENSG00000165731	ENSE00001163712	ENSE00001164787	AGGTCAAAGAATATGGCCAGAAGAGGGCATTCTGCACAGATTG*ATCCACTGTGCGACGAGCTGTGCCGCACGGTGATCGCAGCCGC	in-frame
+RET	KIF5B	known,oncogene,cosmic,cgp,ticdb,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor	0	505	2	33	BOWTIE+BLAT;BOWTIE+STAR	10:43114611:+	10:32017241:-	ENSG00000165731	ENSG00000170759			CTTCTGCATCCACTGCTACCACAAGTTTGCCCACAAGCCACCCATCTCCT*CATCTCGTGATCGCAAACGCTATCAGCAAGAAGTAGATCGCATAAAGGAA	in-frame
+LMNA	NTRK1	known,oncogene,cgp,chimerdb3kb,chimerdb3pub,cancer,tumor,exon-exon	0	476	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	1:156130773:+	1:156874906:+	ENSG00000160789	ENSG00000198400	ENSE00003659692	ENSE00003538368	GCTGGAGGGCGAGCTGCATGATCTGCGGGGCCAGGTGGCCAAG*GTCTCGGTGGCTGTGGGCCTGGCCGTCTTTGCCTGCCTCTTCC	in-frame
+FGFR3	TACC3	known,adjacent,oncogene,cosmic,ticdb,tcga,cell_lines,18cancers,gliomas,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tcga-cancer,oesophagus,10K<gap<100K,exon-exon	0	392	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	4:1806934:+	4:1739702:+	ENSG00000068078	ENSG00000013810	ENSE00003518612	ENSE00001655127	GGTGGAGGACCTGGACCGTGTCCTTACCGTGACGTCCACCGAC*GTAAAGGCGACACAGGAGGAGAACCGGGAGCTGAGGAGCAGGT	in-frame
+NCOA4	RET	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,m356,tcga-cancer,exon-exon	0	380	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	10:46012883:-	10:43116584:+	ENSG00000266412	ENSG00000165731	ENSE00003670291	ENSE00001095944	CCAGGACTGGCTTACCCAAAAGCAGACCTTGGAGAACAGTCAG*GAGGATCCAAAGTGGGAATTCCCTCGGAAGAACTTGGTTCTTG	in-frame
+ETV6	NTRK3	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,tcga-cancer,exon-exon	210	316	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	12:11869969:+	15:87940753:-	ENSG00000139083	ENSG00000140538	ENSE00001788162	ENSE00001134154	TCCCCGCCTGAAGAGCACGCCATGCCCATTGGGAGAATAGCAG*ATGTGCAGCACATTAAGAGGAGAGACATCGTGCTGAAGCGAGA	in-frame
+ETV6	NTRK3	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,tcga-cancer	210	316	2	28	BOWTIE+STAR	12:11663757:+	15:87941485:-	ENSG00000139083	ENSG00000140538			ATATTGTACATTTGAGATAGTTTGCCTGTGGTGTTTATATGTTGGTAGGT*TGTGTGTGTGTGTATGTGTATGTGTGTGTGCATGCGTGTGTCTTGTCTTC	intronic/intronic
+NTRK3	ETV6	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,tcga-cancer	210	316	3	40	BOWTIE+STAR	15:87933100:-	12:11869820:+	ENSG00000140538	ENSG00000139083			TTTCCAGAGGGAGGCCGAGCTGCTCACCAACCTGCAGCATGAGCACATTG*TCAAACAGTCCAGGCTCTCCGAGGACGGGCTGCATAGGGAAGGGAAGCCC	in-frame
+NTRK3	ETV6	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,tcga-cancer	210	316	2	29	BOWTIE+STAR	15:87933123:-	12:11869914:+	ENSG00000140538	ENSG00000139083			CCCACCCTGGCTGCCCGGAAGGATTTCCAGAGGGAGGCCGAGCTGCTCAC*CATGGTCTCTGTCTCCCCGCCTGAAGAGCACGCCATGCCCATTGGGAGAA	in-frame
+TPM3	NTRK1	known,oncogene,cosmic,chimerdb2,cgp,tcga,cell_lines,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,m139,tcga-cancer,exon-exon	0	312	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	1:154170400:-	1:156874571:+	ENSG00000143549	ENSG00000198400	ENSE00003479884	ENSE00003559053	AGATCGGTAGCCAAGCTGGAAAAGACAATTGATGACCTGGAAG*ACACTAACAGCACATCTGGAGACCCGGTGGAGAAGAAGGACGA	in-frame
+EGFR	SEPT14	known,oncogene,tcga,chimerdb3pub,cancer,tumor,m39,exon-exon	61	228	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	7:55200413:+	7:55796092:-	ENSG00000146648	ENSG00000154997	ENSE00001790701	ENSE00001370673	CTCCAAAATGGCCCGAGACCCCCAGCGCTACCTTGTCATTCAG*CTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGG	in-frame
+FGFR3	BAIAP2L1	known,oncogene,cosmic,ticdb,cell_lines,chimerdb3kb,chimerdb3pub,cancer,exon-exon	0	219	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	4:1806934:+	7:98362432:-	ENSG00000068078	ENSG00000006453	ENSE00003518612	ENSE00003530132	GGTGGAGGACCTGGACCGTGTCCTTACCGTGACGTCCACCGAC*AATGTTATGGAACAGTTCAATCCTGGGCTGCGAAATTTAATAA	in-frame
+SLC34A2	ROS1	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,exon-exon	0	206	26	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	4:25664330:+	6:117324415:-	ENSG00000157765	ENSG00000047936	ENSE00001122648	ENSE00000762702	TGCTCCCTGGATATTCTTAGTAGCGCCTTCCAGCTGGTTGGAG*ATGATTTTTGGATACCAGAAACAAGTTTCATACTTACTATTAT	in-frame
+NPIPB5	SMG1	banned,known,bodymap2,hpa,m1,multi	0	201	9	41	BOWTIE+STAR	16:22492287:+	16:18857719:-	ENSG00000243716	ENSG00000157106			GTATATACCACAGTTTATTTCTCCCTTCATCCTTTGCTAGATTTTGGGGT*TTTTTTCACATTGCGCTATTCAGTATAAACCTGCTCTCAACATTCATGTG	intronic/UTR
+NPIPB5	SMG1	banned,known,bodymap2,hpa,m1,multi	0	201	5	30	BOWTIE+STAR	16:22520986:+	16:18885016:-	ENSG00000243716	ENSG00000157106			GCCTCAGCCTCCCAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCTGGC*CAATACCATTTCTTTAATAATGCAAGAAAACTAATTCAGAGAAATGTTTT	intronic/UTR
+NPIPB5	SMG1	banned,known,bodymap2,hpa,m1,multi	0	201	3	39	BOWTIE+STAR	16:22492410:+	16:18857596:-	ENSG00000243716	ENSG00000157106			TGCGTTTCTCTTGAGTGAATGCACCTTGTTGGGTCACGTGGCTTAACTTA*AAAAAAATTTTAATCACTGTGGTGCATATGTAGTGATTATTAGTGATTAT	intronic/UTR
+NPIPB5	SMG1	banned,known,bodymap2,hpa,m1,multi	0	201	2	37	BOWTIE+STAR	16:22493252:+	16:18856754:-	ENSG00000243716	ENSG00000157106			CAAGTATTGATCAAATGTGCTTTGTACCAGGTACTGAGCTCTTCGTTGGG*ATAATGGTGATCAAGGAGATTGTAGATTCTGGCAGGGAAAACTGACATCA	intronic/UTR
+NPIPB5	SMG1	banned,known,bodymap2,hpa,m1,multi	0	201	2	31	BOWTIE+STAR	16:22493134:+	16:18856875:-	ENSG00000243716	ENSG00000157106			TTCACAAGTACCCTCATCTCCTTTCCAGTCGTTTTTTGTTTTTGTTTTTG*TTTTTTTTGAGACCATCTCACTCTGTTGCCCAGGCTGGAGTGCCTCTTCA	intronic/UTR
+NPIPB5	SMG1	banned,known,bodymap2,hpa,m1,multi,exon-exon	0	201	2	28	BOWTIE	16:22506235:+	16:18879719:-	ENSG00000243716	ENSG00000157106	ENSE00002280067	ENSE00001635363	AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAG*GTTCTGTACAGAGTAATGAGATGTGTGACGGCTGCAAACCAGG	UTR/CDS(truncated)
+NPIPB5	SMG1	banned,known,bodymap2,hpa,m1,multi	0	201	2	25	BOWTIE+BLAT;BOWTIE+STAR	16:22506495:+	16:18857012:-	ENSG00000243716	ENSG00000157106			CGACTGAATGGACCCTTCCTGTTGAGCAAGGACATTCCAAAGTAAACTGA*TTCCCCACTTTTGAGATACTTGTAAGGATTATATGAGATGAAGAGATGAG	intronic/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi,exon-exon	0	201	22	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	16:18858170:-	16:22513522:+	ENSG00000157106	ENSG00000243716	ENSE00001505919	ENSE00002138288	AGGTATACTATGTACCAGAATCAGTTGTTGGAGAAAATTAAAG*GTATGATCTCGTGAAATCTTGAGAGAAACTGAATGACGAATGA	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	15	39	BOWTIE+BLAT;BOWTIE+STAR	16:18858211:-	16:22513522:+	ENSG00000157106	ENSG00000243716			TTACGTTCATGTAAACAGCATGACGTGAGGCCATGGATGCAGGCATTAAG*GTATGATCTCGTGAAATCTTGAGAGAAACTGAATGACGAATGAAACTATT	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	7	40	BOWTIE+BLAT;BOWTIE+STAR	16:18858211:-	16:22507337:+	ENSG00000157106	ENSG00000243716			TTACGTTCATGTAAACAGCATGACGTGAGGCCATGGATGCAGGCATTAAG*AGCTTGAAATGAATTTTAAAGGATGACTGATGGTCCTTGGAAGAGAAACA	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi,exon-exon	0	201	6	30	BOWTIE	16:18858170:-	16:22507337:+	ENSG00000157106	ENSG00000243716	ENSE00001505919	ENSE00002285337	AGGTATACTATGTACCAGAATCAGTTGTTGGAGAAAATTAAAG*AGCTTGAAATGAATTTTAAAGGATGACTGATGGTCCTTGGAAG	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi,exon-exon	0	201	6	29	BOWTIE	16:18858170:-	16:22513523:+	ENSG00000157106	ENSG00000243716	ENSE00001505919	ENSE00002700286	AGGTATACTATGTACCAGAATCAGTTGTTGGAGAAAATTAAAG*TATGATCTCGTGAAATCTTGAGAGAAACTGAATGACGAATGAA	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	3	41	BOWTIE+BLAT;BOWTIE+STAR	16:18858211:-	16:22513424:+	ENSG00000157106	ENSG00000243716			TTACGTTCATGTAAACAGCATGACGTGAGGCCATGGATGCAGGCATTAAG*GTTTCAGGCACAGAACTGTATATCCAATAATAGTGAAATGGATCCCACTA	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	3	35	BOWTIE+BLAT;BOWTIE+STAR	16:18858171:-	16:22523820:+	ENSG00000157106	ENSG00000243716			AGGCATTAAGGTATACTATGTACCAGAATCAGTTGTTGGAGAAAATTAAA*GTTATCAATACTCTGGCTGACCATCATCATCGTGGGACTGACTTTGGTGG	in-frame
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	3	32	BOWTIE+BLAT	16:18856886:-	16:22531600:+	ENSG00000157106	ENSG00000243716			AGCGTGTCATATAATATGGCTTCACAAGTACCCTCATCTCCTTTCCAGTC*TTTTTTGTTTTTGTTTTTGTTTTTCTGAGATGGAGTCTCGCTGTGTCACC	UTR/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	3	29	BOWTIE+BLAT	16:18858211:-	16:22506113:+	ENSG00000157106	ENSG00000243716			TTACGTTCATGTAAACAGCATGACGTGAGGCCATGGATGCAGGCATTAAG*ACAGAGTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAGTGGCACGATCTCA	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	2	36	BOWTIE+STAR	16:18858170:-	16:22510896:+	ENSG00000157106	ENSG00000243716			GGCATTAAGGTATACTATGTACCAGAATCAGTTGTTGGAGAAAATTAAAG*ATTCTACCTTTGTCACCCAGGCGGGAGTGCAGTGGCATGATCTCGGCTCA	CDS(truncated)/intronic
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	2	33	BOWTIE+BLAT;BOWTIE+STAR	16:18858211:-	16:22513764:+	ENSG00000157106	ENSG00000243716			TTACGTTCATGTAAACAGCATGACGTGAGGCCATGGATGCAGGCATTAAG*GTTGGCACTCATCATGAGCCCCTGTTCTCATTCTGCAAATGGTGAAGCTC	CDS(truncated)/UTR
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi	0	201	2	30	BOWTIE+STAR	16:18857596:-	16:22492410:+	ENSG00000157106	ENSG00000243716			TGTGTTTCTCTCGAGTGAATGCACCTTGTTGGGTCACGTAGCTTAACTTA*AAAAAAATTTTAATCACTGTGGTGCATATGTAGTGATTATTAGTGATTAT	UTR/intronic
+SMG1	NPIPB5	banned,known,bodymap2,hpa,m1,multi,exon-exon	0	201	2	22	BOWTIE	16:18858170:-	16:22523820:+	ENSG00000157106	ENSG00000243716	ENSE00001505919	ENSE00003563471	AGGTATACTATGTACCAGAATCAGTTGTTGGAGAAAATTAAAG*GTTATCAATACTCTGGCTGACCATCATCATCGTGGGACTGACT	in-frame
+PAX8	PPARG	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tcga-cancer,exon-exon	0	186	27	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	2:113235394:-	3:12379704:+	ENSG00000125618	ENSG00000132170	ENSE00003562615	ENSE00003553746	GCCTCCGTGTACGGGCAGTTCACGGGCCAGGCCCTCCTCTCAG*AAATGACCATGGTTGACACAGAGATGCCATTCTGGCCCACCAA	in-frame
+PPARG	PAX8	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tcga-cancer	0	186	2	40	BOWTIE+BLAT;BOWTIE+STAR	3:12379852:+	2:113235530:-	ENSG00000132170	ENSG00000125618			ATATCAAGCCCTTCACTACTGTTGACTTCTCCAGCATTTCTACTCCACAT*TAGCTCCACCCCTTCCTCTTTATCTAGCTCCGCCTTTTTGGATCTGCAGC	out-of-frame
+AC118758.2	AL606534.6	pseudogene,m3	1174	168	17	41	BOWTIE+STAR	7:56806065:+	1:243050504:+	ENSG00000277253	ENSG00000278455			GCGAGCGGTGGTACACCCTGGGGCCCAATGAGACGCAGAAGTACCATGAC*CTGGCCTTCCAGGTGAAGGTGGCCCACTTGCAACAAGGACCGAAAGAAGT	exonic(no-known-CDS)/exonic(no-known-CDS)
+AC118758.2	AL606534.6	pseudogene,m3	1174	168	5	41	BOWTIE+STAR	7:56806526:+	1:243050970:+	ENSG00000277253	ENSG00000278455			GCAGCCCCTGGTGAGGGAGGTGACCAGTGGGCAGCCCTGCTGCTGCCCAC*CTGAGCTGCTCATTCCCAGCACATGGCCGGTGAGGACACAGCGAGTGACG	exonic(no-known-CDS)/exonic(no-known-CDS)
+AL606534.6	AC118758.2	pseudogene,m3	1174	168	19	46	BOWTIE+STAR	1:243051299:+	7:56806857:+	ENSG00000278455	ENSG00000277253			GGTCCCCCGATCCACCTGCAGCTTTTGGCAAAGTCTATGGTCCCACCCTG*TCCTCCTCCTACACATACTCGGATGCTTCCTCCTCAACCTTGGCACCCAC	exonic(no-known-CDS)/exonic(no-known-CDS)
+AC211429.1	POM121	pseudogene,m3	1680	128	13	40	BOWTIE+STAR	7:75485916:-	7:72879833:+	ENSG00000242073	ENSG00000196313			GTGGACAGGAGGGGACCTCGCGAGCAGACGCGCGCGCCAGCGACAGCAGC*CCCGCCCCGGCCTCTCGGGAGCCGTGGGGCAGAGGCTGCAGAGCCCCAGG	exonic(no-known-CDS)/UTR
+AC211429.1	POM121	pseudogene,m3,exon-exon	1680	128	13	29	BOWTIE	7:75484198:-	7:72890627:+	ENSG00000242073	ENSG00000196313	ENSE00002492251	ENSE00002455731	CTCTGCATGTTTCCAAGAGCAGCAGAAAATGAACACATTGCAG*GGGCCAGTGTCATTCAAAGATGTGGCTGTGGATTTCACCCAGG	exonic(no-known-CDS)/UTR
+AC211429.1	POM121	pseudogene,m3,exon-exon	1680	128	5	26	BOWTIE	7:75475062:-	7:72890933:+	ENSG00000242073	ENSG00000196313	ENSE00003671850	ENSE00001407696	GGGGATGTGATGTTGGAGAACTACAGCCATCTAGTTTCCTTGG*CTTATGAGGTGGCAACATCTTGTACTTCGGAGATCTGAAGCCG	exonic(no-known-CDS)/UTR
+CD74	ROS1	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,tcga-cancer,exon-exon	0	128	25	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	5:150404680:-	6:117324415:-	ENSG00000019582	ENSG00000047936	ENSE00000841201	ENSE00000762702	AGGCACTCCTTGGAGCAAAAGCCCACTGACGCTCCACCGAAAG*ATGATTTTTGGATACCAGAAACAAGTTTCATACTTACTATTAT	in-frame
+POM121	AC211429.1	pseudogene,m3,exon-exon	1680	128	10	30	BOWTIE	7:72879885:+	7:75484256:-	ENSG00000196313	ENSG00000242073	ENSE00001404826	ENSE00002492251	CCTCTCGGGAGCCGTGGGGCAGAGGCTGCAGAGCCCCAGGAGG*GTCTATCAGCCACAGTCTCTGCATGTTTCCAAGAGCAGCAGAA	UTR/exonic(no-known-CDS)
+POM121	AC211429.1	pseudogene,m3,exon-exon	1680	128	3	30	BOWTIE	7:72891110:+	7:75475188:-	ENSG00000196313	ENSG00000242073	ENSE00001521122	ENSE00003671850	CTGTGGATAACGGGAGGTGAATTTCCATGTCAACATAGTCCAG*GGGCCAGTGTCATTCAAAGATGTGGCTGTGGATTTCACCCAGG	UTR/exonic(no-known-CDS)
+ROS1	CD74	known,oncogene,cosmic,chimerdb2,cgp,ticdb,tcga,cell_lines,18cancers,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,tcga-cancer	0	128	2	38	BOWTIE+BLAT	6:117324378:-	5:150406932:-	ENSG00000047936	ENSG00000019582			TTATATTATTAGATGATTTTTGGATACCAGAAACAAGTTTCATACTTACT*GGCCACCCCGCTGCTGATGCAGGCGCTGCCCATGGGAGCCCTGCCCCAGG	in-frame
+WASH3P	WASH5P	pseudogene,m19	1342	127	4	38	BOWTIE+STAR	15:101967910:+	19:64660:-	ENSG00000185596	ENSG00000282458			CCCGTACACTGCCTTTGGTTGCCATTCATGAACCTGCCACCAATAGTAAC*ACAAAGTGCTGGATGCACCTTTTGTGCTTATCTTTGTGCTAAATGTGCCC	intronic/exonic(no-known-CDS)
+WASH3P	WASH5P	pseudogene,m19	1342	127	4	29	BOWTIE+STAR	15:101967252:+	19:65317:-	ENSG00000185596	ENSG00000282458			GACTTCACTCAGTGTTTATCTAACATGAGTGAGTGAATGGTGTTTGCTGT*TTTTTTTGGTAAAGGTCCCAGGGGTTGTCGGGTACACAGGTCCTGTCTTT	intronic/exonic(no-known-CDS)
+WASH3P	WASH5P	pseudogene,m19,exon-exon	1342	127	3	26	BOWTIE	15:101961641:+	19:66499:-	ENSG00000185596	ENSG00000282458	ENSE00002566302	ENSE00003783427	GAAGCGGCGGCGGGAGCTTCCGGGAGGGCGGCTCGCAG*GCACCATGACTCCTGTGAGGATGCAGCACTCCCTGGCAGGTCA	exonic(no-known-CDS)/exonic(no-known-CDS)
+WASH3P	WASH5P	pseudogene,m19	1342	127	2	37	BOWTIE+STAR	15:101966992:+	19:65574:-	ENSG00000185596	ENSG00000282458			CGTCATGGGAAGCCTGGCTTGGTCTCAGGTCAGGGAAGGCAGATGTGAGG*ATGACATTTATGGTAAAGTCTGAGGGTTGAGTGGGTAGGTTGGGAAGAAC	intronic/exonic(no-known-CDS)
+WASH3P	WASH5P	pseudogene,m19	1342	127	2	37	BOWTIE+STAR	15:101966414:+	19:66151:-	ENSG00000185596	ENSG00000282458			GAAGTGAGAGCCCAACTTGGAAGCTTTTACTCCTGGGAGTCCGAGAGCTC*CCCTTCCACCCCACTTAGCCTCCTGGTTTCCTGTGGTGGCTCTGCTCTCA	intronic/exonic(no-known-CDS)
+WASH5P	WASH3P	pseudogene,m19,exon-exon	1342	127	9	29	BOWTIE	19:66346:-	15:101972901:+	ENSG00000282458	ENSG00000185596	ENSE00003783498	ENSE00003637821	CCCTGCAGTACCTGCAGAAGGTCTCTGGAGACATCTTCAGCAG*GTGTTCTCCAGTGCCAAGTACCCTGCTCCAGAGCGCCTGCAGG	exonic(no-known-CDS)/exonic(no-known-CDS)
+WASH5P	WASH3P	pseudogene,m19	1342	127	3	33	BOWTIE+STAR	19:65317:-	15:101967252:+	ENSG00000282458	ENSG00000185596			GACTTCACTCAGTGTTTATCTAACATGAGTGAGTGAATGGTGTTTGCTCT*TTTTTTTGGTAAAGGTCCCAGGGGTTGTCGGGTACACAGGTCCTGTCTTT	exonic(no-known-CDS)/intronic
+WASH5P	WASH3P	pseudogene,m19,exon-exon	1342	127	3	26	BOWTIE	19:70928:-	15:101966070:+	ENSG00000282458	ENSG00000185596	ENSE00003775509	ENSE00003488566	AGCGGCGGCGGGAGCTTCCGGGAGGGCGGCTCGCAG*GCACCATGACTCCTGTGAGGATGCAGCACTCCCTGGCAGGTCA	exonic(no-known-CDS)/exonic(no-known-CDS)
+WASH5P	WASH3P	pseudogene,m19	1342	127	2	41	BOWTIE+STAR	19:62635:-	15:101969934:+	ENSG00000282458	ENSG00000185596			CCTCCTGCACGGAAGAGACAGGGGCAGGGGAGAGACTCTCTCCCCACCAC*CCCAGCTCAGGCCCCAGCACAGCCCGGCCTCTGGCCTCACTGGCGTCTGT	exonic(no-known-CDS)/intronic
+WASH5P	WASH3P	pseudogene,m19	1342	127	2	40	BOWTIE+STAR	19:62163:-	15:101970410:+	ENSG00000282458	ENSG00000185596			TTCTGTATTTATACGCTTGATTTTTAAAACCTAAATGTTGGGCTTCACAT*TTCCTTGTAAATTTCATCTTGGTGATTGCAGTCTACCCTCTGGCCTTTAA	exonic(no-known-CDS)/intronic
+WASH5P	WASH3P	pseudogene,m19	1342	127	2	36	BOWTIE+STAR	19:62597:-	15:101969974:+	ENSG00000282458	ENSG00000185596			TCTCCCCACCACCCGGCTCAGGCCCCAGCACAGCCCGGCCTCTGGCCTCA*TGGCGTCTGTGCCCAGTGACGCAGGCAGGTGAGCTCCTGGCAAATTAGCA	exonic(no-known-CDS)/intronic
+WASH5P	WASH3P	pseudogene,m19,exon-exon	1342	127	2	22	BOWTIE	19:70928:-	15:101972901:+	ENSG00000282458	ENSG00000185596	ENSE00003775509	ENSE00003637821	AGCGGCGGCGGGAGCTTCCGGGAGGGCGGCTCGCAG*GTGTTCTCCAGTGCCAAGTACCCTGCTCCAGAGCGCCTGCAGG	exonic(no-known-CDS)/exonic(no-known-CDS)
+ADGRE2	ADGRE5	banned,known,paralogs,healthy,duplicates,bodymap2,ambiguous,hpa,gtex,cancer,m31,tcga-normal,exon-exon	1977	93	2	30	BOWTIE	19:14774256:-	19:14388702:+	ENSG00000127507	ENSG00000123146	ENSE00003682970	ENSE00003504172	GTCTGGCTGACTCTGCCGGGAGCTGAAACCCAGGACTCCAGGG*GCTGTGCCCGGTGGTGCCCTCAGAACTCCTCGTGTGTCAATGC	in-frame
+ADGRE5	ADGRE2	banned,known,paralogs,healthy,duplicates,bodymap2,ambiguous,hpa,gtex,cancer,m31,tcga-normal	1977	93	8	38	BOWTIE+STAR	19:14381261:+	19:14777023:-	ENSG00000123146	ENSG00000127507			ACTCTAGCAGAGGGTTTTTGAAAGCAGGTTGCACTTCATTCTTACTGAGT*GAGTACACGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGCAG	UTR/intronic
+ADGRE5	ADGRE2	banned,known,paralogs,healthy,duplicates,bodymap2,ambiguous,hpa,gtex,cancer,m31,tcga-normal,exon-exon	1977	93	3	26	BOWTIE	19:14381545:+	19:14774306:-	ENSG00000123146	ENSG00000127507	ENSE00002770497	ENSE00003682970	CTCCTGCCGGCAGCTCCAACCATGGGAGGCCGCGTCTTTCTCG*CATTCTGTGTCTGGCTGACTCTGCCGGGAGCTGAAACCCAGGA	in-frame
+ADGRE5	ADGRE2	banned,known,paralogs,healthy,duplicates,bodymap2,ambiguous,hpa,gtex,cancer,m31,tcga-normal,exon-exon	1977	93	2	30	BOWTIE	19:14388500:+	19:14774054:-	ENSG00000123146	ENSG00000127507	ENSE00003576276	ENSE00003634486	GTCTGGCTGACTCTGCCGGGAGCTGAAACCCAGGACTCCAGGG*GCTGTGCCCGGTGGTGCCCTCAGGACTCCTCGTGTGTCAATGC	in-frame
+ADGRE5	ADGRE2	banned,known,paralogs,healthy,duplicates,bodymap2,ambiguous,hpa,gtex,cancer,m31,tcga-normal,exon-exon	1977	93	2	28	BOWTIE	19:14388818:+	19:14772497:-	ENSG00000123146	ENSG00000127507	ENSE00003504172	ENSE00003574549	TCTTTTTCTGAGATCATCACCACCCCGACGGAGACTTGTGACG*ACATCAACGAGTGTGCAACACTGTCGAAAGTGTCATGCGGAAA	in-frame
+AC007228.2	TOB2	lincrna,cancer,m3	1043	91	2	33	BOWTIE+STAR	19:56672874:-	22:41433788:-	ENSG00000268568	ENSG00000183864			AAGGTGGATTATCTTCTGAAGAATGGAAACTGTTAGTCCAGAATGATGTC*TTTTCTCAATGCAGTGAGTTATAGATTCTCTAGTTTTCTCCCTAGGGATG	exonic(no-known-CDS)/UTR
+TOB2	AC007228.2	lincrna,cancer,m3	1043	91	4	26	BOWTIE+STAR	22:41434099:-	19:56673182:-	ENSG00000183864	ENSG00000268568			AATACAAAAACAGAAATCTGCACTAATTTACCTGGTTTCGTAGGAAAACT*TTTTTTTTATTTTTTACATTTTTTGGTGTCCGTTTGTATTGAATAATTTG	UTR/exonic(no-known-CDS)
+TOB2	AC007228.2	lincrna,cancer,m3	1043	91	2	40	BOWTIE+STAR	22:41433799:-	19:56672881:-	ENSG00000183864	ENSG00000268568			ACAAATCAAAAGGTGGATTATCTTCTGAAGAATGGAAACTGTTAGTCCAG*ATGATGTCTTTTTCTCAATGCAGTGAGTTATAGATTCTCTAGTTTTCTCC	UTR/exonic(no-known-CDS)
+DTX2P1-UPK3BP1-PMS2P11	AC004951.1	lincrna,m29	1246	70	3	35	BOWTIE+STAR	7:77024867:+	7:44007586:-	ENSG00000265479	ENSG00000228434			AGCCCACCCGATTCCTCCCCACATCCTCCACGTCCCCAGGCCCCACCCAC*CTCCTCCAACTCCTCTGGGGAAACCCAAGCCCTGCAGCTCATGGAACAGA	exonic(no-known-CDS)/exonic(no-known-CDS)
+DTX2P1-UPK3BP1-PMS2P11	AC004951.1	lincrna,m29	1246	70	2	35	BOWTIE+STAR	7:77025475:+	7:44006903:-	ENSG00000265479	ENSG00000228434			CATAGCAAGACCCTCGTCTCTATTAAAAATATAAAAAATACGCCAGACGT*GGTGGCTCATGCCTGTAATCCCAGCACTTTAGAAGGCTGAAGCAGGTGGA	exonic(no-known-CDS)/intronic
+AC093890.1	GYPE	adjacent,pseudogene,m13,1K<gap<10K,readthrough	0	63	13	38	BOWTIE+BLAT;BOWTIE+STAR	4:143911516:-	4:143880509:-	ENSG00000249741	ENSG00000197465			CCATGATCTCTTCCAAAGAATGCCACAGAATCAGGGGAGGCATGCTTCAG*GAATTGTGAGCATATCAGCATCAAGTACCACTGGTGTGGCAATGCACACT	exonic(no-known-CDS)/CDS(truncated)
+NAIP	OCLN	banned,known,bodymap2,hpa,1000genomes,m0,multi	0	62	9	38	BOWTIE+STAR	5:70986290:-	5:69524854:+	ENSG00000249437	ENSG00000197822			GCCTCCCCAGTAGCTGGGACCACAAGCATGTGCCACCACACCTGGCTAAT*TTTTTGTATTTTTAGTAGAGACAGGGTTTTGCCATGTTGGCCAGGCTGGT	UTR/intronic
+NAIP	OCLN	banned,known,bodymap2,hpa,1000genomes,m0,multi,exon-exon	0	62	8	27	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	5:70979869:-	5:69534694:+	ENSG00000249437	ENSG00000197822	ENSE00003590701	ENSE00003560198	TTATTGATCCAAATTTCAGCTGAGTATGATCCTTCCAAACTAG*GTTAAAAATGTGTCTGCAGGCACACAGGACGTGCCTTCACCCC	out-of-frame
+NAIP	OCLN	banned,known,bodymap2,hpa,1000genomes,m0,multi,exon-exon	0	62	7	30	BOWTIE	5:70983775:-	5:69534694:+	ENSG00000249437	ENSG00000197822	ENSE00002214653	ENSE00003560198	TCCCTGGAATCTCTTGAAGTCTCAGGGACAATCCAGTCACAAG*GTTAAAAATGTGTCTGCAGGCACACAGGACGTGCCTTCACCCC	out-of-frame
+NAIP	OCLN	banned,known,bodymap2,hpa,1000genomes,m0,multi	0	62	2	21	BOWTIE+STAR	5:70986290:-	5:69543683:+	ENSG00000249437	ENSG00000197822			GCCTCCCCAGTAGCTGGGACCACAAGCATGTGCCACCACACCTGGCTAAT*TTTTTGTATTTTTTGTAGAGATGGGGTTTCACCATGTTGGCCAGGCCGAT	UTR/intronic
+PRIM1	NACA	banned,known,adjacent,conjoing,healthy,bodymap2,hpa,1000genomes,cancer,m60,gap<1K,readthrough	0	61	16	36	BOWTIE+BLAT;BOWTIE+STAR	12:56731608:-	12:56731607:-	ENSG00000198056	ENSG00000196531			CTTCAAGAGCCATTTAATAAATATGGCAGAACTATATATGTGTCTTAAAC*CTCAAAGTAAATTTTCCTTGAGAAATAATTTATGTTGAAAAGATTTCCTG	UTR/UTR
+PRIM1	NACA	banned,known,adjacent,conjoing,healthy,bodymap2,hpa,1000genomes,cancer,m60,gap<1K,readthrough,exon-exon	0	61	7	30	BOWTIE	12:56734147:-	12:56714687:-	ENSG00000198056	ENSG00000196531	ENSE00003575614	ENSE00003607207	AATCTGGATAAATCCCGAAAAGGAGAACTTCTTAAGAAGAGTG*GGTCTGGAACAGAATCTGACAGTGATGAATCAGTACCAGAGCT	in-frame
+RMND5A	ANAPC1	banned,known,bodymap2,hpa,m0,multi,exon-exon	0	55	21	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	2:86741069:+	2:111822600:-	ENSG00000153561	ENSG00000153107	ENSE00002531730	ENSE00002441208	CATCCACAGCAGTGTTTCTCGGGTTGGAAAAGCCATTGATAAG*GATTCACTTTAAGAGATTTGGAAACTCTTCCCTTTGGAATTGC	out-of-frame
+TTTY15	USP9Y	banned,known,adjacent,lincrna,healthy,cacg,bodymap2,hpa,chimerdb3pub,1K<gap<10K,readthrough	0	54	17	40	BOWTIE+BLAT;BOWTIE+STAR	Y:12686606:+	Y:12702010:+	ENSG00000233864	ENSG00000114374			TGGCCTCATATTTTGTGTACACAGTCCCTGTACAGGGTTTCTGACCTGTG*AGAGCTTGGAGATAATTCTGGTGGCTGTGTGGAGTATGTGTTGGAGGTGA	exonic(no-known-CDS)/UTR
+TTTY15	USP9Y	banned,known,adjacent,lincrna,healthy,cacg,bodymap2,hpa,chimerdb3pub,1K<gap<10K,readthrough	0	54	10	40	BOWTIE+BLAT;BOWTIE+STAR	Y:12690441:+	Y:12702010:+	ENSG00000233864	ENSG00000114374			CACTTCAGTTTTGAGATCTCTGCTGTATTTGTATACAAGTATTTTGGAAG*AGAGCTTGGAGATAATTCTGGTGGCTGTGTGGAGTATGTGTTGGAGGTGA	exonic(no-known-CDS)/UTR
+AC009086.3	SMG1	m42	375	49	8	37	BOWTIE+STAR	16:29595389:-	16:18926454:-	ENSG00000279583	ENSG00000157106			CTCTCCCCAGGCCCGGCGGGGCCGACCCCGCCTCTCGCTCCCAGCATGCC*GTGCGACAGCGGCGGCGCGGCGGGCGGAGCCGGGAGGCGGGGAAGCAGTG	exonic(no-known-CDS)/UTR
+AC009086.3	SMG1	m42	375	49	3	30	BOWTIE+STAR	16:29595244:-	16:18926309:-	ENSG00000279583	ENSG00000157106			TCCTCGTCCTCCGGGGCCCCGGCGTCGTGGGCCGCGCATGGCCCTGGAAG*AGACGTCGCCTCCCCTTCATCCGCCTCTCTCTCACCGCGCCGCTCCCGCC	exonic(no-known-CDS)/UTR
+AL132656.3	NUTM2A-AS1	antisense,m4	350	49	2	33	BOWTIE+BLAT;BOWTIE+STAR	10:79663926:+	10:87331096:-	ENSG00000272489	ENSG00000223482			AACTTGGCAAACATAATTAATAATCATGGGTTATTAATTACTTCTGCAAG*AAACTTAAATCCTCAGTGTGACACAACTCAGTCCTTCTACTGCAGCAGCT	exonic(no-known-CDS)/exonic(no-known-CDS)
+SMG1	AC009086.3	m42	375	49	2	28	BOWTIE+STAR	16:18926284:-	16:29595219:-	ENSG00000157106	ENSG00000279583			GTGGGCCGCGCACGGCCCTGGAAAAGACGTCGCCTCCCCTTCATCCGCCT*CTCTCTCTCACCGCGCCGCTCCCGCCTCCTCATTCTGCGTTGCGGGCTCA	UTR/exonic(no-known-CDS)
+NPEPPS	TBC1D3	banned,known,oncogene,bodymap2,hpa,gtex,18cancers,tumor,m0,multi,exon-exon	0	47	15	30	BOWTIE;BOWTIE+STAR	17:47592545:+	17:38191030:-	ENSG00000141279	ENSG00000274611	ENSE00003785912	ENSE00003731868	AACATGTATTTAACCAAGTTCCAACAAAAGAATGCTGCCACAG*GATGGACGTGGTAGAGGTCGCGGGCAGTTGGTGGGCACAAGAG	CDS(complete)/UTR
+BANK1	IGH@	banned,known,hpa,m3	0	31	3	41	BOWTIE+BLAT	4:101841881:+	14:106819539:-	ENSG00000153064	ENSG09000001018			GTTCTCATTGTTCAATTCCCACCTATGAGTGAGAATATGCGGTGTTTGGT*TTTTTTGTTCTTGCGATAGTTTACTGAGAATGATGATTTCCAGTTTCATC	intronic/---
+TNFRSF17	SNX29	adjacent,tcga,cell_lines,non_tumor_cells,chimerdb3seq,cancer,tumor,tcga-cancer,1K<gap<10K,readthrough	0	30	14	41	BOWTIE+BLAT;BOWTIE+STAR	16:11967843:+	16:11999297:+	ENSG00000048462	ENSG00000048471			CTGCCAGCTGCTTTGAGTGCTACGGAGATAGAGAAATCAATTTCTGCTAG*GATCACAGAACAATGACAAAAGACAATTTCTGCTGGAGCGACTGCTGGAT	out-of-frame
+TNFRSF17	SNX29	adjacent,tcga,cell_lines,non_tumor_cells,chimerdb3seq,cancer,tumor,tcga-cancer,1K<gap<10K,readthrough	0	30	3	28	BOWTIE+BLAT;BOWTIE+STAR	16:11967843:+	16:11983566:+	ENSG00000048462	ENSG00000048471			CTGCCAGCTGCTTTGAGTGCTACGGAGATAGAGAAATCAATTTCTGCTAG*GCACTGGATTGATCTATGATGGTGGAATGCAAGAAGTACATGTTCTACCA	CDS(truncated)/intronic
+MIR100HG	SPN	oncogene,m6	457	29	20	39	BOWTIE+STAR	11:122506443:-	16:29666553:+	ENSG00000255248	ENSG00000197471			AACACACACAGACACACACACACACACACACACACACACACACACACACG*CGCGCGCGCGCGCGCGCTCTCCTGCGAACAGAGGCAGGGGGAGAGGGGTT	intronic/UTR
+MIR100HG	SPN	oncogene,m6	457	29	3	36	BOWTIE+STAR	11:122363024:-	16:29665301:+	ENSG00000255248	ENSG00000197471			TCGCCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCTGC*CTGCCTCAGCCTCCCAAAGTGCTGAGATTACAGACATGAGCCTCCGCGCC	intronic/UTR
+MIR100HG	SPN	oncogene,m6	457	29	2	40	BOWTIE+STAR	11:122398748:-	16:29665326:+	ENSG00000255248	ENSG00000197471			TAACTCCTGACCTCAGGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGG*GATTACAGACATGAGCCTCCGCGCCTTGCCTCCTCACCCACCTCTTCACT	intronic/UTR
+SPN	MIR100HG	oncogene,m6	457	29	7	37	BOWTIE+STAR	16:29665227:+	11:122568387:-	ENSG00000197471	ENSG00000255248			CCGAGTAACTGAGATTACAGGCACCCACCACCATGCCCAGCTGCTTTTTT*TGTATTTTTGGTAGAGATGGGGTTTCACCATATTGGCCAGGTTGGTCTTG	UTR/intergenic
+SPN	MIR100HG	oncogene,m6	457	29	3	34	BOWTIE+STAR	16:29666518:+	11:122506476:-	ENSG00000197471	ENSG00000255248			AAGTCTCCATTTCTGCAGTACACATGCATGTGCGCTCTCTCTCTCTCTCT*CACACACACACACACACACACACACACACACACGAAAGAAAAGAAAAGAA	UTR/intronic
+SPN	MIR100HG	oncogene,m6	457	29	2	26	BOWTIE+STAR	16:29666361:+	11:122587592:-	ENSG00000197471	ENSG00000255248			AAGTGAGCTGTGATTGCACCACTGCACTTCAGCCTGGGCAACAGAGTGAG*AAAAAAAAAAAAAAAAGAAAAGAAAAGAAAAAAATAATCTTTGAAAGCGG	UTR/intergenic
+SPN	MIR100HG	oncogene,m6	457	29	2	25	BOWTIE+STAR	16:29665223:+	11:122576939:-	ENSG00000197471	ENSG00000255248			CTTCCCGAGTAACTGAGATTACAGGCACCCACCACCATGCCCAGCTGCTT*TTTTTGTATTTTTGGTAGAGATGGGTTTCACCATGTTGACCAGGCTGGTC	UTR/intergenic
+SPN	MIR100HG	oncogene,m6	457	29	2	22	BOWTIE+STAR	16:29667392:+	11:122127457:-	ENSG00000197471	ENSG00000255248			TTGACAGATATGGTTGTTTTCTAAGCCAGGACTGGTTTTAGTCAGGTCCT*AAAAAAAAAAAAAAAAAGAGTTAAACTAACTGGTAGATGAGAGCGGGT	UTR/intronic
+SPN	MIR100HG	oncogene,m6	457	29	2	20	BOWTIE+STAR	16:29665286:+	11:122398786:-	ENSG00000197471	ENSG00000255248			GTAGAGATGGGGTTTCACCATGTTGGCTAGGCTGGTCTCAAACTCCTGAC*CTCAGGTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCA	UTR/intronic
+SPN	MIR100HG	oncogene,m6	457	29	2	20	BOWTIE+STAR	16:29667392:+	11:121947699:-	ENSG00000197471	ENSG00000255248			GACAGATATGGTTGTTTTCTAAGCCAGGACTGGTTTTAGTCAGGTCCT*AAAAAAAAAAAAAAAAAGAGAGAAAAGAAAGAGTGCACCCAACACAGAAT	UTR/intergenic
+AC092718.8	AC016769.2	pseudogene,m14	390	28	3	21	BOWTIE+STAR	16:81090618:-	7:64582903:+	ENSG00000284512	ENSG00000224669			AGAGAAACACGAATGGGTAACAACAGAAAATGGCATTGGAACAGTGGGAA*TCAGCAGTTTTGCACAGGAAACGTTGGGAGATATTGTTTACTGTAGTCTG	CDS(truncated)/exonic(no-known-CDS)
+AL021368.4	WDR70	m7,exon-exon	230	28	6	30	BOWTIE	6:57961321:-	5:37379489:+	ENSG00000283352	ENSG00000082068	ENSE00003792457	ENSE00003478369	TGCGGTGTGCGGCCAGCCATGGAGCGCTCTGGGCCCAGCGAAG*TGACAGGCTCAGACGCGTCGGGACCGGACCCGCAGCTTGCGGT	exonic(no-known-CDS)/CDS(truncated)
+WDR70	AL021368.4	m7,exon-exon	230	28	10	29	BOWTIE	5:37379554:+	6:57959111:-	ENSG00000082068	ENSG00000283352	ENSE00003478369	ENSE00003801048	CCGGACCCGCAGCTTGCGGTCACCATGGGCTTCACGGGGTTCG*GTAAAAAAGCTCACACATTTGACTTGGAAGCAATGTTTGAACA	CDS(truncated)/exonic(no-known-CDS)
+WDR70	AL021368.4	m7,exon-exon	230	28	6	30	BOWTIE	5:37379392:+	6:57961224:-	ENSG00000082068	ENSG00000283352	ENSE00002056284	ENSE00003800756	TGGGGTGTGCGGCCAGCCATGGAGCGCTCTGGGCCCAGCGAAG*TGACAGGCTCAGACGCATCGGGACCGGACCCGCAGCTTGTGGT	CDS(truncated)/exonic(no-known-CDS)
+WDR70	AL021368.4	m7	230	28	3	39	BOWTIE+BLAT;BOWTIE+STAR	5:37381685:+	6:57947177:-	ENSG00000082068	ENSG00000283352			GTTTGAACAAACTCGAAGGACAGCTGTGGAAAGAAGTCGCAAAACACTGG*TTCAACCCAATGGAAATGGATTGAAAGAGTGAAAGTCACTGGTCTCAAAG	CDS(truncated)/intronic
+WDR70	AL021368.4	m7	230	28	3	31	BOWTIE+STAR	5:37381685:+	6:57953043:-	ENSG00000082068	ENSG00000283352			GTTTGAACAAACTCGAAGGACAGCTGTGGAAAGAAGTCGCAAAACACTGG*TGTTCTTCCAGACTTGGCAATTAGATTACATTTAGACCGTCGAATTAAAA	CDS(truncated)/intronic
+WDR70	AL021368.4	m7	230	28	2	41	BOWTIE+STAR	5:37379474:+	6:57961235:-	ENSG00000082068	ENSG00000283352			GGCCGTGTCGGGGGAGCTGGGGCGCCGCACTAACTAGGCCGCCTCTCTTT*TCTTGCTCCAGTGACAGGCTCAGACGCATCGGGACCGGACCCGCAGCTTG	intronic/intronic
+ABBA01031661.1	ABBA01031663.1	pseudogene,m6,10K<gap<100K	54	26	12	41	BOWTIE+STAR	20:29410456:-	20:29448626:-	ENSG00000282935	ENSG00000282911			CCTGTTAGATGAGCTCCTGTCAACCCCAGAGTTTCAGCAAAAGGCACAAA*CTTTCCTAGATCCGGCGCCACTGGGGGAGCTGAAGGACGTGGAAGAGCCC	exonic(no-known-CDS)/exonic(no-known-CDS)
+CCDC88C	NAP1L1	known,ambiguous,non_cancer_tissues,m2,multi	918	26	2	39	BOWTIE+STAR	14:91384500:-	12:76053301:-	ENSG00000015133	ENSG00000187109			ATGGGGTGATGATTATGATGAAGAAGGTGAAGAAGCAGATGAGGAAGGGG*AAGAAGAAGCAGAAACACAAGGGACGTGGGACAGTTCGTACTGTGACTAA	UTR/CDS(truncated)
+LSP1	IGK@	m14	0	26	2	34	BOWTIE+STAR	11:1886928:+	2:89581054:-	ENSG00000130592	ENSG09000000011			CCAGGGGCAAGGCTGGGCTGCAGAGCCAGCGCCTGGGAGTTTAGTAGCAG*GACGGGTGAGGTACAGGCTCAGTCTGCGGCCAAGACTCCGTCCTGCAAGG	intronic/---
+NAP1L1	CCDC88C	known,ambiguous,non_cancer_tissues,m2,multi	918	26	3	19	BOWTIE+STAR	12:76053767:-	14:91278920:-	ENSG00000187109	ENSG00000015133			TCTGATCCCTTTTCTTTTGATGGACCAGAAATTATGGGTTGTACAGGGTG*TCAGAAAAAAAAAAAAAAAAAAAAAAAAAAAGTGTATTTGGTGGTTTGGC	intronic/intronic
+PPIP5K1	CATSPER2	banned,known,bodymap2,hpa,1000genomes,m1,multi,10K<gap<100K,exon-exon	126	22	6	29	BOWTIE	15:43564867:-	15:43640496:-	ENSG00000168781	ENSG00000166762	ENSE00003788646	ENSE00003500886	CATGGCAAAAAGCTACCACCTGCCAGTCTGAAGCACCGAGATG*AATTGCTGGAATCCACAAATACCAAACTATGGCCATTGAAGCT	in-frame
+PPIP5K1	CATSPER2	banned,known,bodymap2,hpa,1000genomes,m1,multi,10K<gap<100K,exon-exon	126	22	4	28	BOWTIE	15:43571412:-	15:43668118:-	ENSG00000168781	ENSG00000166762	ENSE00003586891	ENSE00003522972	GAGATCACCCCTCATTCGTAACCGAAAAGCTGGTTCCATGGAG*GTACTTTCTGAGACTTCATCCTCGAGGCCTGGTGGCTACCGGC	CDS(truncated)/UTR
+PPIP5K1	CATSPER2	banned,known,bodymap2,hpa,1000genomes,m1,multi,10K<gap<100K,exon-exon	126	22	2	23	BOWTIE	15:43564867:-	15:43649879:-	ENSG00000168781	ENSG00000166762	ENSE00003788646	ENSE00001581568	CATGGCAAAAAGCTACCACCTGCCAGTCTGAAGCACCGAGATG*AGGTATCTGCCGGTACTGAGATGTAAGTGATCAGCTGAAGGGA	CDS(complete)/UTR
+TRB@	SYNE3	cancer,m19	0	22	2	29	BOWTIE+STAR	7:142662667:-	14:95427145:-	ENSG09000001076	ENSG00000176438			ACCAAGGAGCCCTCTGATGGACCTGTCCGGGCATAACAGAAGGCTCGCAC*TCTTCTGGTCACTTCTCACTATGTCCCCTCAGCTCCTATCTCTGTATGGC	---/intronic
+AC092821.3	AC141557.1	pseudogene,m3,exon-exon	113	21	3	29	BOWTIE	12:9569202:+	12:9402556:-	ENSG00000284634	ENSG00000256673	ENSE00003813482	ENSE00002277932	TCTGAGCTTTCTTCTTGGACACCTAATACCCACAGTCCTCCAG*CAGTGAAGGATCCAGTGAGATTTTCCAGGTTAACGGTCATAAT	exonic(no-known-CDS)/exonic(no-known-CDS)
+AC141557.1	AC092821.3	pseudogene,m3	113	21	6	34	BOWTIE+BLAT;BOWTIE+STAR	12:9398382:-	12:9575643:+	ENSG00000256673	ENSG00000284634			GTTCAACATTCAGCCAGCCCCAGCCATGGTCTACGATTACTATGAAAAAG*AAGAATATGCCCTAGCTTTTTACAACATCGACAGTAGTTCAGTTTCCCAG	exonic(no-known-CDS)/intronic
+AC141557.1	AC092821.3	pseudogene,m3,exon-exon	113	21	2	22	BOWTIE	12:9402427:-	12:9571461:+	ENSG00000256673	ENSG00000284634	ENSE00002277932	ENSE00003812092	ATACACAGTAGATGTGGAAGGACACGGTTGTACATTTATCCAG*GCCACCCTTAAGTACAATGTTCTCCTACCTAAGAAGGCATCTG	exonic(no-known-CDS)/exonic(no-known-CDS)
+SLC45A3	BRAF	known,oncogene,cosmic,ticdb,chimerdb3kb,chimerdb3pub,chimerdb3seq,cancer,tumor,exon-exon	0	20	25	30	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	1:205680394:-	7:140794467:-	ENSG00000158715	ENSG00000157764	ENSE00001443628	ENSE00003569635	CTGCACGCGCTGGCTCCGGGTGACAGCCGCGCGCCTCGGCCAG*GCCCCAAATTCTCACCAGTCCGTCTCCTTCAAAATCCATTCCA	UTR/CDS(truncated)
+AC009093.10	BANP	oncogene,tumor,m7,exon-exon	309	19	6	30	BOWTIE	16:29328624:+	16:88037973:+	ENSG00000284685	ENSG00000172530	ENSE00003811530	ENSE00003683876	GGTGCCACAGGGGGAGCAAGTCCAGATCACGCAGGACAGCGAG*GGCAACCTCCAGATCCATCACGTGGGGCAGGACGGTCAGCTTC	exonic(no-known-CDS)/CDS(truncated)
+AC009093.10	BANP	oncogene,tumor,m7,exon-exon	309	19	3	29	BOWTIE	16:29312589:+	16:88033109:+	ENSG00000284685	ENSG00000172530	ENSE00003813161	ENSE00002215853	GGGAAGAAGCAGCTGGACCCGCTCACCATCTATGGCATCCGGT*AGCCGATGATGAGCACCCCACCTCCTGCCAGCGAGCTCCCGCA	exonic(no-known-CDS)/CDS(truncated)
+BANP	AC009093.10	oncogene,tumor,m7,exon-exon	309	19	7	30	BOWTIE	16:88038011:+	16:29320928:+	ENSG00000172530	ENSG00000284685	ENSE00003683876	ENSE00003811895	CGAGGGCAACCTCCAGATCCATCACGTGGGGCAGGACGGTCAG*GTCACCTTTTCTATAAATTTGGCATCACAGAATCTGACTGGTA	CDS(truncated)/exonic(no-known-CDS)
+BANP	AC009093.10	oncogene,tumor,m7,exon-exon	309	19	6	30	BOWTIE	16:88035394:+	16:29331237:+	ENSG00000172530	ENSG00000284685	ENSE00001659025	ENSE00003812137	GGTGCCGCAGGGGGAGCAAGTCCAGATCACGCAGGACAGCGAG*GGCAACCTCCAGATCCATCACGTGGGGCAGGATGGTCAGCTGA	CDS(truncated)/exonic(no-known-CDS)
+BANP	AC009093.10	oncogene,tumor,m7,exon-exon	309	19	6	30	BOWTIE	16:88038011:+	16:29312350:+	ENSG00000172530	ENSG00000284685	ENSE00003683876	ENSE00003813161	CGAGGGCAACCTCCAGATCCATCACGTGGGGCAGGACGGTCAG*AGGATTACCCCAATGGCACCTGGCTGGGCGACGAGAACAACCC	CDS(truncated)/exonic(no-known-CDS)
+BANP	AC009093.10	oncogene,tumor,m7,exon-exon	309	19	4	29	BOWTIE	16:87984259:+	16:29251479:+	ENSG00000172530	ENSG00000284685	ENSE00001188809	ENSE00003812336	GCTCCCCTCTCGGGGCAACCCAGACGTGCAACAAAGTGCGATG*GGATCTGAAGGGAGCCGAGCTCCTTCTGCAGAAGGGCGCTGAG	CDS(truncated)/exonic(no-known-CDS)
+BANP	AC009093.10	oncogene,tumor,m7,exon-exon	309	19	3	29	BOWTIE	16:88018667:+	16:29326347:+	ENSG00000172530	ENSG00000284685	ENSE00003510744	ENSE00003812990	GGGAAGAAGCAGCTGGACCCGCTCACCATCTACGGCATCCGGT*AGCCGATGATGAGCACCCCACCTCCTGCCAGAGAGCTCCTGCA	CDS(truncated)/exonic(no-known-CDS)
+BANP	AC009093.10	oncogene,tumor,m7,exon-exon	309	19	2	30	BOWTIE	16:88038011:+	16:29326347:+	ENSG00000172530	ENSG00000284685	ENSE00003683876	ENSE00003812990	CGAGGGCAACCTCCAGATCCATCACGTGGGGCAGGACGGTCAG*AGCCGATGATGAGCACCCCACCTCCTGCCAGAGAGCTCCTGCA	CDS(truncated)/exonic(no-known-CDS)
+IGH@	SYNE3	known,cancer,m3	0	19	2	22	BOWTIE+STAR	14:106495084:+	14:95475836:-	ENSG09000000017	ENSG00000176438			GCTGGGCATAGTGGCACATGCCTGTAATCCCAGCTACCCGGGAGGCTGAG*NNNNNNNNNNNNNNNNNNNNNGCCCCTGCAGGTGCCATGACTCAGCAGCCCCAGGACGACTTTGACAGGAG	---/UTR
+TOR1AIP1	EXOSC6	m3	8	18	6	28	BOWTIE+STAR	1:179921000:+	16:70247827:-	ENSG00000143337	ENSG00000223496			TCACCATGTTGGCCAGGCTGGTCTTGAACTCCTGACCTCAGGTGATCCAC*CTGCCTCGTCCTCCCAAAGTGCTGGGGTTACAGGTGTGAGCCACTGCACC	UTR/UTR
+TOR1AIP1	EXOSC6	m3	8	18	2	26	BOWTIE+STAR	1:179916828:+	16:70249290:-	ENSG00000143337	ENSG00000223496			CTGCAACCTCCGCCTCCCGTGTTCAAGTGATTCTCCTGCCTCAGCCTCCC*AAGTAGCTGGTATTACAGGAGTGTGCCATTAGCCTGGCTAATTTTTGTAT	intronic/UTR
+TRB@	ALK	oncogene,cancer,tumor,m3	0	18	2	40	BOWTIE+BLAT	7:142395093:-	2:29784590:-	ENSG09000001076	ENSG00000171094			CACAGCAACCTCTGCCTCCTGGGTTTAAGCAATTCTCCTGCCTCAGCTTC*CCAAGTAGCTGGGATTACAGGTGCCTGCCACCACACCTGGCTAATTTTTG	---/intronic
+ZNF549	KLHL6	m5	335	18	2	27	BOWTIE+STAR	19:57556882:+	3:183521175:-	ENSG00000121406	ENSG00000172578			CCCTAGCCCCAACCCTGTGCTCACAGAAACATGTGCTGTATTGACTCAAG*GTTTAATGGATTTAGGGCTGTGCAGGAGGTGCTTTGTTAAAAATGTGTTT	UTR/UTR
+ARPC2	AL022322.2	antisense,m6	74	17	8	31	BOWTIE+STAR	2:218236682:+	22:38132006:+	ENSG00000163466	ENSG00000279080			AAGTACAAAATTAGCCAGGCCTGGTGGCGCATGCCTGTAATCCCAGCTGC*TTGGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGGGAGGCGGAGGTTGT	UTR/exonic(no-known-CDS)
+NSF	LRRC37A3	banned,known,hpa,m0,multi,exon-exon	0	17	4	23	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	17:46704854:+	17:64898459:-	ENSG00000073969	ENSG00000176809	ENSE00003258850	ENSE00001540364	GAGAGGAGACTTCCTTGCTTCTTTGGAGAATGATATCAAACCA*GCTGAAGTGCAATGTTGTGATCTCGGCTCACTGCAACCTCTGC	CDS(truncated)/UTR
+NSF	LRRC37A3	banned,known,hpa,m0,multi,exon-exon	0	17	3	21	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	17:46704854:+	17:64892600:-	ENSG00000073969	ENSG00000176809	ENSE00003258850	ENSE00003547821	GAGAGGAGACTTCCTTGCTTCTTTGGAGAATGATATCAAACCA*AAATTTCCAAGGAAACTATATTTCTTACATTGATGGAAATGTA	in-frame
+AC009299.2	MXRA7	pseudogene,m3	128	16	5	29	BOWTIE+STAR	2:161340829:-	17:76680881:-	ENSG00000232320	ENSG00000182534			GAAACCAGTACGAGAAGATGATGACCAAAGAGGAGCTGGAGGAGGAGCAG*AGAACTGAAGAATAACGAAGTTATCCTTAGCGTCCTCCTAAAGGCTTTTC	exonic(no-known-CDS)/intronic
+AL592310.1	FAM133B	pseudogene,m10	104	16	3	34	BOWTIE+STAR	1:227598469:-	7:92562326:-	ENSG00000225934	ENSG00000234545			AAGCAACAGAAAAAAAACAAAAAAAGAAAAAGAAGCATAAGAAACATAGT*AAGAAGAAAAAGAAGGCTGCTAGTTCAAGTCCTGACTCACCATAACATTA	exonic(no-known-CDS)/CDS(truncated)
+AL592310.1	FAM133B	pseudogene,m10	104	16	2	40	BOWTIE+BLAT;BOWTIE+STAR	1:227598694:-	7:92577174:-	ENSG00000225934	ENSG00000234545			CTTCTGAAAGCTCCATGTCATAATCTGAATCAGACAGTAAGGATAGTTTA*AGAAAGAAAAAGAAGAACCGTTCACATAAATCTTCTGAAAGCTCCATGTC	exonic(no-known-CDS)/CDS(truncated)
+AL592310.1	FAM133B	pseudogene,m10	104	16	2	39	BOWTIE+STAR	1:227598953:-	7:92578395:-	ENSG00000225934	ENSG00000234545			GAAAAGAAAGGCTCCAAGGTTTTGGCTGAATTTGAAGAAAAAAATGAATG*AGAACTGGAAGAAAGAACTGGAAAAACACAGGGAGAAATTGTTAAGTGGA	exonic(no-known-CDS)/intronic
+CCDC32	CBX3	banned,known,healthy,tcga,hpa,gtex,gliomas,chimerdb3pub,chimerdb3seq,tcga-normal,oesophagus	0	16	6	41	BOWTIE+BLAT;BOWTIE+STAR	15:40561981:-	7:26201745:+	ENSG00000128891	ENSG00000122565			AGATTCATTCTGCCTCTTGTTAGCCATTTTATTTTAAAAATATTTCCTGA*CTTCGGATGTGGCTTGAGCTGTAGGCGCGGAGGGCCGGAGACGCTGCAGA	UTR/UTR
+CTSC	RAB38	banned,known,adjacent,healthy,non_tumor_cells,hpa,1000genomes,chimerdb3pub,chimerdb3seq,100K<gap<200K,readthrough,exon-exon	0	14	2	27	BOWTIE	11:88300530:-	11:88149955:-	ENSG00000109861	ENSG00000123892	ENSE00000743350	ENSE00000989348	AATGTTCATGGTATCAATTTTGTCAGTCCTGTTCGAAACCAAG*GTCAAGAAAGATTTGGAAACATGACGAGGGTCTATTACCGAGA	in-frame
+CCDC50	MALT1	oncogene,cancer,m3	30	13	2	20	BOWTIE+STAR	3:191392749:+	18:58698080:+	ENSG00000152492	ENSG00000172175			ATGAAGTGGTCATCTATCCAAGTATTTGGTTTTTGTTTTGTTTTGTTTTG*TTTTTTGAGATGGAGTCTCGCTCTGTCGCCAGGCTGGAGTGCAGTGGCGC	UTR/intronic
+SERF1A	SMN1	banned,known,adjacent,bodymap2,hpa,m0,multi,1K<gap<10K,readthrough,exon-exon	0	13	5	28	BOWTIE	5:70901933:+	5:70938839:+	ENSG00000172058	ENSG00000172062	ENSE00003460559	ENSE00001688516	AGAGGAAAGAGGATAGCTTGACTGCCTCTCAGAGAAAGCAGAG*AGCGATGATTCTGACATTTGGGATGATACAGCACTGATAAAAG	out-of-frame
+SERF1A	SMN1	banned,known,adjacent,bodymap2,hpa,m0,multi,1K<gap<10K,readthrough,exon-exon	0	13	2	28	BOWTIE	5:70917048:+	5:70938839:+	ENSG00000172058	ENSG00000172062	ENSE00002077354	ENSE00001688516	GGGAGAATCCTTGCTTTCCTCTTCCAGCTGCTGGTGGCTCCAG*AGCGATGATTCTGACATTTGGGATGATACAGCACTGATAAAAG	out-of-frame
+SMN1	SERF1A	banned,known,adjacent,bodymap2,hpa,m0,multi,1K<gap<10K	0	13	2	17	BOWTIE+STAR	5:70952640:+	5:70913385:+	ENSG00000172062	ENSG00000172058			TTGATTAAAAGTTATGTAATAACCAAATGCAATGTGAAATATTTTACTGG*AAAAAAAAAAAAAAAACATACAAACCGAATTTCCATTCCACATACTACTC	UTR/intronic
+AC004951.1	POLR2J3	lincrna,m3	0	12	3	38	BOWTIE+STAR	7:44004380:-	7:102557705:-	ENSG00000228434	ENSG00000168255			GATCATGCCACTGCACTCCATCCTGGGGAGCAGAGCTAGATTCTGTCTCA*AAAAAAAAAATTTGTGGGTGCCAAGACTCAAGACCATGGGAGCTGGTC	exonic(no-known-CDS)/intronic
+IGH@	EBF1	banned,known,cancer,m0,multi	0	12	3	21	BOWTIE+STAR	14:106880684:+	5:158930263:-	ENSG09000000018	ENSG00000164330			TTCTTCTCACCTCCATTGGCAAAAAAAAAAAAAAACAAAAAAAAAAAAAC*AAACAAAAAAACTAGAAAACAGCTCGAGCCTTTTTTGCTCAAGTTTTGTT	---/intronic
+MND1	TMEM131L	adjacent,tcga,non_cancer_tissues,chimerdb3seq,tcga-cancer,10K<gap<100K,readthrough,exon-exon	0	12	3	26	BOWTIE	4:153409015:+	4:153467211:+	ENSG00000121211	ENSG00000121210	ENSE00003485440	ENSE00001713308	CAAGCAAATAAAGTAGCCAAAGAAGCTGCTAACAGATGGACTG*CGATTGAGCCGTTGCCGAACGTGGTGGAGCTGTGGCAGGCAGA	in-frame
+MRPS33	TMPO	ribosomal,m5	144	12	8	40	BOWTIE+STAR	7:141008967:-	12:98548967:+	ENSG00000090263	ENSG00000120802			CAAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATAC*AAAAATTAGCTGGGCGTGATGATGTGCGCCTGTAGTCCTGTCTACTAG	intronic/UTR
+MRPS33	TMPO	ribosomal,m5	144	12	8	40	BOWTIE+STAR	7:141009304:-	12:98548967:+	ENSG00000090263	ENSG00000120802			TGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATAC*AAAAATTAGCTGGGCGTGATGATGTGCGCCTGTAGTCCTGTCTACTAG	intronic/UTR
+MRPS33	TMPO	ribosomal,m5	144	12	6	29	BOWTIE+STAR	7:141004968:-	12:98548918:+	ENSG00000090263	ENSG00000120802			CCCAGCACTTTGGGAGGTCGAGGCGGGTGGATCACGAGGTCAGGGGATCG*AGACCATCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAA	UTR/UTR
+POLR2J3	AC004951.1	lincrna,m3	0	12	8	41	BOWTIE+STAR	7:102557707:-	7:44004378:-	ENSG00000168255	ENSG00000228434			TCATGCCACTGCACTCCATCCTGGGGAGCAGAGCTAGACTCTGTCTCA*AAAAAAAAAATTTGTGGGTGCCAAGACTCAAGACCATGGGAGCTGGTCGG	intronic/exonic(no-known-CDS)
+POLR2J3	AC004951.1	lincrna,m3,exon-exon	0	12	6	24	BOWTIE	7:102566997:-	7:44005335:-	ENSG00000168255	ENSG00000228434	ENSE00003643284	ENSE00001805426	CTGAGGCCTCCCCAGCCACGCTTCCTGTACAGCCTGCAGAACT*AGTAAAGAGGACAAATAGGTGAAAGAATAAATGAAAGGCTGGA	CDS(truncated)/exonic(no-known-CDS)
+THAP11	ATXN3	m4	0	12	8	41	BOWTIE+BLAT;BOWTIE+STAR	16:67842947:+	14:92071034:-	ENSG00000168286	ENSG00000066427			AGCAACAGCAGCAGCAGCAGCAACAGCAGCAGCAGCAGCAGCAGCAGCAG*CAGCAGCAGCAGCAGCAGCAGCAGGGGGACCTATCAGGACAGAGTTCACA	in-frame
+THAP11	ATXN3	m4	0	12	3	40	BOWTIE+STAR	16:67842948:+	14:92071009:-	ENSG00000168286	ENSG00000066427			GCAACAGCAGCAGCAGCAGCAACAGCAGCAGCAGCAGCAGCAGCAGCAGC*GGGACCTATCAGGACAGAGTTCACATCCATGTGAAAGGCCAGCCACCAGT	in-frame
+TMPO	MRPS33	ribosomal,m5	144	12	6	39	BOWTIE+STAR	12:98548905:+	7:141004979:-	ENSG00000120802	ENSG00000090263			TACGCCTGTAATCCCAGCACTTTGGGAGGCCAGGGCGGGTGGATCACGAG*GTCAGGGGATCGAGACCATCCTGGCTAACATGGTGAAACCCCGTCTCTAC	UTR/UTR
+AL512306.2	ENTPD1-AS1	lincrna,antisense,m6,exon-exon	0	11	2	24	BOWTIE	1:204629712:+	10:95875666:-	ENSG00000240219	ENSG00000226688	ENSE00001628090	ENSE00001648768	CATGTGGAACTATAAGTCCAATTAAACCTCTTTTTCTTCCCGG*TCTCAGGTATGTCTTTATCAGCAGCATGAAAATGGACTAATAG	exonic(no-known-CDS)/exonic(no-known-CDS)
+DCAF10	ZNF587B	m10	0	11	5	38	BOWTIE+STAR	9:37863096:+	19:57844270:+	ENSG00000122741	ENSG00000269343			GCCAGGCGCAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGAG*GTGGGTGGATCACAAGGACAGGAAGGAGTTCAAGACCAGCCTGGGCAATA	UTR/CDS(truncated)
+FCRL4	ZNF587B	cancer	0	11	6	41	BOWTIE+STAR	1:157570818:-	19:57844270:+	ENSG00000163518	ENSG00000269343			GCCAGGAGCGGTGGCTCATGCCTGTAATCCCAGCGCTTTGGGAGGCCGAG*GTGGGTGGATCACAAGGACAGGAAGGAGTTCAAGACCAGCCTGGGCAATA	intergenic/CDS(truncated)
+KANSL1	LRRC37A3	banned,known,hpa,non_cancer_tissues,m0,multi,exon-exon	0	11	12	28	BOWTIE	17:46152904:-	17:64869166:-	ENSG00000120071	ENSG00000176809	ENSE00002635959	ENSE00003526242	ATAAAGAAAGAAAGGGATAGATGGAATGAGTTTCATCGTGATT*AATTCTCAATCACAATCCTCTGACAACTGTTGAAGATCCATAT	UTR/CDS(truncated)
+KANSL1	LRRC37A3	banned,known,hpa,non_cancer_tissues,m0,multi,exon-exon	0	11	4	29	BOWTIE	17:46152904:-	17:64892600:-	ENSG00000120071	ENSG00000176809	ENSE00002635959	ENSE00003547821	ATAAAGAAAGAAAGGGATAGATGGAATGAGTTTCATCGTGATT*AAATTTCCAAGGAAACTATATTTCTTACATTGATGGAAATGTA	UTR/CDS(truncated)
+KANSL1	LRRC37A3	banned,known,hpa,non_cancer_tissues,m0,multi,exon-exon	0	11	2	24	BOWTIE	17:46170855:-	17:64869166:-	ENSG00000120071	ENSG00000176809	ENSE00003614505	ENSE00003526242	AAGAACTGACCAGAGCTGATCCCGAGCAGCGTCATGTACCCCT*AATTCTCAATCACAATCCTCTGACAACTGTTGAAGATCCATAT	in-frame
+KANSL1	LRRC37A3	banned,known,hpa,non_cancer_tissues,m0,multi	0	11	2	18	BOWTIE+STAR	17:46030419:-	17:64910653:-	ENSG00000120071	ENSG00000176809			TTGTTCCCCCTTCACCCTTCTCTGTTAACCTTGTGCCTGTCTCCTGTATG*AAAAAAAAAAAAAAAAGGGGGGACAATGTTACTCTTCTTCCTTCTAGAAA	UTR/intronic
+RAB8A	IL31RA	oncogene,cancer,tumor,m3	0	11	3	28	BOWTIE+STAR	19:16126779:+	5:55854527:+	ENSG00000167461	ENSG00000164509			GCTTATGCCTGTAATCCCAGCAGTTTGGGAGGCTGAGGCGGGCAGATCAC*CTGAGGTCAGAAGTTTGAGACCAGCCTGGCCAACATGGCGAAACTCTGTC	UTR/intronic
+SAMD5	SASH1	banned,known,adjacent,hpa,gtex,18cancers,chimerdb3kb,chimerdb3pub,tumor,readthrough,exon-exon	0	11	3	25	BOWTIE	6:147509387:+	6:148390134:+	ENSG00000203727	ENSG00000111961	ENSE00001444606	ENSE00001612635	CCGTGACGGCATCCACCTGAGCAAGCCCCCGTACTCCCGCAAG*GACGGTTCACTGGGAAACATCGATGACCTGGCGCAGCAGTATG	in-frame
+AC026412.3	AC233280.1	lincrna,m3	124	10	2	23	BOWTIE+BLAT	5:1610965:+	3:195656096:-	ENSG00000271119	ENSG00000229178			CAGTTATTGGCAATGAGGAATGAGGCCTAAACACTTGTGTGCAGGTTTGT*GTGTGCAGGTTTGTGTGTGCACGTTTAAGTTTTCCCTTGGGGGACATTTC	exonic(no-known-CDS)/exonic(no-known-CDS)
+AC233280.1	AC026412.3	lincrna,m3	124	10	3	31	BOWTIE+STAR	3:195655760:-	5:1611285:+	ENSG00000229178	ENSG00000271119			GAGCCTCTTTCTCGGGCAATATGCCCTCCTTATACCTTCTTTGATGAGGC*CTCCATTCAAATATCTGCCCTCTCTTTAATACTGGGGTTTTTACTTTCTT	exonic(no-known-CDS)/exonic(no-known-CDS)
+AC233280.1	AC026412.3	lincrna,m3	124	10	2	27	BOWTIE+STAR	3:195655905:-	5:1611144:+	ENSG00000229178	ENSG00000271119			TCCCACTGCAGCTTATGAGGGTCCCAGTTCCTCTGCATCATCACTAGAAC*CTGGGTCGGCCTGTGGTTTTTGTCTGTTTTTAGCCATTTTAATAGATTTG	exonic(no-known-CDS)/exonic(no-known-CDS)
+C19MC	RPLP1	banned,known,ribosomal,bodymap2,non_tumor_cells,hpa,m2,multi	0	10	3	32	BOWTIE+STAR	19:53775834:+	15:69455458:+	ENSG09000000001	ENSG00000137818			TAGAGGCTGGTGGAGCTGCTCCAGCTGAGGAGAAGAAAGTGGAAGCAAAG*AAGAATCCGAGGAGTCTGATGATGACATGGGCTTTGGTCTTTTTGAC	---/CDS(truncated)
+IKZF3	SPG7	cancer,m3	88	10	5	30	BOWTIE+STAR	17:39761083:-	16:89503471:+	ENSG00000161405	ENSG00000197912			TAGGCCCATACTCAAAATCGGCCAGATATAAAATGACCTCAGATTTTGAT*CTCCTGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGGT	UTR/intronic
+IKZF3	SPG7	cancer,m3	88	10	4	39	BOWTIE+STAR	17:39760938:-	16:89547844:+	ENSG00000161405	ENSG00000197912			GCAGAGATGGCGTTTCGCCATACTGCCCAGGCTAGTCTCAAAATCCTGGG*CTCAGGTGATCTGCCCACCTCAGCCTCCCAAAGTGCTGGGATTACAGGGG	UTR/intronic
+IKZF3	SPG7	cancer,m3	88	10	3	21	BOWTIE+STAR	17:39764953:-	16:89533075:+	ENSG00000161405	ENSG00000197912			GTTGGCAGAGCAAGGGTATAAAGGGGGAAATTGTTTGGCAGCACCAACAG*AAAAAAAAAAAAAAAAAAACACAGCTTTTAATTAAAAAAAGGTCAAGACA	UTR/UTR
+IKZF3	SPG7	cancer,m3	88	10	3	21	BOWTIE+STAR	17:39764953:-	16:89501013:+	ENSG00000161405	ENSG00000197912			GTTGGCAGAGCAAGGGTATAAAGGGGGAAATTGTTTGGCAGCACCAACAG*AAAAAAAAAAAAAAAAAAACATAAAACAACAGGGACAGGCCGGGTGTGGT	UTR/intronic
+IKZF3	SPG7	cancer,m3	88	10	2	26	BOWTIE+BLAT	17:39761083:-	16:89499937:+	ENSG00000161405	ENSG00000197912			TAGGCCCATACTCAAAATCGGCCAGATATAAAATGACCTCAGATTTTGAT*CTCCTGGGCTCAAGCAATCCTTCTGCCTTTGCCTCCCAAATTGCTGGGTT	UTR/intronic
+IKZF3	SPG7	cancer,m3	88	10	2	24	BOWTIE+STAR	17:39761712:-	16:89533075:+	ENSG00000161405	ENSG00000197912			CTGCACTCCAGCCTCGGCAACAAGAGCGAAACTCTGTCTAAAACAAAAAC*AAAAAAAAAAAAAAAAAAACACAGCTTTTAATTAAAAAAAGGTCAAGACA	UTR/UTR
+IKZF3	SPG7	cancer,m3	88	10	2	22	BOWTIE+STAR	17:39763595:-	16:89501016:+	ENSG00000161405	ENSG00000197912			GTGTTAAGTTTTTCCTTTAACCACTGAGTTGTGAATGTGAAGAAGGTGGT*AAAAAAAAAAAAAAAACATAAAACAACAGGGACAGGCCGGGTGTGGTGGC	UTR/intronic
+IKZF3	SPG7	cancer,m3	88	10	2	21	BOWTIE+STAR	17:39764863:-	16:89498537:+	ENSG00000161405	ENSG00000197912			AAGAGTTCATGGAGAATTGGTGATACAGACCCAAAGCAAATTTGCCAATG*AAAAAAAAAAAAAAAAAAGTCGCCGGGCGCGGTGGCTCATGCCTGTAATC	UTR/UTR
+IKZF3	SPG7	cancer,m3	88	10	2	20	BOWTIE+STAR	17:39759901:-	16:89533076:+	ENSG00000161405	ENSG00000197912			GCTCAAGAAGAGAAAAGGGAGTGGTGAAACTTTGTCCTAAAAGTTAGCAC*AAAAAAAAAAAAAAAAAACACAGCTTTTAATTAAAAAAAGGTCAAGACA	UTR/UTR
+IKZF3	SPG7	cancer,m3	88	10	2	20	BOWTIE+STAR	17:39759901:-	16:89501014:+	ENSG00000161405	ENSG00000197912			GCTCAAGAAGAGAAAAGGGAGTGGTGAAACTTTGTCCTAAAAGTTAGCAC*AAAAAAAAAAAAAAAAAACATAAAACAACAGGGACAGGCCGGGTGTGGT	UTR/intronic
+RPLP1	C19MC	banned,known,ribosomal,bodymap2,non_tumor_cells,hpa,m2,multi	0	10	7	34	BOWTIE+BLAT	15:69455201:+	19:53775643:+	ENSG00000137818	ENSG09000000001			CATGCACATGTATTGCAGGCCCTGGCCAACGTCAACATTGGGAGCCTCAT*TCTGCACGACGATGAGGTGACCGTCACGGAGGATAAGATCAATGCTCTCA	CDS(truncated)/---
+RPLP1	C19MC	banned,known,ribosomal,bodymap2,non_tumor_cells,hpa,m2,multi	0	10	2	36	BOWTIE+BLAT;BOWTIE+STAR	15:69455499:+	19:53775591:+	ENSG00000137818	ENSG09000000001			AAAGAAAGAAGAATCCGAGGAGTCTGATGATGACATGGGCTTTGGTCTTT*CCCGCGCCATGGCCTCTGTCTCCGAGCTCGCCTGCATCTACTAGGCGCTC	CDS(truncated)/---
+RPLP1	C19MC	banned,known,ribosomal,bodymap2,non_tumor_cells,hpa,m2,multi	0	10	2	33	BOWTIE+STAR	15:69452986:+	19:53775638:+	ENSG00000137818	ENSG09000000001			CTCGCCCGCGCCATGGCCTCTGTCTCCGAGCTCGCCTGCATCTACTCGGC*CTCATTCTGCACGACGATGAGGTGACCGTCACGGAGGATAAGATCAATGC	CDS(truncated)/---
+RPLP1	C19MC	banned,known,ribosomal,bodymap2,non_tumor_cells,hpa,m2,multi	0	10	2	21	BOWTIE+STAR	15:69455518:+	19:53780199:+	ENSG00000137818	ENSG09000000001			GAGTCTGATGATGACATGGGCTTTGGTCTTTTTGACTAAACCTCTTTTAT*AAAAAAAAAAAAAAAAAGCTGGCCCCGAGCACGGTGGCTCACTCCTGTAA	UTR/---
+RPLP1	C19MC	banned,known,ribosomal,bodymap2,non_tumor_cells,hpa,m2,multi	0	10	2	21	BOWTIE+STAR	15:69455518:+	19:53720714:+	ENSG00000137818	ENSG09000000001			GAGTCTGATGATGACATGGGCTTTGGTCTTTTTGACTAAACCTCTTTTAT*AAAAAAAAAAAAAAAAAGCTGGGCGCTGTGGCTCACGCCTGTAATCCCAC	UTR/---
+SIDT2	TAGLN	banned,known,adjacent,conjoing,healthy,cacg,bodymap2,hpa,cancer,1K<gap<10K,readthrough,exon-exon	0	10	5	27	BOWTIE	11:117195915:+	11:117203002:+	ENSG00000149577	ENSG00000149591	ENSE00001678826	ENSE00000991541	CTGGCACTTCCTCTCCTCCATCGCCATGTTCGGGTCCTTCCTG*CTTTCCCCAGACATGGCCAACAAGGGTCCTTCCTATGGCATGA	CDS(truncated)/UTR
+ZMYM5	PSPC1	banned,known,healthy,bodymap2,hpa,chimerdb3seq,10K<gap<100K	0	10	4	35	BOWTIE+BLAT;BOWTIE+STAR	13:19824920:-	13:19782791:-	ENSG00000132950	ENSG00000121390			GAAAAATTTTGAAGACTCCATTGTACCAGTTGTGCTTTCCGCAGATCCAG*GCCTGTATCCGGTGTCCGAGGCGAACTCAGTAAGATGATGTTAAGAGGAA	CDS(truncated)/UTR
+ZMYM5	PSPC1	banned,known,healthy,bodymap2,hpa,chimerdb3seq,10K<gap<100K	0	10	3	37	BOWTIE+BLAT;BOWTIE+STAR	13:19824920:-	13:19772543:-	ENSG00000132950	ENSG00000121390			GAAAAATTTTGAAGACTCCATTGTACCAGTTGTGCTTTCCGCAGATCCAG*GAATCCAGAACCCTGGCTGAAATTGCAAAAGCAGAGCTGGACGGCACCAT	out-of-frame
+AC011455.2	SDHA	lincrna,tumor,m4	125	9	8	37	BOWTIE+STAR	19:38938046:-	5:246263:+	ENSG00000269486	ENSG00000073578			TAATTAACAAGAGAAGCTGAGGCAGAGCTGCGGCTAATCGAAAAGCAAGT*CCATAAAGCCCAAGTAAATAGAATCGATCCAGAGAAGACTCGAGCTGCAG	exonic(no-known-CDS)/intronic
+AC011455.2	SDHA	lincrna,tumor,m4	125	9	6	40	BOWTIE+STAR	19:38938043:-	5:246400:+	ENSG00000269486	ENSG00000073578			TTAACAAGAGAAGCTGAGGCAGAGCTGCGGCTAATCGAAAAGCAAGTCCA*TAAAGCCCAAATCAATAGAATCGATCCAGAGAAGACTCGAGCTGCGGCTG	exonic(no-known-CDS)/intronic
+AL683807.1	AL683807.2	adjacent,lincrna,10K<gap<100K,readthrough,exon-exon	0	9	6	28	BOWTIE	X:1755985:+	X:1768435:+	ENSG00000223511	ENSG00000234622	ENSE00001667046	ENSE00001632295	AACCCAGAAGTATCCCCCAGGACAAATCATTAAGGGGGAAAAG*AGATGGGGTCTCAGTCTGTGGCCCAGGCTGCAGTGAAGTGGAA	exonic(no-known-CDS)/exonic(no-known-CDS)
+ARGFX	AC093267.1	pseudogene,m5	97	9	4	31	BOWTIE+STAR	3:121586233:+	5:122676389:+	ENSG00000186103	ENSG00000240058			TCCCTTCCATCTCAGCCCTTAGACCCTTCCAATTGGGCATGGAACTCTAC*CTTCACTGAGAGTCCCACAGGTGACTTCCAAATGCAAGATACTCAATGGG	CDS(truncated)/exonic(no-known-CDS)
+C19MC	SPN	m6	0	9	7	31	BOWTIE+STAR	19:53776871:+	16:29665331:+	ENSG09000000001	ENSG00000197471			CTCCTGACCTAGTGATTCACCTGCCTCGGCCTCCCAAGGTGCTGGGATTG*CAGACATGAGCCTCCGCGCCTTGCCTCCTCACCCACCTCTTCACTCTGAA	---/UTR
+C19MC	SPN	m6	0	9	3	28	BOWTIE+STAR	19:53776871:+	16:29669476:+	ENSG09000000001	ENSG00000197471			CTCCTGACCTAGTGATTCACCTGCCTCGGCCTCCCAAGGTGCTGGGATTG*CATGCGTGAGCCACCGTGCCCGACAATAGATGTCTTTTAATTTTCTGGAG	---/UTR
+C19MC	SPN	m6	0	9	2	22	BOWTIE+STAR	19:53776860:+	16:29668319:+	ENSG09000000001	ENSG00000197471			CTGGTCTCGAACTCCTGACCTAGTGATTCACCTGCCTCGGCCTCCCAAGG*TGCTGGGACTATAGGCATGAGCCACTGCACCCAGCCACTGCTTCATTCCT	---/UTR
+CDH8	SPN	m3	0	9	6	30	BOWTIE+STAR	16:61942224:-	16:29668716:+	ENSG00000150394	ENSG00000197471			TGATCCTTTCACCTTGATCTCCCAAGTACTGGGATTACAGGTGTGAGCCA*GGGATTATAGGTGTGAGCCACTGTGCCCAGGCTTGCCTCAGATATTTGAA	intronic/UTR
+CDH8	SPN	m3	0	9	2	31	BOWTIE+STAR	16:61650674:-	16:29666561:+	ENSG00000150394	ENSG00000197471			CTCTCTCTCTCTCTCTCACACACACACACACACACACACACACACACACA*CGCGCGCGCTCTCCTGCGAACAGAGGCAGGGGGAGAGGGGTTTGCCCTGG	UTR/UTR
+CDH8	SPN	m3	0	9	2	23	BOWTIE+STAR	16:61650696:-	16:29666525:+	ENSG00000150394	ENSG00000197471			CTCTTTCTCTCTTTCTCTCTCTCTCTCTCTCTCTCTCTCACACACACACA*CACACACACACACACACACACACACACACGCGCGCGCGCGCGCGCTCTCC	UTR/UTR
+CHST11	IGH@	banned,known,hpa,m0,multi	0	9	2	29	BOWTIE+BLAT;BOWTIE+STAR	12:104665831:+	14:106880683:-	ENSG00000171310	ENSG09000001018			TTTCCCTTTCTCTTTCTCTCTCTCTGTCTCTTTTTTTTTTTTTTTTTTT*TTTTTTTTTTTTTGTTTTTTTTTTTTTTTGCCAATGGAGGTGAGAAGAAA	intronic/---
+FAM86B3P	LINC02014	pseudogene,lincrna,m4	41	9	3	28	BOWTIE+STAR	8:8242000:+	3:130090663:-	ENSG00000173295	ENSG00000248243			TTCCGTGGAAAGGTAAGTTATTGGAGTTTATGCAGTCTGGTATATTAATT*GAGAGCTTTACTTCAAAGAATGTCACGTTTACGATTCAGTCTCACAGACT	exonic(no-known-CDS)/intronic
+FAM86B3P	LINC02014	pseudogene,lincrna,m4	41	9	2	39	BOWTIE+STAR	8:8241754:+	3:130090903:-	ENSG00000173295	ENSG00000248243			CCTGGTTCTGTTATTGTGTGGAGGAATGTCTTAGTCTGTTGCTCCTGTGG*GTCGCTAAAGCACGAACCAGGGGTCTTCATCCTTAGAAAGCAGTTAGACA	exonic(no-known-CDS)/exonic(no-known-CDS)
+HACL1	COLQ	banned,known,adjacent,healthy,cacg,hpa,1000genomes,chimerdb3seq,10K<gap<100K,readthrough,exon-exon	0	9	4	26	BOWTIE;BOWTIE+STAR	3:15563358:-	3:15489637:-	ENSG00000131373	ENSG00000206561	ENSE00003693947	ENSE00001498676	TATCAACATCATGATTGAGCCACAAGCCACACGGAAGGCCCAG*CCCTTCCCAGCCTGGATCAGAAGAAGCGTGGTGGCCACAAAGC	out-of-frame
+LGALS17A	AC093063.1	adjacent,pseudogene,1K<gap<10K,readthrough,exon-exon	0	9	4	29	BOWTIE	19:39685652:+	19:39694464:+	ENSG00000226025	ENSG00000268088	ENSE00001592376	ENSE00003004107	AAATTGGAGAGTGTCACTTTTCTATGACATTATATTGATTAAG*CAATGACCCAGAGCTGCGGGTGGAATTCTACACTGGGACAAAT	exonic(no-known-CDS)/exonic(no-known-CDS)
+RASAL3	WIZ	adjacent,1K<gap<10K,readthrough	0	9	3	34	BOWTIE+BLAT;BOWTIE+STAR	19:15451768:-	19:15448367:-	ENSG00000105122	ENSG00000011451			GCCTCAATGGAGACACCACCTGAGCTGCCCATCCTGCCTCATCACACGTG*GCCCACAATGCCCCGCTGAGCCGGTGCAGCAGCTGAGTGGATCCAAGCAG	UTR/UTR
+RNF168	BCL9	oncogene,cancer,tumor,m3	0	9	3	35	BOWTIE+STAR	3:196471230:-	1:147537912:+	ENSG00000163961	ENSG00000116128			AGACGGAGTTTCAAAATTTGGATTCATATTCATATGTGGGAAATCCCAGA*TTTTTTTTTTTTTTTTTTTGGAGACAGAGTCTTGCTCTGTCACCCAGGCT	UTR/intergenic
+SDHA	AC011455.2	lincrna,tumor,m4	125	9	5	38	BOWTIE+STAR	5:245760:+	19:38938215:-	ENSG00000073578	ENSG00000269486			TTGTGCTACAGAAAGTAGCCATACGTAGGGATCAATTGAAAACATTAAAT*GACTTTCAAAAATTACTAGGGGACATTAATTGGATACGACCTGCTCTAGG	intronic/exonic(no-known-CDS)
+SFT2D2	DA750114	m4	0	9	2	36	BOWTIE+BLAT;BOWTIE+STAR	1:168247507:+	9:106086630:+	ENSG00000213064	ENSG09000000004			TGATGGTTTCCAGCTTCATCCATGTCCCTACAAAGGACATGAACTCATCC*TTATGGCTGCATGGTATTCCATGGTGTATATGTGCCACATTTTCTTTATT	UTR/---
+SPN	C19MC	m6	0	9	9	30	BOWTIE+BLAT;BOWTIE+STAR	16:29668735:+	19:53779381:+	ENSG00000197471	ENSG09000000001			GGTCCTCCCACCTAAGCTTCCCCAAATACTGGGATTATAGGTGTGAGCCA*GGGATTACAGGTGTGAGCCACTGTGCCCAGCCCTGTCCACATTTTCTAGC	UTR/---
+SPN	C19MC	m6	0	9	3	39	BOWTIE+STAR	16:29667546:+	19:53668663:+	ENSG00000197471	ENSG09000000001			GCCGGGTGCGGTGGCTCACGCCTGTAATCTCAGCACTTTGGGAGGCCAAG*GTGGGCGGATCATGAGGTCGGGAGTTCGAGACCAGCCTGGCCAATATGGT	UTR/---
+SPN	C19MC	m6	0	9	3	34	BOWTIE+STAR	16:29665291:+	19:53740131:+	ENSG00000197471	ENSG09000000001			GATGGGGTTTCACCATGTTGGCTAGGCTGGTCTCAAACTCCTGACCTCAG*GTGATCTGCCTGCCTCGGCCTCCCAAAGTGCTGAGAATACAGGCGTGAGC	UTR/---
+SPN	C19MC	m6	0	9	3	28	BOWTIE+STAR	16:29667562:+	19:53652059:+	ENSG00000197471	ENSG09000000001			CACGCCTGTAATCTCAGCACTTTGGGAGGCCAAGGTGGGCAGATCATGAG*GTCGGGAGTTTGAGACCAGCCTGGCCAACATGGTGAAACTCTGTCTCTAC	UTR/---
+SPN	C19MC	m6	0	9	2	27	BOWTIE+BLAT	16:29668299:+	19:53776842:+	ENSG00000197471	ENSG09000000001			TCACCATGTTGTCCCGGCTGGTCTCAAACTCCCGGGCACAAGAGATCCAC*CTGCCTCGGCCTCCCAAGGTGCTGGGATTGCAGTGTGAGCCACCACGCCT	UTR/---
+SPN	C19MC	m6	0	9	2	20	BOWTIE+STAR	16:29669033:+	19:53774977:+	ENSG00000197471	ENSG09000000001			CAAAAACACAAAAAATTAGCCAGGCGTGGTGGTGCATGCCTGTAGTACCA*GCTGCTTGGGAGGCTGAGGCGGGAGAATCGCTTGAACCTGGAAGATGGTG	UTR/---
+SPN	CDH8	m3	0	9	2	21	BOWTIE+STAR	16:29670643:+	16:61917985:-	ENSG00000197471	ENSG00000150394			CATGTTTTTGAAAATTATGTAATGACTTTGGAAAATACCAGCAATATAAT*AAAAAAAAAAAAAAAAAGCAAATAACTTTGACCTCCTGGTTGCCTGGGAA	UTR/intronic
+AC022144.1	NEAT1	lincrna,antisense,oncogene,m3	9	8	2	34	BOWTIE+STAR	19:38739449:+	11:65440843:+	ENSG00000267892	ENSG00000245532			GGTCGTGCCACTGCACTCCAGCCTGGGTGACAGAGCAAGATTCCGTCTCA*AAAAAAAAAAAAAAAAAAAAAACCAAGAAGAAAAGGAATGAATTAGAACT	exonic(no-known-CDS)/exonic(no-known-CDS)
+ADAMTSL3	IGH@	cancer,m3	0	8	2	40	BOWTIE+STAR	15:83862771:+	14:105960131:-	ENSG00000156218	ENSG09000001014			AATAACCAGCTAACATCATAATGACAGGATCAAATTCACACATAACAATA*TAACCTTTAATGTAAATGGGCTAAATGCTCCAATTAAAAGACACAGACTG	UTR/---
+AGAP5	BMS1P4	banned,known,adjacent,hpa,m0,multi,gap<1K,exon-exon	0	8	2	26	BOWTIE	10:73692043:-	10:73713332:-	ENSG00000172650	ENSG00000271816	ENSE00003612965	ENSE00002439611	AACAGATGTTGTAGAAATAAGAAGAAGCAACTGTACAAACCAT*TGTCAGAGATGGCTTTGGATTCCCCGTTCTGTGTGCTGCTGTC	CDS(truncated)/exonic(no-known-CDS)
+CD53	PRPF8	cancer	0	8	7	26	BOWTIE+STAR	1:110899760:+	17:1652728:-	ENSG00000143119	ENSG00000174231			CAAGATCTCATTTCAATTTCTTTATTAGAGGGCCTTATTGATGTGTTCTA*AAAAAAAAAAAAAAAACTAGCCAGGCATGGTGGCATACACCTGTAATC	UTR/intronic
+CD53	PRPF8	cancer	0	8	4	29	BOWTIE+STAR	1:110898247:+	17:1683142:-	ENSG00000143119	ENSG00000174231			AAACCCCGTCTCTACTAAAAATACAAAAAATTAACCAGACGTAGTGGCAG*GTGTCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATGGCGTGA	intronic/UTR
+DSCC1	TAF2	adjacent,tcga,non_tumor_cells,non_cancer_tissues,18cancers,chimerdb3seq,tcga-cancer,1K<gap<10K,readthrough	0	8	3	30	BOWTIE+BLAT;BOWTIE+STAR	8:119838259:-	8:119832598:-	ENSG00000136982	ENSG00000064313			TTCTCTCTAAGGGAGAAGTGGACAGAAGAAGATATTGCTCCATATATTCA*GAAGCCGGGAAGCCAAGCTCCGCGGGACCGCTTCATGCCGCTGACTGGTG	CDS(truncated)/UTR
+MALAT1	CCND2	lincrna,oncogene,cancer	0	8	3	18	BOWTIE+STAR	11:65499951:+	12:4296706:+	ENSG00000251562	ENSG00000118971			AATGAAAAACAAGCTAAGACAAGTATTGGAGAAGTATAGAAGATAGAAAA*AAAAAAAAAAAAAAATTGTAAACATCCCCTTCAGCAGCTTCTCAAGTCCC	exonic(no-known-CDS)/intronic
+MALAT1	CCND2	lincrna,oncogene,cancer	0	8	2	38	BOWTIE+STAR	11:65499662:+	12:4303079:+	ENSG00000251562	ENSG00000118971			TGGGAGTGGTAGGATGAAACAATTTGGAGAAGATAGAAGTTTGAAGTGGA*AAACAGAAGCAGCAAATGAAAGAACCGGACAAATAAGGAAGGGCACAAGC	exonic(no-known-CDS)/UTR
+NEAT1	AC022144.1	lincrna,antisense,oncogene,m3	9	8	2	21	BOWTIE+STAR	11:65428347:+	19:38739453:+	ENSG00000245532	ENSG00000267892			TGAGGCTCTTCAGTATGCCCAGGTTGGCAGCACTGAGAACCGCAGGAA*AAAAAAAAAAAAAAAAAAAAGATGGAAAGAGTAGAGAGACAGAGACAGAG	exonic(no-known-CDS)/exonic(no-known-CDS)
+PMS2	POLR2J3	banned,known,hpa,cancer,tumor,m0,multi	0	8	4	31	BOWTIE+STAR	7:5996374:-	7:102564294:-	ENSG00000122512	ENSG00000168255			ACCACCATGCCCGGCTGATTTTTTGTATTTTTAGTGGAGACGGGGTTTCG*CCATGTTGGCCAGGCTGGTCTCAAACTCCTGCTGGGATCATGGGCGTGAG	intronic/UTR
+PMS2	POLR2J3	banned,known,hpa,cancer,tumor,m0,multi	0	8	2	30	BOWTIE+STAR	7:6003690:-	7:102567085:-	ENSG00000122512	ENSG00000168255			GAAACTTTTGGCTTTCGGGGGGAAGCTCTGAGCTCACTTTGTGCACTGAG*AGACGTGCCTGCTTCCCCTTCGCCTTCTGCCGTGATTGTCAGTTTCCTGA	CDS(truncated)/UTR
+PMS2	POLR2J3	banned,known,hpa,cancer,tumor,m0,multi,exon-exon	0	8	2	28	BOWTIE+BLAT	7:6003690:-	7:102567083:-	ENSG00000122512	ENSG00000168255			GAAACTTTTGGCTTTCGGGGGGAAGCTCTGAGCTCACTTTGTGCACTGAG*ACGTGCCTGCTTCCCCTTCGCCTTCTGCCGTGATTGTCAGTTTCCTGAGG	in-frame
+PMS2	POLR2J3	banned,known,hpa,cancer,tumor,m0,multi,exon-exon	0	8	2	26	BOWTIE	7:6002453:-	7:102567083:-	ENSG00000122512	ENSG00000168255	ENSE00003715574	ENSE00003643284	ACTACCTGTGCGCCATAAGGAATTTCAAAGGAATATTAAGAAG*ACGTGCCTGCTTCCCCTTCGCCTTCTGCCGTGATTGTCAGTTT	in-frame
+POLR2J3	PMS2	banned,known,hpa,cancer,tumor,m0,multi	0	8	4	40	BOWTIE+STAR	7:102564328:-	7:5996404:-	ENSG00000168255	ENSG00000122512			AGCCTCCCAAATAGCTGGGATTATAGGCGTGCACCACCACGCCTGCCT*TTTTTGTATTTTTAGTGGAGACGGGGTTTCGCCATGTTGCCCAGGCTGGT	UTR/intronic
+RPL23AP53	AL732372.2	pseudogene,ribosomal,m3,exon-exon	0	8	3	26	BOWTIE	8:232129:-	1:485208:-	ENSG00000223508	ENSG00000237094	ENSE00003701152	ENSE00001758832	GTGTGCCGTGGGTCCGCGCTGCTTCCACCCAACTTCCTGTTAG*CTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGG	exonic(no-known-CDS)/exonic(no-known-CDS)
+RPL23AP53	AL732372.2	pseudogene,ribosomal,m3,exon-exon	0	8	2	29	BOWTIE	8:219290:-	1:485208:-	ENSG00000223508	ENSG00000237094	ENSE00003696815	ENSE00001758832	TAAGACTGACCTGGGGAGACGTGGCTGCAGGCCATTGAGGAAG*CTGCAGGACAAGTTCGAGCATCTTAAAATGATTCAACAGGAGG	exonic(no-known-CDS)/exonic(no-known-CDS)
+TPM4	HDGFL3	cancer,m7	41	8	2	25	BOWTIE+STAR	19:16080686:+	15:83188393:-	ENSG00000167460	ENSG00000166503			CCTGTCTCTACTAAAAATACAAAAAAATAGCAAGGCGTGGTGGTGTCCGT*CTGTAGTCCCAGCTACTCAGGAGGCTGAGGCGCGAGAATTGCTTGAACCA	intronic/intronic
+TPM4	HDGFL3	cancer,m7	41	8	2	21	BOWTIE+STAR	19:16089114:+	15:83191951:-	ENSG00000167460	ENSG00000166503			AAGAACTCAAGAATGTTACTAACAATCTGAAATCTCTGGAGGCTGCATCT*AAAAAAAAAAAAAAAAGGAGAGATAAGTTACAATAGGATGCATTTAACA	CDS(truncated)/intronic
+WWOX	IGK@	cancer,tumor,m4	0	8	2	39	BOWTIE+STAR	16:78195671:+	2:90240727:+	ENSG00000186153	ENSG09000001012			GACCAGCCTGAGCAACATGACGAAACCCTGTCTCTACTAAAAATACAAAA*AAAAAATTAGCCAGGCATAGTGGCAAGAGCCTGTAAACCCAGCTACTAGG	intronic/---
+AC078883.1	AC078883.3	adjacent,antisense,10K<gap<100K,readthrough,exon-exon	0	7	3	25	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	2:172495375:-	2:172464574:-	ENSG00000225205	ENSG00000232788	ENSE00001655316	ENSE00001663512	AGCCTTGTTCTGTCTTGTCACACCAACTGTTACTGATGAACAG*GTTTACAATCTTTGAGGCCTGGCTGACTCATGGAACAACTAGG	exonic(no-known-CDS)/exonic(no-known-CDS)
+AC141586.1	AC093525.4	pseudogene,antisense	0	7	2	39	BOWTIE+STAR	16:2629390:+	16:2570800:-	ENSG00000215154	ENSG00000261140			CATTTTTACATATCAAACAGTATGGTATTGTGGTTACGCTTTTAAAAAGA*TTGTATTTTAGCGACACATACTGGAATTTTTACAGGGAAAACCCACTGAG	exonic(no-known-CDS)/intronic
+AC141586.1	AC093525.4	pseudogene,antisense	0	7	2	38	BOWTIE+STAR	16:2629498:+	16:2570692:-	ENSG00000215154	ENSG00000261140			GGCATCAGGTCACTAGCACCGAGTGAATTTTTTACAATAAGCAGTAATTC*CCCCAGCCCCATGCATCATAGCCAATCCTAAATGAATGCCAATCTGGAAA	exonic(no-known-CDS)/intronic
+CBFA2T3	SPN	cancer,m5	98	7	3	40	BOWTIE+STAR	16:88959808:-	16:29665292:+	ENSG00000129993	ENSG00000197471			GATGGAGTTTCACCATGTTGGCTAGGCTGGTCTCGAACTCCTGACCTCGG*GTGATCTACCTGCCTCAGCCTCCCAAAGTGCTGAGATTACAGACATGAGC	intronic/UTR
+CBFA2T3	SPN	cancer,m5	98	7	2	26	BOWTIE+STAR	16:88878788:-	16:29668230:+	ENSG00000129993	ENSG00000197471			AGTAGCTGGGATTACAGGCGCCCGCCACCACACCTGGCTAATTTTTGTAT*TTTTTTGTAGAGATGGGGTTTCACCATGTTGTCCCGGCTGGTCTCAAACT	intronic/UTR
+CHST11	IGH@	banned,known,hpa,m1,multi	0	7	5	39	BOWTIE+STAR	12:104541122:+	14:106023254:+	ENSG00000171310	ENSG09000000015			TCAGAACATGACTCGTATGTTTGTATTCTGCAGAATCTCTCCCTCTCTCT*CACACACACACACACACACACACACACACACACACACATAGATTCCCACA	intronic/---
+IGH@	CHST11	banned,known,hpa,m1,multi	0	7	2	26	BOWTIE+STAR	14:106190946:+	12:104546481:+	ENSG09000000015	ENSG00000171310			GAGGTGGAAGTTGCAGTGAGTCTGGACCCTGTCTCAAAAAAAAAAAAAAA*ATTTAAATTTCCCTTTTAAGTTTTTCTTAGCTAGCATAATAAGGGATAGT	---/intronic
+KIR3DX1	AL161457.2	antisense,m3	11	7	3	34	BOWTIE+STAR	19:54538525:+	9:68394341:-	ENSG00000104970	ENSG00000233178			GGGTTCAAGCGATTTTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAG*GCATGCGTCATCATACCTAGCTAATTTGTCCCATAGAATCTCATCTTCTG	UTR/exonic(no-known-CDS)
+PTGES2	SLC25A25-AS1	adjacent,antisense,18cancers,1K<gap<10K,readthrough	0	7	3	31	BOWTIE+BLAT;BOWTIE+STAR	9:128122362:-	9:128118118:-	ENSG00000148334	ENSG00000234771			GCAAGGACCGGCCCTTCATGGGGGGCCAGAAGCCGAATCTCGCTGATTTG*AAGGAGATCCGGTTGGCAGCTAAACCGCGCTGGGAACAGGGGCCTGAGTC	CDS(truncated)/exonic(no-known-CDS)
+ZNF468	ZNF160	m6	114	7	4	41	BOWTIE+STAR	19:52854084:-	19:53086085:-	ENSG00000204604	ENSG00000170949			CCTTCAGTCCCTCTCATCTCGCTTAGGTTCCGTCTCTCGTGACCCAGTGT*CATGAACTTGGGAAGAGGCTGCACTGGGCATGGTCCTGGGAAGGGCTCAC	CDS(truncated)/UTR
+ZNF468	ZNF160	m6,exon-exon	114	7	2	25	BOWTIE	19:52854258:-	19:53075183:-	ENSG00000204604	ENSG00000170949	ENSE00003644211	ENSE00003471812	GAGGAAGAGGAAAGCAAAGGAGTCAGGGATGGCTCTTCCTCAG*GTACGGTTGACATTTAGGGATGTGGCCATAGAATTCTCTCAGG	in-frame
+ANKRD36	WWOX	cancer,tumor,m3	45	6	2	21	BOWTIE+STAR	2:97245798:+	16:78229254:+	ENSG00000135976	ENSG00000186153			AAATGAAGGACATTGAAAAAATGTACAAAAGTGGATACAATACAATGGAA*AAATGAAAAAAAAAACAGGAATAAATAAAAAGAATAAAAAGAGCCAGGAC	CDS(truncated)/intronic
+ANKRD36	WWOX	cancer,tumor,m3	45	6	2	20	BOWTIE+STAR	2:97241479:+	16:79157416:+	ENSG00000135976	ENSG00000186153			CAGTTAAAGCATGAAATACTTGAATTGGAAAAAGAACTCTGTAGTTTGAG*AATGGCCAAAAAAAAAAAAAAGAAAGAAACCACAAAACCATACCTTCTTT	CDS(truncated)/intronic
+COQ8B	NUMBL	banned,known,adjacent,healthy,bodymap2,fragments,hpa,gap<1K,readthrough,exon-exon	0	6	5	25	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	19:40692951:-	19:40686995:-	ENSG00000123815	ENSG00000105245	ENSE00003529650	ENSE00003482506	GAAGTCCAGGGACCTCAAATTCCTCACAGGCTTTGAAACCAAG*GGCGGACCCCGGAGGCCTGAGCGGCACCTGCCCCCAGCCCCCT	in-frame
+GAS2	AC006299.1	adjacent,antisense,10K<gap<100K,readthrough,exon-exon	0	6	4	28	BOWTIE;BOWTIE+BLAT;BOWTIE+STAR	11:22755953:+	11:22834907:+	ENSG00000148935	ENSG00000246225	ENSE00003594907	ENSE00001970824	CCAAGGAAGATACCGAGTGGGAGAAAAGATCCTCTTCATTAGG*AGTTCCACTCTTTTTGTCGACAATAATATGGCGCCCAAACACG	CDS(truncated)/exonic(no-known-CDS)
+OR2T2	AC098483.1	antisense,m3,10K<gap<100K	0	6	7	41	BOWTIE+BLAT;BOWTIE+STAR	1:248449392:+	1:248548764:+	ENSG00000196240	ENSG00000224521			TCCCTGGTAGAAGAACCTGTGACTTCTCTTGGAGATCTCTTCAATTTTCG*GAAGCTGATGCTTACAACAATGGAGATTAGCAATACCTGGACAGAGTTGA	UTR/exonic(no-known-CDS)
+OR2T2	AC098483.1	antisense,m3,10K<gap<100K,exon-exon	0	6	2	30	BOWTIE	1:248449392:+	1:248562600:+	ENSG00000196240	ENSG00000224521	ENSE00003813036	ENSE00002382273	TAGAAGAACCTGTGACTTCTCTTGGAGATCTCTTCAATTTTCG*ATGATAAAGAAAAAATGAGGAAATGCTGTGAAGAACTAATAGA	UTR/exonic(no-known-CDS)
+STAT6	NRDE2	cancer,m4	0	6	2	20	BOWTIE+STAR	12:57099361:-	14:90271909:-	ENSG00000166888	ENSG00000119720			TCTCTGCCAAAGACCTGTCCATTCGCTCACTGGGGGACCGAATCCGGGAT*AAAAAAAAAAAAAAAAATCTGTTCTGAAATAAAGCATGAGACACCTAGAC	CDS(truncated)/UTR
+TMEM161B	AC061975.7	lincrna,m4	71	6	4	40	BOWTIE+STAR	5:88189654:-	17:28233412:-	ENSG00000164180	ENSG00000267259			TTAGGTGAAATTGAGTTACAGCGACAGCTACAGGCTTTAGGAAAAATTAC*ACAATCTCAGCCTGGGTAAATGAGAGATGCCCTGTTAGCTTATTCTCCCC	UTR/exonic(no-known-CDS)
+WWOX	ANKRD36	cancer,tumor,m3	45	6	2	22	BOWTIE+STAR	16:78186741:+	2:97163244:+	ENSG00000186153	ENSG00000135976			GAGGCAGAGGTTGCAGTGAGCCAAGATCGTGCCACTGCACTCCAGCCTGG*GCAACAGAGCAAGACTCCATGTCAAAAATAAAAAAAAGAAAGTTGATTGT	intronic/CDS(truncated)
+WWOX	ANKRD36	cancer,tumor,m3	45	6	2	22	BOWTIE+STAR	16:78188464:+	2:97163244:+	ENSG00000186153	ENSG00000135976			GAGGCGGAGATTGCAGTGAGCTGGGATTGTGCCACTGCACTCCAGCCTGG*GCAACAGAGCAAGACTCCATGTCAAAAATAAAAAAAAGAAAGTTGATTGT	intronic/CDS(truncated)
+WWOX	ANKRD36	cancer,tumor,m3	45	6	2	22	BOWTIE+STAR	16:78458053:+	2:97163244:+	ENSG00000186153	ENSG00000135976			GAGGCGGAGGTTGCAGTGAGCCAAGATGGTGCCACTGCACTCCAGCCTGG*GCAACAGAGCAAGACTCCATGTCAAAAATAAAAAAAAGAAAGTTGATTGT	intronic/CDS(truncated)
+AC008770.1	SDHA	tumor,m4	28	5	2	37	BOWTIE+STAR	19:12027244:+	5:249223:+	ENSG00000219665	ENSG00000073578			TTGGAGGCTCTGTGATTTCAATGATGATTGTGCTTTTAATCTGTGTTGTC*TTTGTATAGTCTGCAGATGTGGATCCTGACTCTGCAAGAAGTAGCTCACC	intronic/UTR
+AC008770.1	SDHA	tumor,m4	28	5	2	36	BOWTIE+STAR	19:12026071:+	5:248055:+	ENSG00000219665	ENSG00000073578			TTTTTGAGCCTGCCTAAAGGCCAGATGCTATCAGCAGCTGAACAATATCT*AGAAACCAGCTGCAAAGACAGAAGCAGAACAACTGGTTTGGTGGAGAGAC	exonic(no-known-CDS)/intronic
+AC023813.2	RSL24D1	pseudogene,m4	84	5	2	40	BOWTIE+STAR	16:48600281:+	15:55192792:-	ENSG00000260033	ENSG00000137876			ACGACTGAAAGGTGTTCAGATTTTGTAAATCTAAATGTCATAAAAACTTT*AAAGAAGCGCAATCCTCGCAAAGTTAGGTGGACCAAAGCATTCCGGAAAG	exonic(no-known-CDS)/CDS(truncated)
+AC060780.1	MYOCOS	lincrna,m3	13	5	2	36	BOWTIE+STAR	17:43163262:-	1:171603744:+	ENSG00000267002	ENSG00000283683			CTATGTGTCAGAAAGAGAAAAAATGGCAGTTGGAGTTTTAACCCATACTG*AGGCCCTGGCCAAGGCCAGTGGCCTATCTCTCAAAACAACTAGACAGCGT	exonic(no-known-CDS)/intronic
+AC135012.1	IKZF3	lincrna,cancer,m3	11	5	2	22	BOWTIE+BLAT	16:86084612:-	17:39856402:-	ENSG00000261177	ENSG00000161405			AGCTGGGTGTGGTGGTGCATGCCTGTGGTCCCAGCTACTCGGGAGGCTGA*GCAGGAGAATTGCTTGAACCTGCGAGGCCGAGGTTGCAGTGAGTGGAGAT	exonic(no-known-CDS)/intronic
+AC135012.1	IKZF3	lincrna,cancer,m3	11	5	2	21	BOWTIE+BLAT	16:86084628:-	17:39760287:-	ENSG00000261177	ENSG00000161405			AAAAAATACAAAAATTAGCTGGGTGTGGTGGTGCATGCCTGTGGTCCCAG*CTACTCGGGAGGCTGACGCAGGAGAATTGCTTGAACCTGGGAGGTGGAGG	exonic(no-known-CDS)/UTR
+ARHGEF39	AC008993.1	m3	0	5	7	37	BOWTIE+STAR	9:35675026:-	19:68663:+	ENSG00000137135	ENSG00000282542			GAGGTTGCAGTGAGCCGAGATCGCGCCATTGCACTCCAGCCTGGGCAACA*AGGGTGAACCAGGTCCAGGAAGAAGGTGCAAAGACAGCATTCCAGGTAAA	UTR/exonic(no-known-CDS)
+ATP6V0D1	NUP43	m3	23	5	3	33	BOWTIE+STAR	16:67439795:-	6:149725366:-	ENSG00000159720	ENSG00000120253			GGGTTTCACCATCTTGACCAGGCTGGTCTTGAACTGACCTCGTGATCCAC*CTGCCTCAGCCTCCCAAAATGCTGGGATTACAGGCTTGAGCCACTGCTCC	UTR/UTR
+C19MC	STAG3		0	5	2	33	BOWTIE+BLAT	19:53779051:+	7:100203759:+	ENSG09000000001	ENSG00000066923			TCACCATGTTGGTCAGGCTGGTCTCGAACTCCTGACCTCAAGTGATCTGC*CCGTCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGCACC	---/intronic
+CCDC9	SLFN5	m4	0	5	25	40	BOWTIE+BLAT;BOWTIE+STAR	19:47271736:+	17:35266178:+	ENSG00000105321	ENSG00000166750			CTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGCGC*GCGCGCGCGCACGTGCACATGTGTGTAGGTAGATGGAGGGGGTGATTATT	UTR/UTR
+CDK2	RAB5B	banned,known,adjacent,healthy,fragments,hpa,18cancers,cancer,1K<gap<10K,readthrough,exon-exon	0	5	2	25	BOWTIE	12:55971247:+	12:55986869:+	ENSG00000123374	ENSG00000111540	ENSE00003566576	ENSE00003588839	AGTTGTACCTCCCCTGGATGAAGATGGACGGAGCTTGTTATCG*GAGTGTTGAAGCCTGGAAATCCCCTCCCCTTCCCCCTCCCCCC	out-of-frame
+CHST11	IGH@	banned,known,hpa,m0,multi	0	5	2	39	BOWTIE+STAR	12:104464072:+	14:106561885:+	ENSG00000171310	ENSG09000000017			TGGAAGCAATGTAGCAAGATTGAAAATGAAGTCTCCTACTTGCACTTTTT*TTTTTTTTTTTTTTGAGATGGAGTCTTGCTCTGTCACCCAGGCTGCAGTG	intronic/---
+CHST11	IGH@	banned,known,hpa,m0,multi	0	5	2	24	BOWTIE+STAR	12:104550737:+	14:106564033:+	ENSG00000171310	ENSG09000000017			TTAAAAGTCAGTGTGCTGTTTGGCTAAGAGACACTTCCTCACCTGAAATC*AAAAAAAAAAAAAAAAAGGGAGAGGACGACTTGATGCACCTACATATGGT	intronic/---
+CHST11	IGH@	banned,known,hpa,m0,multi	0	5	2	20	BOWTIE+STAR	12:104466441:+	14:106461039:+	ENSG00000171310	ENSG09000000017			CTATTGAGTTCAAGAACAAATTGATGGCAAAGCCGGAGAGTGGACCCTAT*AAAAAAAAAAAAAAAAACCCCGGCACTGCTCCGGCGGCCACTCCGACCCG	intronic/---
+CRLF2	CSF2RA	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	2	33	BOWTIE+STAR	X:1136995:-	X:1258239:+	ENSG00000205755	ENSG00000198223			GCCGGGCGTGGTGGCGGGCGCCTGTGGTCCCAGCTACTCGGGAGGCTGAG*GCAGAATGGCGTGAACCCAGGAGGCAGAGCTTGCAGTGAGCCAAGATCGT	intergenic/intergenic
+CSF2RA	CRLF2	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	5	31	BOWTIE+STAR	X:1282502:+	X:1146505:-	ENSG00000198223	ENSG00000205755			ATTGACCATCTTGAGATGCCAAAGATGTCCACGTCCTAATCCCATGTGGG*AGACAGAATAATGTCCCCAAAGATGTCCACGTCCTAATCCCCATGTGATA	UTR/intergenic
+CSF2RA	CRLF2	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	5	31	BOWTIE+STAR	X:1282502:+	X:1147190:-	ENSG00000198223	ENSG00000205755			ATTGACCATCTTGAGATGCCAAAGATGTCCACGTCCTAATCCCATGTGGG*AGACAGAATAATGTCCCCAAAGATGTCCACGTCCTAATCCCCATGTGATA	UTR/intergenic
+CSF2RA	CRLF2	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	5	31	BOWTIE+STAR	X:1282502:+	X:1146701:-	ENSG00000198223	ENSG00000205755			ATTGACCATCTTGAGATGCCAAAGATGTCCACGTCCTAATCCCATGTGGG*AGACAGAATAATGTCCCCAAAGATGTCCACGTCCTAATCCCCATGTGATA	UTR/intergenic
+CSF2RA	CRLF2	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	5	31	BOWTIE+STAR	X:1282502:+	X:1146946:-	ENSG00000198223	ENSG00000205755			ATTGACCATCTTGAGATGCCAAAGATGTCCACGTCCTAATCCCATGTGGG*AGACAGAATAATGTCCCCAAAGATGTCCACGTCCTAATCCCCATGTGATA	UTR/intergenic
+CSF2RA	CRLF2	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	5	31	BOWTIE+STAR	X:1282502:+	X:1146897:-	ENSG00000198223	ENSG00000205755			ATTGACCATCTTGAGATGCCAAAGATGTCCACGTCCTAATCCCATGTGGG*AGACAGAATAATGTCCCCAAAGATGTCCACGTCCTAATCCCCATGTGATA	UTR/intergenic
+CSF2RA	CRLF2	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	5	31	BOWTIE+STAR	X:1282502:+	X:1146554:-	ENSG00000198223	ENSG00000205755			ATTGACCATCTTGAGATGCCAAAGATGTCCACGTCCTAATCCCATGTGGG*AGACAGAATAATGTCCCCAAAGATGTCCACGTCCTAATCCCCATGTGATA	UTR/intergenic
+CSF2RA	CRLF2	banned,known,similar_reads,oncogene,healthy,ambiguous,chimerdb3pub,cancer,m0,multi	16792	5	3	38	BOWTIE+STAR	X:1282515:+	X:1146296:-	ENSG00000198223	ENSG00000205755			AGATGCCAAAGATGTCCACGTCCTAATCCCATGTGGGAGACAGAATAATG*TCCCCAAAGATGTCCACGTCCTAATCCCCATGTGATAGACAGAATAATGT	UTR/intergenic
+DUS4L	BCAP29	banned,known,adjacent,healthy,bodymap2,hpa,chimerdb3pub,1K<gap<10K,readthrough,exon-exon	0	5	2	23	BOWTIE	7:107576592:+	7:107580759:+	ENSG00000105865	ENSG00000075790	ENSE00003520906	ENSE00003577399	TTAAAGGAAGCAGAAAATGTGTGGCGGATTACTGGGACAGATG*GTGTGAAGAAAAAAATGACACTCCAATGGGCTGCAGTGGCAAC	CDS(truncated)/UTR
+FOXP1	PAX5	known,similar_reads,oncogene,chimerdb2,cgp,ticdb,ambiguous,fragments,chimerdb3pub,chimerdb3seq,cancer,tumor	7942	5	2	37	BOWTIE+STAR	3:71507762:-	9:36860192:-	ENSG00000114861	ENSG00000196092			AGGTCAGGAGATCGAGACCATCCTGGCTAACATGGTGAAACCCCATCTCT*GCTAATTTTTCTATTTTTAGTAGAGATAGGATTTCACCATGTTGGTCAGG	intronic/intronic
+HNRNPC	KIAA0586	gtex	0	5	3	40	BOWTIE+BLAT	14:21269298:-	14:58521163:+	ENSG00000092199	ENSG00000100578			ACCCGGGAGTAGGAGACTCAGAATCGAATCTCTTCTCCCTCCCCTTCTTG*AGATTTTTTTGATCTTCAGCTACATTTTTGGCTTTGTGAGAAGCCTCA	UTR/UTR
+HNRNPC	KIAA0586	gtex	0	5	2	36	BOWTIE+STAR	14:21211222:-	14:58521995:+	ENSG00000092199	ENSG00000100578			GGAAGGAGAGGATGACAGAGACAGCGCCAATGGCGAGGATGACTCTTAAG*CATAGTGGGGTTTAGAAATCTTATCCCATCATTTCTTTACGTAGGTGCTT	UTR/intronic
+HNRNPC	KIAA0586	gtex	0	5	2	27	BOWTIE+STAR	14:21210649:-	14:58522562:+	ENSG00000092199	ENSG00000100578			ATCCTCTAGAAAAATAATTAGTGTTATAGTCTTAAGATTTGTTTTCTAAA*TTGATACTGTGGGTTATTTTTGTGAACAGCCTGATGTTTGGGACCTTTTT	UTR/intronic
+IGH@	CHST11	banned,known,hpa,m0,multi	0	5	2	26	BOWTIE+STAR	14:106561933:+	12:104549853:+	ENSG09000000017	ENSG00000171310			CTTTTTTTTTTTTTTGAGATGGAGTCTTGCTCTGTCACCCAGGCTGCAGT*GCTGTGATCTCACTATTGCATTCCAGCCTGGGCGACAGAGTGAGACCTCA	---/intronic
+IGH@	BACH2	banned,known,cancer,m1,multi	0	5	5	19	BOWTIE+STAR	14:105862206:-	6:90016823:-	ENSG09000001014	ENSG00000112182			TCCGGCACCCACAGCAGGTGGCAGGAAGCAGGTCACCGCGAGAGTCTATT*AAAGAAAAAAAAAAAACACCTGTCAACCTGGAATCTATACCCTTCAAAGA	---/intronic
+IGH@	BACH2	banned,known,cancer,m1,multi	0	5	2	25	BOWTIE+STAR	14:105862068:-	6:89930243:-	ENSG09000001014	ENSG00000112182			ACTGAGCAAAACAACACCTGGACAATTTGCGTTTCTAAAATAAGGCGAGG*AAAAAAAAAAAAAAAGGCTTTTCTTTTTAAACAGTTCCACTTTTAAAA	---/UTR
+IGK@	ATM	cancer,tumor,m3	0	5	2	20	BOWTIE+STAR	2:89163082:+	11:108222500:+	ENSG09000001010	ENSG00000149311			CCGAGATCGCGCCACTGCACTCCAGCCTGGGCAACAGAGTGAGACTCTGT*CCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACA	---/UTR
+KIAA0586	HNRNPC	gtex	0	5	2	38	BOWTIE+STAR	14:58522649:+	14:21210557:-	ENSG00000100578	ENSG00000092199			GGACCTTTTTTCCTCAAAATAAACAAGTCCTTATTAAACCAGGAAAAA*AGAAAAAAAAAACCCTGGTTTTTTATTTTTGTATTTTATTATTGTTTACT	intronic/UTR
+KIAA0586	HNRNPC	gtex	0	5	2	36	BOWTIE+STAR	14:58522354:+	14:21210852:-	ENSG00000100578	ENSG00000092199			CAGGGAGTACAGTTCTTTTCATTCATACGTAAGTTCAGTAGTTGCTTCCC*CTGCAAAGGCAATCTCATTTAGTTGAGTAGCTCTTGAAAGCAGCTTTGAG	intronic/UTR
+MYOCOS	AC060780.1	lincrna,m3	13	5	3	33	BOWTIE+STAR	1:171604377:+	17:43162629:-	ENSG00000283683	ENSG00000267002			ACCTTCCTCAGACTGAGGGCTGTTCCCAGTATATACATCAAGTCACTGAG*GTAGGACAAAAGGTTGCTACAGTCCTATTATTTTATGGTTATTATAAGTG	UTR/exonic(no-known-CDS)
+SDHA	AC008770.1	tumor,m4	28	5	2	41	BOWTIE+STAR	5:249199:+	19:12027220:+	ENSG00000073578	ENSG00000219665			AATCCATTAAAATGGATAAAAACACTTGGAAGCTCTGTGATTTCAATGAA*GATTGTGCTTTTAATCTGTGTTGTCTTTGTATAGTCTGCAGATGTGGATC	UTR/intronic
+SLFN5	CCDC9	m4	0	5	10	39	BOWTIE+STAR	17:35266146:+	19:47271690:+	ENSG00000166750	ENSG00000105321			TGGTAAAAAGGGATATCAGTAATTAGAGGACCGTGAGACTCAGAGATGTG*TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGCGCGCG	UTR/UTR
+STAG3	C19MC		0	5	8	41	BOWTIE+BLAT;BOWTIE+STAR	7:100199599:+	19:53763105:+	ENSG00000066923	ENSG09000000001			ATGTGCAGGAGAGCACACTGATAGAAATCCTTGTGTCCAGTGCCCGGCAA*CTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGTGCCCGCCACCAC	CDS(truncated)/---
+STAG3	C19MC		0	5	7	38	BOWTIE+BLAT;BOWTIE+STAR	7:100199599:+	19:53730201:+	ENSG00000066923	ENSG09000000001			ATGTGCAGGAGAGCACACTGATAGAAATCCTTGTGTCCAGTGCCCGGCAA*CTCCTGCCTCAGCCTCCCGAGCAGCTGGGATTACAGGTATGTGCCACCAC	CDS(truncated)/---
+STAG3	C19MC		0	5	5	38	BOWTIE+BLAT;BOWTIE+STAR	7:100199599:+	19:53706607:+	ENSG00000066923	ENSG09000000001			ATGTGCAGGAGAGCACACTGATAGAAATCCTTGTGTCCAGTGCCCGGCAA*CTCCTGCCTCAGCCTCCCGAGCAGCTGGGATTACAGGCATATGCCACCAC	CDS(truncated)/---
+STAG3	C19MC		0	5	3	24	BOWTIE+BLAT;BOWTIE+STAR	7:100199599:+	19:53725794:+	ENSG00000066923	ENSG09000000001			ATGTGCAGGAGAGCACACTGATAGAAATCCTTGTGTCCAGTGCCCGGCAA*CTCCTGCCTCAGCCTCCCGAGCAGGGTTATAGGCATGTGCCACCACACCT	CDS(truncated)/---
+STAG3	C19MC		0	5	3	24	BOWTIE+STAR	7:100199599:+	19:53763683:+	ENSG00000066923	ENSG09000000001			ATGTGCAGGAGAGCACACTGATAGAAATCCTTGTGTCCAGTGCCCGGCAA*CTCCTGCCTCAGCCTCCCGAGCAGCTGGGATTACAGGCATGCATCACCAC	CDS(truncated)/---
+AC005829.1	C17ORF58	pseudogene,m3	31	4	3	38	BOWTIE+STAR	17:46267371:+	17:67993174:-	ENSG00000261575	ENSG00000186665			GCGATGAACGGGATAGTGCATGACGTGGACGTGCTGGGCGCGGGCATCTG*GCTGGTGGATCGGGACGGGCTGTACAAGATGAACCGCCTGTACCTCACCC	exonic(no-known-CDS)/CDS(truncated)
+AC009163.5	TMEM170A	pseudogene,10K<gap<100K,exon-exon	0	4	3	29	BOWTIE	16:75516203:-	16:75451839:-	ENSG00000262583	ENSG00000166822	ENSE00002602871	ENSE00003646655	TATGTGCCCAGGGATTTTCCTGGACGGGATTGTGAAGTAGCGG*AGATGTGGTATGGTGTATTCCTGTGGGCACTGGTGTCTTCTCT	exonic(no-known-CDS)/CDS(truncated)
+AC018630.1	TAS2R19	pseudogene,m3,10K<gap<100K	66	4	7	37	BOWTIE+STAR	12:11048458:-	12:11021772:-	ENSG00000256019	ENSG00000212124			GAATCCTAGGACTCAGCAGAGCAAACTTGTATTCCTGCTTTACCAAACTC*TTGCAATCATGTATCCTTCATTCCACTCATTCATCCTGATTATGGGAAGT	exonic(no-known-CDS)/CDS(truncated)
+FBXO25	FAM157B	banned,known,pseudogene,bodymap2,hpa,m0,multi,exon-exon	0	4	7	30	BOWTIE	8:435707:+	9:138243620:+	ENSG00000147364	ENSG00000233013	ENSE00003682552	ENSE00002337959	AAGTGCAATTCAAGATATCCGAAGGTTCAATTATGTGGTCAAA*ATGATGATTATTCCCCACCTTCTAAGAGACAAAGACCAACGAG	CDS(truncated)/exonic(no-known-CDS)
+LINC01566	ESD	lincrna,m3	0	4	2	39	BOWTIE+STAR	16:35389990:+	13:46771348:-	ENSG00000259841	ENSG00000139684			AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA*AAAAAAAAAAAAAATTCAAAACATTGGATTTTATAGTGCTAAAAGGGCTT	exonic(no-known-CDS)/UTR
+MSANTD2	ESAM	known,adjacent,gliomas,1K<gap<10K,readthrough,exon-exon	0	4	2	18	BOWTIE;BOWTIE+STAR	11:124772994:-	11:124756742:-	ENSG00000120458	ENSG00000149564	ENSE00003565346	ENSE00000991328	AGAGAACATTAAAAATAAAACAAGAGTCTTCTGAAGAAGCACA*GTGTTGTCCTACATCAATGGGGTCACAACAAGCAAACCTGGAG	out-of-frame
+NUTM2A	NUTM2B	banned,paralogs,cgp,ambiguous,cancer,m0,multi,exon-exon	2818	4	3	26	BOWTIE	10:87231048:+	10:79710382:+	ENSG00000184923	ENSG00000188199	ENSE00002441741	ENSE00002519104	CGAGCAACTTTGACCGGATGATCTTCTACGAGATGGCGGAAAA*TGTACCTTCCCAGCAAGGCCGGCCCCAAGGCCCCGACTGCCTG	in-frame
+PEX26	FGF13	cancer,m3	0	4	2	40	BOWTIE+STAR	22:18089731:+	X:138624824:-	ENSG00000215193	ENSG00000129682			AGTTTTCTTTTGTTTTGTTTGTTTGTTTGTTTGTTTGTTTTTGAGATGGA*GTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGTGTGATCATGTCTCAC	UTR/UTR
+ACTR3	PTMA	oncogene,cancer	0	3	3	21	BOWTIE+STAR	2:113949408:+	2:231713284:+	ENSG00000115091	ENSG00000187514			GATCGAGACTCGGTCTCAAAAAAAAAAAAAAAAGAAAAAAAAAAAGAA*AAAAAAAGCAAAAATGACAACAGAAAAACAATCTTATTCCGAGCATTCCA	intronic/UTR
+ATP6V0C	AMDHD2	known,adjacent,conjoing,cacg,cell_lines,fragments,gap<1K,readthrough,exon-exon	0	3	2	25	BOWTIE	16:2514182:+	16:2520769:+	ENSG00000185883	ENSG00000162066	ENSE00001309635	ENSE00003563063	TTTTTCGCCGTCATGGGCGCCTCGGCCGCCATGGTCTTCAGCG*GGAGGATCTGTGGGTGCGCGGAGGCCGCATCTTGGACCCAGAG	out-of-frame
+AZIN1	RCSD1		0	3	2	21	BOWTIE+STAR	8:102833064:-	1:167668738:+	ENSG00000155096	ENSG00000198771			CTCGCAGTTAATATCATAGCAAAGAAAGTTGTTGAAAATGATAAATTTCC*AAAAAAAAAAAAAAAAGAGGGAGCTGCCAATTCTGCCTGAATGGGGGAA	CDS(truncated)/intronic
+AZIN1	RCSD1		0	3	2	19	BOWTIE+STAR	8:102833065:-	1:167670359:+	ENSG00000155096	ENSG00000198771			CGCAGTTAATATCATAGCAAAGAAAGTTGTTGAAAATGATAAATTTC*AAAAAAAAAAAAAAACCACTCGAGCTGTTCAGAGGCTGCCAGACTGACAA	CDS(truncated)/intronic
+DST	IGH@	cancer	0	3	2	24	BOWTIE+STAR	6:56553461:-	14:105865661:+	ENSG00000151914	ENSG09000000014			GTCATTAATTAAAGATCATAAAGACTTTAGTAAAACTTTGACCGCTCAGT*AAAAAAAAAAAAAAAAAACCAGTGCTCTCCCTTCCGCTGTTAGCCCCAAA	CDS(truncated)/---
+DST	IGH@	cancer	0	3	2	21	BOWTIE+STAR	6:56497915:-	14:105863448:+	ENSG00000151914	ENSG09000000014			AGAAGCAAGCAACCTTCAGAACAAGCTAGAGGTTTTAAATCAACGCTGGC*AAAAAAAAAAAAAAAAAACAGACGGCTGCTAAGTCACCCCCAGGAGTCCA	CDS(truncated)/---
+DST	IGH@	cancer	0	3	2	21	BOWTIE+STAR	6:56572829:-	14:105864836:+	ENSG00000151914	ENSG09000000014			CTAAAAACCAAAGTAGAACTGTTTGAGAACCTCTCAGAAAAGCTCCAGAC*AAAAAAAAAAAAAAAAACACACTAAAGCGCAGGCCTGGTCCCCGGCACAT	CDS(truncated)/---
+HNRNPA2B1	KIF2A	cancer	0	3	4	23	BOWTIE+STAR	7:26191932:-	5:62485853:+	ENSG00000122566	ENSG00000068796			AAAGTTTTGAAAAGCTATTAGCCAGGATCATGGTGTAATAAGACATAA*AAAAAAAAAAAAAAAAATTAAAGATTCCCAACATTAAGTCAGACAGATAA	UTR/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	4	21	BOWTIE+STAR	7:26191931:-	5:62518144:+	ENSG00000122566	ENSG00000068796			AAAGTTTTGAAAAGCTATTAGCCAGGATCATGGTGTAATAAGACATAAC*AAAAAAAAAAAAAAAAATTTGGCCAGGCGCAGTGACTCACGCCTGTAATC	UTR/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	19	BOWTIE+STAR	7:26191930:-	5:62401504:+	ENSG00000122566	ENSG00000068796			AAGTTTTGAAAAGCTATTAGCCAGGATCATGGTGTAATAAGACATAACG*AAAAAAAAAAAAAAAATTCCCTGTTGAAATACCTAATGTGGTTTCTGTTT	UTR/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	19	BOWTIE+STAR	7:26191930:-	5:62352154:+	ENSG00000122566	ENSG00000068796			AAAGTTTTGAAAAGCTATTAGCCAGGATCATGGTGTAATAAGACATAACG*AAAAAAAAAAAAAAAATTATATTTTACACACATATTTTAGGAGGATTTC	UTR/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	19	BOWTIE+STAR	7:26196923:-	5:62518146:+	ENSG00000122566	ENSG00000068796			TGGCGGAATTAAAGAAGATACTGAGGAACATCACCTTAGAGATTACTT*AAAAAAAAAAAAAAATTTGGCCAGGCGCAGTGACTCACGCCTGTAATCCT	CDS(truncated)/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	19	BOWTIE+STAR	7:26196923:-	5:62352155:+	ENSG00000122566	ENSG00000068796			TGGCGGAATTAAAGAAGATACTGAGGAACATCACCTTAGAGATTACTT*AAAAAAAAAAAAAAATTATATTTTACACACATATTTTAGGAGGATTTCTT	CDS(truncated)/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	19	BOWTIE+STAR	7:26196923:-	5:62401505:+	ENSG00000122566	ENSG00000068796			GTTGGCGGAATTAAAGAAGATACTGAGGAACATCACCTTAGAGATTACTT*AAAAAAAAAAAAAAATTCCCTGTTGAAATACCTAATGTGGTTTCTGTT	CDS(truncated)/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	19	BOWTIE+STAR	7:26196923:-	5:62485855:+	ENSG00000122566	ENSG00000068796			TGGCGGAATTAAAGAAGATACTGAGGAACATCACCTTAGAGATTACTT*AAAAAAAAAAAAAAATTAAAGATTCCCAACATTAAGTCAGACAGATAAGA	CDS(truncated)/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	18	BOWTIE+STAR	7:26191929:-	5:62366831:+	ENSG00000122566	ENSG00000068796			AAGTTTTGAAAAGCTATTAGCCAGGATCATGGTGTAATAAGACATAACGT*AAAAAAAAAAAAAAAATTAATATTTGATAGGGTAAATCTTACTACATTGT	UTR/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	17	BOWTIE+STAR	7:26196921:-	5:62366832:+	ENSG00000122566	ENSG00000068796			TGGCGGAATTAAAGAAGATACTGAGGAACATCACCTTAGAGATTACTTTG*AAAAAAAAAAAAAAATTAATATTTGATAGGGTAAATCTTACTACATTGTT	CDS(truncated)/intronic
+HNRNPA2B1	KIF2A	cancer	0	3	2	17	BOWTIE+STAR	7:26196921:-	5:62331411:+	ENSG00000122566	ENSG00000068796			TGGCGGAATTAAAGAAGATACTGAGGAACATCACCTTAGAGATTACTTTG*AAAAAAAAAAAAAAATTATGTTTTCTGGGATTGCAGGCACGGTGGCATAT	CDS(truncated)/intronic
+HSPE1	MOB4	banned,known,adjacent,conjoing,bodymap2,fragments,hpa,1000genomes,m2,multi,10K<gap<100K,readthrough,exon-exon	0	3	2	26	BOWTIE	2:197503128:+	2:197523624:+	ENSG00000115541	ENSG00000115540	ENSE00003580189	ENSE00003583379	TCTCCCAGAATATGGAGGCACCAAAGTAGTTCTAGATGACAAG*GATTTCTATAATTGGCCTGATGAATCCTTTGATGAAATGGACA	in-frame
+IGH@	BCL2	known,oncogene,chimerdb2,fragments,chimerdb3kb,cancer,tumor,m0,multi	0	3	2	33	BOWTIE+STAR	14:105685170:+	18:63198947:-	ENSG09000000013	ENSG00000171791			TGTGTGTGAGGTATATCTGTGTTTGTGTGTCTCTGTGTCTGTGTGTGTCT*NNNNNNNNNNNNNNNNNNNNNTGTGTCAGTGTGTGTCTCTGTGTCTGTGTGTGTCTGTATATGTCTGTG	---/intronic
+IGH@	BCL2	known,oncogene,chimerdb2,fragments,chimerdb3kb,cancer,tumor,m0,multi	0	3	2	23	BOWTIE+STAR	14:105677824:+	18:63242056:-	ENSG09000000013	ENSG00000171791			GAGTGCACCTGCCTGGGCCGGCCCTGACGGATTTTTTTTTTTTTTTTGAT*NNNNNNNNNNNNNNNNNNNNNAGATCTACACCTTGGAGTCACCCATCTCACAAAATGTCAGCCGGGGTGCT	---/intronic
+IGH@	DST	cancer	0	3	3	31	BOWTIE+STAR	14:105960629:+	6:56586163:-	ENSG09000000014	ENSG00000151914			GAATCTAACAATTATGTGTCTTGGAGTTGCTCTTCTCGAGGAGTATCTTT*CAGAAAGTTAACAAGGATACCCAGGAATTGAACTCAGCTCTACACCAAGC	---/intronic
+LINC02363	CRLF2	lincrna,oncogene,cancer	21	3	3	20	BOWTIE+BLAT	4:184340757:-	X:1120532:-	ENSG00000180712	ENSG00000205755			AGATCACGCCACTGCACTCCAGCATGGACAACAGAGCGAGACTCCGTCCC*AAAAAAAAAAAAAAAAAAGATACAGAGATTCTGAAAGCAAGAAAGAGAA	exonic(no-known-CDS)/intergenic
+MALAT1	LIG3	lincrna,oncogene,cancer	0	3	5	24	BOWTIE+STAR	11:65502811:+	17:34988201:+	ENSG00000251562	ENSG00000005156			TTGGGGAATAAGCATAACCCTGAGATTCTTACTACTGATGAGAACATTAT*AAAAAAAAAAAAAAAATTCTGAGGTTGGTTGGTTTGTTGTTTGCTGGTA	exonic(no-known-CDS)/intronic
+MALAT1	LIG3	lincrna,oncogene,cancer	0	3	3	20	BOWTIE+STAR	11:65505913:+	17:34988203:+	ENSG00000251562	ENSG00000005156			AACTTGTAGACTGGAGAAGATAGGCATTTGAGTGGCTGAGAGGGCTTTTG*AAAAAAAAAAAAAATTCTGAGGTTGGTTGGTTTGTTGTTTGCTGGTAG	exonic(no-known-CDS)/intronic
+MALAT1	UBE2B	lincrna,oncogene,cancer	0	3	21	20	BOWTIE+STAR	11:65504861:+	5:134376334:+	ENSG00000251562	ENSG00000119048			ATGCAGGAACACTCAGCAGACACACGTATGCGAAGGGCCAGAGAAGCC*AAAAAAAAAAAAAAAATATATATATATATATACACATATGTACAAAATGA	exonic(no-known-CDS)/intronic
+MALAT1	UBE2B	lincrna,oncogene,cancer	0	3	5	20	BOWTIE+STAR	11:65499936:+	5:134376336:+	ENSG00000251562	ENSG00000119048			GAGTTTCAGATAGAAAATGAAAAACAAGCTAAGACAAGTATTGGAGAAGT*AAAAAAAAAAAAAATATATATATATATATACACATATGTACAAAATGA	exonic(no-known-CDS)/intronic
+MALAT1	UBE2B	lincrna,oncogene,cancer	0	3	4	19	BOWTIE+STAR	11:65504863:+	5:134378931:+	ENSG00000251562	ENSG00000119048			ATGCAGGAACACTCAGCAGACACACGTATGCGAAGGGCCAGAGAAGCCAG*AAAAAAAAAAAAAAAACAGTGGGGGGATCTAAGTGGGTTGTTAGGTTAAG	exonic(no-known-CDS)/intronic
+MALAT1	UBE2B	lincrna,oncogene,cancer	0	3	3	23	BOWTIE+STAR	11:65499318:+	5:134376333:+	ENSG00000251562	ENSG00000119048			AGGAAAAGATAAAAGGTTTCTAAAACATGACGGAGGTTGAGATGAAGC*AAAAAAAAAAAAAAAAATATATATATATATATACACATATGTACAAAATG	exonic(no-known-CDS)/intronic
+MALAT1	UBE2B	lincrna,oncogene,cancer	0	3	3	19	BOWTIE+STAR	11:65500595:+	5:134379650:+	ENSG00000251562	ENSG00000119048			CGATTTGGTGAAGGAAGCTAGGAAGAAGGAAGGAGCGCTAACGATTTGGT*AAAAAAAAAAAAAAAGGAAGAAAGAAAGAAAGTACTCCTTTTTTCAAG	exonic(no-known-CDS)/intronic
+MALAT1	UBE2B	lincrna,oncogene,cancer	0	3	2	19	BOWTIE+STAR	11:65500178:+	5:134375810:+	ENSG00000251562	ENSG00000119048			AGAAGGGGAAGTTGGTTAAAAATCACATCAAAAAGCTACTAAAAGGACTG*AAAAAAAAAAAAAAAAACTTTATGCTGCTGCTTAGGAAAGATGAAATTGG	exonic(no-known-CDS)/intronic
+MANBAL	SRC	banned,known,adjacent,oncogene,tcga,fragments,hpa,non_cancer_tissues,chimerdb3seq,cancer,tumor,10K<gap<100K,readthrough,exon-exon	0	3	3	28	BOWTIE	20:37301413:+	20:37365204:+	ENSG00000101363	ENSG00000197122	ENSE00001461016	ENSE00001460974	TGTGCTGGCCATCATCGTACCCATTCCCAAGTCCCACGAGGCG*AACAGAGAACAGAAGCTCAGAGAAGTGAAGCAACTTGCCCAGC	CDS(truncated)/UTR
+NSD2	IGK@	known,oncogene,fragments,cancer,tumor,m0,multi	0	3	2	18	BOWTIE+STAR	4:1951174:+	2:89645485:+	ENSG00000109685	ENSG09000001011			CGAAACACAAACTGAAGTATCTGTCTCATCCAAAAAGTCTGAGCGAGGAG*AAAAAAAAAAAAAGGAGTTTTTATATGTAGGGTACAAAATACTATCTGAC	CDS(truncated)/---
+NUP153	NUPL2		0	3	3	19	BOWTIE+STAR	6:17615279:-	7:23189913:+	ENSG00000124789	ENSG00000136243			GATTTTATTATGTAAATCCCATTATTCAAAGTTGCCTAAATCCATTTGGA*AAAAAAAAAAAAAAAAATTTTTTTGATGTTGTATAATAACGTGCCAATGT	UTR/intronic
+PTMA	ACTR3	oncogene,cancer	0	3	4	21	BOWTIE+STAR	2:231713036:+	2:113914616:+	ENSG00000187514	ENSG00000115091			CCGCAGATGACACGCGCTCTCCACCACCCAACCCAAACCATGAGAATTTG*AAAAAAAAAAAAAAAAAAGAAAGAAAAAGAAAAGAAATACACTTTCAAAA	UTR/intronic
+PTMA	ACTR3	oncogene,cancer	0	3	2	18	BOWTIE+STAR	2:231713259:+	2:113949386:+	ENSG00000187514	ENSG00000115091			CTCTGTCCTACTTCTGACTTTACTTGTGGTGTGACCATGTTCATTATAAT*AAAAAAAAGAAAAAAAAAAAGAAAAAATTGACACATGCCTAATAATTAT	UTR/intronic
+PTMA	QRICH1	oncogene,cancer	0	3	4	19	BOWTIE+STAR	2:231713259:+	3:49047497:-	ENSG00000187514	ENSG00000198218			CTCTGTCCTACTTCTGACTTTACTTGTGGTGTGACCATGTTCATTATAAT*AAAAAAAAAAAAAAAAAACATTTAAAAGTGCAGCCAAGGCTGAGAAGCAC	UTR/intronic
+PTMA	QRICH1	oncogene,cancer	0	3	4	19	BOWTIE+STAR	2:231713260:+	3:49088634:-	ENSG00000187514	ENSG00000198218			CTGTCCTACTTCTGACTTTACTTGTGGTGTGACCATGTTCATTATAATC*AAAAAAAAAAAAAAAACAGACAAAAGCCTGGTGCAGTGGCTCAGGCATGT	UTR/intronic
+PTMA	QRICH1	oncogene,cancer	0	3	3	18	BOWTIE+STAR	2:231713041:+	3:49058344:-	ENSG00000187514	ENSG00000198218			GATGACACGCGCTCTCCACCACCCAACCCAAACCATGAGAATTTGCAACA*AAAAAAAAAAGAACCAAAGTTTTGATATTTCCTTGTTAGACTCCAGC	UTR/intronic
+PTMA	QRICH1	oncogene,cancer	0	3	2	18	BOWTIE+STAR	2:231712871:+	3:49078710:-	ENSG00000187514	ENSG00000198218			AAGAAGCAGAAGACCGACGAGGATGACTAGACAGCAAAAAAGGAAAAGTT*AAAAAAAAAAAAAAAAGGACAGGTGTGGTGGCTCACGCCTGTAATCTCAG	UTR/intronic
+SLC6A20	KLHL6		0	3	2	19	BOWTIE+STAR	3:45757526:-	3:183521147:-	ENSG00000163817	ENSG00000172578			ACATGCGCTGTAATGAATCAAAGTTTAATGGATTTAGGGCTGTGCAGGAT*GTGCTTTGTTAAAAATGTGTTTGCAGGCAGTATGCTTGGTAAAAGTCATC	UTR/UTR
+TAF15	AC005261.2	lincrna,oncogene,cancer,tumor	6	3	4	41	BOWTIE+STAR	17:35827576:+	19:57304911:+	ENSG00000270647	ENSG00000268205			AAACCCCGTCTCTACTAAAAATACAAAAAATTAGCTGGACACGATGGTGG*GCGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGACAGGAGAATGGCATGA	intronic/exonic(no-known-CDS)
+TBCEL	TECTA	banned,known,adjacent,cosmic,healthy,bodymap2,fragments,hpa,18cancers,chimerdb3kb,cancer,10K<gap<100K,readthrough,exon-exon	0	3	2	22	BOWTIE	11:121060085:+	11:121102665:+	ENSG00000154114	ENSG00000109927	ENSE00003505369	ENSE00001513132	TTCGTTACTATGTGGATGTTCCACAGGAAGAAGTGCCATTCAG*GATGAATTATTCATCATTCCTTAGAATTTGGGTCTCTTTCATC	CDS(truncated)/UTR
+UBA2	WTIP	banned,known,adjacent,fragments,hpa,non_cancer_tissues,1000genomes,18cancers,m0,multi,10K<gap<100K,readthrough,exon-exon	0	3	3	25	BOWTIE	19:34467014:+	19:34490376:+	ENSG00000126261	ENSG00000142279	ENSE00003701944	ENSE00001160005	ACCAATGGCAGTGATGATGGAGCTCAGCCCTCCACCTCCACAG*GCATTTGCATCAAGTGTGGGCTTGGCATCTACGGAGCCCAGCA	in-frame
+UBE2B	MALAT1	lincrna,oncogene,cancer	0	3	2	19	BOWTIE+STAR	5:134383489:+	11:65504323:+	ENSG00000119048	ENSG00000251562			GCTGGGACTGCAGATGTGTGCCACCATACCCAGCTTTTTTTTTTTTTTTT*CAGACTTCACAGAGAATGCAGTTGTCTTGACTTCAGGTCTGTCTGTTCTG	intronic/exonic(no-known-CDS)
+USP7	BTN2A3P	pseudogene,cancer	0	3	2	21	BOWTIE+STAR	16:8917060:-	6:26428224:+	ENSG00000187555	ENSG00000124549			CCCTTTAGCATTACAAAGAGTGTTCTATGAATTACAGCATAGTGATAAAC*AAAAAAAAAAAAAAAAGTAAAAGAGTAGGAGGACAGTTCACCATTATAGA	CDS(truncated)/intronic
+BPTF	LRRC37A2	banned,known,fragments,hpa,m0,multi,exon-exon	0	2	5	27	BOWTIE	17:67826337:+	17:46511511:+	ENSG00000171634	ENSG00000238083	ENSE00002689718	ENSE00002680088	GAAAGCAGCTTCAGGAGCCATAGTACCTACAGCAGCACTCCAG*GCTGAAGTGCAATGTTGTGATCTCGGCTCACTGCAACCTCTGC	CDS(truncated)/UTR
+BPTF	LRRC37A2	banned,known,fragments,hpa,m0,multi,exon-exon	0	2	3	27	BOWTIE	17:67826337:+	17:46517362:+	ENSG00000171634	ENSG00000238083	ENSE00002689718	ENSE00002373231	GAAAGCAGCTTCAGGAGCCATAGTACCTACAGCAGCACTCCAG*AAATTTCCAAGGAAACTATATTTCTTACATTGATGGAAATGTA	out-of-frame
+NCOR2	UBC	banned,known,healthy,tcga,fragments,hpa,gtex,chimerdb3seq,cancer,oesophagus,exon-exon	0	2	4	30	BOWTIE	12:124567308:-	12:124913774:-	ENSG00000196498	ENSG00000150991	ENSE00001798415	ENSE00001406973	AGCGCGCCCCAAGCCCGGGCGCCACCGCTGCCACCTCCGCGAG*ACAATGCAGATCTTCGTGAAGACTCTGACTGGTAAGACCATCA	UTR/UTR
+NFATC3	PLA2G15	banned,known,adjacent,healthy,bodymap2,fragments,hpa,chimerdb3seq,cancer,10K<gap<100K,readthrough,exon-exon	0	2	2	23	BOWTIE	16:68191775:+	16:68249290:+	ENSG00000072736	ENSG00000103066	ENSE00003583006	ENSE00003644749	AACTTTGCAACCATTGGTCTGCAGGACATCACTTTAGATGATG*TCCCTGGTGATTTGGGTAACCAACTGGAAGCCAAGCTGGACAA	in-frame
+RRM2	C2ORF48	banned,known,adjacent,lincrna,conjoing,cacg,non_tumor_cells,fragments,hpa,1000genomes,1K<gap<10K,readthrough,exon-exon	0	2	2	28	BOWTIE	2:10129154:+	2:10141854:+	ENSG00000171848	ENSG00000163009	ENSE00003659027	ENSE00001489842	GTTTGTGGCAGACAGACTTATGCTGGAACTGGGTTTTAGCAAG*GTGCTGGGAGACCGTGAAGTGCAAAGCAGATGGAGTCCAGGCC	CDS(truncated)/exonic(no-known-CDS)
+SAV1	GYPB	banned,known,hpa,chimerdb3seq,cancer,m0,multi,exon-exon	0	1	6	29	BOWTIE	14:50665179:-	4:144001283:-	ENSG00000151748	ENSG00000250361	ENSE00001246769	ENSE00003580166	CTCTTCCAAAGAATGCCACAGAATCAGGGGAGGCATGCTTCAG*AAATTGTGAGCATATCAGCATTAAGTACCACTGAGGTGGCAAT	in-frame
+VPS13B	STK3	banned,known,gtex,gliomas,chimerdb3seq,exon-exon	0	1	2	29	BOWTIE	8:99275254:+	8:98579789:-	ENSG00000132549	ENSG00000104375	ENSE00001285840	ENSE00003461113	ACAATCCAAGTTCCACAATATATTGACTACTGCCACAATTCCG*CATCCTTTTATCAAGAATGCCAAACCTGTATCAATATTAAGAG	out-of-frame
diff --git a/tests/fixtures/fusions_arriba_test.tsv b/tests/fixtures/fusions_arriba_test.tsv
new file mode 100644
index 0000000..a02afc9
--- /dev/null
+++ b/tests/fixtures/fusions_arriba_test.tsv
@@ -0,0 +1,2 @@
+#gene1	gene2	strand1(gene/fusion)	strand2(gene/fusion)	breakpoint1	breakpoint2	site1	site2	type	split_reads1	split_reads2	discordant_mates	coverage1	coverage2	confidence	reading_frame	tags	retained_protein_domains	closest_genomic_breakpoint1	closest_genomic_breakpoint2	gene_id1	gene_id2	transcript_id1	transcript_id2	direction1	direction2	filters	fusion_transcript	peptide_sequence	read_identifiers
+BCR	ABL1	+/+	+/+	22:23632600	9:133729451	CDS/splice-site	CDS/splice-site	translocation	4	7	0	4	12	high	in-frame	Mitelman	Bcr-Abl_oncoprotein_oligomerisation_domain(100%),C2_domain(100%),PH_domain(100%),RhoGEF_domain(100%)|F-actin_binding(100%),Protein_kinase_domain(100%),SH2_domain(100%),SH3_domain(100%),Variant_SH3_domain(100%)	.	.	ENSG00000186716.15	ENSG00000097007.13	ENST00000305877.8	ENST00000372348.2	downstream	upstream	.	AGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAG___ATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAA|AAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAG___GTGAAAAGCTCCGGG	SFSLTSVELQMLTNSCVKLQTVHSIPLTINKEDDESPGLYGFLNVIVHSATGFKQSS|kALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLR	BCR-ABL1-10,BCR-ABL1-2,BCR-ABL1-24,BCR-ABL1-28,BCR-ABL1-58,BCR-ABL1-60,BCR-ABL1-76,BCR-ABL1-12,BCR-ABL1-18,BCR-ABL1-4,BCR-ABL1-66
diff --git a/tests/fixtures/jaffa_results.csv b/tests/fixtures/jaffa_results.csv
new file mode 100644
index 0000000..b908e98
--- /dev/null
+++ b/tests/fixtures/jaffa_results.csv
@@ -0,0 +1,492 @@
+"sample","fusion genes","chrom1","base1","strand1","chrom2","base2","strand2","gap (kb)","spanning pairs","spanning reads","inframe","aligns","rearrangement","contig","contig break","classification","known"
+"1_Neat_A_Exp2SeracareV2","RP4-777O23.3:AC005154.6","chr7",30550636,"-","chr7",30574881,"-",24.243,1602,7,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:11464:22274/2/2",75,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LMNA:NTRK1","chr1",156130773,"+","chr1",156874906,"+",744.135,75,159,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1101:19918:20181/1/1",106,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FGFR3:TACC3","chr4",1806934,"+","chr4",1739702,"+",67.23,72,161,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:5345:3512/1/1",77,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","NCOA4:RET","chr10",46012883,"-","chr10",43116584,"+",2896.301,74,144,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:17736:19601/1/1",97,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","TPM3:NTRK1","chr1",154170400,"-","chr1",156874571,"+",2704.169,61,147,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:18396:45361/1/1",69,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","EML4:ALK","chr2",42295516,"+","chr2",29223528,"-",13071.985,80,91,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:9465:1402/1/1",75,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","ETV6:NTRK3","chr12",11869969,"+","chr15",87940753,"-",Inf,46,122,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:5345:2563/1/1",65,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","KIF5B:RET","chr10",32017143,"-","chr10",43114480,"+",11097.335,95,65,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:14549:39049/1/1",85,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","FGFR3:BAIAP2L1","chr4",1806934,"+","chr7",98362432,"-",Inf,37,113,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:20791:45256/2/2",49,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","PAX8:PPARG","chr2",113235394,"-","chr3",12379704,"+",Inf,20,109,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:2432:14291/1/1",57,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","SLC34A2:ROS1","chr4",25664330,"+","chr6",117324415,"-",Inf,45,32,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:16467:25281/2/2",66,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","VRK2:FANCL","chr2",58146474,"+","chr2",58165874,"-",19.407,55,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1208:30929:31206/1/1",61,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CD74:ROS1","chr5",150404680,"-","chr6",117324415,"-",Inf,12,43,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:31933:31294/1/1",71,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","NAIP:OCLN","chr5_GL339449v2_alt",427054,"-","chr5_GL339449v2_alt",317638,"+",109.407,10,5,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1222:24677:18511/1/1",101,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","C14orf37:PSMA3","chr14",58291635,"-","chr14",58270418,"+",21.216,7,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1209:11606:23751/2/2",57,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-96H19.1:RP11-446N19.1","chr12",46387972,"+","chr12",46652390,"+",264.428,6,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1122:11495:42020/1/1",100,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GKAP1:KIF27","chr9",83816996,"-","chr9",83908652,"-",91.654,4,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2121:24292:26318/2/2",65,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SAMD5:SASH1","chr6",147509387,"+","chr6",148390134,"+",880.75,3,3,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1225:19847:46223/1/1",85,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF181:ZNF302","chr19",34739609,"+","chr19",34678737,"+",60.868,4,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1115:29995:25369/1/1",36,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NSF:AC243627.1","chr17_KI270908v1_alt",1224279,"+","chr17_KI270908v1_alt",1037054,"+",187.222,1,3,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:25986:37976/1/1",63,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EGFR:RP11-34P13.7","chr7",55200413,"+","chr1",129223,"-",Inf,1,3,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:32045:28956/1/1",67,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NAIP:OCLN","chr5_GL339449v2_alt",430962,"-","chr5_GL339449v2_alt",317638,"+",113.315,2,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2104:20608:31962/1/1",107,"HighConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","AC005062.2:MACC1","chr7",20028919,"-","chr7",20161870,"-",132.95,1,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1107:13474:9825/1/1",57,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SAMD5:RP11-307P5.1","chr6",147509387,"+","chr6",147733729,"+",224.345,2,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1123:28432:8242/1/1",71,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LINC00158:AP000233.4","chr21",25420708,"-","chr21",25175445,"-",245.267,2,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2201:29163:18441/1/1",65,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TXNDC11:SNX29","chr16",11730645,"-","chr16",12524702,"+",794.054,1,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1116:5903:36763/1/1",57,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CTD-2215E18.1:LINC01340","chr5",97431900,"+","chr5",97668551,"+",236.654,1,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2125:15392:8189/2/2",88,"HighConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EGFR:FO082796.1","chr7",55200413,"+","chr9",138286248,"+",Inf,0,47,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:1407:12937/1/1",95,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-444D3.1:SOX5","chr12",24213343,"-","chr12",23896024,"-",317.324,0,35,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1101:7090:29290/1/1",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-977G19.10:NPM1","chr12",56314851,"-","chr5",171407700,"+",Inf,0,3,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2223:13971:34670/1/1",93,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PLEC:TSPAN4","chr8",143950184,"-","chr11",850288,"+",Inf,0,3,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2119:29082:43234/1/1",102,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DBN1:GRK6","chr5",177460432,"-","chr5",177441737,"+",18.695,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1217:13382:34213/1/1",108,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AC005062.2:MACC1","chr7",20129120,"-","chr7",20161870,"-",32.748,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1226:12784:19777/1/1",86,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PPIA:H2AFV","chr7",44799874,"+","chr7",44834562,"-",34.69,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:24637:33703/1/1",114,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SEC24D:RP11-384K6.6","chr4",118797683,"-","chr4",118595912,"+",201.773,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:13139:6044/1/1",75,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FOXO1:LINC00598","chr13",40665583,"-","chr13",40460406,"-",205.181,0,2,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2125:12459:33985/1/1",58,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EFCAB2:DESI2","chr1",244970443,"+","chr1",244686597,"+",283.844,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1122:29711:48368/1/1",115,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SDCCAG8:AKT3","chr1_KI270763v1_alt",501248,"+","chr1_KI270763v1_alt",855380,"-",354.137,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1105:5345:39699/1/1",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NDUFB5:PIK3CA","chr3",179604939,"+","chr3",179198750,"+",406.185,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:10744:10616/1/1",107,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-440L14.1:CTBP1","chr4",781772,"-","chr4",1241519,"-",459.743,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1113:26758:15961/1/1",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARHGAP31:GSK3B","chr3",119295004,"+","chr3",119876508,"-",581.506,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1118:27549:6150/1/1",95,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","VPS13B:STK3","chr8",99275254,"+","chr8",98579789,"-",695.463,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2221:28615:27338/1/1",80,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CTPS2:AP1S2","chrX",16609541,"-","chrX",15846011,"-",763.533,0,2,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2226:15696:29466/1/1",66,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RNF19A:VPS13B","chr8",100309867,"-","chr8",99520899,"+",788.972,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2209:17289:13095/1/1",49,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MRPL1:CCNI","chr4",77909372,"+","chr4",77058635,"-",850.736,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2220:25195:31013/1/1",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RNF219:SLAIN1","chr13",78659049,"-","chr13",77719532,"+",939.518,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1223:30462:36376/1/1",51,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NRROS:LRCH3","chr3",196654647,"+","chr3",197865423,"+",1210.784,0,2,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1228:8694:39453/1/1",103,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TMPRSS2:ERG","chr21",41508081,"-","chr21",38584945,"-",2923.142,0,2,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1203:24373:3389/1/1",72,"MediumConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","SETD3:PAPOLA","chr14",99458279,"-","chr14",96520055,"+",2938.226,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2127:25601:36042/1/1",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SNRNP70:PPP2R1A","chr19",49086561,"+","chr19",52201944,"+",3115.386,0,2,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2223:7507:47243/1/1",66,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HEATR5A:PPP2R3C","chr14",31420472,"-","chr14",35085778,"-",3665.301,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1116:19025:40297/1/1",81,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BICD1:ITPR2","chr12",32107544,"+","chr12",26443650,"-",5663.891,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1219:8247:38504/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CD80:SPICE1","chr3",119557629,"-","chr3",113494142,"-",6063.49,0,2,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1118:27387:12058/1/1",94,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TNFAIP8:MCC","chr5",119268907,"+","chr5",113071234,"-",6197.672,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2220:22090:29993/1/1",98,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ITGAV:ANKRD44","chr2",186602151,"+","chr2",197001840,"-",10399.69,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:25702:10405/1/1",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SEH1L:THOC1","chr18",12948232,"+","chr18",265530,"-",12682.701,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1116:21684:43181/1/1",99,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RAB18:PRPF18","chr10",27504437,"+","chr10",13597458,"+",13906.977,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1113:13504:29501/1/1",61,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CCDC22:TAF1","chrX",49237263,"+","chrX",71392569,"+",22155.308,0,2,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2217:20121:30204/1/1",90,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-383H13.1:STK3","chr8",71844422,"+","chr8",98548161,"-",26703.74,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2108:8846:33035/1/1",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EVI2B:BZRAP1-AS1","chr17",31313979,"-","chr17",58337448,"+",27023.466,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2106:28564:30380/1/1",98,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TRIO:CAST","chr5",14369523,"+","chr5",96675539,"+",82306.019,0,2,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1222:15788:12357/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TMCO1:PUM1","chr1",165759525,"-","chr1",31028864,"-",134730.664,0,2,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2228:10561:18458/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PDIA6:OLA1","chr2",10787281,"-","chr2",174229451,"-",163442.167,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1216:13240:5270/1/1",80,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ADARB2:RPL12","chr10",1233694,"-","chr9",127448423,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1206:17005:7679/1/1",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TIMM23B:ZNF236","chr10",49942300,"+","chr18",76849526,"+",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1118:15554:46768/1/1",82,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RNU6-1318P:RNU4-1","chr1",100000637,"-","chr12",120293237,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2104:26839:13746/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FLI1:ZNF330","chr11",128694276,"+","chr4",141224487,"+",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:4807:23821/1/1",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PRDM2:SETD3","chr1",13773188,"+","chr14",99458535,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2103:17787:9139/1/1",86,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RELT:MGAT3","chr11",73376499,"+","chr22",39487347,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1112:31862:4901/1/1",66,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EP400:TACC1","chr12",131994956,"+","chr8",38825308,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1208:29518:34002/1/1",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TMEM117:BRCA1","chr12",44211387,"+","chr17",43095922,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1209:12124:27074/2/2",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","WASF2:AURKA","chr1",27489986,"-","chr20",56384324,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:25784:13007/1/1",67,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:EIF4G2","chr1",28508160,"+","chr11",10801087,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1213:6532:18722/1/1",57,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:TCHP","chr1",28508160,"+","chr12",109912991,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:9049:13025/1/1",58,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:EPSTI1","chr1",28508160,"+","chr13",42926429,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:26271:10194/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:SF3B3","chr1",28508160,"+","chr16",70538323,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1215:15402:18335/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:MRPS23","chr1",28508160,"+","chr17",57849410,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2209:16305:41106/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:SMARCA4","chr1",28508160,"+","chr19",11012949,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:30462:20480/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:PTBP3","chr1",28508160,"+","chr9",112297916,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1125:25601:37871/1/1",59,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLAIN1:UBE2V1","chr13",77698539,"+","chr20",50084254,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2202:10713:14889/1/1",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RAB2B:MRPS21","chr14",21468357,"-","chr1",150308048,"+",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1209:6187:7750/1/1",74,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AKR1A1:RP11-468E2.1","chr1",45567020,"+","chr14",24210768,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:23967:13447/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLC39A9:PPA1","chr14",69399465,"+","chr10",70217931,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:8724:37044/1/1",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLC8A3:PBX3","chr14",70166639,"-","chr9",125748550,"+",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1110:8674:21096/1/1",37,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NDC1:ZNF292","chr1",53793229,"-","chr6",87233325,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:20395:18441/1/1",93,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CLPX:DNAJC15","chr15",65178934,"-","chr13",43085768,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:23734:17474/1/1",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RRN3:RMDN1","chr16_KI270853v1_alt",578361,"-","chr8",86507112,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2202:24545:8822/1/1",89,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARHGAP17:MS4A1","chr16",24930784,"-","chr11",60462185,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2224:23135:42601/1/1",57,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RABEP1:HINT1","chr17",5346925,"+","chr5",131159611,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1215:20943:42108/1/1",51,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DHX33:CDK17","chr17",5448809,"-","chr12",96300360,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:21542:25703/1/1",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","P4HB:HINT1","chr17",81855142,"-","chr5",131159611,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1121:5152:10985/1/1",49,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SBNO2:CIRBP","chr19_KI270865v1_alt",11598,"-","chr19",1270928,"+",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2228:5507:42759/1/1",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CTD-2006C1.2:SMCHD1","chr19",11987823,"+","chr18",2673281,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1109:14884:1261/1/1",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBA52:RFTN1","chr19",18573748,"+","chr3",16434037,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1108:25337:19654/1/1",103,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CARD8:NUP153","chr19",48249523,"-","chr6",17675033,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:13169:23716/1/1",74,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RUVBL2:ACAP3","chr19",49010066,"+","chr1",1298113,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1112:4036:26670/1/1",67,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","C19orf48:NDUFS4","chr19",50802217,"-","chr5",53646233,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1201:27123:23276/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF814:PRKDC","chr19",57888767,"-","chr8",47954446,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:23246:45678/1/1",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZFAS1:KTN1","chr20",49279208,"+","chr14",55601757,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2205:8907:44922/1/1",79,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MCM3AP:SNRNP70","chr21",46266982,"-","chr19",49104634,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2121:27539:7433/1/1",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","C21orf58:ZNF335","chr21",46302485,"-","chr20",45953948,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:24454:12497/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","STAT1:COPA","chr2",190995061,"-","chr1",160332557,"-",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2111:16792:40719/1/1",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IKZF2:XPOT","chr2",213021993,"-","chr12",64409962,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2213:20730:11249/1/1",88,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ATL2:RPLP0","chr2",38377143,"-","chr12",120197462,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1118:12043:37519/1/1",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SNX4:VRK1","chr3",125504623,"-","chr14",96876030,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2101:30797:4426/1/1",91,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","COPG1:GAS5","chr3",129267099,"+","chr1",173864704,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2217:14478:45889/1/1",80,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CPOX:POLE","chr3",98590632,"-","chr12",132659509,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2104:29356:29747/1/1",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBE2K:FAM208A","chr4",39755739,"+","chr3",56628614,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1128:7009:26617/2/2",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DANCR:CPSF3L","chr4",52712959,"+","chr1",1321093,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2201:26037:21360/1/1",89,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DANCR:ATAD5","chr4",52712959,"+","chr17",30892607,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2127:26443:5183/1/1",75,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBA6-AS1:KIAA1429","chr4",67701484,"+","chr8",94495891,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1225:25347:39436/1/1",115,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","C4orf36:SMC4","chr4",86935988,"-","chr3",160420740,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2211:25134:24859/1/1",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BRD8:ZDHHC4","chr5",138177571,"-","chr7",6581607,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1125:19319:25298/2/2",37,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBE2D2:TP53BP1","chr5",139614617,"+","chr15",43470066,"-",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1205:22008:16489/1/1",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RASA1:PHF14","chr5",87268990,"+","chr7",11051612,"+",Inf,0,2,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2208:16386:1402/1/1",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PRRC2B:DAP3","chr9",131394263,"+","chr1",155709773,"+",Inf,0,2,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2223:31517:4971/1/1",89,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-686D22.8:SLFN12","chr17",35409465,"-","chr17",35420381,"-",10.914,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2204:21521:8506/1/1",83,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GAMT:MUM1","chr19",1398916,"-","chr19",1376519,"+",22.396,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2208:8450:24823/2/2",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PNO1:HZGJ","chr2",68158529,"+","chr2",68188690,"-",30.161,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2116:28422:25773/1/1",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GART:SON","chr21",33532345,"-","chr21",33567157,"+",34.809,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:3447:33387/1/1",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","URGCP-MRPS24:UBE2D4","chr7",43887415,"-","chr7",43942826,"+",55.41,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1128:22577:20955/2/2",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CNPPD1:ANKZF1","chr2",219174778,"-","chr2",219233715,"+",58.938,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2222:21197:27742/1/1",105,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF738:ZNF708","chr19",21361858,"+","chr19",21294739,"-",67.118,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1119:7446:5569/2/2",61,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","KIAA0100:SUPT6H","chr17",28618791,"-","chr17",28686339,"+",67.545,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2217:21988:28657/1/1",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HSP90AA1:DYNC1H1","chr14",102083034,"-","chr14",102000954,"+",82.085,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1214:7689:26248/1/1",66,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZBTB17:SPEN","chr1",15975983,"-","chr1",15872816,"+",103.169,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:31304:42935/1/1",39,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ORC2:FAM126B","chr2",200925836,"-","chr2",201045593,"-",119.753,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2106:30594:2879/1/1",39,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","INIP:KIAA1958","chr9",112717987,"-","chr9",112574057,"+",143.933,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2111:2412:41757/1/1",101,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MKL1:SLC25A17","chr22",40636478,"-","chr22",40799083,"-",162.603,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:17858:46188/1/1",61,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","THOC2:XIAP","chrX",123703454,"-","chrX",123888619,"+",185.162,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1202:17086:29554/2/2",56,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HLA-DQA1:TAP2","chr6_GL000256v2_alt",4043417,"+","chr6_GL000256v2_alt",4229694,"-",186.279,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1206:20314:38521/1/1",37,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CNOT1:GOT2","chr16",58534147,"-","chr16",58722278,"-",188.125,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:16650:15996/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PRIM2:ZNF451","chr6",57326045,"+","chr6",57124734,"+",201.308,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:21115:26582/2/2",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","VTI1A:RP11-57H14.3","chr10",112668998,"+","chr10",112888735,"+",219.739,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2108:23246:2914/1/1",111,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GRK5:TIAL1","chr10",119326611,"+","chr10",119588248,"-",261.64,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:26666:41651/1/1",80,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","C7orf55-LUC7L2:ZC3HAV1","chr7",139360322,"+","chr7",139089759,"-",270.562,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:28483:38679/1/1",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-141M1.3:STARD13","chr13",33439691,"-","chr13",33167622,"-",272.072,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1220:22749:42073/1/1",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-680G10.1:GSE1","chr16",85357643,"+","chr16",85633914,"+",276.274,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2103:15879:35655/2/2",58,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PAPSS1:RP11-286E11.1","chr4",107631631,"-","chr4",107910549,"-",278.915,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2102:14509:39400/2/2",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FIP1L1:SCFD2","chr4",53428183,"+","chr4",53145582,"-",282.599,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2209:15554:29079/1/1",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CASK:DDX3X","chrX",41626604,"-","chrX",41341484,"+",285.122,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1105:9831:27989/1/1",113,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UQCC1:CPNE1","chr20",35347164,"-","chr20",35632923,"-",285.754,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2217:22059:41370/1/1",59,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TTC27:BIRC6","chr2",32666768,"+","chr2",32377588,"+",289.178,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2102:10247:11618/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TSPAN5:RAP1GDS1","chr4",98658146,"-","chr4",98343139,"+",315.008,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2115:19207:10159/1/1",77,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NCOR2:UBC","chr12",124567308,"-","chr12",124913774,"-",346.463,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2105:15148:40754/1/1",99,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PIK3R6:NDEL1","chr17",8819083,"-","chr17",8466930,"+",352.154,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2114:10845:22467/1/1",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NOP14:RNF4","chr4",2963125,"-","chr4",2596084,"+",367.043,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1227:20862:45942/1/1",74,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PICALM:C11orf73","chr11",85974708,"-","chr11",86344603,"+",369.891,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1222:15128:34213/1/1",53,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARHGDIA:CSNK1D","chr17",81869741,"-","chr17",82252604,"-",382.86,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2210:14651:26037/1/1",71,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","XRCC5:RPL37A","chr2",216113129,"+","chr2",216499949,"+",386.823,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1209:15940:13148/1/1",110,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SSFA2:ITGA4","chr2",181909971,"+","chr2",181493325,"+",416.641,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:30320:7961/2/2",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RHPN2:ANKRD27","chr19",33064784,"-","chr19",32643484,"-",421.302,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1210:9475:11653/1/1",81,"MediumConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","BSG:CNN2","chr19",572701,"+","chr19",1036130,"+",463.432,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2102:11251:42618/1/1",86,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SIPA1L1:PCNX","chr14",71414014,"+","chr14",70946915,"+",467.09,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2112:8034:21254/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EIF3A:TIAL1","chr10",119049801,"-","chr10",119578834,"-",529.028,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:15168:6044/1/1",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TMLHE:FUNDC2","chrX",155612792,"-","chrX",155051670,"+",561.126,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2216:6299:21236/1/1",86,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLC12A2:PRRC1","chr5",128135808,"+","chr5",127547819,"+",587.988,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:26697:25210/2/2",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TAF3:SFMBT2","chr10",7824560,"+","chr10",7220537,"-",604.025,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2219:17706:13746/2/2",80,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF267:FUS","chr16",31885256,"+","chr16",31188325,"+",696.927,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1223:31507:25914/1/1",104,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TCF12:RFX7","chr15",56921098,"+","chr15",56179303,"-",741.793,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2210:1570:20850/1/1",38,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TLK1:SLC25A12","chr2",171028339,"-","chr2",171855949,"-",827.607,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1207:12317:2123/2/2",93,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPS3A:LRBA","chr4",151103079,"+","chr4",150265812,"-",837.267,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:20395:44816/1/1",39,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GTF2B:RP11-302M6.4","chr1",88887261,"-","chr1",89843638,"+",956.377,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2223:25246:45695/2/2",88,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FUT8:ZBTB1","chr14",65669480,"+","chr14",64521487,"+",1147.989,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1220:4178:23751/1/1",108,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARHGAP31:NDUFB4","chr3",119409776,"+","chr3",120601111,"+",1191.339,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1201:26484:13447/2/2",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FBXW7:RPS3A","chr4",152337802,"-","chr4",151102871,"+",1234.934,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1110:6177:7802/1/1",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ESYT2:DNAJB6","chr7",158788004,"-","chr7",157358547,"+",1429.458,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2102:29528:28076/1/1",77,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ODF2L:MCOLN2","chr1",86396033,"-","chr1",84965708,"-",1430.327,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1206:4868:34336/1/1",73,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UTRN:HIVEP2","chr6",144344304,"+","chr6",142837047,"-",1507.253,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1202:5934:15434/1/1",90,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LEKR1:RSRC1","chr3",156829377,"+","chr3",158460935,"+",1631.56,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1127:17969:39945/1/1",96,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PPP6R3:RCE1","chr11",68519651,"+","chr11",66843759,"+",1675.888,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2119:12753:18493/1/1",110,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EXTL3:RBPMS","chr8",28743214,"+","chr8",30504286,"+",1761.074,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1122:29609:36165/1/1",37,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CDK6:CDK14","chr7",92833091,"-","chr7",90955697,"+",1877.394,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2223:16214:48614/1/1",56,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HMG20A:PTPN9","chr15",77479321,"+","chr15",75527261,"-",1952.054,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1226:14945:22854/1/1",60,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DLGAP4:ITCH","chr20",36500611,"+","chr20",34369394,"+",2131.215,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1218:21460:22291/2/2",99,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CDK14:ADAM22","chr7",90726812,"+","chr7",87978336,"+",2748.474,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2226:13413:11583/1/1",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CHST11:CHPT1","chr12",104601991,"+","chr12",101714090,"+",2887.897,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1118:15382:38732/1/1",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HINT1:LINC01184","chr5",131162572,"-","chr5",128024298,"-",3138.276,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1201:26788:18863/2/2",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IKBKAP:TMEM38B","chr9",108933864,"-","chr9",105748073,"+",3185.792,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1111:13717:37712/1/1",56,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ANKFY1:ZBTB4","chr17",4242256,"-","chr17",7467327,"-",3225.068,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:24759:31664/2/2",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CPNE8:YAF2","chr12",38730283,"-","chr12",42199235,"-",3468.947,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1227:27813:15961/1/1",81,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GPR108:ZNF57","chr19",6736592,"-","chr19",2915522,"+",3821.07,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1221:8623:14818/2/2",77,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GPR108:ZNF555","chr19",6736592,"-","chr19",2850587,"+",3886.007,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1221:8674:15715/1/1",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DEPDC1B:SKIV2L2","chr5",60686962,"-","chr5",55322327,"+",5364.635,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2210:20709:17790/2/2",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NSUN4:NFYC","chr1",46353028,"+","chr1",40738836,"+",5614.19,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1225:25733:43972/2/2",67,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ENTPD1:KIF20B","chr10",95756255,"+","chr10",89709167,"+",6047.086,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1126:5365:10405/1/1",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TCEA1:PRKDC","chr8",54022063,"-","chr8",47930787,"-",6091.278,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2203:5771:8646/2/2",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FMR1:CMC4","chrX",147912230,"+","chrX",155064033,"-",7151.805,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2212:9414:16155/1/1",54,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARHGEF9:FTX","chrX",63754315,"-","chrX",74281848,"-",10527.527,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2220:10977:18476/1/1",56,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LINC00240:ZFAND3","chr6",26968522,"+","chr6",37929959,"+",10961.44,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1103:20851:30837/1/1",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ADK:JMJD1C","chr10",74224591,"+","chr10",63219983,"-",11004.608,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1112:21765:31681/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARHGEF7:UBAC2","chr13",111210002,"+","chr13",99238427,"+",11971.573,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1123:6167:16260/1/1",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ANKRD12:MIB1","chr18",9221999,"+","chr18",21765772,"+",12543.778,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2226:3955:30820/1/1",44,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PTPN1:RPN2","chr20",50510590,"+","chr20",37223878,"+",13286.709,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1113:5315:31488/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARFIP1:NAA15","chr4",152829726,"+","chr4",139370211,"+",13459.513,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2220:4249:6466/1/1",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ADAM9:CDCA2","chr8",39014043,"+","chr8",25466175,"+",13547.864,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1122:20283:9737/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ELL:APBA3","chr19",18472835,"-","chr19",3752990,"-",14719.848,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2223:10287:32965/1/1",51,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZFP36:ZNF254","chr19",39406928,"+","chr19",24126254,"+",15280.669,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1121:21958:44957/1/1",61,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SNX25:NEK1","chr4",185320864,"+","chr4",169590809,"-",15730.053,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1218:6066:40842/1/1",112,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TBCA:MCTP1","chr5",77776205,"-","chr5",95017484,"-",17241.277,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2212:12134:41721/2/2",71,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DGKH:DIAPH3","chr13",42129632,"+","chr13",60016145,"-",17886.515,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2108:6329:13025/1/1",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BABAM1:USF2","chr19",17267527,"+","chr19",35278946,"+",18011.422,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1209:2493:31910/1/1",60,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MBNL1:FNDC3B","chr3",152300367,"+","chr3",172112452,"+",19812.087,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2208:5964:46504/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF382:RAB8A","chr19",36605730,"+","chr19",16132212,"+",20473.514,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1116:11190:21869/2/2",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TMA16:SNX25","chr4",163494804,"+","chr4",185247294,"+",21752.492,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1123:31801:31523/1/1",107,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TET2:CCNI","chr4",105190505,"+","chr4",77066405,"-",28124.103,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2212:10896:41440/2/2",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TCF12:AKAP13","chr15",57091891,"+","chr15",85485710,"+",28393.821,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1224:17584:44025/1/1",95,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARID4B:ELK4","chr1",235326914,"-","chr1",205623891,"-",29703.026,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1126:13271:21395/1/1",71,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ITSN2:RPS27A","chr2",24298665,"-","chr2",55234119,"+",30935.452,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1126:11769:5991/1/1",99,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","POU2AF1:CLNS1A","chr11",111379162,"-","chr11",77625818,"-",33753.345,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1104:21156:33158/1/1",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLC16A1:ZRANB2","chr1",112929092,"-","chr1",71072548,"-",41856.548,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1207:5944:43726/1/1",104,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CPSF6:ITPR2","chr12",69262562,"+","chr12",26655852,"-",42606.709,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1203:31365:26406/1/1",77,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","THOP1:PRKD2","chr19",2785678,"+","chr19",46701112,"-",43915.437,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2212:31121:32912/1/1",51,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CCDC9:AES","chr19",47260839,"+","chr19",3056356,"-",44204.482,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2202:6614:27654/2/2",39,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RABGAP1L:ABCB10","chr1",174159657,"+","chr1",229549434,"-",55389.78,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2119:21024:6941/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NPSR1-AS1:CCDC132","chr7",34569712,"-","chr7",93291703,"+",58721.992,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1205:24901:42987/2/2",58,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CCDC18:C1orf43","chr1",93207398,"+","chr1",154214574,"-",61007.177,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1108:30502:22344/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HSD17B4:GPBP1","chr5",119452633,"+","chr5",57214074,"+",62238.557,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2121:17432:11372/2/2",53,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BRAF:YWHAG","chr7",140924566,"-","chr7",76330233,"-",64594.337,0,1,TRUE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1224:6786:29747/2/2",77,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","VOPP1:SND1","chr7",55521072,"-","chr7",127721287,"+",72200.216,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:5406:11319/1/1",108,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPTOR:TP53","chr17",80885148,"+","chr17",7674290,"-",73210.858,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1108:13524:8471/2/2",52,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FAM110B:EFR3A","chr8",57994806,"+","chr8",131940499,"+",73945.696,0,1,FALSE,TRUE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1203:13240:24823/2/2",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SVIL:SHOC2","chr10",29657969,"-","chr10",111000415,"+",81342.444,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1112:4787:13482/1/1",49,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PSMD5:POLR1E","chr9",120842737,"-","chr9",37489315,"+",83353.424,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2206:26301:39119/1/1",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UMPS:CLASP2","chr3",124742266,"+","chr3",33644903,"-",91097.365,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2217:14722:6044/1/1",62,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BIRC6:NCKAP5","chr2",32549481,"+","chr2",133213779,"-",100664.301,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1219:15605:16436/1/1",89,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UHRF2:DENND1A","chr9",6460791,"+","chr9",123879021,"-",117418.23,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1214:18335:41458/1/1",68,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SP110:C2orf15","chr2",230177538,"-","chr2",99215998,"+",130961.542,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:18873:45449/1/1",71,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SNX25:EVC","chr4",185288082,"+","chr4",5729307,"+",179558.773,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2115:10480:48386/2/2",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ASXL2:SP100","chr2",25799385,"-","chr2",230494416,"+",204695.03,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:5132:30187/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CEP170:WDR37","chr1_KI270763v1_alt",267148,"-","chr10",1072116,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2120:19776:27707/1/1",73,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ALOX5:DEGS1","chr10_GL383546v1_alt",269595,"+","chr1",224192332,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:21907:9772/2/2",54,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DHTKD1:SLC25A3","chr10",12120267,"+","chr12",98597856,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1221:11272:23030/1/1",68,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RAB18:KIAA0922","chr10",27509930,"+","chr4",153580826,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1107:11119:39225/1/1",82,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF33A:AGER","chr10",38012350,"+","chr6_GL000256v2_alt",3497684,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1207:16346:47682/1/1",75,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MAPK8:ANKRD17","chr10",48410168,"+","chr4",73177533,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2202:5406:4215/1/1",96,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SGMS1-AS1:TAF1D","chr10",50629178,"+","chr11",93732291,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2204:8978:36499/1/1",108,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FAM208B:LRRC28","chr10",5685175,"+","chr15",99333923,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1208:30391:32560/1/1",42,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","JMJD1C:GFPT1","chr10",63380318,"-","chr2",69329424,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1223:2057:15504/1/1",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ENTPD1:CCNI","chr10",95756255,"+","chr4",77066405,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2210:13088:33352/1/1",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BLNK:ZNF236","chr10",96271352,"-","chr18",76849526,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:27285:10475/1/1",99,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RAP1A:NAA50","chr1",111627563,"+","chr3",113724095,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1105:20811:9772/2/2",57,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CHEK1:FAM98B","chr11",125653847,"+","chr15",38465269,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2123:24200:24401/2/2",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BCAS2:LARS","chr1",114575590,"-","chr5",146172774,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1208:9181:6044/2/2",60,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PRDM2:FAM228B","chr1",13773188,"+","chr2",24095141,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2128:2849:8822/1/1",68,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CELF1:HMG20A","chr11",47482695,"-","chr15",77482970,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2115:30817:8365/2/2",83,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PRCC:RP11-706O15.1","chr1",156787174,"+","chrX",3817778,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1107:9435:48016/1/1",76,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLAMF6:PLEKHA2","chr1",160523144,"-","chr8",38917907,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2210:13484:23768/1/1",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EML3:GUK1","chr11",62603149,"-","chr1",228148665,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1121:6654:12673/1/1",95,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SNHG1:ADIPOR2","chr11",62855133,"-","chr12",1754258,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2223:16224:7978/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLC25A45:KIAA0100","chr11",65381915,"-","chr17",28642335,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2108:8339:16893/1/1",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","POU2F1:COTL1","chr1",167220958,"+","chr16",84590262,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2221:21846:18845/1/1",38,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","C1orf112:IPO5","chr1",169795213,"+","chr13",98014042,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2122:28392:45660/1/1",89,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GAS5:CIT","chr1",173865857,"-","chr12",119710620,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2123:25479:36358/1/1",113,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC2:NCOR2","chr1",17439545,"-","chr12",124356806,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1222:28321:42935/2/2",100,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UVRAG:CLDND1","chr11",75912039,"+","chr3",98521442,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1212:17959:37044/2/2",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SWT1:LRRC16A","chr1",185160925,"+","chr6",25537855,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2215:2656:20304/2/2",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPL27A:ZXDC","chr11",8684892,"+","chr3",126462220,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2213:6624:41071/2/2",36,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CTSC:DAPK1","chr11",88300530,"-","chr9",87645895,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1116:6187:22942/2/2",71,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IPO7:MAVS","chr11",9425262,"+","chr20",3861332,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2113:12165:30028/2/2",66,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FAM60A:BICD1","chr12_KI270835v1_alt",216818,"-","chr12",32216247,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2212:16031:9930/1/1",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EIF4G3:C12orf49","chr1",20849415,"-","chr12",116717893,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2222:16752:26336/1/1",38,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RP11-689B22.2:SLC15A2","chr12",108636455,"+","chr3",121931639,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1107:17076:2281/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARPC3:RERE","chr12",110436665,"-","chr1",8423247,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1217:15432:14062/2/2",98,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DTL:BANP","chr1",212035942,"+","chr16",88018428,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1115:20446:31189/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RSRC2:ZNF367","chr12",122526848,"-","chr9",96398314,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1105:17198:1930/1/1",68,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CHFR:GNAS","chr12",132841518,"-","chr20",58895612,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1213:13636:14748/1/1",93,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF84:SET","chr12",133048848,"+","chr9",128691170,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2220:5781:24911/2/2",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","USP48:EFR3A","chr1",21747067,"-","chr8",131953818,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1119:30421:37079/2/2",66,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LYPLAL1:TERF1","chr1",219179246,"+","chr8",73026940,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2106:23825:32543/1/1",53,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARID2:ASCC3","chr12",45731314,"+","chr6",100607088,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2125:12083:48421/1/1",68,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SMYD3:DENND5B","chr1",245764041,"-","chr12",31499671,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2116:20080:44728/1/1",57,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARID2:C18orf8","chr12",45821487,"+","chr18",23515856,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2110:10866:11143/1/1",109,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MYL6:HMGN1","chr12",56159622,"+","chr21",39348577,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1219:8247:35268/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MYL6:CCNB3","chr12",56159622,"+","chrX",50204563,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1123:26179:12286/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CPSF6:TAF10","chr12",69262562,"+","chr11",6611452,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1218:29528:38978/2/2",54,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PTPN6:GLTSCR1","chr12",6960239,"+","chr19",47670443,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1228:12439:9754/1/1",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZFC3H1:NUPL2","chr12",71629609,"-","chr7",23195839,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2215:27965:39647/2/2",60,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:GBA","chr1",28508160,"+","chr1_GL383519v1_alt",46203,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1114:30015:42108/1/1",40,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:RUFY2","chr1",28508160,"+","chr10",68404844,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2201:21065:17878/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:RAB38","chr1",28508160,"+","chr11",88149955,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1127:29741:41915/1/1",60,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:GPHN","chr1",28508160,"+","chr14",66879939,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1111:23520:6132/1/1",59,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:TRPM7","chr1",28508160,"+","chr15",50648885,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1222:9029:37044/1/1",67,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:ANKDD1A","chr1",28508160,"+","chr15",64943484,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2110:8085:20814/1/1",57,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:CNOT1","chr1",28508160,"+","chr16",58547682,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1112:32004:36200/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:NCOA3","chr1",28508160,"+","chr20",47622229,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2226:8998:44236/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:PPIG","chr1",28508160,"+","chr2",169603642,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1218:10521:27285/1/1",59,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:TBC1D22A","chr1",28508160,"+","chr22",46997634,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2104:2666:12867/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:MRPL3","chr1",28508160,"+","chr3",131471279,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2221:20385:1859/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:HSPA9","chr1",28508160,"+","chr5",138560091,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2119:26859:16313/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:MATR3","chr1",28508160,"+","chr5",139315697,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2221:27813:12620/1/1",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:AP3B1","chr1",28508160,"+","chr5",78181662,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2228:13677:33176/1/1",59,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:CHD1","chr1",28508160,"+","chr5",98926534,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2125:28422:7591/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:CNR1","chr1",28508160,"+","chr6",88145337,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2107:22709:21676/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:SPTAN1","chr1",28508160,"+","chr9",128608130,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1216:30797:17720/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RCC1:IARS","chr1",28508160,"+","chr9",92247551,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1209:2706:1648/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GAS6-AS1:CD74","chr13",113833579,"+","chr5",150406321,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:4888:18933/1/1",89,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PSPC1:CKAP5","chr13",19730239,"-","chr11",46780319,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1221:25570:46504/1/1",56,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LNX2:FER","chr13",27581297,"-","chr5",108867767,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1221:21582:20322/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AK2:TBC1D13","chr1",33013207,"-","chr9",128790735,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1205:11566:30573/1/1",111,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HSPH1:IFNGR2","chr13",31140184,"-","chr21_GL383581v2_alt",9460,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1225:3884:31892/1/1",79,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF362:PDCD11","chr1",33280457,"+","chr10",103438015,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2104:15442:29624/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","VWA8:PSMD13","chr13",41787437,"-","chr11",244426,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2215:29751:4303/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RBM26:LSM14A","chr13",79405704,"-","chr19_GL383574v1_alt",69249,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2121:25195:16243/2/2",77,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FHL3:RPS21","chr1",38005439,"-","chr20",62387321,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:9354:45977/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RLF:COG5","chr1",40161636,"+","chr7",107230691,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:17685:29536/1/1",67,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HSP90AA1:COX6B1","chr14",102083034,"-","chr19",35658594,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2103:4442:5851/1/1",93,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","APOPT1:PPP1CB","chr14",103563379,"+","chr2",28776851,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1226:28615:30398/1/1",98,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ACIN1:ANKFY1","chr14",23069476,"-","chr17",4206486,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:17655:44746/1/1",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TMX1:RPS6KC1","chr14",51240444,"+","chr1",213077696,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2108:13453:3494/1/1",68,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DDHD1:BMPR2","chr14",53103683,"-","chr2",202513719,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2118:28909:14941/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RBM25:ZCCHC6","chr14",73088161,"+","chr9",86288744,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1122:5172:29835/2/2",90,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RBM25:KIAA1324L","chr14",73099750,"+","chr7",86942104,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2108:18142:47946/1/1",94,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ABCD4:KANSL3","chr14",74302875,"-","chr2",96619762,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1222:17929:46100/1/1",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARHGAP11B:SNURF","chr15_KI270905v1_alt",2931829,"+","chr15",24962114,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1108:14722:18951/2/2",44,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RASGRP1:INPP4A","chr15",38559821,"-","chr2",98559463,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1223:31101:7591/1/1",98,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PDIA3:PSMA3","chr15",43746706,"+","chr14",58260948,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1123:24505:22643/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TEX9:MKL2","chr15",56365670,"+","chr16",14140544,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2107:6238:25386/2/2",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HERC1:MAST2","chr15",63674342,"-","chr1",46010730,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1216:2808:13746/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PKM:MX1","chr15",72206728,"-","chr21",41435837,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2220:14702:2703/2/2",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CSK:DDX6","chr15",74782720,"+","chr11",118786518,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2217:13606:6712/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MEF2A:PHF20","chr15",99565669,"+","chr20",35801491,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1123:20963:15768/1/1",82,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MEF2A:ATIC","chr15",99633173,"+","chr2",215336035,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:31010:16999/1/1",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NOMO3:RPP30","chr16_KI270853v1_alt",1910520,"+","chr10",90895454,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:25540:24472/1/1",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NOMO1:RPP30","chr16_KI270853v1_alt",932814,"+","chr10",90895454,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:25540:24472/2/2",85,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CORO7-PAM16:TAPBP","chr16_KI270855v1_alt",53556,"-","chr6_GL000255v2_alt",4499818,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1227:16741:15803/1/1",37,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","C16orf59:SLC3A2","chr16",2461800,"+","chr11",62881893,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1122:22272:43040/2/2",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SH2B1:MAP4K1","chr16",28867432,"+","chr19",38610025,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1127:11495:20392/1/1",101,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MAZ:RECQL4","chr16",29806180,"+","chr8",144513138,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1125:27996:39875/1/1",74,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LA16c-306E5.2:FUT10","chr16",3404648,"+","chr8",33461725,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1210:27641:15909/2/2",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CNOT1:RP4-545C24.1","chr16",58525179,"-","chr7",144207693,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2107:27032:12603/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CYB5B:RPL27","chr16",69424857,"+","chr17",42999933,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:25012:23979/1/1",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GSE1:EIF4A1","chr16",85668424,"+","chr17",7575119,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1202:14864:5165/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PMM2:RNGTT","chr16",8813106,"+","chr6",88941180,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1118:22191:21799/1/1",52,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PSMB3:SSR2","chr17_KI270857v1_alt",2791309,"+","chr1",156011887,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1221:14681:18247/2/2",86,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ABR:IGJ","chr17_KI270862v1_alt",169261,"-","chr4",70666514,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2206:27488:13851/1/1",76,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PITPNA:HELLS","chr17",1553004,"-","chr10",94562691,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:4553:44201/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LRRC75A-AS1:WDR1","chr17",16439414,"+","chr4",10116234,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:20923:26107/1/1",93,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","KIAA0100:TRIM22","chr17",28618791,"-","chr11",5698315,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:12987:9051/2/2",36,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NUFIP2:RPN2","chr17",29285992,"-","chr20",37184180,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1208:17766:47471/2/2",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLFN11:DTYMK","chr17",35366947,"-","chr2",241676237,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2114:27113:44922/1/1",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PSMD3:SFPQ","chr17",39995295,"+","chr1",35190995,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2105:14560:25562/1/1",95,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ASB16-AS1:PIK3CD","chr17",44181623,"-","chr1",9716440,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2216:4259:14010/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CRYZ:KMT2C","chr1",74710098,"-","chr7",152230321,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2126:30279:22133/1/1",82,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","KIAA0195:STX17","chr17",75486426,"+","chr9",99915178,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1114:28087:37572/1/1",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CYTH1:TSC2","chr17",78700331,"-","chr16",2056196,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1218:17462:34987/1/1",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPTOR:PCM1","chr17",80791509,"+","chr8",17956604,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2201:7162:27584/1/1",41,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TSPAN10:GABPA","chr17",81637406,"+","chr21",25741573,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2207:21876:45097/1/1",44,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LRRC45:IRF9","chr17",82025507,"+","chr14",24165847,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:9485:14344/2/2",90,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CSNK1D:SUN1","chr17",82248875,"-","chr7",848418,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1125:30411:1824/2/2",57,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IFI44L:SNX5","chr1",78620571,"+","chr20",17961326,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1201:1955:27356/1/1",101,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NTN1:CDC14A","chr17",9182969,"+","chr1",100377546,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1212:20933:36710/2/2",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RERE:SMARCC1","chr1",8422727,"-","chr3",47662592,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:24657:16682/1/1",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PIAS2:CELF1","chr18",46827959,"-","chr11",47489024,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1213:10622:25246/1/1",99,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","WDR7:GSPT1","chr18",56651576,"+","chr16",11898035,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1121:2564:43673/1/1",101,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TYMS:DHCR24","chr18",671451,"+","chr1",54883773,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1206:6441:38468/1/1",41,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF627:UPF2","chr19",11597630,"+","chr10",11959356,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:21450:7574/1/1",103,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DAZAP1:C12orf4","chr19",1422396,"+","chr12",4518880,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:20232:33211/1/1",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PBX4:DIP2B","chr19",19599292,"-","chr12",50714395,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:20212:45273/2/2",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EML2:MED24","chr19",45638845,"-","chr17",40036154,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2206:9161:8822/2/2",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","BSG:SLC38A10","chr19",572701,"+","chr17",81253240,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2108:4898:1930/2/2",59,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LINC01237:PTBP3","chr2_KI270776v1_alt",94733,"+","chr9",112297916,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1107:15544:15381/1/1",60,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","KIZ:TPT1","chr20",21163159,"+","chr13",45337388,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:25784:6396/1/1",53,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PCED1A:AC246787.8","chr20",2839192,"-","chr14_KI270846v1_alt",237755,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1114:8105:2774/1/1",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PANK2:CEP350","chr20",3910830,"+","chr1",180041138,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2201:5172:35040/1/1",106,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CTSA:ATAD3B","chr20",45895133,"+","chr1",1487863,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:5954:7838/2/2",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NCOA3:DIAPH2","chr20",47583261,"+","chrX",96738586,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2101:17959:45801/1/1",100,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GYPC:PARPBP","chr2",126656312,"+","chr12",102153869,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1102:25489:43761/1/1",64,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ERCC3:NPIPB3","chr2",127292610,"-","chr16",21415085,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1201:28869:42056/2/2",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CHAF1B:SNX14","chr21",36399605,"+","chr6",85526237,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2214:28625:43708/2/2",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ITGB2:TDP2","chr21",44920821,"-","chr6",24666611,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2204:31304:42372/1/1",102,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NBAS:CORO1C","chr2",15415546,"-","chr12",108701323,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2117:23257:5112/2/2",88,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AC009948.5:MLH3","chr2",178433475,"+","chr14",75049718,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2114:26108:18564/1/1",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AAMP:GART","chr2",218269382,"-","chr21",33539356,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2206:27052:30573/1/1",79,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GUSBP11:COMMD1","chr22",23705429,"-","chr2",62000701,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2211:32116:45572/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LRRFIP1:DTD1","chr2",237708630,"+","chr20",18744100,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2107:15767:24630/2/2",51,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPS19BP1:TMC8","chr22",39532395,"-","chr17",78140834,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2219:6390:12568/1/1",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DTYMK:CCZ1B","chr2",241680229,"-","chr7",6822364,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2221:5660:1701/2/2",62,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PPP1CB:SLC38A1","chr2",28752176,"+","chr12",46243314,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1127:26382:42987/2/2",61,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GEMIN6:ERAP2","chr2",38779118,"+","chr5",96902274,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2120:25266:18476/2/2",96,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PIGF:BRCA2","chr2",46592475,"-","chr13",32376670,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2201:15716:30661/2/2",84,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CHMP3:ERGIC3","chr2",86563304,"-","chr20",35547412,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1104:17939:13974/1/1",76,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TOPBP1:MADD","chr3",133652463,"-","chr11",47323671,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2217:10927:26336/2/2",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EPHB1:NFYB","chr3",134795689,"+","chr12",104135532,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1123:10368:7468/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","COMMD2:CDC123","chr3",149750678,"-","chr10",12215740,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:5406:5200/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TBL1XR1:STAM","chr3",177032971,"-","chr10",17714543,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2213:26951:16682/2/2",60,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBE2E2:ELMO1","chr3",23217312,"+","chr7",37133234,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2102:12053:10106/1/1",111,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPL15:CEP104","chr3",23917173,"+","chr1",3839776,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1122:3051:38820/1/1",35,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TRANK1:NUCKS1","chr3",36908323,"-","chr1",205729621,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2221:16924:31699/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RBM6:SNAP47","chr3",49962685,"+","chr1",227780527,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2113:11576:23768/1/1",38,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GNL3:SUMO2","chr3",52687615,"+","chr17",75174823,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1105:25926:19126/1/1",79,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PTPRG:TARS","chr3",61562372,"+","chr5",33448541,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1107:15351:11214/2/2",39,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBE2D3:FAM177A1","chr4",102868715,"-","chr14",35102681,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:24353:46891/1/1",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SNHG8:ATP9B","chr4",118279137,"+","chr18",79126267,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:24931:48421/2/2",70,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SNHG8:TBC1D5","chr4",118279137,"+","chr3",17508605,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2225:19015:7398/1/1",52,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LRBA:UBAP2","chr4",150415438,"-","chr9",33948587,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1215:10389:6027/1/1",93,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TRAPPC11:ZCCHC7","chr4",183706940,"+","chr9",37327799,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:6400:32842/2/2",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IRF2:SET","chr4",184474379,"-","chr9",128691170,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2114:14164:4338/1/1",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DANCR:UBE2J2","chr4",52712959,"+","chr1",1267992,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2105:5792:27215/1/1",111,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DANCR:MANEA","chr4",52712959,"+","chr6",95604827,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1206:3518:44799/1/1",75,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DANCR:NCALD","chr4",52712959,"+","chr8",102020289,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1209:23399:37114/1/1",68,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","JAKMIP1:PBX3","chr4",6112722,"-","chr9",125915686,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:12398:34618/1/1",110,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBA6-AS1:PDCD6IP","chr4",67790511,"+","chr3",33864006,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1226:19674:17860/1/1",81,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ANKRD17:FAM73A","chr4",73177380,"-","chr1",77858938,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2226:11677:7662/2/2",76,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SMN1:TXN","chr5_KI270897v1_alt",494609,"+","chr9",110251462,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:22465:11653/1/1",77,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLC25A46:AP3M1","chr5",110743787,"+","chr10",74129992,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2116:7517:20146/1/1",95,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AC116366.5:RRNAD1","chr5",132426282,"-","chr1",156735725,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2105:29325:36657/1/1",105,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MATR3:SND1","chr5",139293805,"+","chr7",127807484,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1207:3802:11143/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBE2D2:PSMA1","chr5",139614617,"+","chr11",14513699,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1212:10318:28270/1/1",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EBF1:NEIL3","chr5",159073396,"-","chr4",177335688,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2102:28168:24560/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","FBXW11:PMS1","chr5",171868610,"-","chr2",189791790,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2224:15321:3530/1/1",104,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NDUFS6:WDR37","chr5",1802374,"+","chr10",1072116,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2109:13210:11372/1/1",44,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DHFR:LRRC28","chr5",80649389,"-","chr15",99386030,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1202:7882:25773/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MEF2C-AS1:NUBPL","chr5",88905270,"+","chr14",31787780,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2227:8126:44552/1/1",55,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CAST:ATIC","chr5",96746425,"+","chr2",215349536,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2210:24667:42513/1/1",94,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HLA-DQA1:ZBED1","chr6_GL000256v2_alt",4039471,"+","chrY",2490772,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:13575:9192/1/1",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NT5DC1:GNB1L","chr6",116117945,"+","chr22",19854539,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2211:3528:8031/1/1",41,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZBTB2:TLK1","chr6",151391420,"-","chr2",171117857,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1119:3011:3196/1/1",65,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ARID1B:ANP32B","chr6",156901525,"+","chr9",98004964,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1212:20456:35497/1/1",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TRIM38:CHD9","chr6",25962855,"+","chr16",53155926,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1117:30005:15363/1/1",54,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPS10-NUDT3:ABCF1","chr6",34421730,"-","chr6_GL000256v2_alt",1897327,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1124:32471:26177/2/2",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SRPK1:RABGGTB","chr6",35874233,"-","chr1",75794510,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2109:26260:45942/1/1",69,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LINC00680:KIAA1551","chr6",57959028,"-","chr12",31980878,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1201:3397:29114/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","F13A1:AIDA","chr6",6195797,"-","chr1",222670250,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2119:5609:26934/1/1",105,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HMGN3:TMEM165","chr6",79234546,"-","chr4",55424538,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2204:12743:6660/1/1",114,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DNAJC2:NKTR","chr7",103327332,"-","chr3",42617570,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:15564:49036/1/1",80,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SND1:MTA3","chr7",127844424,"+","chr2",42753374,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2205:9983:25966/1/1",90,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AGK:SNX1","chr7",141555567,"+","chr15",64112573,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1126:27438:38134/2/2",53,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RBM33:NCAPD3","chr7",155644919,"+","chr11",134194738,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1121:14397:12902/2/2",51,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RBM33:TMEM245","chr7",155665253,"+","chr9",109108570,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2117:26778:29079/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","HIBADH:DENND1A","chr7",27649473,"-","chr9",123671371,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1106:8542:13799/1/1",88,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CARD11:B2M","chr7",3043667,"-","chr15_KI270849v1_alt",38769,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1205:9526:32525/1/1",86,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NPSR1-AS1:TCF4","chr7",34728737,"-","chr18",55279656,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2117:31994:15821/1/1",82,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LIMK1:CBWD3","chr7",74085844,"+","chr9",68264468,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1209:23896:27496/1/1",41,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SUN1:XPOT","chr7",872562,"+","chr12",64409962,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2212:9648:6114/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CTB-13L3.1:STAT5B","chr7",90403549,"+","chr17",42232137,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:30513:48983/1/1",62,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EEF1D:RCC2","chr8_KI270816v1_alt",87388,"-","chr1",17416646,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2220:13727:43884/1/1",41,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","YWHAZ:RPL22","chr8",100953269,"-","chr1",6197756,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1109:26474:27813/1/1",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","ZNF706:STAG1","chr8",101205435,"-","chr3",136521417,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1105:18548:47665/1/1",45,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NCALD:SRP9","chr8",101915809,"-","chr1",225777935,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2102:12855:27180/1/1",88,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","OXR1:ACAP2","chr8",106713985,"+","chr3",195345317,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2206:24962:47665/1/1",47,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EMC2:ATIC","chr8",108470121,"+","chr2",215325241,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2219:7649:31804/2/2",92,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PVT1:CDKAL1","chr8",128010444,"+","chr6",20649293,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:18314:33369/1/1",72,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PCM1:ZMYND8","chr8",17923188,"+","chr20",47249439,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2105:27417:26494/1/1",54,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PPP2R2A:MED13L","chr8",26293740,"+","chr12",116096752,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2102:16985:17175/1/1",50,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GTF2E2:LRRC28","chr8",30607057,"-","chr15",99287257,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2119:14143:48474/1/1",71,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","SLC20A2:PARP14","chr8",42540915,"-","chr3",122727812,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1221:17036:42653/2/2",78,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PCMTD1:PCNX","chr8",51845661,"-","chr14",70962226,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1228:26189:43708/2/2",66,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","LYN:RNF213","chr8",55880103,"+","chr17",80294720,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1110:9120:6712/1/1",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PEX2:ALDH3A2","chr8",76999990,"-","chr17",19656366,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1126:4371:12972/1/1",67,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RMDN1:MKI67","chr8",86508492,"-","chr10",128119319,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:21876:11231/1/1",73,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","MFHAS1:SNTB2","chr8",8890061,"-","chr16",69245602,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1218:10003:12743/2/2",40,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","TMEM38B:ETFA","chr9",105694772,"+","chr15",76292513,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1126:14844:23804/2/2",63,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","DENND1A:SUCLG1","chr9",123878951,"-","chr2",84449752,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1219:10399:18634/1/1",106,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PBX3:IRF2BP2","chr9",125748623,"+","chr1",234607852,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2112:12297:48825/2/2",97,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RPL12:FUT10","chr9",127449281,"-","chr8",33389798,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1211:11495:28797/1/1",87,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","UBAP2:CCT6A","chr9",33953285,"-","chr7",56060269,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:7547:29167/2/2",80,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","CBWD3:TCF12","chr9",68242185,"+","chr15",57251350,"+",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2224:6360:24366/1/1",102,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IARS:PMS1","chr9",92293441,"-","chr2",189785346,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2103:16437:10774/1/1",107,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RAP2C-AS1:GBE1","chrX",132239711,"+","chr3",81594023,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2104:26118:31417/2/2",79,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IDS:ADK","chrX",149529367,"-","chr10",74525256,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2224:1499:10422/1/1",48,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","WDR45:SP3","chrX",49100582,"-","chr2",173918785,"-",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2110:8034:38803/1/1",46,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","GNL3L:FDPS","chrX",54532585,"+","chr1",155317941,"+",Inf,0,1,FALSE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2216:29538:14941/1/1",76,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","OGT:SNRNP200","chrX",71544635,"+","chr2",96284004,"-",Inf,0,1,TRUE,TRUE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1106:11150:2738/2/2",43,"MediumConfidence","-"
+"1_Neat_A_Exp2SeracareV2","EIF3CL:EIF3C","chr16",28383455,"-","chr16",28731854,"+",348.399,885,3,NA,FALSE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:1221:26839:34424/1/1",31,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AL627171.3:AL139099.3","chr14",49862670,"-","chr14",49586724,"+",275.946,609,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1106:6796:8383/1/1",36,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP63:RN7SKP221","chr22",21389382,"+","chr22",21545552,"-",156.17,516,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1116:16752:7064/1/1",96,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP221:RN7SKP63","chr22",21545550,"-","chr22",21389341,"+",156.209,410,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1205:19715:41317/1/1",76,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SL712P:RN7SL5P","chrX",115992662,"-","chr9",9442188,"+",Inf,409,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2110:25581:7521/1/1",40,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP203:RN7SK","chr2",76445316,"-","chr6",52995770,"+",Inf,300,5,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1103:13930:25492/2/2",76,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP154:RN7SKP93","chr2",133391386,"-","chr2",133596128,"+",204.742,238,2,NA,FALSE,FALSE,"190626_K00400_0121_BH7C5GBBXY:1:2203:27468:12093/1/1",107,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP203:RN7SK","chr2",76445291,"-","chr6",52995771,"+",Inf,138,3,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2203:1742:3670/1/1",51,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP80:RN7SK","chr22",42565180,"-","chr6",52995713,"+",Inf,129,5,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1204:21826:48280/1/1",62,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","OSBPL10:GPD1L","chr3",32030289,"-","chr3",32151852,"+",121.563,85,15,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:29721:34776/1/1",58,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP80:RN7SK","chr22",42565158,"-","chr6",52995717,"+",Inf,81,5,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2213:4706:48685/2/2",52,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP203:RN7SK","chr2",76445301,"-","chr6",52995797,"+",Inf,31,6,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1123:27082:48456/1/1",62,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP80:RN7SK","chr22",42565153,"-","chr6",52995724,"+",Inf,25,4,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1120:11637:30310/1/1",60,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP203:RN7SK","chr2",76445305,"-","chr6",52995797,"+",Inf,19,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2115:9475:13904/2/2",65,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP203:RN7SK","chr2",76445312,"-","chr6",52995792,"+",Inf,15,3,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1104:19410:21588/1/1",55,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP80:RN7SK","chr22",42565153,"-","chr6",52995728,"+",Inf,11,3,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1202:14407:30187/2/2",60,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP80:RN7SK","chr22",42565168,"-","chr6",52995681,"+",Inf,12,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2211:1905:1560/1/1",53,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","PARP8:RP11-353N4.6","chr5",50671287,"+","chr1",149611641,"+",Inf,5,3,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1127:29193:31962/1/1",47,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP80:RN7SK","chr22",42565179,"-","chr6",52995738,"+",Inf,5,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1207:26250:13570/2/2",88,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SKP203:RN7SK","chr2",76445317,"-","chr6",52995810,"+",Inf,5,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1113:4340:5112/2/2",68,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","IGLV2-14:RN7SL4P","chr22",22758937,"+","chr3",15738565,"+",Inf,4,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2210:9607:48772/1/1",50,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AL627171.3:IGLV2-14","chr14",49862669,"-","chr22",22758773,"+",Inf,3,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1106:18649:28147/1/1",79,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AC010149.4:NTRK1","chr2",230573249,"-","chr1",156874573,"+",Inf,2,3,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1101:18578:43533/2/2",65,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","AC010149.4:NTRK1","chr2",230573250,"-","chr1",156874573,"+",Inf,2,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1107:25591:22485/2/2",63,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","RN7SL2:AC246787.8","chr14",49862635,"-","chr14_KI270846v1_alt",237248,"-",Inf,1,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1105:31558:45379/1/1",56,"LowConfidence","-"
+"1_Neat_A_Exp2SeracareV2","NTRK3:ETV6","chr15",87933100,"-","chr12",11869825,"+",Inf,1,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:1115:18223:9860/2/2",66,"LowConfidence","Yes"
+"1_Neat_A_Exp2SeracareV2","RN7SL5P:AC246787.8","chr9",9442109,"+","chr14_KI270846v1_alt",239021,"-",Inf,1,2,NA,FALSE,TRUE,"190626_K00400_0121_BH7C5GBBXY:1:2124:17503:34424/1/1",76,"LowConfidence","-"
diff --git a/tests/fixtures/star-fusion.fusion_predictions.abridged.tsv b/tests/fixtures/star-fusion.fusion_predictions.abridged.tsv
new file mode 100644
index 0000000..231459f
--- /dev/null
+++ b/tests/fixtures/star-fusion.fusion_predictions.abridged.tsv
@@ -0,0 +1,38 @@
+#FusionName	JunctionReadCount	SpanningFragCount	SpliceType	LeftGene	LeftBreakpoint	RightGene	RightBreakpoint	LargeAnchorSupport	FFPM	LeftBreakDinuc	LeftBreakEntropy	RightBreakDinuc	RightBreakEntropy	annots
+KIF5B--RET	286	27	ONLY_REF_SPLICE	KIF5B^ENSG00000170759.10	chr10:32017143:-	RET^ENSG00000165731.18	chr10:43114480:+	YES_LDAS	3.2247	GT	1.9656	AG	1.9656	["ChimerKB","ChimerPub","Cosmic","ChimerSeq","INTRACHROMOSOMAL[chr10:11.02Mb]"]
+EML4--ALK	219	39	ONLY_REF_SPLICE	EML4^ENSG00000143924.18	chr2:42295516:+	ALK^ENSG00000171094.17	chr2:29223528:-	YES_LDAS	2.6581	GT	1.6895	AG	1.9086	["Klijn_CellLines","GUO2018CR_TCGA","HaasMedCancer","TCGA_StarF2019","CCLE_StarF2019","chimerdb_pubmed","ChimerSeq","ChimerKB","TumorFusionsNAR2018","Cosmic","YOSHIHARA_TCGA","ChimerPub","INTRACHROMOSOMAL[chr2:12.25Mb]"]
+ETV6--NTRK3	212	26	ONLY_REF_SPLICE	ETV6^ENSG00000139083.10	chr12:11869969:+	NTRK3^ENSG00000140538.16	chr15:87940753:-	YES_LDAS	2.452	GT	1.7819	AG	1.9656	["TCGA_StarF2019","HaasMedCancer","GUO2018CR_TCGA","chimerdb_omim","Cosmic","YOSHIHARA_TCGA","TumorFusionsNAR2018","ChimerPub","Larsson_TCGA","ChimerKB","ChimerSeq","chimerdb_pubmed","INTERCHROMOSOMAL[chr12--chr15]"]
+LMNA--NTRK1	196	34	ONLY_REF_SPLICE	LMNA^ENSG00000160789.19	chr1:156130773:+	NTRK1^ENSG00000198400.11	chr1:156874906:+	YES_LDAS	2.3696	GT	1.7465	AG	1.5058	["ChimerKB","Cosmic","TCGA_StarF2019","ChimerPub","INTRACHROMOSOMAL[chr1:0.68Mb]"]
+FGFR3--TACC3	196	25	ONLY_REF_SPLICE	FGFR3^ENSG00000068078.18	chr4:1806934:+	TACC3^ENSG00000013810.18	chr4:1739702:+	YES_LDAS	2.2769	GT	1.8892	AG	1.7819	["Cosmic","ChimerPub","YOSHIHARA_TCGA","TumorFusionsNAR2018","ChimerKB","ChimerSeq","CCLE_StarF2019","TCGA_StarF2019","GUO2018CR_TCGA","Klijn_CellLines","INTRACHROMOSOMAL[chr4:0.05Mb]","LOCAL_REARRANGEMENT:+:[48117]"]
+NCOA4--RET	183	20	ONLY_REF_SPLICE	NCOA4^ENSG00000266412.5	chr10:46012883:-	RET^ENSG00000165731.18	chr10:43116584:+	YES_LDAS	2.0915	GT	1.9086	AG	1.8323	["GUO2018CR_TCGA","HaasMedCancer","TCGA_StarF2019","ChimerSeq","ChimerKB","YOSHIHARA_TCGA","Cosmic","ChimerPub","TumorFusionsNAR2018","chimerdb_pubmed","INTRACHROMOSOMAL[chr10:2.87Mb]"]
+TPM3--NTRK1	135	16	ONLY_REF_SPLICE	TPM3^ENSG00000143549.19	chr1:154170400:-	NTRK1^ENSG00000198400.11	chr1:156874571:+	YES_LDAS	1.5556	GT	1.9329	AG	1.6895	["ChimerSeq","ChimerPub","Cosmic","TumorFusionsNAR2018","ChimerKB","CCLE_StarF2019","chimerdb_pubmed","Klijn_CellLines","TCGA_StarF2019","HaasMedCancer","INTRACHROMOSOMAL[chr1:2.62Mb]"]
+PAX8--PPARG	116	15	ONLY_REF_SPLICE	PAX8^ENSG00000125618.16	chr2:113235394:-	PPARG^ENSG00000132170.20	chr3:12379704:+	YES_LDAS	1.3496	GT	1.8295	AG	1.9329	["chimerdb_omim","GUO2018CR_TCGA","HaasMedCancer","TCGA_StarF2019","chimerdb_pubmed","ChimerSeq","ChimerKB","Cosmic","Mitelman","YOSHIHARA_TCGA","ChimerPub","TumorFusionsNAR2018","INTERCHROMOSOMAL[chr2--chr3]"]
+SLC45A3--BRAF	119	0	ONLY_REF_SPLICE	SLC45A3^ENSG00000158715.5	chr1:205680394:-	BRAF^ENSG00000157764.13	chr7:140794467:-	YES_LDAS	1.226	GT	1.5628	AG	1.7465	["ChimerSeq","ChimerPub","Cosmic","ChimerKB","INTERCHROMOSOMAL[chr1--chr7]"]
+FGFR3--BAIAP2L1	111	13	ONLY_REF_SPLICE	FGFR3^ENSG00000068078.18	chr4:1806934:+	BAIAP2L1^ENSG00000006453.13	chr7:98362432:-	YES_LDAS	1.2775	GT	1.8892	AG	1.8062	["CCLE_StarF2019","Klijn_CellLines","ChimerPub","Cosmic","ChimerKB","INTERCHROMOSOMAL[chr4--chr7]"]
+SLC34A2--ROS1	77	26	ONLY_REF_SPLICE	SLC34A2^ENSG00000157765.12	chr4:25664330:+	ROS1^ENSG00000047936.10	chr6:117324415:-	YES_LDAS	1.0612	GT	1.8892	AG	1.7465	["Cosmic","ChimerPub","ChimerKB","ChimerSeq","chimerdb_pubmed","CCLE_StarF2019","TCGA_StarF2019","GUO2018CR_TCGA","Klijn_CellLines","INTERCHROMOSOMAL[chr4--chr6]"]
+CD74--ROS1	58	6	ONLY_REF_SPLICE	CD74^ENSG00000019582.14	chr5:150404680:-	ROS1^ENSG00000047936.10	chr6:117324415:-	YES_LDAS	0.6593	GT	1.7819	AG	1.7465	["ChimerPub","Cosmic","TumorFusionsNAR2018","ChimerKB","ChimerSeq","chimerdb_pubmed","TCGA_StarF2019","GUO2018CR_TCGA","Klijn_CellLines","INTERCHROMOSOMAL[chr5--chr6]"]
+RN7SKP80--RN7SKP118	33	0	INCL_NON_REF_SPLICE	RN7SKP80^ENSG00000202058.1	chr22:42565086:-	RN7SKP118^ENSG00000201201.1	chr16:67321742:-	YES_LDAS	0.34	TT	1.9656	CG	1.8256	["TCGA_StarF2019","CCLE_StarF2019","INTERCHROMOSOMAL[chr22--chr16]"]
+EGFR--SEPT14	9	13	ONLY_REF_SPLICE	EGFR^ENSG00000146648.17	chr7:55200413:+	SEPT14^ENSG00000154997.8	chr7:55796092:-	YES_LDAS	0.2266	GT	1.8892	AG	1.9899	["GUO2018CR_TCGA","TCGA_StarF2019","YOSHIHARA_TCGA","INTRACHROMOSOMAL[chr7:0.58Mb]"]
+AC021660.2--GPR15	8	0	INCL_NON_REF_SPLICE	AC021660.2^ENSG00000248839.1	chr3:98525247:+	GPR15^ENSG00000154165.4	chr3:98532009:+	YES_LDAS	0.0824	CT	1.8892	AC	1.9656	["INTRACHROMOSOMAL[chr3:0.01Mb]","NEIGHBORS[6565]"]
+EEF1A1P13--EFHC1	6	0	INCL_NON_REF_SPLICE	EEF1A1P13^ENSG00000250182.3	chr5:14651703:-	EFHC1^ENSG00000096093.15	chr6:52482342:+	YES_LDAS	0.0618	CA	1.7056	CT	0.3534	["INTERCHROMOSOMAL[chr5--chr6]"]
+AC098590.1--AC099789.1	5	0	INCL_NON_REF_SPLICE	AC098590.1^ENSG00000213851.3	chr4:43410174:-	AC099789.1^ENSG00000225475.1	chr1:56619540:-	YES_LDAS	0.0515	CT	1.8323	AC	1.6049	["CCLE_StarF2019","INTERCHROMOSOMAL[chr4--chr1]"]
+RPL14--USP27X-AS1	5	0	INCL_NON_REF_SPLICE	RPL14^ENSG00000188846.13	chr3:40462056:+	USP27X-AS1^ENSG00000234390.4	chrX:49877691:-	YES_LDAS	0.0515	CT	1.5850	TC	1.5850	["INTERCHROMOSOMAL[chr3--chrX]"]
+TMPRSS2--ERG	5	0	ONLY_REF_SPLICE	TMPRSS2^ENSG00000184012.11	chr21:41508081:-	ERG^ENSG00000157554.18	chr21:38584945:-	YES_LDAS	0.0515	GT	1.6895	AG	1.9219	["ChimerSeq","Cosmic","ChimerPub","YOSHIHARA_TCGA","TumorFusionsNAR2018","Larsson_TCGA","ChimerKB","CCLE_StarF2019","chimerdb_pubmed","GUO2018CR_TCGA","TCGA_StarF2019","INTRACHROMOSOMAL[chr21:2.80Mb]"]
+LINC00158--AP001341.1	3	1	ONLY_REF_SPLICE	LINC00158^ENSG00000185433.8	chr21:25420708:-	AP001341.1^ENSG00000222042.1	chr21:25175445:-	YES_LDAS	0.0412	GT	1.9086	AG	1.4295	["TCGA_StarF2019","INTRACHROMOSOMAL[chr21:0.05Mb]","NEIGHBORS[51995]"]
+AC021660.2--GPR15	3	0	INCL_NON_REF_SPLICE	AC021660.2^ENSG00000248839.1	chr3:98525260:+	GPR15^ENSG00000154165.4	chr3:98532011:+	YES_LDAS	0.0309	AT	1.6895	CA	1.8892	["INTRACHROMOSOMAL[chr3:0.01Mb]","NEIGHBORS[6565]"]
+AC021660.2--GPR15	3	0	INCL_NON_REF_SPLICE	AC021660.2^ENSG00000248839.1	chr3:98525337:+	GPR15^ENSG00000154165.4	chr3:98532012:+	YES_LDAS	0.0309	GA	1.5058	AG	1.8892	["INTRACHROMOSOMAL[chr3:0.01Mb]","NEIGHBORS[6565]"]
+CLSPN--C1orf216	3	0	INCL_NON_REF_SPLICE	CLSPN^ENSG00000092853.13	chr1:35720748:-	C1orf216^ENSG00000142686.7	chr1:35719497:-	YES_LDAS	0.0309	TG	1.5656	CG	1.6729	["INTRACHROMOSOMAL[chr1:0.00Mb]","NEIGHBORS[746]"]
+ENO1--PDE6A	3	0	INCL_NON_REF_SPLICE	ENO1^ENSG00000074800.15	chr1:8867988:-	PDE6A^ENSG00000132915.10	chr5:149867239:-	YES_LDAS	0.0309	GT	1.4256	TG	1.8295	["INTERCHROMOSOMAL[chr1--chr5]"]
+RF00100--RN7SKP76	3	0	INCL_NON_REF_SPLICE	RF00100^ENSG00000202198.1	chr6:52995864:+	RN7SKP76^ENSG00000201289.1	chr16:61743234:-	YES_LDAS	0.0309	TT	1.9656	AT	1.8256	["INTERCHROMOSOMAL[chr6--chr16]"]
+RN7SK--RN7SKP76	3	0	INCL_NON_REF_SPLICE	RN7SK^ENSG00000283293.1	chr6:52995864:+	RN7SKP76^ENSG00000201289.1	chr16:61743234:-	YES_LDAS	0.0309	TT	1.9656	AT	1.8256	["INTERCHROMOSOMAL[chr6--chr16]"]
+EPSTI1--TGS1	2	0	ONLY_REF_SPLICE	EPSTI1^ENSG00000133106.14	chr13:42991978:-	TGS1^ENSG00000137574.10	chr8:55790182:+	YES_LDAS	0.0206	GT	1.3996	AG	1.9086	["INTERCHROMOSOMAL[chr13--chr8]"]
+KLHL18--ELP6	2	0	ONLY_REF_SPLICE	KLHL18^ENSG00000114648.11	chr3:47322708:+	ELP6^ENSG00000163832.15	chr3:47511226:-	YES_LDAS	0.0206	GT	1.9899	AG	1.9656	["INTRACHROMOSOMAL[chr3:0.15Mb]"]
+MCOLN2--NEK7	2	0	ONLY_REF_SPLICE	MCOLN2^ENSG00000153898.12	chr1:84997008:-	NEK7^ENSG00000151414.14	chr1:198232553:+	YES_LDAS	0.0206	GT	1.5301	AG	1.8892	["INTRACHROMOSOMAL[chr1:113.16Mb]"]
+PLEC--TSPAN4	2	0	ONLY_REF_SPLICE	PLEC^ENSG00000178209.15	chr8:143950184:-	TSPAN4^ENSG00000214063.10	chr11:850288:+	YES_LDAS	0.0206	GT	1.6729	AG	1.9329	["INTERCHROMOSOMAL[chr8--chr11]"]
+SEPT2--SFI1	2	0	ONLY_REF_SPLICE	SEPT2^ENSG00000168385.17	chr2:241315982:+	SFI1^ENSG00000198089.15	chr22:31607937:+	YES_LDAS	0.0206	GT	1.4566	AG	1.8062	["INTERCHROMOSOMAL[chr2--chr22]"]
+SNX25--NEK1	2	0	ONLY_REF_SPLICE	SNX25^ENSG00000109762.15	chr4:185320864:+	NEK1^ENSG00000137601.16	chr4:169590809:-	YES_LDAS	0.0206	GT	1.7232	AG	1.6402	["INTRACHROMOSOMAL[chr4:15.59Mb]"]
+UBE2K--FAM208A	2	0	ONLY_REF_SPLICE	UBE2K^ENSG00000078140.13	chr4:39755739:+	FAM208A^ENSG00000163946.13	chr3:56628614:-	YES_LDAS	0.0206	GT	1.9219	AG	1.6895	["INTERCHROMOSOMAL[chr4--chr3]"]
+XXYLT1-AS2--RNF145	2	0	ONLY_REF_SPLICE	XXYLT1-AS2^ENSG00000230266.1	chr3:195148105:+	RNF145^ENSG00000145860.11	chr5:159174158:-	YES_LDAS	0.0206	GT	1.8256	AG	1.5656	["INTERCHROMOSOMAL[chr3--chr5]"]
+NSF--AC091132.5	1	1	ONLY_REF_SPLICE	NSF^ENSG00000073969.18	chr17:46704854:+	AC091132.5^ENSG00000267246.1	chr17:45551537:-	YES_LDAS	0.0206	GT	1.6729	AG	1.9329	["INTRACHROMOSOMAL[chr17:1.03Mb]"]
+TXNDC11--SNX29	1	1	ONLY_REF_SPLICE	TXNDC11^ENSG00000153066.12	chr16:11730645:-	SNX29^ENSG00000048471.13	chr16:12524702:+	YES_LDAS	0.0206	GT	1.9086	AG	1.9086	["INTRACHROMOSOMAL[chr16:0.23Mb]"]
+UBE2R2--WNK1	1	1	ONLY_REF_SPLICE	UBE2R2^ENSG00000107341.4	chr9:33817934:+	WNK1^ENSG00000060237.16	chr12:813642:+	YES_LDAS	0.0206	GT	1.9656	AG	1.7232	["INTERCHROMOSOMAL[chr9--chr12]"]
diff --git a/tests/test_extractions.py b/tests/test_extractions.py
new file mode 100644
index 0000000..451e187
--- /dev/null
+++ b/tests/test_extractions.py
@@ -0,0 +1,66 @@
+"""Module for testing extraction methods"""
+
+from pathlib import Path
+
+from fusor.extract import (
+    get_arriba_records,
+    get_cicero_records,
+    get_fusion_catcher_records,
+    get_jaffa_records,
+    get_star_fusion_records,
+)
+
+
+def test_get_jaffa_records(fixture_data_dir):
+    """Test that get_jaffa_records works correctly"""
+    path = fixture_data_dir / "jaffa_results.csv"
+    fusions_list = get_jaffa_records(Path(path))
+    assert len(fusions_list) == 491
+
+    path = fixture_data_dir / "jaffa_resultss.csv"
+    fusions_list = get_jaffa_records(Path(path))
+    assert fusions_list is None
+
+
+def test_get_star_fusion_records(fixture_data_dir):
+    """Test that get_star_fusion_records works correctly"""
+    path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv"
+    fusions_list = get_star_fusion_records(Path(path))
+    assert len(fusions_list) == 37
+
+    path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs"
+    fusions_list = get_star_fusion_records(Path(path))
+    assert fusions_list is None
+
+
+def test_get_fusion_catcher_records(fixture_data_dir):
+    """Test that get_fusion_catcher_records works correctly"""
+    path = fixture_data_dir / "final-list_candidate-fusion-genes.txt"
+    fusions_list = get_fusion_catcher_records(Path(path))
+    assert len(fusions_list) == 355
+
+    path = fixture_data_dir / "final-list_candidate-fusion-genes.txts"
+    fusions_list = get_fusion_catcher_records(Path(path))
+    assert fusions_list is None
+
+
+def test_get_arriba_records(fixture_data_dir):
+    """Test that get_arriba_records works correctly"""
+    path = fixture_data_dir / "fusions_arriba_test.tsv"
+    fusions_list = get_arriba_records(Path(path))
+    assert len(fusions_list) == 1
+
+    path = fixture_data_dir / "fusionsd_arriba_test.tsv"
+    fusions_list = get_arriba_records(Path(path))
+    assert fusions_list is None
+
+
+def test_get_cicero_records(fixture_data_dir):
+    """Test that get_cicero_records works correctly"""
+    path = fixture_data_dir / "annotated.fusion.txt"
+    fusions_list = get_cicero_records(Path(path))
+    assert len(fusions_list) == 1
+
+    path = fixture_data_dir / "annnotated.fusion.txt"
+    fusions_list = get_cicero_records(Path(path))
+    assert fusions_list is None

From cb4d2c0927c793a3ecd9f59a8f2ffe65335aa16f Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 17 Jan 2025 13:19:32 -0500
Subject: [PATCH 09/20] Add remaining extraction methods, change some
 attributes

---
 src/fusor/extract.py              | 100 ++++++++++++++++++++++++------
 src/fusor/fusion_caller_models.py |   3 +
 src/fusor/translator.py           |   2 +-
 tests/fixtures/enfusion_test.csv  |   2 +
 tests/fixtures/genie_test.txt     |   2 +
 tests/test_extractions.py         |  24 +++++++
 tests/test_translators.py         |   2 +-
 7 files changed, 115 insertions(+), 20 deletions(-)
 create mode 100644 tests/fixtures/enfusion_test.csv
 create mode 100644 tests/fixtures/genie_test.txt

diff --git a/src/fusor/extract.py b/src/fusor/extract.py
index c049bf5..32293c9 100644
--- a/src/fusor/extract.py
+++ b/src/fusor/extract.py
@@ -6,20 +6,39 @@
 import logging
 from pathlib import Path
 
-from fusor.fusion_caller_models import JAFFA, Arriba, Cicero, FusionCatcher, STARFusion
+from fusor.fusion_caller_models import (
+    JAFFA,
+    Arriba,
+    Cicero,
+    EnFusion,
+    FusionCatcher,
+    Genie,
+    STARFusion,
+)
 
 _logger = logging.getLogger(__name__)
 
 
+def _check_if_file_exists(path: Path) -> bool:
+    """Check if fusions file exists
+
+    :param path: The path to the file
+    :return ``True`` if the file exists, ``False`` if not
+    """
+    if not path.exists():
+        statement = f"{path!s} does not exist"
+        _logger.error(statement)
+        return False
+    return True
+
+
 def get_jaffa_records(path: Path) -> list[JAFFA] | None:
     """Load fusions from JAFFA csv file
 
     :param path: The path to the file of JAFFA fusions
     :return A list of JAFFA objects, or None if the specified file does not exist
     """
-    if not path.exists():
-        statement = f"{path!s} does not exist"
-        _logger.error(statement)
+    if not _check_if_file_exists(path):
         return None
     fusions_list: list[JAFFA] = []
     column_rename = {
@@ -41,9 +60,7 @@ def get_star_fusion_records(path: Path) -> list[STARFusion] | None:
     :param path: The path to the file of STAR-Fusion fusions
     :return A list of STAR-Fusion objects, or None if the specified file does not exist
     """
-    if not path.exists():
-        statement = f"{path!s} does not exist"
-        _logger.error(statement)
+    if not _check_if_file_exists(path):
         return None
     fusions_list: list[STARFusion] = []
     column_rename = {
@@ -68,9 +85,7 @@ def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
     :param path: The path to the file of FusionCatcher fusions
     :return A list of FusionCatcher objects, or None if the specified file does not exist
     """
-    if not path.exists():
-        statement = f"{path!s} does not exist"
-        _logger.error(statement)
+    if not _check_if_file_exists(path):
         return None
     fusions_list: list[FusionCatcher] = []
     column_rename = {
@@ -91,15 +106,13 @@ def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
     return fusions_list
 
 
-def get_arriba_records(path: Path) -> list[Arriba]:
+def get_arriba_records(path: Path) -> list[Arriba] | None:
     """Load fusions from Arriba tsv file
 
     :param path: The path to the file of Arriba fusions
     :return A list of Arriba objects, or None if the specified file does not exist
     """
-    if not path.exists():
-        statement = f"{path!s} does not exist"
-        _logger.error(statement)
+    if not _check_if_file_exists(path):
         return None
     fusions_list: list[Arriba] = []
     column_rename = {
@@ -117,15 +130,13 @@ def get_arriba_records(path: Path) -> list[Arriba]:
     return fusions_list
 
 
-def get_cicero_records(path: Path) -> list[Cicero]:
+def get_cicero_records(path: Path) -> list[Cicero] | None:
     """Load fusions from Cicero txt file
 
     :param path: The path to the file of Cicero fusions
     :return A list of Cicero objects, or None if the specified file does not exist
     """
-    if not path.exists():
-        statement = f"{path!s} does not exist"
-        _logger.error(statement)
+    if not _check_if_file_exists(path):
         return None
     fusions_list: list[Cicero] = []
     column_rename = {
@@ -147,3 +158,56 @@ def get_cicero_records(path: Path) -> list[Cicero]:
             row = {column_rename.get(key, key): value for key, value in row.items()}
             fusions_list.append(Cicero(**row))
     return fusions_list
+
+
+def get_enfusion_records(path: Path) -> list[EnFusion] | None:
+    """Load fusions from EnFusion tsv file
+
+    :param path: The path to the file of Enfusion fusions
+    :return A list of Enfusion objects, or None if the specified file does not exist
+    """
+    if not _check_if_file_exists(path):
+        return None
+    fusions_list: list[EnFusion] = []
+    column_rename = {
+        "Gene1": "gene_5prime",
+        "Gene2": "gene_3prime",
+        "Chr1": "chr_5prime",
+        "Chr2": "chr_3prime",
+        "Break1": "break_5prime",
+        "Break2": "break_3prime",
+        "FusionJunctionSequence": "fusion_junction_sequence",
+    }
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile, delimiter="\t")
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(EnFusion(**row))
+    return fusions_list
+
+
+def get_genie_records(path: Path) -> list[Genie] | None:
+    """Load fusions from Genie txt file
+
+    :param path: The path to the file of Genie structural variants
+    :return A list of Genie objects, or None if the specified file does not exist
+    """
+    if not _check_if_file_exists(path):
+        return None
+    fusions_list: list[Genie] = []
+    column_rename = {
+        "Site1_Hugo_Symbol": "site1_hugo",
+        "Site2_Hugo_Symbol": "site2_hugo",
+        "Site1_Chromosome": "site1_chrom",
+        "Site2_Chromosome": "site2_chrom",
+        "Site1_Position": "site1_pos",
+        "Site2_Position": "site2_pos",
+        "Site2_Effect_On_Frame": "reading_frame",
+        "Annotation": "annot",
+    }
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile, delimiter="\t")
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(Genie(**row))
+    return fusions_list
diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 45c56a7..d3b915c 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -223,6 +223,9 @@ class EnFusion(BaseModel):
     break_3prime: int = Field(
         ..., description="The 3' gene fusion partner genomic breakpoint"
     )
+    fusion_junction_sequence: str | None = Field(
+        None, description="The sequence near the fusion junction"
+    )
 
 
 class Genie(BaseModel):
diff --git a/src/fusor/translator.py b/src/fusor/translator.py
index 73a77cb..f22c460 100644
--- a/src/fusor/translator.py
+++ b/src/fusor/translator.py
@@ -498,7 +498,7 @@ async def from_arriba(
                 eventType=EventType("read-through"),
                 eventDescription=arriba.confidence,
             )
-            if "read_through" in arriba.event
+            if "read_through" in arriba.event_type
             else CausativeEvent(
                 eventType=EventType("rearrangement"),
                 eventDescription=arriba.confidence,
diff --git a/tests/fixtures/enfusion_test.csv b/tests/fixtures/enfusion_test.csv
new file mode 100644
index 0000000..eac8911
--- /dev/null
+++ b/tests/fixtures/enfusion_test.csv
@@ -0,0 +1,2 @@
+	Gene1	Gene2	Chr1	Chr2	Break1	Break2	FusionJunctionSequence
+0	TPM3	PDGFRB	1	5	154170465	150126612	ATATATCTAGAG|ATAGACAGATAGAT
diff --git a/tests/fixtures/genie_test.txt b/tests/fixtures/genie_test.txt
new file mode 100644
index 0000000..d5c1b1b
--- /dev/null
+++ b/tests/fixtures/genie_test.txt
@@ -0,0 +1,2 @@
+Site1_Hugo_Symbol	Site2_Hugo_Symbol	Site1_Chromosome	Site2_Chromosome	Site1_Position	Site2_Position	Site2_Effect_On_Frame	Annotation
+TPM3	PDGFRB	1	5	154170465	150126612	In_frame	TMP3 (NM_152263.4) - PDGFRB (NM_002609.4) fusion
diff --git a/tests/test_extractions.py b/tests/test_extractions.py
index 451e187..99c49c3 100644
--- a/tests/test_extractions.py
+++ b/tests/test_extractions.py
@@ -5,7 +5,9 @@
 from fusor.extract import (
     get_arriba_records,
     get_cicero_records,
+    get_enfusion_records,
     get_fusion_catcher_records,
+    get_genie_records,
     get_jaffa_records,
     get_star_fusion_records,
 )
@@ -64,3 +66,25 @@ def test_get_cicero_records(fixture_data_dir):
     path = fixture_data_dir / "annnotated.fusion.txt"
     fusions_list = get_cicero_records(Path(path))
     assert fusions_list is None
+
+
+def test_get_enfusion_records(fixture_data_dir):
+    """Test that get_enfusion_records works correctly"""
+    path = fixture_data_dir / "enfusion_test.csv"
+    fusions_list = get_enfusion_records(Path(path))
+    assert len(fusions_list) == 1
+
+    path = fixture_data_dir / "enfusions_test.csv"
+    fusions_list = get_enfusion_records(Path(path))
+    assert fusions_list is None
+
+
+def test_get_genie_records(fixture_data_dir):
+    """Test that get_genie_records works correctly"""
+    path = fixture_data_dir / "genie_test.txt"
+    fusions_list = get_genie_records(Path(path))
+    assert len(fusions_list) == 1
+
+    path = fixture_data_dir / "genie_tests.txt"
+    fusions_list = get_genie_records(Path(path))
+    assert fusions_list is None
diff --git a/tests/test_translators.py b/tests/test_translators.py
index 6811a5d..1ca6d9d 100644
--- a/tests/test_translators.py
+++ b/tests/test_translators.py
@@ -369,7 +369,7 @@ async def test_arriba(
         strand2="-/-",
         breakpoint1="1:154170465",
         breakpoint2="5:150126612",
-        event="translocation",
+        event_type="translocation",
         confidence="high",
         direction1="upstream",
         direction2="downstream",

From ff8df520cdb1963c07fee283fe6cfa33f649693c Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 23 Jan 2025 10:15:31 -0500
Subject: [PATCH 10/20] Allow extra parameters to be supplied

---
 src/fusor/fusion_caller_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 6953799..9c8d168 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -21,7 +21,7 @@ class Caller(str, Enum):
     GENIE = "GENIE"
 
 
-class FusionCaller(ABC, BaseModel, extra="forbid"):
+class FusionCaller(ABC, BaseModel):
     """ABC for fusion callers"""
 
     type: Caller

From 4fb55c603f0c16e6f78fbd5c9d516ecf7ca6fa29 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 23 Jan 2025 15:11:54 -0500
Subject: [PATCH 11/20] Refactor extraction methods

---
 src/fusor/extract.py      | 109 +++++++++++++-------------------------
 tests/test_extractions.py |  30 ++++++-----
 2 files changed, 54 insertions(+), 85 deletions(-)

diff --git a/src/fusor/extract.py b/src/fusor/extract.py
index 32293c9..bb707b1 100644
--- a/src/fusor/extract.py
+++ b/src/fusor/extract.py
@@ -3,7 +3,6 @@
 """
 
 import csv
-import logging
 from pathlib import Path
 
 from fusor.fusion_caller_models import (
@@ -11,15 +10,14 @@
     Arriba,
     Cicero,
     EnFusion,
+    FusionCaller,
     FusionCatcher,
     Genie,
     STARFusion,
 )
 
-_logger = logging.getLogger(__name__)
 
-
-def _check_if_file_exists(path: Path) -> bool:
+def _does_file_exist(path: Path) -> None:
     """Check if fusions file exists
 
     :param path: The path to the file
@@ -27,31 +25,42 @@ def _check_if_file_exists(path: Path) -> bool:
     """
     if not path.exists():
         statement = f"{path!s} does not exist"
-        _logger.error(statement)
-        return False
-    return True
+        raise ValueError(statement)
+    return
+
+
+def _process_fusion_caller_rows(
+    path: Path, caller: FusionCaller, column_rename: dict
+) -> list[FusionCaller]:
+    """Convert rows of fusion caller output to Pydantic classes
+
+    :param path: The path to the fusions file
+    :param caller: The name of the fusion caller
+    :param column_rename: A dictionary of column mappings
+    :return: A list of fusions, represented as Pydantic objects
+    """
+    fusions_list = []
+    with path.open() as csvfile:
+        reader = csv.DictReader(csvfile, delimiter="," if caller == JAFFA else "\t")
+        for row in reader:
+            row = {column_rename.get(key, key): value for key, value in row.items()}
+            fusions_list.append(caller(**row))
+    return fusions_list
 
 
-def get_jaffa_records(path: Path) -> list[JAFFA] | None:
+def get_jaffa_records(path: Path) -> list[JAFFA]:
     """Load fusions from JAFFA csv file
 
     :param path: The path to the file of JAFFA fusions
     :return A list of JAFFA objects, or None if the specified file does not exist
     """
-    if not _check_if_file_exists(path):
-        return None
-    fusions_list: list[JAFFA] = []
+    _does_file_exist(path)
     column_rename = {
         "fusion genes": "fusion_genes",
         "spanning reads": "spanning_reads",
         "spanning pairs": "spanning_pairs",
     }
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile)
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(JAFFA(**row))
-    return fusions_list
+    return _process_fusion_caller_rows(path, JAFFA, column_rename)
 
 
 def get_star_fusion_records(path: Path) -> list[STARFusion] | None:
@@ -60,9 +69,7 @@ def get_star_fusion_records(path: Path) -> list[STARFusion] | None:
     :param path: The path to the file of STAR-Fusion fusions
     :return A list of STAR-Fusion objects, or None if the specified file does not exist
     """
-    if not _check_if_file_exists(path):
-        return None
-    fusions_list: list[STARFusion] = []
+    _does_file_exist(path)
     column_rename = {
         "LeftGene": "left_gene",
         "RightGene": "right_gene",
@@ -71,12 +78,7 @@ def get_star_fusion_records(path: Path) -> list[STARFusion] | None:
         "JunctionReadCount": "junction_read_count",
         "SpanningFragCount": "spanning_frag_count",
     }
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile, delimiter="\t")
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(STARFusion(**row))
-    return fusions_list
+    return _process_fusion_caller_rows(path, STARFusion, column_rename)
 
 
 def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
@@ -85,9 +87,7 @@ def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
     :param path: The path to the file of FusionCatcher fusions
     :return A list of FusionCatcher objects, or None if the specified file does not exist
     """
-    if not _check_if_file_exists(path):
-        return None
-    fusions_list: list[FusionCatcher] = []
+    _does_file_exist(path)
     column_rename = {
         "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
         "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
@@ -98,12 +98,7 @@ def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
         "Spanning_pairs": "spanning_reads",
         "Fusion_sequence": "fusion_sequence",
     }
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile, delimiter="\t")
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(FusionCatcher(**row))
-    return fusions_list
+    return _process_fusion_caller_rows(path, FusionCatcher, column_rename)
 
 
 def get_arriba_records(path: Path) -> list[Arriba] | None:
@@ -112,9 +107,7 @@ def get_arriba_records(path: Path) -> list[Arriba] | None:
     :param path: The path to the file of Arriba fusions
     :return A list of Arriba objects, or None if the specified file does not exist
     """
-    if not _check_if_file_exists(path):
-        return None
-    fusions_list: list[Arriba] = []
+    _does_file_exist(path)
     column_rename = {
         "#gene1": "gene1",
         "strand1(gene/fusion)": "strand1",
@@ -122,12 +115,7 @@ def get_arriba_records(path: Path) -> list[Arriba] | None:
         "type": "event_type",
         "reading_frame": "rf",
     }
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile, delimiter="\t")
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(Arriba(**row))
-    return fusions_list
+    return _process_fusion_caller_rows(path, Arriba, column_rename)
 
 
 def get_cicero_records(path: Path) -> list[Cicero] | None:
@@ -136,9 +124,7 @@ def get_cicero_records(path: Path) -> list[Cicero] | None:
     :param path: The path to the file of Cicero fusions
     :return A list of Cicero objects, or None if the specified file does not exist
     """
-    if not _check_if_file_exists(path):
-        return None
-    fusions_list: list[Cicero] = []
+    _does_file_exist(path)
     column_rename = {
         "geneA": "gene_5prime",
         "geneB": "gene_3prime",
@@ -152,12 +138,7 @@ def get_cicero_records(path: Path) -> list[Cicero] | None:
         "coverageA": "coverage_5prime",
         "coverageB": "coverage_3prime",
     }
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile, delimiter="\t")
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(Cicero(**row))
-    return fusions_list
+    return _process_fusion_caller_rows(path, Cicero, column_rename)
 
 
 def get_enfusion_records(path: Path) -> list[EnFusion] | None:
@@ -166,9 +147,7 @@ def get_enfusion_records(path: Path) -> list[EnFusion] | None:
     :param path: The path to the file of Enfusion fusions
     :return A list of Enfusion objects, or None if the specified file does not exist
     """
-    if not _check_if_file_exists(path):
-        return None
-    fusions_list: list[EnFusion] = []
+    _does_file_exist(path)
     column_rename = {
         "Gene1": "gene_5prime",
         "Gene2": "gene_3prime",
@@ -178,12 +157,7 @@ def get_enfusion_records(path: Path) -> list[EnFusion] | None:
         "Break2": "break_3prime",
         "FusionJunctionSequence": "fusion_junction_sequence",
     }
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile, delimiter="\t")
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(EnFusion(**row))
-    return fusions_list
+    return _process_fusion_caller_rows(path, EnFusion, column_rename)
 
 
 def get_genie_records(path: Path) -> list[Genie] | None:
@@ -192,9 +166,7 @@ def get_genie_records(path: Path) -> list[Genie] | None:
     :param path: The path to the file of Genie structural variants
     :return A list of Genie objects, or None if the specified file does not exist
     """
-    if not _check_if_file_exists(path):
-        return None
-    fusions_list: list[Genie] = []
+    _does_file_exist(path)
     column_rename = {
         "Site1_Hugo_Symbol": "site1_hugo",
         "Site2_Hugo_Symbol": "site2_hugo",
@@ -205,9 +177,4 @@ def get_genie_records(path: Path) -> list[Genie] | None:
         "Site2_Effect_On_Frame": "reading_frame",
         "Annotation": "annot",
     }
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile, delimiter="\t")
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(Genie(**row))
-    return fusions_list
+    return _process_fusion_caller_rows(path, Genie, column_rename)
diff --git a/tests/test_extractions.py b/tests/test_extractions.py
index 99c49c3..f9deb8b 100644
--- a/tests/test_extractions.py
+++ b/tests/test_extractions.py
@@ -2,6 +2,8 @@
 
 from pathlib import Path
 
+import pytest
+
 from fusor.extract import (
     get_arriba_records,
     get_cicero_records,
@@ -20,8 +22,8 @@ def test_get_jaffa_records(fixture_data_dir):
     assert len(fusions_list) == 491
 
     path = fixture_data_dir / "jaffa_resultss.csv"
-    fusions_list = get_jaffa_records(Path(path))
-    assert fusions_list is None
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert get_jaffa_records(path)
 
 
 def test_get_star_fusion_records(fixture_data_dir):
@@ -31,8 +33,8 @@ def test_get_star_fusion_records(fixture_data_dir):
     assert len(fusions_list) == 37
 
     path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs"
-    fusions_list = get_star_fusion_records(Path(path))
-    assert fusions_list is None
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert get_jaffa_records(path)
 
 
 def test_get_fusion_catcher_records(fixture_data_dir):
@@ -42,8 +44,8 @@ def test_get_fusion_catcher_records(fixture_data_dir):
     assert len(fusions_list) == 355
 
     path = fixture_data_dir / "final-list_candidate-fusion-genes.txts"
-    fusions_list = get_fusion_catcher_records(Path(path))
-    assert fusions_list is None
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert get_jaffa_records(path)
 
 
 def test_get_arriba_records(fixture_data_dir):
@@ -53,8 +55,8 @@ def test_get_arriba_records(fixture_data_dir):
     assert len(fusions_list) == 1
 
     path = fixture_data_dir / "fusionsd_arriba_test.tsv"
-    fusions_list = get_arriba_records(Path(path))
-    assert fusions_list is None
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert get_jaffa_records(path)
 
 
 def test_get_cicero_records(fixture_data_dir):
@@ -64,8 +66,8 @@ def test_get_cicero_records(fixture_data_dir):
     assert len(fusions_list) == 1
 
     path = fixture_data_dir / "annnotated.fusion.txt"
-    fusions_list = get_cicero_records(Path(path))
-    assert fusions_list is None
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert get_jaffa_records(path)
 
 
 def test_get_enfusion_records(fixture_data_dir):
@@ -75,8 +77,8 @@ def test_get_enfusion_records(fixture_data_dir):
     assert len(fusions_list) == 1
 
     path = fixture_data_dir / "enfusions_test.csv"
-    fusions_list = get_enfusion_records(Path(path))
-    assert fusions_list is None
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert get_jaffa_records(path)
 
 
 def test_get_genie_records(fixture_data_dir):
@@ -86,5 +88,5 @@ def test_get_genie_records(fixture_data_dir):
     assert len(fusions_list) == 1
 
     path = fixture_data_dir / "genie_tests.txt"
-    fusions_list = get_genie_records(Path(path))
-    assert fusions_list is None
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert get_jaffa_records(path)

From 7da8e76cffb4c8782464a5a7feb8f0497094cec4 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 23 Jan 2025 17:02:44 -0500
Subject: [PATCH 12/20] Store work

---
 src/fusor/extract.py              |  11 +-
 src/fusor/fusion_caller_models.py | 299 +++++++++++++++++++++++-------
 tests/test_extractions.py         |  20 +-
 3 files changed, 251 insertions(+), 79 deletions(-)

diff --git a/src/fusor/extract.py b/src/fusor/extract.py
index bb707b1..4418845 100644
--- a/src/fusor/extract.py
+++ b/src/fusor/extract.py
@@ -21,7 +21,8 @@ def _does_file_exist(path: Path) -> None:
     """Check if fusions file exists
 
     :param path: The path to the file
-    :return ``True`` if the file exists, ``False`` if not
+    :return None
+    :raise ValueError if the file does not exist at the specified path
     """
     if not path.exists():
         statement = f"{path!s} does not exist"
@@ -39,6 +40,7 @@ def _process_fusion_caller_rows(
     :param column_rename: A dictionary of column mappings
     :return: A list of fusions, represented as Pydantic objects
     """
+    _does_file_exist(path)
     fusions_list = []
     with path.open() as csvfile:
         reader = csv.DictReader(csvfile, delimiter="," if caller == JAFFA else "\t")
@@ -54,7 +56,6 @@ def get_jaffa_records(path: Path) -> list[JAFFA]:
     :param path: The path to the file of JAFFA fusions
     :return A list of JAFFA objects, or None if the specified file does not exist
     """
-    _does_file_exist(path)
     column_rename = {
         "fusion genes": "fusion_genes",
         "spanning reads": "spanning_reads",
@@ -69,7 +70,6 @@ def get_star_fusion_records(path: Path) -> list[STARFusion] | None:
     :param path: The path to the file of STAR-Fusion fusions
     :return A list of STAR-Fusion objects, or None if the specified file does not exist
     """
-    _does_file_exist(path)
     column_rename = {
         "LeftGene": "left_gene",
         "RightGene": "right_gene",
@@ -87,7 +87,6 @@ def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
     :param path: The path to the file of FusionCatcher fusions
     :return A list of FusionCatcher objects, or None if the specified file does not exist
     """
-    _does_file_exist(path)
     column_rename = {
         "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
         "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
@@ -107,7 +106,6 @@ def get_arriba_records(path: Path) -> list[Arriba] | None:
     :param path: The path to the file of Arriba fusions
     :return A list of Arriba objects, or None if the specified file does not exist
     """
-    _does_file_exist(path)
     column_rename = {
         "#gene1": "gene1",
         "strand1(gene/fusion)": "strand1",
@@ -124,7 +122,6 @@ def get_cicero_records(path: Path) -> list[Cicero] | None:
     :param path: The path to the file of Cicero fusions
     :return A list of Cicero objects, or None if the specified file does not exist
     """
-    _does_file_exist(path)
     column_rename = {
         "geneA": "gene_5prime",
         "geneB": "gene_3prime",
@@ -147,7 +144,6 @@ def get_enfusion_records(path: Path) -> list[EnFusion] | None:
     :param path: The path to the file of Enfusion fusions
     :return A list of Enfusion objects, or None if the specified file does not exist
     """
-    _does_file_exist(path)
     column_rename = {
         "Gene1": "gene_5prime",
         "Gene2": "gene_3prime",
@@ -166,7 +162,6 @@ def get_genie_records(path: Path) -> list[Genie] | None:
     :param path: The path to the file of Genie structural variants
     :return A list of Genie objects, or None if the specified file does not exist
     """
-    _does_file_exist(path)
     column_rename = {
         "Site1_Hugo_Symbol": "site1_hugo",
         "Site2_Hugo_Symbol": "site2_hugo",
diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 9c8d168..3afc903 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -1,7 +1,9 @@
 """Schemas for outputs provided by different fusion callers"""
 
-from abc import ABC
+import csv
+from abc import ABC, abstractmethod
 from enum import Enum
+from pathlib import Path
 from typing import Literal
 
 from pydantic import BaseModel, Field
@@ -26,45 +28,100 @@ class FusionCaller(ABC, BaseModel):
 
     type: Caller
 
+    class Config:
+        """Allow extra fields from fusion callers to be provided"""
+
+        extra = "allow"
+
+    def _does_file_exist(self, path: Path) -> None:
+        """Check if fusions file exists
+
+        :param path: The path to the file
+        :return None
+        :raise ValueError if the file does not exist at the specified path
+        """
+        if not path.exists():
+            statement = f"{path!s} does not exist"
+            raise ValueError(statement)
+        return
+
+    def _process_fusion_caller_rows(
+        self, path: Path, caller: Caller, column_rename: dict
+    ) -> list[Caller]:
+        """Convert rows of fusion caller output to Pydantic classes
+
+        :param path: The path to the fusions file
+        :param caller: The name of the fusion caller
+        :param column_rename: A dictionary of column mappings
+        :return: A list of fusions, represented as Pydantic objects
+        """
+        self._does_file_exist(path)
+        fusions_list = []
+        with path.open() as csvfile:
+            reader = csv.DictReader(
+                csvfile, delimiter="," if caller == Caller.JAFFA else "\t"
+            )
+            for row in reader:
+                row = {column_rename.get(key, key): value for key, value in row.items()}
+                fusions_list.append(caller(**row))
+        return fusions_list
+
+    @abstractmethod
+    def load_records(self, path: Path) -> list[Caller]:
+        """Abstract method to load records from a fusion caller file."""
+
 
 class JAFFA(FusionCaller):
     """Define parameters for JAFFA model"""
 
     type: Literal[Caller.JAFFA] = Caller.JAFFA
     fusion_genes: str = Field(
-        ..., description="A string containing the two fusion partners"
+        None, description="A string containing the two fusion partners"
     )
     chrom1: str = Field(
-        ..., description="The chromosome indicated in the chrom1 column"
+        None, description="The chromosome indicated in the chrom1 column"
     )
     base1: int = Field(
-        ..., description="The genomic position indicated in the base1 column"
+        None, description="The genomic position indicated in the base1 column"
     )
     chrom2: str = Field(
-        ..., description="The chromosome indicated in the chrom2 column"
+        None, description="The chromosome indicated in the chrom2 column"
     )
     base2: int = Field(
-        ..., description="The genomic position indicated in the base2 column"
+        None, description="The genomic position indicated in the base2 column"
     )
     rearrangement: bool = Field(
-        ..., description=" A boolean indicating if a rearrangement occurred"
+        None, description=" A boolean indicating if a rearrangement occurred"
     )
     classification: str = Field(
-        ..., description="The classification associated with the called fusion"
+        None, description="The classification associated with the called fusion"
     )
     inframe: bool | str = Field(
-        ...,
+        None,
         description="A boolean or string indicating if the fusion occurred in-frame",
     )
     spanning_reads: int = Field(
-        ...,
+        None,
         description="The number of detected reads that span the junction between the two transcript. Although described as spanning reads, this aligns with our definition of split reads i.e. reads that have sequence belonging to the two fusion partners",
     )
     spanning_pairs: int = Field(
-        ...,
+        None,
         description="The number of detected reads that align entirely on either side of the breakpoint",
     )
 
+    def load_records(self, path: Path) -> list["JAFFA"]:
+        """Load fusions from JAFFA csv file
+
+        :param path: The path to the file of JAFFA fusions
+        :return A list of JAFFA objects, or None if the specified file does not exist
+        """
+        column_rename = {
+            "fusion genes": "fusion_genes",
+            "spanning reads": "spanning_reads",
+            "spanning pairs": "spanning_pairs",
+        }
+        return self._process_fusion_caller_rows(path, JAFFA, column_rename)
+
 
 class STARFusion(BaseModel):
     """Define parameters for STAR-Fusion model"""
@@ -72,57 +129,91 @@ class STARFusion(BaseModel):
     type: Literal[Caller.STAR_FUSION] = Caller.STAR_FUSION
     left_gene: str = Field(..., description="The gene indicated in the LeftGene column")
     right_gene: str = Field(
-        ..., description="The gene indicated in the RightGene column"
+        None, description="The gene indicated in the RightGene column"
     )
     left_breakpoint: str = Field(
-        ..., description="The gene indicated in the LeftBreakpoint column"
+        None, description="The gene indicated in the LeftBreakpoint column"
     )
     right_breakpoint: str = Field(
-        ..., description="The gene indicated in the RightBreakpoint column"
+        None, description="The gene indicated in the RightBreakpoint column"
     )
-    annots: str = Field(..., description="The annotations associated with the fusion")
+    annots: str = Field(None, description="The annotations associated with the fusion")
     junction_read_count: int = Field(
-        ...,
+        None,
         description="The number of RNA-seq fragments that split the junction between the two transcript segments (from STAR-Fusion documentation)",
     )
     spanning_frag_count: int = Field(
-        ...,
+        None,
         description="The number of RNA-seq fragments that encompass the fusion junction such that one read of the pair aligns to a different gene than the other paired-end read of that fragment (from STAR-Fusion documentation)",
     )
 
+    def load_records(self, path: Path) -> list["STARFusion"]:
+        """Load fusions from STAR-Fusion tsv file
+
+        :param path: The path to the file of STAR-Fusion fusions
+        :return A list of STAR-Fusion objects, or None if the specified file does not exist
+        """
+        column_rename = {
+            "LeftGene": "left_gene",
+            "RightGene": "right_gene",
+            "LeftBreakpoint": "left_breakpoint",
+            "RightBreakpoint": "right_breakpoint",
+            "JunctionReadCount": "junction_read_count",
+            "SpanningFragCount": "spanning_frag_count",
+        }
+        return self._process_fusion_caller_rows(path, STARFusion, column_rename)
+
 
 class FusionCatcher(BaseModel):
     """Define parameters for FusionCatcher model"""
 
     type: Literal[Caller.FUSION_CATCHER] = Caller.FUSION_CATCHER
     five_prime_partner: str = Field(
-        ..., description="Gene symbol for the 5' fusion partner"
+        None, description="Gene symbol for the 5' fusion partner"
     )
     three_prime_partner: str = Field(
-        ..., description="Gene symbol for the 3' fusion partner"
+        None, description="Gene symbol for the 3' fusion partner"
     )
     five_prime_fusion_point: str = Field(
-        ...,
+        None,
         description="Chromosomal position for the 5' end of the fusion junction. This coordinate is 1-based",
     )
     three_prime_fusion_point: str = Field(
-        ...,
+        None,
         description="Chromosomal position for the 3' end of the fusion junction. This coordinate is 1-based",
     )
     predicted_effect: str = Field(
-        ...,
+        None,
         description="The predicted effect of the fusion event, created using annotation from the Ensembl database",
     )
     spanning_unique_reads: int = Field(
-        ..., description="The number of unique reads that map on the fusion junction"
+        None, description="The number of unique reads that map on the fusion junction"
     )
     spanning_reads: int = Field(
-        ..., description="The number of paired reads that support the fusion"
+        None, description="The number of paired reads that support the fusion"
     )
     fusion_sequence: str = Field(
-        ..., description="The inferred sequence around the fusion junction"
+        None, description="The inferred sequence around the fusion junction"
     )
 
+    def load_records(self, path: Path) -> list["FusionCatcher"]:
+        """Load fusions from FusionCatcher txt file
+
+        :param path: The path to the file of FusionCatcher fusions
+        :return A list of FusionCatcher objects, or None if the specified file does not exist
+        """
+        column_rename = {
+            "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
+            "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
+            "Fusion_point_for_gene_1(5end_fusion_partner)": "five_prime_fusion_point",
+            "Fusion_point_for_gene_2(3end_fusion_partner)": "three_prime_fusion_point",
+            "Predicted_effect": "predicted_effect",
+            "Spanning_unique_reads": "spanning_unique_reads",
+            "Spanning_pairs": "spanning_reads",
+            "Fusion_sequence": "fusion_sequence",
+        }
+        return self._process_fusion_caller_rows(path, FusionCatcher, column_rename)
+
 
 class Arriba(BaseModel):
     """Define parameters for Arriba model"""
@@ -131,117 +222,193 @@ class Arriba(BaseModel):
     gene1: str = Field(..., description="The 5' gene fusion partner")
     gene2: str = Field(..., description="The 3' gene fusion partner")
     strand1: str = Field(
-        ..., description="The strand information for the 5' gene fusion partner"
+        None, description="The strand information for the 5' gene fusion partner"
     )
     strand2: str = Field(
-        ..., description="The strand information for the 3' gene fusion partner"
+        None, description="The strand information for the 3' gene fusion partner"
+    )
+    breakpoint1: str = Field(
+        None, description="The chromosome and breakpoint for gene1"
+    )
+    breakpoint2: str = Field(
+        None, description="The chromosome and breakpoint for gene2"
     )
-    breakpoint1: str = Field(..., description="The chromosome and breakpoint for gene1")
-    breakpoint2: str = Field(..., description="The chromosome and breakpoint for gene2")
     event_type: str = Field(
-        ..., description=" An inference about the type of fusion event"
+        None, description=" An inference about the type of fusion event"
     )
     confidence: str = Field(
-        ..., description="A metric describing the confidence of the fusion prediction"
+        None, description="A metric describing the confidence of the fusion prediction"
     )
     direction1: str = Field(
-        ...,
+        None,
         description="A description that indicates if the transcript segment starts or ends at breakpoint1",
     )
     direction2: str = Field(
-        ...,
+        None,
         description="A description that indicates if the transcript segment starts or ends at breakpoint2",
     )
     rf: str = Field(
-        ...,
+        None,
         description="A description if the reading frame is preserved for the fusion",
     )
     split_reads1: int = Field(
-        ..., description="Number of supporting split fragments with anchor in gene1"
+        None, description="Number of supporting split fragments with anchor in gene1"
     )
     split_reads2: int = Field(
-        ..., description="Number of supporting split fragments with anchor in gene2"
+        None, description="Number of supporting split fragments with anchor in gene2"
     )
     discordant_mates: int = Field(
-        ..., description="Number of discordant mates supporting the fusion"
+        None, description="Number of discordant mates supporting the fusion"
     )
     coverage1: int = Field(
-        ..., description="Number of fragments retained near breakpoint1"
+        None, description="Number of fragments retained near breakpoint1"
     )
     coverage2: int = Field(
-        ..., description="Number of fragments retained near breakpoint2"
+        None, description="Number of fragments retained near breakpoint2"
     )
-    fusion_transcript: str = Field(..., description="The assembled fusion transcript")
+    fusion_transcript: str = Field(None, description="The assembled fusion transcript")
+
+    def load_records(self, path: Path) -> list["Arriba"]:
+        """Load fusions from Arriba tsv file
+
+        :param path: The path to the file of Arriba fusions
+        :return A list of Arriba objects, or None if the specified file does not exist
+        """
+        column_rename = {
+            "#gene1": "gene1",
+            "strand1(gene/fusion)": "strand1",
+            "strand2(gene/fusion)": "strand2",
+            "type": "event_type",
+            "reading_frame": "rf",
+        }
+        return self._process_fusion_caller_rows(path, Arriba, column_rename)
 
 
 class Cicero(BaseModel):
     """Define parameters for CICERO model"""
 
     type: Literal[Caller.CICERO] = Caller.CICERO
-    gene_5prime: str = Field(..., description="The gene symbol for the 5' partner")
-    gene_3prime: str = Field(..., description="The gene symbol for the 3' partner")
-    chr_5prime: str = Field(..., description="The chromosome for the 5' partner")
-    chr_3prime: str = Field(..., description="The chromosome for the 3' partner")
+    gene_5prime: str = Field(None, description="The gene symbol for the 5' partner")
+    gene_3prime: str = Field(None, description="The gene symbol for the 3' partner")
+    chr_5prime: str = Field(None, description="The chromosome for the 5' partner")
+    chr_3prime: str = Field(None, description="The chromosome for the 3' partner")
     pos_5prime: int = Field(
-        ..., description="The genomic breakpoint for the 5' partner"
+        None, description="The genomic breakpoint for the 5' partner"
     )
     pos_3prime: int = Field(
-        ..., description="The genomic breakpoint for the 3' partner"
+        None, description="The genomic breakpoint for the 3' partner"
     )
     sv_ort: str = Field(
-        ...,
+        None,
         description="Whether the mapping orientation of assembled contig (driven by structural variation) has confident biological meaning",
     )
     event_type: str = Field(
-        ..., description="The structural variation event that created the called fusion"
+        None,
+        description="The structural variation event that created the called fusion",
     )
     reads_5prime: int = Field(
-        ...,
+        None,
         description="The number of reads that support the breakpoint for the 5' partner",
     )
     reads_3prime: int = Field(
-        ...,
+        None,
         description="The number of reads that support the breakpoint for the 3' partner",
     )
     coverage_5prime: int = Field(
-        ..., description="The fragment coverage at the 5' breakpoint"
+        None, description="The fragment coverage at the 5' breakpoint"
     )
     coverage_3prime: int = Field(
-        ..., description="The fragment coverage at the 3' breakpoint"
+        None, description="The fragment coverage at the 3' breakpoint"
     )
     contig: str = Field(..., description="The assembled contig sequence for the fusion")
 
+    def load_records(self, path: Path) -> list["Cicero"]:
+        """Load fusions from Cicero txt file
+
+        :param path: The path to the file of Cicero fusions
+        :return A list of Cicero objects, or None if the specified file does not exist
+        """
+        column_rename = {
+            "geneA": "gene_5prime",
+            "geneB": "gene_3prime",
+            "chrA": "chr_5prime",
+            "chrB": "chr_3prime",
+            "posA": "pos_5prime",
+            "posB": "pos_3prime",
+            "type": "event_type",
+            "readsA": "reads_5prime",
+            "readsB": "reads_3prime",
+            "coverageA": "coverage_5prime",
+            "coverageB": "coverage_3prime",
+        }
+        return self._process_fusion_caller_rows(path, Cicero, column_rename)
+
 
 class EnFusion(BaseModel):
     """Define parameters for EnFusion model"""
 
     type: Literal[Caller.ENFUSION] = Caller.ENFUSION
-    gene_5prime: str = Field(..., description="The 5' gene fusion partner")
-    gene_3prime: str = Field(..., description="The 3' gene fusion partner")
-    chr_5prime: int = Field(..., description="The 5' gene fusion partner chromosome")
-    chr_3prime: int = Field(..., description="The 3' gene fusion partner chromosome")
+    gene_5prime: str = Field(None, description="The 5' gene fusion partner")
+    gene_3prime: str = Field(None, description="The 3' gene fusion partner")
+    chr_5prime: int = Field(None, description="The 5' gene fusion partner chromosome")
+    chr_3prime: int = Field(None, description="The 3' gene fusion partner chromosome")
     break_5prime: int = Field(
-        ..., description="The 5' gene fusion partner genomic breakpoint"
+        None, description="The 5' gene fusion partner genomic breakpoint"
     )
     break_3prime: int = Field(
-        ..., description="The 3' gene fusion partner genomic breakpoint"
+        None, description="The 3' gene fusion partner genomic breakpoint"
     )
     fusion_junction_sequence: str | None = Field(
         None, description="The sequence near the fusion junction"
     )
 
+    def load_records(self, path: Path) -> list["EnFusion"]:
+        """Load fusions from EnFusion tsv file
+
+        :param path: The path to the file of Enfusion fusions
+        :return A list of Enfusion objects, or None if the specified file does not exist
+        """
+        column_rename = {
+            "Gene1": "gene_5prime",
+            "Gene2": "gene_3prime",
+            "Chr1": "chr_5prime",
+            "Chr2": "chr_3prime",
+            "Break1": "break_5prime",
+            "Break2": "break_3prime",
+            "FusionJunctionSequence": "fusion_junction_sequence",
+        }
+        return self._process_fusion_caller_rows(path, EnFusion, column_rename)
+
 
 class Genie(BaseModel):
     """Define parameters for Genie model"""
 
     type: Literal[Caller.GENIE] = Caller.GENIE
-    site1_hugo: str = Field(..., description="The HUGO symbol reported at site 1")
-    site2_hugo: str = Field(..., description="The HUGO symbol reported at site 2")
-    site1_chrom: int = Field(..., description="The chromosome reported at site 1")
-    site2_chrom: int = Field(..., description="The chromosome reported at site 2")
-    site1_pos: int = Field(..., description="The breakpoint reported at site 1")
-    site2_pos: int = Field(..., description="The breakpoint reported at site 2")
-    annot: str = Field(..., description="The annotation for the fusion event")
+    site1_hugo: str = Field(None, description="The HUGO symbol reported at site 1")
+    site2_hugo: str = Field(None, description="The HUGO symbol reported at site 2")
+    site1_chrom: int = Field(None, description="The chromosome reported at site 1")
+    site2_chrom: int = Field(None, description="The chromosome reported at site 2")
+    site1_pos: int = Field(None, description="The breakpoint reported at site 1")
+    site2_pos: int = Field(None, description="The breakpoint reported at site 2")
+    annot: str = Field(None, description="The annotation for the fusion event")
     reading_frame: str = Field(
-        ..., description="The reading frame status of the fusion"
+        None, description="The reading frame status of the fusion"
     )
+
+    def load_records(self, path: Path) -> list["Genie"]:
+        """Load fusions from Genie txt file
+
+        :param path: The path to the file of Genie structural variants
+        :return A list of Genie objects, or None if the specified file does not exist
+        """
+        column_rename = {
+            "Site1_Hugo_Symbol": "site1_hugo",
+            "Site2_Hugo_Symbol": "site2_hugo",
+            "Site1_Chromosome": "site1_chrom",
+            "Site2_Chromosome": "site2_chrom",
+            "Site1_Position": "site1_pos",
+            "Site2_Position": "site2_pos",
+            "Site2_Effect_On_Frame": "reading_frame",
+            "Annotation": "annot",
+        }
+        return self._process_fusion_caller_rows(path, Genie, column_rename)
diff --git a/tests/test_extractions.py b/tests/test_extractions.py
index f9deb8b..7742e7f 100644
--- a/tests/test_extractions.py
+++ b/tests/test_extractions.py
@@ -13,17 +13,27 @@
     get_jaffa_records,
     get_star_fusion_records,
 )
+from fusor.fusion_caller_models import JAFFA
 
 
 def test_get_jaffa_records(fixture_data_dir):
     """Test that get_jaffa_records works correctly"""
-    path = fixture_data_dir / "jaffa_results.csv"
-    fusions_list = get_jaffa_records(Path(path))
-    assert len(fusions_list) == 491
+    path = Path(fixture_data_dir / "jaffa_results.csv")
+    jaffa_instance = JAFFA()
+    records = jaffa_instance.load_records(path)
+    assert len(records) == 491
 
-    path = fixture_data_dir / "jaffa_resultss.csv"
+    path = Path(fixture_data_dir / "jaffa_resultss.csv")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert get_jaffa_records(path)
+        assert jaffa_instance.load_records(path)
+
+    # path = fixture_data_dir / "jaffa_results.csv"
+    # fusions_list = get_jaffa_records(Path(path))
+    # assert len(fusions_list) == 491
+
+    # path = fixture_data_dir / "jaffa_resultss.csv"
+    # with pytest.raises(ValueError, match=f"{path} does not exist"):
+    #   assert get_jaffa_records(path)
 
 
 def test_get_star_fusion_records(fixture_data_dir):

From 3b2c5e7f27c4742af464939a5f083958c4ec9c78 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 23 Jan 2025 17:12:26 -0500
Subject: [PATCH 13/20] Add fusion caller models tests

---
 tests/test_fusion_caller_models.py | 107 +++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 tests/test_fusion_caller_models.py

diff --git a/tests/test_fusion_caller_models.py b/tests/test_fusion_caller_models.py
new file mode 100644
index 0000000..c2177cf
--- /dev/null
+++ b/tests/test_fusion_caller_models.py
@@ -0,0 +1,107 @@
+"""Module for testing extraction methods"""
+
+from pathlib import Path
+
+import pytest
+
+from fusor.fusion_caller_models import (
+    JAFFA,
+    Arriba,
+    Cicero,
+    EnFusion,
+    FusionCatcher,
+    Genie,
+    STARFusion,
+)
+
+
+def test_get_jaffa_records(fixture_data_dir):
+    """Test that get_jaffa_records works correctly"""
+    path = Path(fixture_data_dir / "jaffa_results.csv")
+    jaffa_instance = JAFFA()
+    records = jaffa_instance.load_records(path)
+    assert len(records) == 491
+
+    path = Path(fixture_data_dir / "jaffa_resultss.csv")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert jaffa_instance.load_records(path)
+
+
+def test_get_star_fusion_records(fixture_data_dir):
+    """Test that get_star_fusion_records works correctly"""
+    path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv")
+    sf_instance = STARFusion()
+    records = sf_instance.load_records(path)
+    assert len(records) == 37
+
+    path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert sf_instance.load_records(path)
+
+    # path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv"
+    # fusions_list = get_star_fusion_records(Path(path))
+    # assert len(fusions_list) == 37
+
+    # path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs"
+    # with pytest.raises(ValueError, match=f"{path} does not exist"):
+    #   assert get_jaffa_records(path)
+
+
+def test_get_fusion_catcher_records(fixture_data_dir):
+    """Test that get_fusion_catcher_records works correctly"""
+    path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txt")
+    fc_instance = FusionCatcher()
+    fusions_list = fc_instance.load_records(path)
+    assert len(fusions_list) == 355
+
+    path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txts")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert fc_instance.load_records(path)
+
+
+def test_get_arriba_records(fixture_data_dir):
+    """Test that get_arriba_records works correctly"""
+    path = Path(fixture_data_dir / "fusions_arriba_test.tsv")
+    arriba = Arriba()
+    fusions_list = arriba.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "fusionsd_arriba_test.tsv")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert arriba.load_records(path)
+
+
+def test_get_cicero_records(fixture_data_dir):
+    """Test that get_cicero_records works correctly"""
+    path = Path(fixture_data_dir / "annotated.fusion.txt")
+    cicero = Cicero()
+    fusions_list = cicero.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "annnotated.fusion.txt")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert cicero.load_records(path)
+
+
+def test_get_enfusion_records(fixture_data_dir):
+    """Test that get_enfusion_records works correctly"""
+    path = Path(fixture_data_dir / "enfusion_test.csv")
+    enfusion = EnFusion()
+    fusions_list = enfusion.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "enfusions_test.csv")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert enfusion.load_records(path)
+
+
+def test_get_genie_records(fixture_data_dir):
+    """Test that get_genie_records works correctly"""
+    path = Path(fixture_data_dir / "genie_test.txt")
+    genie = Genie()
+    fusions_list = genie.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "genie_tests.txt")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert genie.load_records(path)

From 8234e715c25737ede9b7ba4b12cb925687428861 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 23 Jan 2025 17:21:50 -0500
Subject: [PATCH 14/20] Move extraction methods into fusion caller classes

---
 src/fusor/extract.py              | 175 ------------------------------
 src/fusor/fusion_caller_models.py |  24 ++--
 tests/test_extractions.py         | 102 -----------------
 3 files changed, 14 insertions(+), 287 deletions(-)
 delete mode 100644 src/fusor/extract.py
 delete mode 100644 tests/test_extractions.py

diff --git a/src/fusor/extract.py b/src/fusor/extract.py
deleted file mode 100644
index 4418845..0000000
--- a/src/fusor/extract.py
+++ /dev/null
@@ -1,175 +0,0 @@
-"""Module for extracting data from fusion caller output and coverting to pydantic
-objects
-"""
-
-import csv
-from pathlib import Path
-
-from fusor.fusion_caller_models import (
-    JAFFA,
-    Arriba,
-    Cicero,
-    EnFusion,
-    FusionCaller,
-    FusionCatcher,
-    Genie,
-    STARFusion,
-)
-
-
-def _does_file_exist(path: Path) -> None:
-    """Check if fusions file exists
-
-    :param path: The path to the file
-    :return None
-    :raise ValueError if the file does not exist at the specified path
-    """
-    if not path.exists():
-        statement = f"{path!s} does not exist"
-        raise ValueError(statement)
-    return
-
-
-def _process_fusion_caller_rows(
-    path: Path, caller: FusionCaller, column_rename: dict
-) -> list[FusionCaller]:
-    """Convert rows of fusion caller output to Pydantic classes
-
-    :param path: The path to the fusions file
-    :param caller: The name of the fusion caller
-    :param column_rename: A dictionary of column mappings
-    :return: A list of fusions, represented as Pydantic objects
-    """
-    _does_file_exist(path)
-    fusions_list = []
-    with path.open() as csvfile:
-        reader = csv.DictReader(csvfile, delimiter="," if caller == JAFFA else "\t")
-        for row in reader:
-            row = {column_rename.get(key, key): value for key, value in row.items()}
-            fusions_list.append(caller(**row))
-    return fusions_list
-
-
-def get_jaffa_records(path: Path) -> list[JAFFA]:
-    """Load fusions from JAFFA csv file
-
-    :param path: The path to the file of JAFFA fusions
-    :return A list of JAFFA objects, or None if the specified file does not exist
-    """
-    column_rename = {
-        "fusion genes": "fusion_genes",
-        "spanning reads": "spanning_reads",
-        "spanning pairs": "spanning_pairs",
-    }
-    return _process_fusion_caller_rows(path, JAFFA, column_rename)
-
-
-def get_star_fusion_records(path: Path) -> list[STARFusion] | None:
-    """Load fusions from STAR-Fusion tsv file
-
-    :param path: The path to the file of STAR-Fusion fusions
-    :return A list of STAR-Fusion objects, or None if the specified file does not exist
-    """
-    column_rename = {
-        "LeftGene": "left_gene",
-        "RightGene": "right_gene",
-        "LeftBreakpoint": "left_breakpoint",
-        "RightBreakpoint": "right_breakpoint",
-        "JunctionReadCount": "junction_read_count",
-        "SpanningFragCount": "spanning_frag_count",
-    }
-    return _process_fusion_caller_rows(path, STARFusion, column_rename)
-
-
-def get_fusion_catcher_records(path: Path) -> list[FusionCatcher] | None:
-    """Load fusions from FusionCatcher txt file
-
-    :param path: The path to the file of FusionCatcher fusions
-    :return A list of FusionCatcher objects, or None if the specified file does not exist
-    """
-    column_rename = {
-        "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
-        "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
-        "Fusion_point_for_gene_1(5end_fusion_partner)": "five_prime_fusion_point",
-        "Fusion_point_for_gene_2(3end_fusion_partner)": "three_prime_fusion_point",
-        "Predicted_effect": "predicted_effect",
-        "Spanning_unique_reads": "spanning_unique_reads",
-        "Spanning_pairs": "spanning_reads",
-        "Fusion_sequence": "fusion_sequence",
-    }
-    return _process_fusion_caller_rows(path, FusionCatcher, column_rename)
-
-
-def get_arriba_records(path: Path) -> list[Arriba] | None:
-    """Load fusions from Arriba tsv file
-
-    :param path: The path to the file of Arriba fusions
-    :return A list of Arriba objects, or None if the specified file does not exist
-    """
-    column_rename = {
-        "#gene1": "gene1",
-        "strand1(gene/fusion)": "strand1",
-        "strand2(gene/fusion)": "strand2",
-        "type": "event_type",
-        "reading_frame": "rf",
-    }
-    return _process_fusion_caller_rows(path, Arriba, column_rename)
-
-
-def get_cicero_records(path: Path) -> list[Cicero] | None:
-    """Load fusions from Cicero txt file
-
-    :param path: The path to the file of Cicero fusions
-    :return A list of Cicero objects, or None if the specified file does not exist
-    """
-    column_rename = {
-        "geneA": "gene_5prime",
-        "geneB": "gene_3prime",
-        "chrA": "chr_5prime",
-        "chrB": "chr_3prime",
-        "posA": "pos_5prime",
-        "posB": "pos_3prime",
-        "type": "event_type",
-        "readsA": "reads_5prime",
-        "readsB": "reads_3prime",
-        "coverageA": "coverage_5prime",
-        "coverageB": "coverage_3prime",
-    }
-    return _process_fusion_caller_rows(path, Cicero, column_rename)
-
-
-def get_enfusion_records(path: Path) -> list[EnFusion] | None:
-    """Load fusions from EnFusion tsv file
-
-    :param path: The path to the file of Enfusion fusions
-    :return A list of Enfusion objects, or None if the specified file does not exist
-    """
-    column_rename = {
-        "Gene1": "gene_5prime",
-        "Gene2": "gene_3prime",
-        "Chr1": "chr_5prime",
-        "Chr2": "chr_3prime",
-        "Break1": "break_5prime",
-        "Break2": "break_3prime",
-        "FusionJunctionSequence": "fusion_junction_sequence",
-    }
-    return _process_fusion_caller_rows(path, EnFusion, column_rename)
-
-
-def get_genie_records(path: Path) -> list[Genie] | None:
-    """Load fusions from Genie txt file
-
-    :param path: The path to the file of Genie structural variants
-    :return A list of Genie objects, or None if the specified file does not exist
-    """
-    column_rename = {
-        "Site1_Hugo_Symbol": "site1_hugo",
-        "Site2_Hugo_Symbol": "site2_hugo",
-        "Site1_Chromosome": "site1_chrom",
-        "Site2_Chromosome": "site2_chrom",
-        "Site1_Position": "site1_pos",
-        "Site2_Position": "site2_pos",
-        "Site2_Effect_On_Frame": "reading_frame",
-        "Annotation": "annot",
-    }
-    return _process_fusion_caller_rows(path, Genie, column_rename)
diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 3afc903..13047dd 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -123,11 +123,13 @@ def load_records(self, path: Path) -> list["JAFFA"]:
         return self._process_fusion_caller_rows(path, JAFFA, column_rename)
 
 
-class STARFusion(BaseModel):
+class STARFusion(FusionCaller):
     """Define parameters for STAR-Fusion model"""
 
     type: Literal[Caller.STAR_FUSION] = Caller.STAR_FUSION
-    left_gene: str = Field(..., description="The gene indicated in the LeftGene column")
+    left_gene: str = Field(
+        None, description="The gene indicated in the LeftGene column"
+    )
     right_gene: str = Field(
         None, description="The gene indicated in the RightGene column"
     )
@@ -164,7 +166,7 @@ def load_records(self, path: Path) -> list["STARFusion"]:
         return self._process_fusion_caller_rows(path, STARFusion, column_rename)
 
 
-class FusionCatcher(BaseModel):
+class FusionCatcher(FusionCaller):
     """Define parameters for FusionCatcher model"""
 
     type: Literal[Caller.FUSION_CATCHER] = Caller.FUSION_CATCHER
@@ -215,12 +217,12 @@ def load_records(self, path: Path) -> list["FusionCatcher"]:
         return self._process_fusion_caller_rows(path, FusionCatcher, column_rename)
 
 
-class Arriba(BaseModel):
+class Arriba(FusionCaller):
     """Define parameters for Arriba model"""
 
     type: Literal[Caller.ARRIBA] = Caller.ARRIBA
-    gene1: str = Field(..., description="The 5' gene fusion partner")
-    gene2: str = Field(..., description="The 3' gene fusion partner")
+    gene1: str = Field(None, description="The 5' gene fusion partner")
+    gene2: str = Field(None, description="The 3' gene fusion partner")
     strand1: str = Field(
         None, description="The strand information for the 5' gene fusion partner"
     )
@@ -284,7 +286,7 @@ def load_records(self, path: Path) -> list["Arriba"]:
         return self._process_fusion_caller_rows(path, Arriba, column_rename)
 
 
-class Cicero(BaseModel):
+class Cicero(FusionCaller):
     """Define parameters for CICERO model"""
 
     type: Literal[Caller.CICERO] = Caller.CICERO
@@ -320,7 +322,9 @@ class Cicero(BaseModel):
     coverage_3prime: int = Field(
         None, description="The fragment coverage at the 3' breakpoint"
     )
-    contig: str = Field(..., description="The assembled contig sequence for the fusion")
+    contig: str = Field(
+        None, description="The assembled contig sequence for the fusion"
+    )
 
     def load_records(self, path: Path) -> list["Cicero"]:
         """Load fusions from Cicero txt file
@@ -344,7 +348,7 @@ def load_records(self, path: Path) -> list["Cicero"]:
         return self._process_fusion_caller_rows(path, Cicero, column_rename)
 
 
-class EnFusion(BaseModel):
+class EnFusion(FusionCaller):
     """Define parameters for EnFusion model"""
 
     type: Literal[Caller.ENFUSION] = Caller.ENFUSION
@@ -380,7 +384,7 @@ def load_records(self, path: Path) -> list["EnFusion"]:
         return self._process_fusion_caller_rows(path, EnFusion, column_rename)
 
 
-class Genie(BaseModel):
+class Genie(FusionCaller):
     """Define parameters for Genie model"""
 
     type: Literal[Caller.GENIE] = Caller.GENIE
diff --git a/tests/test_extractions.py b/tests/test_extractions.py
deleted file mode 100644
index 7742e7f..0000000
--- a/tests/test_extractions.py
+++ /dev/null
@@ -1,102 +0,0 @@
-"""Module for testing extraction methods"""
-
-from pathlib import Path
-
-import pytest
-
-from fusor.extract import (
-    get_arriba_records,
-    get_cicero_records,
-    get_enfusion_records,
-    get_fusion_catcher_records,
-    get_genie_records,
-    get_jaffa_records,
-    get_star_fusion_records,
-)
-from fusor.fusion_caller_models import JAFFA
-
-
-def test_get_jaffa_records(fixture_data_dir):
-    """Test that get_jaffa_records works correctly"""
-    path = Path(fixture_data_dir / "jaffa_results.csv")
-    jaffa_instance = JAFFA()
-    records = jaffa_instance.load_records(path)
-    assert len(records) == 491
-
-    path = Path(fixture_data_dir / "jaffa_resultss.csv")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert jaffa_instance.load_records(path)
-
-    # path = fixture_data_dir / "jaffa_results.csv"
-    # fusions_list = get_jaffa_records(Path(path))
-    # assert len(fusions_list) == 491
-
-    # path = fixture_data_dir / "jaffa_resultss.csv"
-    # with pytest.raises(ValueError, match=f"{path} does not exist"):
-    #   assert get_jaffa_records(path)
-
-
-def test_get_star_fusion_records(fixture_data_dir):
-    """Test that get_star_fusion_records works correctly"""
-    path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv"
-    fusions_list = get_star_fusion_records(Path(path))
-    assert len(fusions_list) == 37
-
-    path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs"
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert get_jaffa_records(path)
-
-
-def test_get_fusion_catcher_records(fixture_data_dir):
-    """Test that get_fusion_catcher_records works correctly"""
-    path = fixture_data_dir / "final-list_candidate-fusion-genes.txt"
-    fusions_list = get_fusion_catcher_records(Path(path))
-    assert len(fusions_list) == 355
-
-    path = fixture_data_dir / "final-list_candidate-fusion-genes.txts"
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert get_jaffa_records(path)
-
-
-def test_get_arriba_records(fixture_data_dir):
-    """Test that get_arriba_records works correctly"""
-    path = fixture_data_dir / "fusions_arriba_test.tsv"
-    fusions_list = get_arriba_records(Path(path))
-    assert len(fusions_list) == 1
-
-    path = fixture_data_dir / "fusionsd_arriba_test.tsv"
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert get_jaffa_records(path)
-
-
-def test_get_cicero_records(fixture_data_dir):
-    """Test that get_cicero_records works correctly"""
-    path = fixture_data_dir / "annotated.fusion.txt"
-    fusions_list = get_cicero_records(Path(path))
-    assert len(fusions_list) == 1
-
-    path = fixture_data_dir / "annnotated.fusion.txt"
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert get_jaffa_records(path)
-
-
-def test_get_enfusion_records(fixture_data_dir):
-    """Test that get_enfusion_records works correctly"""
-    path = fixture_data_dir / "enfusion_test.csv"
-    fusions_list = get_enfusion_records(Path(path))
-    assert len(fusions_list) == 1
-
-    path = fixture_data_dir / "enfusions_test.csv"
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert get_jaffa_records(path)
-
-
-def test_get_genie_records(fixture_data_dir):
-    """Test that get_genie_records works correctly"""
-    path = fixture_data_dir / "genie_test.txt"
-    fusions_list = get_genie_records(Path(path))
-    assert len(fusions_list) == 1
-
-    path = fixture_data_dir / "genie_tests.txt"
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert get_jaffa_records(path)

From 91bd5bc8e9db1f7ed0e507cf1fc99002d3e707e5 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Thu, 23 Jan 2025 17:24:52 -0500
Subject: [PATCH 15/20] Rename file and remove commented out code

---
 tests/test_fusion_caller_models.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/tests/test_fusion_caller_models.py b/tests/test_fusion_caller_models.py
index c2177cf..a3191a9 100644
--- a/tests/test_fusion_caller_models.py
+++ b/tests/test_fusion_caller_models.py
@@ -1,4 +1,4 @@
-"""Module for testing extraction methods"""
+"""Module for testing fusion caller classes"""
 
 from pathlib import Path
 
@@ -38,14 +38,6 @@ def test_get_star_fusion_records(fixture_data_dir):
     with pytest.raises(ValueError, match=f"{path} does not exist"):
         assert sf_instance.load_records(path)
 
-    # path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv"
-    # fusions_list = get_star_fusion_records(Path(path))
-    # assert len(fusions_list) == 37
-
-    # path = fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs"
-    # with pytest.raises(ValueError, match=f"{path} does not exist"):
-    #   assert get_jaffa_records(path)
-
 
 def test_get_fusion_catcher_records(fixture_data_dir):
     """Test that get_fusion_catcher_records works correctly"""

From 3b7a50a7a0926b11b0fed74d81cb3c563c27f219 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 24 Jan 2025 11:30:49 -0500
Subject: [PATCH 16/20] Add static and class methods

---
 src/fusor/fusion_caller_models.py  | 210 ++++++++++++++---------------
 tests/test_fusion_caller_models.py |  35 ++---
 2 files changed, 118 insertions(+), 127 deletions(-)

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 13047dd..4b448bb 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 from typing import Literal
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class Caller(str, Enum):
@@ -27,13 +27,10 @@ class FusionCaller(ABC, BaseModel):
     """ABC for fusion callers"""
 
     type: Caller
+    model_config = ConfigDict(extra="allow")
 
-    class Config:
-        """Allow extra fields from fusion callers to be provided"""
-
-        extra = "allow"
-
-    def _does_file_exist(self, path: Path) -> None:
+    @staticmethod
+    def _does_file_exist(path: Path) -> None:
         """Check if fusions file exists
 
         :param path: The path to the file
@@ -45,25 +42,27 @@ def _does_file_exist(self, path: Path) -> None:
             raise ValueError(statement)
         return
 
+    @classmethod
     def _process_fusion_caller_rows(
-        self, path: Path, caller: Caller, column_rename: dict
-    ) -> list[Caller]:
+        cls,
+        path: Path,
+        column_rename: dict,
+        delimeter: str,
+    ) -> list["FusionCaller"]:
         """Convert rows of fusion caller output to Pydantic classes
 
         :param path: The path to the fusions file
-        :param caller: The name of the fusion caller
         :param column_rename: A dictionary of column mappings
+        :param delimeter: The delimeter for the fusions file
         :return: A list of fusions, represented as Pydantic objects
         """
-        self._does_file_exist(path)
+        cls._does_file_exist(path)
         fusions_list = []
         with path.open() as csvfile:
-            reader = csv.DictReader(
-                csvfile, delimiter="," if caller == Caller.JAFFA else "\t"
-            )
+            reader = csv.DictReader(csvfile, delimiter=delimeter)
             for row in reader:
                 row = {column_rename.get(key, key): value for key, value in row.items()}
-                fusions_list.append(caller(**row))
+                fusions_list.append(cls(**row))
         return fusions_list
 
     @abstractmethod
@@ -76,40 +75,41 @@ class JAFFA(FusionCaller):
 
     type: Literal[Caller.JAFFA] = Caller.JAFFA
     fusion_genes: str = Field(
-        None, description="A string containing the two fusion partners"
+        ..., description="A string containing the two fusion partners"
     )
     chrom1: str = Field(
-        None, description="The chromosome indicated in the chrom1 column"
+        ..., description="The chromosome indicated in the chrom1 column"
     )
     base1: int = Field(
-        None, description="The genomic position indicated in the base1 column"
+        ..., description="The genomic position indicated in the base1 column"
     )
     chrom2: str = Field(
-        None, description="The chromosome indicated in the chrom2 column"
+        ..., description="The chromosome indicated in the chrom2 column"
     )
     base2: int = Field(
-        None, description="The genomic position indicated in the base2 column"
+        ..., description="The genomic position indicated in the base2 column"
     )
     rearrangement: bool = Field(
-        None, description=" A boolean indicating if a rearrangement occurred"
+        ..., description=" A boolean indicating if a rearrangement occurred"
     )
     classification: str = Field(
-        None, description="The classification associated with the called fusion"
+        ..., description="The classification associated with the called fusion"
     )
     inframe: bool | str = Field(
-        None,
+        ...,
         description="A boolean or string indicating if the fusion occurred in-frame",
     )
     spanning_reads: int = Field(
-        None,
+        ...,
         description="The number of detected reads that span the junction between the two transcript. Although described as spanning reads, this aligns with our definition of split reads i.e. reads that have sequence belonging to the two fusion partners",
     )
     spanning_pairs: int = Field(
-        None,
+        ...,
         description="The number of detected reads that align entirely on either side of the breakpoint",
     )
 
-    def load_records(self, path: Path) -> list["JAFFA"]:
+    @classmethod
+    def load_records(cls, path: Path) -> list["JAFFA"]:
         """Load fusions from JAFFA csv file
 
         :param path: The path to the file of JAFFA fusions
@@ -120,36 +120,35 @@ def load_records(self, path: Path) -> list["JAFFA"]:
             "spanning reads": "spanning_reads",
             "spanning pairs": "spanning_pairs",
         }
-        return self._process_fusion_caller_rows(path, JAFFA, column_rename)
+        return cls._process_fusion_caller_rows(path, column_rename, ",")
 
 
 class STARFusion(FusionCaller):
     """Define parameters for STAR-Fusion model"""
 
     type: Literal[Caller.STAR_FUSION] = Caller.STAR_FUSION
-    left_gene: str = Field(
-        None, description="The gene indicated in the LeftGene column"
-    )
+    left_gene: str = Field(..., description="The gene indicated in the LeftGene column")
     right_gene: str = Field(
-        None, description="The gene indicated in the RightGene column"
+        ..., description="The gene indicated in the RightGene column"
     )
     left_breakpoint: str = Field(
-        None, description="The gene indicated in the LeftBreakpoint column"
+        ..., description="The gene indicated in the LeftBreakpoint column"
     )
     right_breakpoint: str = Field(
-        None, description="The gene indicated in the RightBreakpoint column"
+        ..., description="The gene indicated in the RightBreakpoint column"
     )
-    annots: str = Field(None, description="The annotations associated with the fusion")
+    annots: str = Field(..., description="The annotations associated with the fusion")
     junction_read_count: int = Field(
-        None,
+        ...,
         description="The number of RNA-seq fragments that split the junction between the two transcript segments (from STAR-Fusion documentation)",
     )
     spanning_frag_count: int = Field(
-        None,
+        ...,
         description="The number of RNA-seq fragments that encompass the fusion junction such that one read of the pair aligns to a different gene than the other paired-end read of that fragment (from STAR-Fusion documentation)",
     )
 
-    def load_records(self, path: Path) -> list["STARFusion"]:
+    @classmethod
+    def load_records(cls, path: Path) -> list["STARFusion"]:
         """Load fusions from STAR-Fusion tsv file
 
         :param path: The path to the file of STAR-Fusion fusions
@@ -163,7 +162,7 @@ def load_records(self, path: Path) -> list["STARFusion"]:
             "JunctionReadCount": "junction_read_count",
             "SpanningFragCount": "spanning_frag_count",
         }
-        return self._process_fusion_caller_rows(path, STARFusion, column_rename)
+        return cls._process_fusion_caller_rows(path, column_rename, "\t")
 
 
 class FusionCatcher(FusionCaller):
@@ -171,34 +170,35 @@ class FusionCatcher(FusionCaller):
 
     type: Literal[Caller.FUSION_CATCHER] = Caller.FUSION_CATCHER
     five_prime_partner: str = Field(
-        None, description="Gene symbol for the 5' fusion partner"
+        ..., description="Gene symbol for the 5' fusion partner"
     )
     three_prime_partner: str = Field(
-        None, description="Gene symbol for the 3' fusion partner"
+        ..., description="Gene symbol for the 3' fusion partner"
     )
     five_prime_fusion_point: str = Field(
-        None,
+        ...,
         description="Chromosomal position for the 5' end of the fusion junction. This coordinate is 1-based",
     )
     three_prime_fusion_point: str = Field(
-        None,
+        ...,
         description="Chromosomal position for the 3' end of the fusion junction. This coordinate is 1-based",
     )
     predicted_effect: str = Field(
-        None,
+        ...,
         description="The predicted effect of the fusion event, created using annotation from the Ensembl database",
     )
     spanning_unique_reads: int = Field(
-        None, description="The number of unique reads that map on the fusion junction"
+        ..., description="The number of unique reads that map on the fusion junction"
     )
     spanning_reads: int = Field(
-        None, description="The number of paired reads that support the fusion"
+        ..., description="The number of paired reads that support the fusion"
     )
     fusion_sequence: str = Field(
-        None, description="The inferred sequence around the fusion junction"
+        ..., description="The inferred sequence around the fusion junction"
     )
 
-    def load_records(self, path: Path) -> list["FusionCatcher"]:
+    @classmethod
+    def load_records(cls, path: Path) -> list["FusionCatcher"]:
         """Load fusions from FusionCatcher txt file
 
         :param path: The path to the file of FusionCatcher fusions
@@ -214,63 +214,60 @@ def load_records(self, path: Path) -> list["FusionCatcher"]:
             "Spanning_pairs": "spanning_reads",
             "Fusion_sequence": "fusion_sequence",
         }
-        return self._process_fusion_caller_rows(path, FusionCatcher, column_rename)
+        return cls._process_fusion_caller_rows(path, column_rename, "\t")
 
 
 class Arriba(FusionCaller):
     """Define parameters for Arriba model"""
 
     type: Literal[Caller.ARRIBA] = Caller.ARRIBA
-    gene1: str = Field(None, description="The 5' gene fusion partner")
-    gene2: str = Field(None, description="The 3' gene fusion partner")
+    gene1: str = Field(..., description="The 5' gene fusion partner")
+    gene2: str = Field(..., description="The 3' gene fusion partner")
     strand1: str = Field(
-        None, description="The strand information for the 5' gene fusion partner"
+        ..., description="The strand information for the 5' gene fusion partner"
     )
     strand2: str = Field(
-        None, description="The strand information for the 3' gene fusion partner"
-    )
-    breakpoint1: str = Field(
-        None, description="The chromosome and breakpoint for gene1"
-    )
-    breakpoint2: str = Field(
-        None, description="The chromosome and breakpoint for gene2"
+        ..., description="The strand information for the 3' gene fusion partner"
     )
+    breakpoint1: str = Field(..., description="The chromosome and breakpoint for gene1")
+    breakpoint2: str = Field(..., description="The chromosome and breakpoint for gene2")
     event_type: str = Field(
-        None, description=" An inference about the type of fusion event"
+        ..., description=" An inference about the type of fusion event"
     )
     confidence: str = Field(
-        None, description="A metric describing the confidence of the fusion prediction"
+        ..., description="A metric describing the confidence of the fusion prediction"
     )
     direction1: str = Field(
-        None,
+        ...,
         description="A description that indicates if the transcript segment starts or ends at breakpoint1",
     )
     direction2: str = Field(
-        None,
+        ...,
         description="A description that indicates if the transcript segment starts or ends at breakpoint2",
     )
     rf: str = Field(
-        None,
+        ...,
         description="A description if the reading frame is preserved for the fusion",
     )
     split_reads1: int = Field(
-        None, description="Number of supporting split fragments with anchor in gene1"
+        ..., description="Number of supporting split fragments with anchor in gene1"
     )
     split_reads2: int = Field(
-        None, description="Number of supporting split fragments with anchor in gene2"
+        ..., description="Number of supporting split fragments with anchor in gene2"
     )
     discordant_mates: int = Field(
-        None, description="Number of discordant mates supporting the fusion"
+        ..., description="Number of discordant mates supporting the fusion"
     )
     coverage1: int = Field(
-        None, description="Number of fragments retained near breakpoint1"
+        ..., description="Number of fragments retained near breakpoint1"
     )
     coverage2: int = Field(
-        None, description="Number of fragments retained near breakpoint2"
+        ..., description="Number of fragments retained near breakpoint2"
     )
-    fusion_transcript: str = Field(None, description="The assembled fusion transcript")
+    fusion_transcript: str = Field(..., description="The assembled fusion transcript")
 
-    def load_records(self, path: Path) -> list["Arriba"]:
+    @classmethod
+    def load_records(cls, path: Path) -> list["Arriba"]:
         """Load fusions from Arriba tsv file
 
         :param path: The path to the file of Arriba fusions
@@ -283,50 +280,49 @@ def load_records(self, path: Path) -> list["Arriba"]:
             "type": "event_type",
             "reading_frame": "rf",
         }
-        return self._process_fusion_caller_rows(path, Arriba, column_rename)
+        return cls._process_fusion_caller_rows(path, column_rename, "\t")
 
 
 class Cicero(FusionCaller):
     """Define parameters for CICERO model"""
 
     type: Literal[Caller.CICERO] = Caller.CICERO
-    gene_5prime: str = Field(None, description="The gene symbol for the 5' partner")
-    gene_3prime: str = Field(None, description="The gene symbol for the 3' partner")
-    chr_5prime: str = Field(None, description="The chromosome for the 5' partner")
-    chr_3prime: str = Field(None, description="The chromosome for the 3' partner")
+    gene_5prime: str = Field(..., description="The gene symbol for the 5' partner")
+    gene_3prime: str = Field(..., description="The gene symbol for the 3' partner")
+    chr_5prime: str = Field(..., description="The chromosome for the 5' partner")
+    chr_3prime: str = Field(..., description="The chromosome for the 3' partner")
     pos_5prime: int = Field(
-        None, description="The genomic breakpoint for the 5' partner"
+        ..., description="The genomic breakpoint for the 5' partner"
     )
     pos_3prime: int = Field(
-        None, description="The genomic breakpoint for the 3' partner"
+        ..., description="The genomic breakpoint for the 3' partner"
     )
     sv_ort: str = Field(
-        None,
+        ...,
         description="Whether the mapping orientation of assembled contig (driven by structural variation) has confident biological meaning",
     )
     event_type: str = Field(
-        None,
+        ...,
         description="The structural variation event that created the called fusion",
     )
     reads_5prime: int = Field(
-        None,
+        ...,
         description="The number of reads that support the breakpoint for the 5' partner",
     )
     reads_3prime: int = Field(
-        None,
+        ...,
         description="The number of reads that support the breakpoint for the 3' partner",
     )
     coverage_5prime: int = Field(
-        None, description="The fragment coverage at the 5' breakpoint"
+        ..., description="The fragment coverage at the 5' breakpoint"
     )
     coverage_3prime: int = Field(
-        None, description="The fragment coverage at the 3' breakpoint"
-    )
-    contig: str = Field(
-        None, description="The assembled contig sequence for the fusion"
+        ..., description="The fragment coverage at the 3' breakpoint"
     )
+    contig: str = Field(..., description="The assembled contig sequence for the fusion")
 
-    def load_records(self, path: Path) -> list["Cicero"]:
+    @classmethod
+    def load_records(cls, path: Path) -> list["Cicero"]:
         """Load fusions from Cicero txt file
 
         :param path: The path to the file of Cicero fusions
@@ -345,28 +341,29 @@ def load_records(self, path: Path) -> list["Cicero"]:
             "coverageA": "coverage_5prime",
             "coverageB": "coverage_3prime",
         }
-        return self._process_fusion_caller_rows(path, Cicero, column_rename)
+        return cls._process_fusion_caller_rows(path, column_rename, "\t")
 
 
 class EnFusion(FusionCaller):
     """Define parameters for EnFusion model"""
 
     type: Literal[Caller.ENFUSION] = Caller.ENFUSION
-    gene_5prime: str = Field(None, description="The 5' gene fusion partner")
-    gene_3prime: str = Field(None, description="The 3' gene fusion partner")
-    chr_5prime: int = Field(None, description="The 5' gene fusion partner chromosome")
-    chr_3prime: int = Field(None, description="The 3' gene fusion partner chromosome")
+    gene_5prime: str = Field(..., description="The 5' gene fusion partner")
+    gene_3prime: str = Field(..., description="The 3' gene fusion partner")
+    chr_5prime: int = Field(..., description="The 5' gene fusion partner chromosome")
+    chr_3prime: int = Field(..., description="The 3' gene fusion partner chromosome")
     break_5prime: int = Field(
-        None, description="The 5' gene fusion partner genomic breakpoint"
+        ..., description="The 5' gene fusion partner genomic breakpoint"
     )
     break_3prime: int = Field(
-        None, description="The 3' gene fusion partner genomic breakpoint"
+        ..., description="The 3' gene fusion partner genomic breakpoint"
     )
     fusion_junction_sequence: str | None = Field(
         None, description="The sequence near the fusion junction"
     )
 
-    def load_records(self, path: Path) -> list["EnFusion"]:
+    @classmethod
+    def load_records(cls, path: Path) -> list["EnFusion"]:
         """Load fusions from EnFusion tsv file
 
         :param path: The path to the file of Enfusion fusions
@@ -381,25 +378,26 @@ def load_records(self, path: Path) -> list["EnFusion"]:
             "Break2": "break_3prime",
             "FusionJunctionSequence": "fusion_junction_sequence",
         }
-        return self._process_fusion_caller_rows(path, EnFusion, column_rename)
+        return cls._process_fusion_caller_rows(path, column_rename, "\t")
 
 
 class Genie(FusionCaller):
     """Define parameters for Genie model"""
 
     type: Literal[Caller.GENIE] = Caller.GENIE
-    site1_hugo: str = Field(None, description="The HUGO symbol reported at site 1")
-    site2_hugo: str = Field(None, description="The HUGO symbol reported at site 2")
-    site1_chrom: int = Field(None, description="The chromosome reported at site 1")
-    site2_chrom: int = Field(None, description="The chromosome reported at site 2")
-    site1_pos: int = Field(None, description="The breakpoint reported at site 1")
-    site2_pos: int = Field(None, description="The breakpoint reported at site 2")
-    annot: str = Field(None, description="The annotation for the fusion event")
+    site1_hugo: str = Field(..., description="The HUGO symbol reported at site 1")
+    site2_hugo: str = Field(..., description="The HUGO symbol reported at site 2")
+    site1_chrom: int = Field(..., description="The chromosome reported at site 1")
+    site2_chrom: int = Field(..., description="The chromosome reported at site 2")
+    site1_pos: int = Field(..., description="The breakpoint reported at site 1")
+    site2_pos: int = Field(..., description="The breakpoint reported at site 2")
+    annot: str = Field(..., description="The annotation for the fusion event")
     reading_frame: str = Field(
-        None, description="The reading frame status of the fusion"
+        ..., description="The reading frame status of the fusion"
     )
 
-    def load_records(self, path: Path) -> list["Genie"]:
+    @classmethod
+    def load_records(cls, path: Path) -> list["Genie"]:
         """Load fusions from Genie txt file
 
         :param path: The path to the file of Genie structural variants
@@ -415,4 +413,4 @@ def load_records(self, path: Path) -> list["Genie"]:
             "Site2_Effect_On_Frame": "reading_frame",
             "Annotation": "annot",
         }
-        return self._process_fusion_caller_rows(path, Genie, column_rename)
+        return cls._process_fusion_caller_rows(path, column_rename, "\t")
diff --git a/tests/test_fusion_caller_models.py b/tests/test_fusion_caller_models.py
index a3191a9..5a2cea5 100644
--- a/tests/test_fusion_caller_models.py
+++ b/tests/test_fusion_caller_models.py
@@ -18,82 +18,75 @@
 def test_get_jaffa_records(fixture_data_dir):
     """Test that get_jaffa_records works correctly"""
     path = Path(fixture_data_dir / "jaffa_results.csv")
-    jaffa_instance = JAFFA()
-    records = jaffa_instance.load_records(path)
+    records = JAFFA.load_records(path)
     assert len(records) == 491
 
     path = Path(fixture_data_dir / "jaffa_resultss.csv")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert jaffa_instance.load_records(path)
+        assert JAFFA.load_records(path)
 
 
 def test_get_star_fusion_records(fixture_data_dir):
     """Test that get_star_fusion_records works correctly"""
     path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv")
-    sf_instance = STARFusion()
-    records = sf_instance.load_records(path)
+    records = STARFusion.load_records(path)
     assert len(records) == 37
 
     path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert sf_instance.load_records(path)
+        assert STARFusion.load_records(path)
 
 
 def test_get_fusion_catcher_records(fixture_data_dir):
     """Test that get_fusion_catcher_records works correctly"""
     path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txt")
-    fc_instance = FusionCatcher()
-    fusions_list = fc_instance.load_records(path)
+    fusions_list = FusionCatcher.load_records(path)
     assert len(fusions_list) == 355
 
     path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txts")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert fc_instance.load_records(path)
+        assert FusionCatcher.load_records(path)
 
 
 def test_get_arriba_records(fixture_data_dir):
     """Test that get_arriba_records works correctly"""
     path = Path(fixture_data_dir / "fusions_arriba_test.tsv")
-    arriba = Arriba()
-    fusions_list = arriba.load_records(path)
+    fusions_list = Arriba.load_records(path)
     assert len(fusions_list) == 1
 
     path = Path(fixture_data_dir / "fusionsd_arriba_test.tsv")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert arriba.load_records(path)
+        assert Arriba.load_records(path)
 
 
 def test_get_cicero_records(fixture_data_dir):
     """Test that get_cicero_records works correctly"""
     path = Path(fixture_data_dir / "annotated.fusion.txt")
-    cicero = Cicero()
-    fusions_list = cicero.load_records(path)
+    fusions_list = Cicero.load_records(path)
     assert len(fusions_list) == 1
 
     path = Path(fixture_data_dir / "annnotated.fusion.txt")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert cicero.load_records(path)
+        assert Cicero.load_records(path)
 
 
 def test_get_enfusion_records(fixture_data_dir):
     """Test that get_enfusion_records works correctly"""
     path = Path(fixture_data_dir / "enfusion_test.csv")
-    enfusion = EnFusion()
-    fusions_list = enfusion.load_records(path)
+    fusions_list = EnFusion.load_records(path)
     assert len(fusions_list) == 1
 
     path = Path(fixture_data_dir / "enfusions_test.csv")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert enfusion.load_records(path)
+        assert EnFusion.load_records(path)
 
 
 def test_get_genie_records(fixture_data_dir):
     """Test that get_genie_records works correctly"""
     path = Path(fixture_data_dir / "genie_test.txt")
-    genie = Genie()
-    fusions_list = genie.load_records(path)
+    fusions_list = Genie.load_records(path)
     assert len(fusions_list) == 1
 
     path = Path(fixture_data_dir / "genie_tests.txt")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert genie.load_records(path)
+        assert Genie.load_records(path)

From 4e00a598b615a26a3f31aec7a90207b5d40fde6c Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 24 Jan 2025 11:33:37 -0500
Subject: [PATCH 17/20] Fix docstring

---
 src/fusor/fusion_caller_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index 4b448bb..a331429 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -66,7 +66,7 @@ def _process_fusion_caller_rows(
         return fusions_list
 
     @abstractmethod
-    def load_records(self, path: Path) -> list[Caller]:
+    def load_records(self, path: Path) -> list["FusionCaller"]:
         """Abstract method to load records from a fusion caller file."""
 
 

From a722022e4061d5f10ef3bc91f0dad180afbf7557 Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 24 Jan 2025 16:28:06 -0500
Subject: [PATCH 18/20] Store latest changes

---
 src/fusor/fusion_caller_models.py  | 169 +----------------------------
 src/fusor/harvester.py             | 166 ++++++++++++++++++++++++++++
 tests/test_fusion_caller_models.py |   7 +-
 tests/test_harvesters.py           |  99 +++++++++++++++++
 4 files changed, 270 insertions(+), 171 deletions(-)
 create mode 100644 src/fusor/harvester.py
 create mode 100644 tests/test_harvesters.py

diff --git a/src/fusor/fusion_caller_models.py b/src/fusor/fusion_caller_models.py
index a331429..faffb21 100644
--- a/src/fusor/fusion_caller_models.py
+++ b/src/fusor/fusion_caller_models.py
@@ -1,9 +1,7 @@
 """Schemas for outputs provided by different fusion callers"""
 
-import csv
-from abc import ABC, abstractmethod
+from abc import ABC
 from enum import Enum
-from pathlib import Path
 from typing import Literal
 
 from pydantic import BaseModel, ConfigDict, Field
@@ -29,46 +27,6 @@ class FusionCaller(ABC, BaseModel):
     type: Caller
     model_config = ConfigDict(extra="allow")
 
-    @staticmethod
-    def _does_file_exist(path: Path) -> None:
-        """Check if fusions file exists
-
-        :param path: The path to the file
-        :return None
-        :raise ValueError if the file does not exist at the specified path
-        """
-        if not path.exists():
-            statement = f"{path!s} does not exist"
-            raise ValueError(statement)
-        return
-
-    @classmethod
-    def _process_fusion_caller_rows(
-        cls,
-        path: Path,
-        column_rename: dict,
-        delimeter: str,
-    ) -> list["FusionCaller"]:
-        """Convert rows of fusion caller output to Pydantic classes
-
-        :param path: The path to the fusions file
-        :param column_rename: A dictionary of column mappings
-        :param delimeter: The delimeter for the fusions file
-        :return: A list of fusions, represented as Pydantic objects
-        """
-        cls._does_file_exist(path)
-        fusions_list = []
-        with path.open() as csvfile:
-            reader = csv.DictReader(csvfile, delimiter=delimeter)
-            for row in reader:
-                row = {column_rename.get(key, key): value for key, value in row.items()}
-                fusions_list.append(cls(**row))
-        return fusions_list
-
-    @abstractmethod
-    def load_records(self, path: Path) -> list["FusionCaller"]:
-        """Abstract method to load records from a fusion caller file."""
-
 
 class JAFFA(FusionCaller):
     """Define parameters for JAFFA model"""
@@ -108,20 +66,6 @@ class JAFFA(FusionCaller):
         description="The number of detected reads that align entirely on either side of the breakpoint",
     )
 
-    @classmethod
-    def load_records(cls, path: Path) -> list["JAFFA"]:
-        """Load fusions from JAFFA csv file
-
-        :param path: The path to the file of JAFFA fusions
-        :return A list of JAFFA objects, or None if the specified file does not exist
-        """
-        column_rename = {
-            "fusion genes": "fusion_genes",
-            "spanning reads": "spanning_reads",
-            "spanning pairs": "spanning_pairs",
-        }
-        return cls._process_fusion_caller_rows(path, column_rename, ",")
-
 
 class STARFusion(FusionCaller):
     """Define parameters for STAR-Fusion model"""
@@ -147,23 +91,6 @@ class STARFusion(FusionCaller):
         description="The number of RNA-seq fragments that encompass the fusion junction such that one read of the pair aligns to a different gene than the other paired-end read of that fragment (from STAR-Fusion documentation)",
     )
 
-    @classmethod
-    def load_records(cls, path: Path) -> list["STARFusion"]:
-        """Load fusions from STAR-Fusion tsv file
-
-        :param path: The path to the file of STAR-Fusion fusions
-        :return A list of STAR-Fusion objects, or None if the specified file does not exist
-        """
-        column_rename = {
-            "LeftGene": "left_gene",
-            "RightGene": "right_gene",
-            "LeftBreakpoint": "left_breakpoint",
-            "RightBreakpoint": "right_breakpoint",
-            "JunctionReadCount": "junction_read_count",
-            "SpanningFragCount": "spanning_frag_count",
-        }
-        return cls._process_fusion_caller_rows(path, column_rename, "\t")
-
 
 class FusionCatcher(FusionCaller):
     """Define parameters for FusionCatcher model"""
@@ -197,25 +124,6 @@ class FusionCatcher(FusionCaller):
         ..., description="The inferred sequence around the fusion junction"
     )
 
-    @classmethod
-    def load_records(cls, path: Path) -> list["FusionCatcher"]:
-        """Load fusions from FusionCatcher txt file
-
-        :param path: The path to the file of FusionCatcher fusions
-        :return A list of FusionCatcher objects, or None if the specified file does not exist
-        """
-        column_rename = {
-            "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
-            "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
-            "Fusion_point_for_gene_1(5end_fusion_partner)": "five_prime_fusion_point",
-            "Fusion_point_for_gene_2(3end_fusion_partner)": "three_prime_fusion_point",
-            "Predicted_effect": "predicted_effect",
-            "Spanning_unique_reads": "spanning_unique_reads",
-            "Spanning_pairs": "spanning_reads",
-            "Fusion_sequence": "fusion_sequence",
-        }
-        return cls._process_fusion_caller_rows(path, column_rename, "\t")
-
 
 class Arriba(FusionCaller):
     """Define parameters for Arriba model"""
@@ -266,22 +174,6 @@ class Arriba(FusionCaller):
     )
     fusion_transcript: str = Field(..., description="The assembled fusion transcript")
 
-    @classmethod
-    def load_records(cls, path: Path) -> list["Arriba"]:
-        """Load fusions from Arriba tsv file
-
-        :param path: The path to the file of Arriba fusions
-        :return A list of Arriba objects, or None if the specified file does not exist
-        """
-        column_rename = {
-            "#gene1": "gene1",
-            "strand1(gene/fusion)": "strand1",
-            "strand2(gene/fusion)": "strand2",
-            "type": "event_type",
-            "reading_frame": "rf",
-        }
-        return cls._process_fusion_caller_rows(path, column_rename, "\t")
-
 
 class Cicero(FusionCaller):
     """Define parameters for CICERO model"""
@@ -321,28 +213,6 @@ class Cicero(FusionCaller):
     )
     contig: str = Field(..., description="The assembled contig sequence for the fusion")
 
-    @classmethod
-    def load_records(cls, path: Path) -> list["Cicero"]:
-        """Load fusions from Cicero txt file
-
-        :param path: The path to the file of Cicero fusions
-        :return A list of Cicero objects, or None if the specified file does not exist
-        """
-        column_rename = {
-            "geneA": "gene_5prime",
-            "geneB": "gene_3prime",
-            "chrA": "chr_5prime",
-            "chrB": "chr_3prime",
-            "posA": "pos_5prime",
-            "posB": "pos_3prime",
-            "type": "event_type",
-            "readsA": "reads_5prime",
-            "readsB": "reads_3prime",
-            "coverageA": "coverage_5prime",
-            "coverageB": "coverage_3prime",
-        }
-        return cls._process_fusion_caller_rows(path, column_rename, "\t")
-
 
 class EnFusion(FusionCaller):
     """Define parameters for EnFusion model"""
@@ -362,24 +232,6 @@ class EnFusion(FusionCaller):
         None, description="The sequence near the fusion junction"
     )
 
-    @classmethod
-    def load_records(cls, path: Path) -> list["EnFusion"]:
-        """Load fusions from EnFusion tsv file
-
-        :param path: The path to the file of Enfusion fusions
-        :return A list of Enfusion objects, or None if the specified file does not exist
-        """
-        column_rename = {
-            "Gene1": "gene_5prime",
-            "Gene2": "gene_3prime",
-            "Chr1": "chr_5prime",
-            "Chr2": "chr_3prime",
-            "Break1": "break_5prime",
-            "Break2": "break_3prime",
-            "FusionJunctionSequence": "fusion_junction_sequence",
-        }
-        return cls._process_fusion_caller_rows(path, column_rename, "\t")
-
 
 class Genie(FusionCaller):
     """Define parameters for Genie model"""
@@ -395,22 +247,3 @@ class Genie(FusionCaller):
     reading_frame: str = Field(
         ..., description="The reading frame status of the fusion"
     )
-
-    @classmethod
-    def load_records(cls, path: Path) -> list["Genie"]:
-        """Load fusions from Genie txt file
-
-        :param path: The path to the file of Genie structural variants
-        :return A list of Genie objects, or None if the specified file does not exist
-        """
-        column_rename = {
-            "Site1_Hugo_Symbol": "site1_hugo",
-            "Site2_Hugo_Symbol": "site2_hugo",
-            "Site1_Chromosome": "site1_chrom",
-            "Site2_Chromosome": "site2_chrom",
-            "Site1_Position": "site1_pos",
-            "Site2_Position": "site2_pos",
-            "Site2_Effect_On_Frame": "reading_frame",
-            "Annotation": "annot",
-        }
-        return cls._process_fusion_caller_rows(path, column_rename, "\t")
diff --git a/src/fusor/harvester.py b/src/fusor/harvester.py
new file mode 100644
index 0000000..a4e3006
--- /dev/null
+++ b/src/fusor/harvester.py
@@ -0,0 +1,166 @@
+"""Harvester methods for output from different fusion callers"""
+
+import csv
+from abc import ABC
+from pathlib import Path
+from typing import ClassVar
+
+from fusor.fusion_caller_models import (
+    JAFFA,
+    Arriba,
+    Cicero,
+    EnFusion,
+    FusionCaller,
+    FusionCatcher,
+    Genie,
+    STARFusion,
+)
+
+
+class FusionCallerHarvester(ABC):
+    """ABC for fusion caller harvesters"""
+
+    fusion_caller: FusionCaller
+    column_rename: dict
+    delimeter: str
+
+    def load_records(
+        self,
+        fusion_path: Path,
+    ) -> list[FusionCaller]:
+        """Convert rows of fusion caller output to Pydantic classes
+
+        :param path: The path to the fusions file
+        :param column_rename: A dictionary of column mappings
+        :param delimeter: The delimeter for the fusions file
+        :raise ValueError: if the file does not exist at the specified path
+        :return: A list of fusions, represented as Pydantic objects
+        """
+        if not fusion_path.exists():
+            statement = f"{fusion_path!s} does not exist"
+            raise ValueError(statement)
+        fusions_list = []
+        fields_to_keep = self.fusion_caller.__annotations__.keys()
+        with fusion_path.open() as csvfile:
+            reader = csv.DictReader(csvfile, delimiter=self.delimeter)
+            for row in reader:
+                row = {
+                    self.column_rename.get(key, key): value
+                    for key, value in row.items()
+                }
+                filered_row = {
+                    key: value for key, value in row.items() if key in fields_to_keep
+                }
+                fusions_list.append(self.fusion_caller(**filered_row))
+        return fusions_list
+
+
+class JAFFAHarvester(FusionCallerHarvester):
+    """Class for harvesting JAFFA data"""
+
+    column_rename: ClassVar[dict] = {
+        "fusion genes": "fusion_genes",
+        "spanning reads": "spanning_reads",
+        "spanning pairs": "spanning_pairs",
+    }
+    delimeter = ","
+    fusion_caller = JAFFA
+
+
+class StarFusionHarvester(FusionCallerHarvester):
+    """Class for harvesting STAR-Fusion data"""
+
+    column_rename: ClassVar[dict] = {
+        "LeftGene": "left_gene",
+        "RightGene": "right_gene",
+        "LeftBreakpoint": "left_breakpoint",
+        "RightBreakpoint": "right_breakpoint",
+        "JunctionReadCount": "junction_read_count",
+        "SpanningFragCount": "spanning_frag_count",
+    }
+    delimeter = "\t"
+    fusion_caller = STARFusion
+
+
+class FusionCatcherHarvester(FusionCallerHarvester):
+    """Class for harvesting FusionCatcher data"""
+
+    column_rename: ClassVar[dict] = {
+        "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
+        "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
+        "Fusion_point_for_gene_1(5end_fusion_partner)": "five_prime_fusion_point",
+        "Fusion_point_for_gene_2(3end_fusion_partner)": "three_prime_fusion_point",
+        "Predicted_effect": "predicted_effect",
+        "Spanning_unique_reads": "spanning_unique_reads",
+        "Spanning_pairs": "spanning_reads",
+        "Fusion_sequence": "fusion_sequence",
+    }
+    delimeter = "\t"
+    fusion_caller = FusionCatcher
+
+
+class ArribaHarvester(FusionCallerHarvester):
+    """Class for harvesting Arriba data"""
+
+    column_rename: ClassVar[dict] = {
+        "#gene1": "gene1",
+        "strand1(gene/fusion)": "strand1",
+        "strand2(gene/fusion)": "strand2",
+        "type": "event_type",
+        "reading_frame": "rf",
+    }
+    delimeter = "\t"
+    fusion_caller = Arriba
+
+
+class CiceroHarvester(FusionCallerHarvester):
+    """Class for harvesting Cicero data"""
+
+    column_rename: ClassVar[dict] = {
+        "geneA": "gene_5prime",
+        "geneB": "gene_3prime",
+        "chrA": "chr_5prime",
+        "chrB": "chr_3prime",
+        "posA": "pos_5prime",
+        "posB": "pos_3prime",
+        "type": "event_type",
+        "readsA": "reads_5prime",
+        "readsB": "reads_3prime",
+        "coverageA": "coverage_5prime",
+        "coverageB": "coverage_3prime",
+    }
+    delimeter = "\t"
+    fusion_caller = Cicero
+
+
+class EnFusionHarvester(FusionCallerHarvester):
+    """Class for harvesting EnFusion data"""
+
+    column_rename: ClassVar[dict] = {
+        "Gene1": "gene_5prime",
+        "Gene2": "gene_3prime",
+        "Chr1": "chr_5prime",
+        "Chr2": "chr_3prime",
+        "Break1": "break_5prime",
+        "Break2": "break_3prime",
+        "FusionJunctionSequence": "fusion_junction_sequence",
+    }
+    delimeter = "\t"
+    fusion_caller = EnFusion
+
+
+class GenieHarvester(FusionCallerHarvester):
+    """Class for harvesting Genie data"""
+
+    column_rename: ClassVar[dict] = {
+        "Site1_Hugo_Symbol": "site1_hugo",
+        "Site2_Hugo_Symbol": "site2_hugo",
+        "Site1_Chromosome": "site1_chrom",
+        "Site2_Chromosome": "site2_chrom",
+        "Site1_Position": "site1_pos",
+        "Site2_Position": "site2_pos",
+        "Site2_Effect_On_Frame": "reading_frame",
+        "Annotation": "annot",
+    }
+    delimeter = "\t"
+    fusion_caller = Genie
diff --git a/tests/test_fusion_caller_models.py b/tests/test_fusion_caller_models.py
index 5a2cea5..4f46470 100644
--- a/tests/test_fusion_caller_models.py
+++ b/tests/test_fusion_caller_models.py
@@ -5,7 +5,6 @@
 import pytest
 
 from fusor.fusion_caller_models import (
-    JAFFA,
     Arriba,
     Cicero,
     EnFusion,
@@ -13,17 +12,19 @@
     Genie,
     STARFusion,
 )
+from fusor.harvester import JAFFAHarvester
 
 
 def test_get_jaffa_records(fixture_data_dir):
     """Test that get_jaffa_records works correctly"""
     path = Path(fixture_data_dir / "jaffa_results.csv")
-    records = JAFFA.load_records(path)
+    # records = JAFFA.load_records(path)
+    records = JAFFAHarvester.load_records(path)
     assert len(records) == 491
 
     path = Path(fixture_data_dir / "jaffa_resultss.csv")
     with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert JAFFA.load_records(path)
+        assert JAFFAHarvester.load_records(path)
 
 
 def test_get_star_fusion_records(fixture_data_dir):
diff --git a/tests/test_harvesters.py b/tests/test_harvesters.py
new file mode 100644
index 0000000..5860b9e
--- /dev/null
+++ b/tests/test_harvesters.py
@@ -0,0 +1,99 @@
+"""Module for testing fusion caller classes"""
+
+from pathlib import Path
+
+import pytest
+
+from fusor.harvester import (
+    ArribaHarvester,
+    CiceroHarvester,
+    EnFusionHarvester,
+    FusionCatcherHarvester,
+    GenieHarvester,
+    JAFFAHarvester,
+    StarFusionHarvester,
+)
+
+
+def test_get_jaffa_records(fixture_data_dir):
+    """Test that get_jaffa_records works correctly"""
+    path = Path(fixture_data_dir / "jaffa_results.csv")
+    harvester = JAFFAHarvester()
+    records = harvester.load_records(path)
+    assert len(records) == 491
+
+    path = Path(fixture_data_dir / "jaffa_resultss.csv")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert harvester.load_records(path)
+
+
+def test_get_star_fusion_records(fixture_data_dir):
+    """Test that get_star_fusion_records works correctly"""
+    path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv")
+    harvester = StarFusionHarvester()
+    records = harvester.load_records(path)
+    assert len(records) == 37
+
+    path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert harvester.load_records(path)
+
+
+def test_get_fusion_catcher_records(fixture_data_dir):
+    """Test that get_fusion_catcher_records works correctly"""
+    path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txt")
+    harvester = FusionCatcherHarvester()
+    fusions_list = harvester.load_records(path)
+    assert len(fusions_list) == 355
+
+    path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txts")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert harvester.load_records(path)
+
+
+def test_get_arriba_records(fixture_data_dir):
+    """Test that get_arriba_records works correctly"""
+    path = Path(fixture_data_dir / "fusions_arriba_test.tsv")
+    harvester = ArribaHarvester()
+    fusions_list = harvester.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "fusionsd_arriba_test.tsv")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert harvester.load_records(path)
+
+
+def test_get_cicero_records(fixture_data_dir):
+    """Test that get_cicero_records works correctly"""
+    path = Path(fixture_data_dir / "annotated.fusion.txt")
+    harvester = CiceroHarvester()
+    fusions_list = harvester.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "annnotated.fusion.txt")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert harvester.load_records(path)
+
+
+def test_get_enfusion_records(fixture_data_dir):
+    """Test that get_enfusion_records works correctly"""
+    path = Path(fixture_data_dir / "enfusion_test.csv")
+    harvester = EnFusionHarvester()
+    fusions_list = harvester.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "enfusions_test.csv")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert harvester.load_records(path)
+
+
+def test_get_genie_records(fixture_data_dir):
+    """Test that get_genie_records works correctly"""
+    path = Path(fixture_data_dir / "genie_test.txt")
+    harvester = GenieHarvester()
+    fusions_list = harvester.load_records(path)
+    assert len(fusions_list) == 1
+
+    path = Path(fixture_data_dir / "genie_tests.txt")
+    with pytest.raises(ValueError, match=f"{path} does not exist"):
+        assert harvester.load_records(path)

From 3af24151b6f11b14c851269928f3789679cb713a Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Fri, 24 Jan 2025 16:30:49 -0500
Subject: [PATCH 19/20] Remove fusion caller model tests

---
 tests/test_fusion_caller_models.py | 93 ------------------------------
 1 file changed, 93 deletions(-)
 delete mode 100644 tests/test_fusion_caller_models.py

diff --git a/tests/test_fusion_caller_models.py b/tests/test_fusion_caller_models.py
deleted file mode 100644
index 4f46470..0000000
--- a/tests/test_fusion_caller_models.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""Module for testing fusion caller classes"""
-
-from pathlib import Path
-
-import pytest
-
-from fusor.fusion_caller_models import (
-    Arriba,
-    Cicero,
-    EnFusion,
-    FusionCatcher,
-    Genie,
-    STARFusion,
-)
-from fusor.harvester import JAFFAHarvester
-
-
-def test_get_jaffa_records(fixture_data_dir):
-    """Test that get_jaffa_records works correctly"""
-    path = Path(fixture_data_dir / "jaffa_results.csv")
-    # records = JAFFA.load_records(path)
-    records = JAFFAHarvester.load_records(path)
-    assert len(records) == 491
-
-    path = Path(fixture_data_dir / "jaffa_resultss.csv")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert JAFFAHarvester.load_records(path)
-
-
-def test_get_star_fusion_records(fixture_data_dir):
-    """Test that get_star_fusion_records works correctly"""
-    path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsv")
-    records = STARFusion.load_records(path)
-    assert len(records) == 37
-
-    path = Path(fixture_data_dir / "star-fusion.fusion_predictions.abridged.tsvs")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert STARFusion.load_records(path)
-
-
-def test_get_fusion_catcher_records(fixture_data_dir):
-    """Test that get_fusion_catcher_records works correctly"""
-    path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txt")
-    fusions_list = FusionCatcher.load_records(path)
-    assert len(fusions_list) == 355
-
-    path = Path(fixture_data_dir / "final-list_candidate-fusion-genes.txts")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert FusionCatcher.load_records(path)
-
-
-def test_get_arriba_records(fixture_data_dir):
-    """Test that get_arriba_records works correctly"""
-    path = Path(fixture_data_dir / "fusions_arriba_test.tsv")
-    fusions_list = Arriba.load_records(path)
-    assert len(fusions_list) == 1
-
-    path = Path(fixture_data_dir / "fusionsd_arriba_test.tsv")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert Arriba.load_records(path)
-
-
-def test_get_cicero_records(fixture_data_dir):
-    """Test that get_cicero_records works correctly"""
-    path = Path(fixture_data_dir / "annotated.fusion.txt")
-    fusions_list = Cicero.load_records(path)
-    assert len(fusions_list) == 1
-
-    path = Path(fixture_data_dir / "annnotated.fusion.txt")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert Cicero.load_records(path)
-
-
-def test_get_enfusion_records(fixture_data_dir):
-    """Test that get_enfusion_records works correctly"""
-    path = Path(fixture_data_dir / "enfusion_test.csv")
-    fusions_list = EnFusion.load_records(path)
-    assert len(fusions_list) == 1
-
-    path = Path(fixture_data_dir / "enfusions_test.csv")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert EnFusion.load_records(path)
-
-
-def test_get_genie_records(fixture_data_dir):
-    """Test that get_genie_records works correctly"""
-    path = Path(fixture_data_dir / "genie_test.txt")
-    fusions_list = Genie.load_records(path)
-    assert len(fusions_list) == 1
-
-    path = Path(fixture_data_dir / "genie_tests.txt")
-    with pytest.raises(ValueError, match=f"{path} does not exist"):
-        assert Genie.load_records(path)

From 270205b71f86da8b60c0e2137c83d146e708fc9a Mon Sep 17 00:00:00 2001
From: Jeremy Arbesfeld <jarbesfeld@gmail.com>
Date: Mon, 27 Jan 2025 09:59:10 -0500
Subject: [PATCH 20/20] Change docstrings, remove double loop

---
 src/fusor/harvester.py | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/fusor/harvester.py b/src/fusor/harvester.py
index a4e3006..20a42cb 100644
--- a/src/fusor/harvester.py
+++ b/src/fusor/harvester.py
@@ -30,7 +30,7 @@ def load_records(
     ) -> list[FusionCaller]:
         """Convert rows of fusion caller output to Pydantic classes
 
-        :param path: The path to the fusions file
+        :param fusion_path: The path to the fusions file
         :param column_rename: A dictionary of column mappings
         :param delimeter: The delimeter for the fusions file
         :raise ValueError: if the file does not exist at the specified path
@@ -40,25 +40,23 @@ def load_records(
             statement = f"{fusion_path!s} does not exist"
             raise ValueError(statement)
         fusions_list = []
-        fields_to_keep = self.fusion_caller.__annotations__.keys()
+        fields_to_keep = self.fusion_caller.__annotations__
         with fusion_path.open() as csvfile:
             reader = csv.DictReader(csvfile, delimiter=self.delimeter)
             for row in reader:
-                row = {
-                    self.column_rename.get(key, key): value
-                    for key, value in row.items()
-                }
-                filered_row = {
-                    key: value for key, value in row.items() if key in fields_to_keep
-                }
-                fusions_list.append(self.fusion_caller(**filered_row))
+                filtered_row = {}
+                for key, value in row.items():
+                    renamed_key = self.column_rename.get(key, key)
+                    if renamed_key in fields_to_keep:
+                        filtered_row[renamed_key] = value
+                fusions_list.append(self.fusion_caller(**filtered_row))
         return fusions_list
 
 
 class JAFFAHarvester(FusionCallerHarvester):
     """Class for harvesting JAFFA data"""
 
-    column_rename: ClassVar[dict] = {
+    column_rename: ClassVar[dict[str, str]] = {
         "fusion genes": "fusion_genes",
         "spanning reads": "spanning_reads",
         "spanning pairs": "spanning_pairs",
@@ -70,7 +68,7 @@ class JAFFAHarvester(FusionCallerHarvester):
 class StarFusionHarvester(FusionCallerHarvester):
     """Class for harvesting STAR-Fusion data"""
 
-    column_rename: ClassVar[dict] = {
+    column_rename: ClassVar[dict[str, str]] = {
         "LeftGene": "left_gene",
         "RightGene": "right_gene",
         "LeftBreakpoint": "left_breakpoint",
@@ -85,7 +83,7 @@ class StarFusionHarvester(FusionCallerHarvester):
 class FusionCatcherHarvester(FusionCallerHarvester):
     """Class for harvesting FusionCatcher data"""
 
-    column_rename: ClassVar[dict] = {
+    column_rename: ClassVar[dict[str, str]] = {
         "Gene_1_symbol(5end_fusion_partner)": "five_prime_partner",
         "Gene_2_symbol(3end_fusion_partner)": "three_prime_partner",
         "Fusion_point_for_gene_1(5end_fusion_partner)": "five_prime_fusion_point",
@@ -102,7 +100,7 @@ class FusionCatcherHarvester(FusionCallerHarvester):
 class ArribaHarvester(FusionCallerHarvester):
     """Class for harvesting Arriba data"""
 
-    column_rename: ClassVar[dict] = {
+    column_rename: ClassVar[dict[str, str]] = {
         "#gene1": "gene1",
         "strand1(gene/fusion)": "strand1",
         "strand2(gene/fusion)": "strand2",
@@ -116,7 +114,7 @@ class ArribaHarvester(FusionCallerHarvester):
 class CiceroHarvester(FusionCallerHarvester):
     """Class for harvesting Cicero data"""
 
-    column_rename: ClassVar[dict] = {
+    column_rename: ClassVar[dict[str, str]] = {
         "geneA": "gene_5prime",
         "geneB": "gene_3prime",
         "chrA": "chr_5prime",
@@ -136,7 +134,7 @@ class CiceroHarvester(FusionCallerHarvester):
 class EnFusionHarvester(FusionCallerHarvester):
     """Class for harvesting EnFusion data"""
 
-    column_rename: ClassVar[dict] = {
+    column_rename: ClassVar[dict[str, str]] = {
         "Gene1": "gene_5prime",
         "Gene2": "gene_3prime",
         "Chr1": "chr_5prime",
@@ -152,7 +150,7 @@ class EnFusionHarvester(FusionCallerHarvester):
 class GenieHarvester(FusionCallerHarvester):
     """Class for harvesting Genie data"""
 
-    column_rename: ClassVar[dict] = {
+    column_rename: ClassVar[dict[str, str]] = {
         "Site1_Hugo_Symbol": "site1_hugo",
         "Site2_Hugo_Symbol": "site2_hugo",
         "Site1_Chromosome": "site1_chrom",