Skip to content

Commit

Permalink
implement gene count (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Nov 20, 2024
1 parent 23b2fbf commit 9494eac
Show file tree
Hide file tree
Showing 9 changed files with 1,058 additions and 21 deletions.
4 changes: 2 additions & 2 deletions precellar/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ edition = "2021"

[dependencies]
anyhow = "1.0"
bed-utils = "0.5.1"
bed-utils = "0.6"
bwa-mem2 = { git = "https://github.com/regulatory-genomics/bwa-mem2-rust.git", rev = "8de06bcc0a2145fd819232ffb2bf100fb795db30" }
star-aligner = { git = "https://github.com/regulatory-genomics/star-aligner", rev = "faef1085eaf26e6e8d5875fcbc641c3af9444d89" }
bstr = "1.0"
Expand All @@ -14,7 +14,7 @@ itertools = "0.13"
indexmap = "2.5"
log = "0.4"
lexical = "6.1"
noodles = { version = "0.85", features = ["core", "fastq", "bam", "sam", "async"] }
noodles = { version = "0.85", features = ["core", "gtf", "fastq", "bam", "sam", "async"] }
kdam = "0.5.2"
rayon = "1.10"
smallvec = "1.13"
Expand Down
30 changes: 15 additions & 15 deletions precellar/src/align.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ pub trait Aligner {
fn align_reads(
&mut self,
num_threads: u16,
records: Vec<AnnotatedRecord>,
records: Vec<AnnotatedFastq>,
) -> Vec<Self::AlignOutput>;

fn align_read_pairs(
&mut self,
num_threads: u16,
records: Vec<AnnotatedRecord>,
records: Vec<AnnotatedFastq>,
) -> Vec<(Self::AlignOutput, Self::AlignOutput)>;
}

Expand All @@ -75,14 +75,14 @@ impl Aligner for DummyAligner {
sam::Header::default()
}

fn align_reads(&mut self, _: u16, _: Vec<AnnotatedRecord>) -> Vec<Self::AlignOutput> {
fn align_reads(&mut self, _: u16, _: Vec<AnnotatedFastq>) -> Vec<Self::AlignOutput> {
Vec::new()
}

fn align_read_pairs(
&mut self,
_: u16,
_: Vec<AnnotatedRecord>,
_: Vec<AnnotatedFastq>,
) -> Vec<(Self::AlignOutput, Self::AlignOutput)> {
Vec::new()
}
Expand All @@ -98,7 +98,7 @@ impl Aligner for BurrowsWheelerAligner {
fn align_reads(
&mut self,
num_threads: u16,
records: Vec<AnnotatedRecord>,
records: Vec<AnnotatedFastq>,
) -> Vec<Self::AlignOutput> {
let (info, mut reads): (Vec<_>, Vec<_>) = records
.into_iter()
Expand All @@ -124,7 +124,7 @@ impl Aligner for BurrowsWheelerAligner {
fn align_read_pairs(
&mut self,
num_threads: u16,
records: Vec<AnnotatedRecord>,
records: Vec<AnnotatedFastq>,
) -> Vec<(Self::AlignOutput, Self::AlignOutput)> {
let (info, mut reads): (Vec<_>, Vec<_>) = records
.into_iter()
Expand Down Expand Up @@ -167,7 +167,7 @@ impl Aligner for StarAligner {
fn align_reads(
&mut self,
num_threads: u16,
records: Vec<AnnotatedRecord>,
records: Vec<AnnotatedFastq>,
) -> Vec<Self::AlignOutput> {
let chunk_size = get_chunk_size(records.len(), num_threads as usize);

Expand All @@ -192,7 +192,7 @@ impl Aligner for StarAligner {
fn align_read_pairs(
&mut self,
num_threads: u16,
records: Vec<AnnotatedRecord>,
records: Vec<AnnotatedFastq>,
) -> Vec<(Self::AlignOutput, Self::AlignOutput)> {
let chunk_size = get_chunk_size(records.len(), num_threads as usize);

Expand Down Expand Up @@ -518,7 +518,7 @@ impl FromIterator<(FastqAnnotator, fastq::Reader<Box<dyn BufRead>>)> for Annotat
}

impl Iterator for AnnotatedFastqReader {
type Item = AnnotatedRecord;
type Item = AnnotatedFastq;

fn next(&mut self) -> Option<Self::Item> {
let mut missing = None;
Expand Down Expand Up @@ -604,7 +604,7 @@ impl FastqAnnotator {
}
}

fn annotate(&self, record: &fastq::Record) -> Result<AnnotatedRecord> {
fn annotate(&self, record: &fastq::Record) -> Result<AnnotatedFastq> {
let n = record.sequence().len();
if n < self.min_len || n > self.max_len {
bail!(
Expand Down Expand Up @@ -655,7 +655,7 @@ impl FastqAnnotator {
}
}
});
Ok(AnnotatedRecord {
Ok(AnnotatedFastq {
barcode,
umi,
read1,
Expand Down Expand Up @@ -685,14 +685,14 @@ impl Barcode {
pub type UMI = fastq::Record;

/// An annotated fastq record with barcode, UMI, and sequence.
pub struct AnnotatedRecord {
pub struct AnnotatedFastq {
pub barcode: Option<Barcode>,
pub umi: Option<UMI>,
pub read1: Option<fastq::Record>,
pub read2: Option<fastq::Record>,
}

impl AnnotatedRecord {
impl AnnotatedFastq {
/// The total number of bases, including read1 and read2, in the record.
pub fn len(&self) -> usize {
self.read1.as_ref().map_or(0, |x| x.sequence().len())
Expand All @@ -703,7 +703,7 @@ impl AnnotatedRecord {
}
}

impl AnnotatedRecord {
impl AnnotatedFastq {
pub fn join(&mut self, other: Self) {
if let Some(bc) = &mut self.barcode {
if let Some(x) = other.barcode.as_ref() {
Expand Down Expand Up @@ -750,7 +750,7 @@ impl<I> VectorChunk<I> {
}
}

impl<I: Iterator<Item = AnnotatedRecord>> Iterator for VectorChunk<I> {
impl<I: Iterator<Item = AnnotatedFastq>> Iterator for VectorChunk<I> {
type Item = Vec<I::Item>;

fn next(&mut self) -> Option<Self::Item> {
Expand Down
4 changes: 2 additions & 2 deletions precellar/src/fragment.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
mod deduplicate;
mod de_dups;

use anyhow::Result;
use bed_utils::{
bed::{BEDLike, ParseError, Strand},
extsort::ExternalSorterBuilder,
};
use deduplicate::{remove_duplicates, AlignmentInfo};
use de_dups::{remove_duplicates, AlignmentInfo};
use either::Either;
use itertools::Itertools;
use noodles::sam::{
Expand Down
File renamed without changes.
1 change: 1 addition & 0 deletions precellar/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pub mod barcode;
pub mod align;
pub mod transcript;
pub mod fragment;
pub mod qc;
pub mod utils;
Loading

0 comments on commit 9494eac

Please sign in to comment.