Skip to content

Commit

Permalink
save key names as const variables
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Dec 5, 2024
1 parent 862b5af commit bd88f41
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 77 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,7 @@ _autosummary

# h5ad
*.h5ad
*.h5ads
*.h5ads
*.bdg
*.bam
*.bai
16 changes: 10 additions & 6 deletions snapatac2-core/src/feature_count/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ use polars::frame::DataFrame;

use crate::genome::ChromSizes;

pub const FRAGMENT_SINGLE: &str = "fragment_single";
pub const FRAGMENT_PAIRED: &str = "fragment_paired";
pub const BASE_VALUE: &str = "_values";

/// The `SnapData` trait represents an interface for reading and
/// manipulating single-cell assay data. It extends the `AnnDataOp` trait,
/// adding methods for reading chromosome sizes and genome-wide base-resolution coverage.
Expand Down Expand Up @@ -70,20 +74,20 @@ impl<B: Backend> SnapData for AnnData<B> {
fn get_fragment_iter(&self, chunk_size: usize) -> Result<FragmentData> {
let obsm = self.obsm();
let matrices: FragmentDataIter = if let Some(insertion) =
obsm.get_item_iter("fragment_single", chunk_size)
obsm.get_item_iter(FRAGMENT_SINGLE, chunk_size)
{
FragmentDataIter::FragmentSingle(Box::new(insertion))
} else if let Some(fragment) = obsm.get_item_iter("fragment_paired", chunk_size) {
FragmentDataIter::FragmentPaired(Box::new(fragment))
} else {
bail!("one of the following keys must be present in the '.obsm': 'fragment_single', 'fragment_paired'")
bail!("one of the following keys must be present in the '.obsm': '{}', '{}'", FRAGMENT_SINGLE, FRAGMENT_PAIRED)
};
Ok(FragmentData::new(self.read_chrom_sizes()?, matrices))
}

fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>>> {
let obsm = self.obsm();
if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>("_values", chunk_size) {
if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>(BASE_VALUE, chunk_size) {
Ok(BaseData::new(self.read_chrom_sizes()?, data))
} else {
bail!("key '_values' is not present in the '.obsm'")
Expand All @@ -96,21 +100,21 @@ impl<B: Backend> SnapData for AnnDataSet<B> {
let adatas = self.adatas().inner();
let obsm = adatas.get_obsm();
let matrices: FragmentDataIter = if let Some(insertion) =
obsm.get_item_iter("fragment_single", chunk_size)
obsm.get_item_iter(FRAGMENT_SINGLE, chunk_size)
{
FragmentDataIter::FragmentSingle(Box::new(insertion))
} else if let Some(fragment) = obsm.get_item_iter("fragment_paired", chunk_size) {
FragmentDataIter::FragmentPaired(Box::new(fragment))
} else {
bail!("one of the following keys must be present in the '.obsm': 'fragment_single', 'fragment_paired'")
bail!("one of the following keys must be present in the '.obsm': '{}', '{}'", FRAGMENT_SINGLE, FRAGMENT_PAIRED)
};
Ok(FragmentData::new(self.read_chrom_sizes()?, matrices))
}

fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>>>
{
let obsm = self.obsm();
if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>("_values", chunk_size) {
if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>(BASE_VALUE, chunk_size) {
Ok(BaseData::new(self.read_chrom_sizes()?, data))
} else {
bail!("key '_values' is not present in the '.obsm'")
Expand Down
9 changes: 4 additions & 5 deletions snapatac2-core/src/preprocessing/import.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::feature_count::ContactData;
use crate::feature_count::{ContactData, BASE_VALUE, FRAGMENT_PAIRED, FRAGMENT_SINGLE};
use crate::genome::{ChromSizes, GenomeBaseIndex};
use crate::preprocessing::qc::{Contact, Fragment, FragmentSummary, FragmentQC};

Expand Down Expand Up @@ -53,9 +53,9 @@ where
false
};
let obsm_key = if is_paired {
"fragment_paired"
FRAGMENT_PAIRED
} else {
"fragment_single"
FRAGMENT_SINGLE
};

let genome_index = GenomeBaseIndex::new(chrom_sizes);
Expand Down Expand Up @@ -336,7 +336,6 @@ where
)
.unwrap(),
);
let obsm_key = "_values";

let genome_index = GenomeBaseIndex::new(chrom_sizes);
let genome_size = genome_index.len();
Expand Down Expand Up @@ -381,7 +380,7 @@ where
CsrMatrix::try_from_csr_data(r, c, offset, ind, csr_data).unwrap()
});

anndata.obsm().add_iter(obsm_key, arrays)?;
anndata.obsm().add_iter(BASE_VALUE, arrays)?;
anndata
.uns()
.add("reference_sequences", chrom_sizes.to_dataframe())?;
Expand Down
120 changes: 60 additions & 60 deletions snapatac2-core/src/preprocessing/qc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,65 @@ use crate::feature_count::{FragmentDataIter, SnapData};

pub type CellBarcode = String;

pub trait QualityControl: SnapData {
/// [ATAC QC] Compute TSS enrichment.
fn tss_enrichment<'a>(&self, promoter: &'a TssRegions) -> Result<(Vec<f64>, TSSe<'a>)> {
let library_tsse = Arc::new(Mutex::new(TSSe::new(promoter)));
let scores = self
.get_fragment_iter(2000)?
.into_fragments()
.flat_map(|(list_of_fragments, _, _)| {
list_of_fragments
.into_par_iter()
.map(|fragments| {
let mut tsse = TSSe::new(promoter);
fragments.into_iter().for_each(|x| tsse.add(&x));
library_tsse.lock().unwrap().add_from(&tsse);
tsse.result().0
})
.collect::<Vec<_>>()
})
.collect();
Ok((
scores,
Arc::into_inner(library_tsse).unwrap().into_inner().unwrap(),
))
}

/// [ATAC QC] Compute the fragment size distribution.
fn fragment_size_distribution(&self, max_size: usize) -> Result<Vec<usize>> {
if let FragmentDataIter::FragmentPaired(fragments) =
self.get_fragment_iter(500)?.into_inner()
{
Ok(fragment_size_distribution(fragments.map(|x| x.0), max_size))
} else {
bail!("key 'fragment_paired' is not present in the '.obsm'")
}
}

/// [ATAC QC] Compute the fraction of reads in each region.
fn frip<D>(
&self,
regions: &Vec<GIntervalMap<D>>,
normalized: bool,
count_as_insertion: bool,
) -> Result<Array2<f64>> {
let vec = fraction_of_reads_in_region(
self.get_fragment_iter(2000)?.into_fragments(),
regions,
normalized,
count_as_insertion,
)
.map(|x| x.0)
.flatten()
.flatten()
.collect::<Vec<_>>();
Array2::from_shape_vec((self.n_obs(), regions.len()), vec).map_err(Into::into)
}
}

impl<T: SnapData> QualityControl for T {}

/// Fragments from single-cell ATAC-seq experiment. Each fragment is represented
/// by a genomic coordinate, cell barcode and a integer value.
#[derive(Serialize, Deserialize, Debug, Clone)]
Expand Down Expand Up @@ -501,63 +560,4 @@ where
.collect::<Vec<_>>();
(frac, start, end)
})
}

pub trait QualityControl: SnapData {
/// Compute TSS enrichment.
fn tss_enrichment<'a>(&self, promoter: &'a TssRegions) -> Result<(Vec<f64>, TSSe<'a>)> {
let library_tsse = Arc::new(Mutex::new(TSSe::new(promoter)));
let scores = self
.get_fragment_iter(2000)?
.into_fragments()
.flat_map(|(list_of_fragments, _, _)| {
list_of_fragments
.into_par_iter()
.map(|fragments| {
let mut tsse = TSSe::new(promoter);
fragments.into_iter().for_each(|x| tsse.add(&x));
library_tsse.lock().unwrap().add_from(&tsse);
tsse.result().0
})
.collect::<Vec<_>>()
})
.collect();
Ok((
scores,
Arc::into_inner(library_tsse).unwrap().into_inner().unwrap(),
))
}

/// Compute the fragment size distribution.
fn fragment_size_distribution(&self, max_size: usize) -> Result<Vec<usize>> {
if let FragmentDataIter::FragmentPaired(fragments) =
self.get_fragment_iter(500)?.into_inner()
{
Ok(fragment_size_distribution(fragments.map(|x| x.0), max_size))
} else {
bail!("key 'fragment_paired' is not present in the '.obsm'")
}
}

/// Compute the fraction of reads in each region.
fn frip<D>(
&self,
regions: &Vec<GIntervalMap<D>>,
normalized: bool,
count_as_insertion: bool,
) -> Result<Array2<f64>> {
let vec = fraction_of_reads_in_region(
self.get_fragment_iter(2000)?.into_fragments(),
regions,
normalized,
count_as_insertion,
)
.map(|x| x.0)
.flatten()
.flatten()
.collect::<Vec<_>>();
Array2::from_shape_vec((self.n_obs(), regions.len()), vec).map_err(Into::into)
}
}

impl<T: SnapData> QualityControl for T {}
}
10 changes: 5 additions & 5 deletions snapatac2-python/src/utils/anndata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use pyanndata::anndata::memory;
use pyanndata::{AnnData, AnnDataSet};
use pyo3::prelude::*;

use snapatac2_core::SnapData;
use snapatac2_core::{feature_count::{BASE_VALUE, FRAGMENT_PAIRED, FRAGMENT_SINGLE}, SnapData};
use snapatac2_core::feature_count::{BaseData, FragmentData, FragmentDataIter};

pub struct PyAnnData<'py>(memory::PyAnnData<'py>);
Expand Down Expand Up @@ -144,19 +144,19 @@ impl<'py> SnapData for PyAnnData<'py> {
fn get_fragment_iter(&self, chunk_size: usize) -> Result<FragmentData> {
let obsm = self.obsm();
let matrices: FragmentDataIter =
if let Some(insertion) = obsm.get_item_iter("fragment_single", chunk_size) {
if let Some(insertion) = obsm.get_item_iter(FRAGMENT_SINGLE, chunk_size) {
FragmentDataIter::FragmentSingle(Box::new(insertion))
} else if let Some(fragment) = obsm.get_item_iter("fragment_paired", chunk_size) {
} else if let Some(fragment) = obsm.get_item_iter(FRAGMENT_SINGLE, chunk_size) {
FragmentDataIter::FragmentPaired(Box::new(fragment))
} else {
bail!("one of the following keys must be present in the '.obsm': 'fragment_single', 'fragment_paired'")
bail!("one of the following keys must be present in the '.obsm': '{}', '{}'", FRAGMENT_SINGLE, FRAGMENT_PAIRED)
};
Ok(FragmentData::new(self.read_chrom_sizes()?, matrices))
}

fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>>> {
let obsm = self.obsm();
if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>("_values", chunk_size) {
if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>(BASE_VALUE, chunk_size) {
Ok(BaseData::new(self.read_chrom_sizes()?, data))
} else {
bail!("key '_values' is not present in the '.obsm'")
Expand Down

0 comments on commit bd88f41

Please sign in to comment.