Skip to content

Commit

Permalink
update document
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Nov 27, 2024
1 parent 5425c01 commit f82efb3
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 8 deletions.
3 changes: 3 additions & 0 deletions docs/_static/images/func+export_coverage.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
86 changes: 81 additions & 5 deletions snapatac2-core/src/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,22 +294,22 @@ where
.map(|(k, v)| GenomicRange::new(k, 0, *v))
.collect();
let mut counter = SparseBinnedCoverage::new(&genome, bin_size);
let mut total_count = 0.0;
let mut total_reads = 0.0;
fragments.for_each(|frag| {
let not_in_blacklist = blacklist_regions.map_or(true, |bl| !bl.is_overlapped(&frag));
if not_in_blacklist {
if ignore_for_norm.map_or(true, |x| !x.contains(frag.chrom())) {
total_count += 1.0;
total_reads += 1.0;
}
counter.insert(&frag, 1.0);
}
});

let norm_factor = match normalization {
None => 1.0,
Some(Normalization::RPKM) => total_count * bin_size as f32 / 1e9,
Some(Normalization::CPM) => total_count / 1e6,
Some(Normalization::BPM) => todo!(),
Some(Normalization::RPKM) => total_reads * bin_size as f32 / 1e9,
Some(Normalization::CPM) => total_reads / 1e6,
Some(Normalization::BPM) => counter.get_coverage().values().sum::<f32>() / 1e6,
Some(Normalization::RPGC) => todo!(),
};

Expand Down Expand Up @@ -480,6 +480,82 @@ fn create_bigwig_from_bedgraph<P: AsRef<Path>>(
mod tests {
use super::*;

#[test]
fn test_bedgraph1() {
let fragments: Vec<Fragment> = vec![
Fragment::new("chr1", 0, 10),
Fragment::new("chr1", 3, 13),
Fragment::new("chr1", 5, 41),
Fragment::new("chr1", 8, 18),
Fragment::new("chr1", 15, 25),
Fragment::new("chr1", 22, 24),
Fragment::new("chr1", 23, 33),
Fragment::new("chr1", 29, 40),
];
let genome: ChromSizes = [("chr1", 50)].into_iter().collect();

let output: Vec<_> = create_bedgraph_from_fragments(
fragments.clone().into_iter(),
&genome,
3,
None,
None,
None,
None,
).into_iter().map(|x| x.value).collect();
let expected = vec![1.0, 3.0, 4.0, 3.0, 2.0, 4.0, 3.0, 2.0];
assert_eq!(output, expected);

let output: Vec<_> = create_bedgraph_from_fragments(
fragments.clone().into_iter(),
&genome,
5,
None,
None,
None,
None,
).into_iter().map(|x| x.value).collect();
let expected = vec![2.0, 4.0, 3.0, 4.0, 3.0, 2.0, 1.0];
assert_eq!(output, expected);
}

#[test]
fn test_bedgraph2() {
let reader = crate::utils::open_file_for_read("test/fragments.tsv.gz");
let mut reader = bed_utils::bed::io::Reader::new(reader, None);
let fragments: Vec<Fragment> = reader.records().map(|x| x.unwrap()).collect();

let reader = crate::utils::open_file_for_read("test/coverage.bdg.gz");
let mut reader = bed_utils::bed::io::Reader::new(reader, None);
let expected: Vec<BedGraph<f32>> = reader.records().map(|x| x.unwrap()).collect();

let output = create_bedgraph_from_fragments(
fragments.clone().into_iter(),
&[("chr1", 248956422), ("chr2", 242193529)].into_iter().collect(),
1,
None,
None,
None,
None,
);
assert_eq!(output, expected);

let output = create_bedgraph_from_fragments(
fragments.into_iter(),
&[("chr1", 248956422), ("chr2", 242193529)].into_iter().collect(),
1,
None,
None,
Some(Normalization::BPM),
None,
);
let scale_factor: f32 = expected.iter().map(|x| x.value * x.len() as f32).sum::<f32>() / 1e6;
assert_eq!(output, expected.into_iter().map(|mut x| {
x.value = x.value / scale_factor;
x
}).collect::<Vec<_>>());
}

#[test]
fn test_smoothing() {
let input = vec![
Expand Down
13 changes: 12 additions & 1 deletion snapatac2-core/src/preprocessing/qc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub type CellBarcode = String;

/// Fragments from single-cell ATAC-seq experiment. Each fragment is represented
/// by a genomic coordinate, cell barcode and a integer value.
#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Fragment {
pub chrom: String,
pub start: u64,
Expand All @@ -20,6 +20,17 @@ pub struct Fragment {
}

impl Fragment {
pub fn new(chrom: impl Into<String>, start: u64, end: u64) -> Self {
Self {
chrom: chrom.into(),
start,
end,
barcode: None,
count: 1,
strand: None,
}
}

pub fn to_insertions(&self) -> SmallVec<[GenomicRange; 2]> {
match self.strand {
None => smallvec![
Expand Down
10 changes: 8 additions & 2 deletions snapatac2-python/python/snapatac2/export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def export_coverage(
selections: list[str] | None = None,
bin_size: int = 10,
blacklist: Path | None = None,
normalization: Literal["RPKM", "CPM"] | None = "RPKM",
normalization: Literal["RPKM", "CPM", "BPM"] | None = "RPKM",
ignore_for_norm: list[str] | None = None,
min_frag_length: int | None = None,
max_frag_length: int | None = 2000,
Expand All @@ -108,6 +108,9 @@ def export_coverage(
normalizes by the total number of reads and the length of the region.
The normalization can be disabled by setting `normalization=None`.
.. image:: /_static/images/func+export_coverage.svg
:align: center
Parameters
----------
adata
Expand All @@ -123,7 +126,10 @@ def export_coverage(
blacklist
A BED file containing the blacklisted regions.
normalization
Normalization method. If `None`, no normalization is performed.
Normalization method. If `None`, no normalization is performed. Options:
- RPKM (per bin) = #reads per bin / (#mapped_reads (in millions) * bin length (kb)).
- CPM (per bin) = #reads per bin / #mapped_reads (in millions).
- BPM (per bin) = #reads per bin / sum of all reads per bin (in millions).
ignore_for_norm
A list of chromosomes to ignore for normalization.
min_frag_length
Expand Down

0 comments on commit f82efb3

Please sign in to comment.