From 99bfcc5dfd9d9af0a536481bd1372b4a5b611011 Mon Sep 17 00:00:00 2001
From: Kai Zhang <zhangkai33@westlake.edu.cn>
Date: Thu, 5 Dec 2024 22:20:46 +0800
Subject: [PATCH] store ratio instead of f32

---
 snapatac2-core/src/feature_count/data_iter.rs | 351 +++++++++++++++---
 snapatac2-core/src/feature_count/mod.rs       |  14 +-
 snapatac2-core/src/preprocessing/import.rs    |  17 +-
 snapatac2-core/src/preprocessing/mod.rs       |   2 +-
 snapatac2-core/src/preprocessing/qc.rs        |   2 +-
 snapatac2-python/Cargo.toml                   |   1 +
 snapatac2-python/src/preprocessing.rs         |  20 +-
 snapatac2-python/src/utils/anndata.rs         |   9 +-
 8 files changed, 316 insertions(+), 100 deletions(-)
diff --git a/snapatac2-core/src/feature_count/data_iter.rs b/snapatac2-core/src/feature_count/data_iter.rs
index ae06bf08..c3aa0ed4 100644
--- a/snapatac2-core/src/feature_count/data_iter.rs
+++ b/snapatac2-core/src/feature_count/data_iter.rs
@@ -1,10 +1,17 @@
+use crate::feature_count::{CountingStrategy, FeatureCounter};
 use crate::genome::{ChromSizes, GenomeBaseIndex};
 use crate::preprocessing::Fragment;
-use crate::feature_count::{CountingStrategy, FeatureCounter};
 
-use anndata::{data::{utils::to_csr_data, CsrNonCanonical}, ArrayData};
+use anndata::backend::{DataType, ScalarType};
+use anndata::data::DynCsrMatrix;
+use anndata::WriteData;
+use anndata::{
+    data::{utils::to_csr_data, CsrNonCanonical},
+    ArrayData,
+};
 use bed_utils::bed::{BEDLike, BedGraph, GenomicRange, Strand};
 use nalgebra_sparse::CsrMatrix;
+use num::rational::Ratio;
 use num::traits::{FromPrimitive, One, Zero};
 use rayon::iter::{IntoParallelIterator, ParallelIterator};
 use std::collections::HashMap;
@@ -21,9 +28,8 @@ pub enum FragmentDataIter {
 fn single_to_fragments(
     index: GenomeBaseIndex,
     exclude_chroms: HashSet<String>,
-    data_iter: impl ExactSizeIterator<Item = (CsrNonCanonical<i32>, usize, usize)>
-) -> impl ExactSizeIterator<Item = (Vec<Vec<Fragment>>, usize, usize)>
-{
+    data_iter: impl ExactSizeIterator<Item = (CsrNonCanonical<i32>, usize, usize)>,
+) -> impl ExactSizeIterator<Item = (Vec<Vec<Fragment>>, usize, usize)> {
     data_iter.map(move |(mat, a, b)| {
         let row_offsets = mat.row_offsets();
         let col_indices = mat.col_indices();
@@ -73,9 +79,8 @@ fn pair_to_fragments(
     exclude_chroms: HashSet<String>,
     min_fragment_size: Option<u64>,
     max_fragment_size: Option<u64>,
-    data_iter: impl ExactSizeIterator<Item = (CsrNonCanonical<u32>, usize, usize)>
-) -> impl ExactSizeIterator<Item = (Vec<Vec<Fragment>>, usize, usize)>
-{
+    data_iter: impl ExactSizeIterator<Item = (CsrNonCanonical<u32>, usize, usize)>,
+) -> impl ExactSizeIterator<Item = (Vec<Vec<Fragment>>, usize, usize)> {
     data_iter.map(move |(mat, a, b)| {
         let row_offsets = mat.row_offsets();
         let col_indices = mat.col_indices();
@@ -128,8 +133,7 @@ pub struct FragmentData {
     counting_strategy: CountingStrategy,
 }
 
-impl FragmentData
-{
+impl FragmentData {
     pub fn new(chrom_sizes: ChromSizes, data_iter: FragmentDataIter) -> Self {
         Self {
             index: GenomeBaseIndex::new(&chrom_sizes),
@@ -207,8 +211,16 @@ impl FragmentData
         self,
     ) -> Box<dyn ExactSizeIterator<Item = (Vec<Vec<Fragment>>, usize, usize)>> {
         match self.data_iter {
-            FragmentDataIter::FragmentSingle(iter) => Box::new(single_to_fragments(self.index, self.exclude_chroms, iter)),
-            FragmentDataIter::FragmentPaired(iter) => Box::new(pair_to_fragments(self.index, self.exclude_chroms, self.min_fragment_size, self.max_fragment_size, iter)),
+            FragmentDataIter::FragmentSingle(iter) => {
+                Box::new(single_to_fragments(self.index, self.exclude_chroms, iter))
+            }
+            FragmentDataIter::FragmentPaired(iter) => Box::new(pair_to_fragments(
+                self.index,
+                self.exclude_chroms,
+                self.min_fragment_size,
+                self.max_fragment_size,
+                iter,
+            )),
         }
     }
 
@@ -237,8 +249,9 @@ impl FragmentData
     }
 
     /// Output the raw coverage matrix.
-    pub fn into_array_iter(self) -> Box<dyn ExactSizeIterator<Item = (CsrMatrix<u32>, usize, usize)>>
-    {
+    pub fn into_array_iter(
+        self,
+    ) -> Box<dyn ExactSizeIterator<Item = (CsrMatrix<u32>, usize, usize)>> {
         let index = self.get_gindex();
         let ori_index = self.index;
         match self.data_iter {
@@ -258,7 +271,8 @@ impl FragmentData
             }
             FragmentDataIter::FragmentSingle(mat_iter) => {
                 Box::new(mat_iter.map(move |(mat, i, j)| {
-                    let new_mat = gen_mat_single::<u32>(&ori_index, &index, &self.exclude_chroms, mat);
+                    let new_mat =
+                        gen_mat_single::<u32>(&ori_index, &index, &self.exclude_chroms, mat);
                     (new_mat, i, j)
                 }))
             }
@@ -481,7 +495,65 @@ where
 pub struct BaseValue {
     pub chrom: String,
     pub pos: u64,
-    pub value: f32,
+    ratio: Option<Ratio<u16>>,
+    float: f32,
+}
+
+impl From<(&str, u64, f32)> for BaseValue {
+    fn from(x: (&str, u64, f32)) -> Self {
+        Self::from_float(x.0, x.1, x.2)
+    }
+}
+
+impl From<(&str, u64, i32)> for BaseValue {
+    fn from(x: (&str, u64, i32)) -> Self {
+        Self::from_ratio_raw(x.0, x.1, x.2)
+    }
+}
+
+impl BaseValue {
+    pub fn from_ratio(chrom: impl Into<String>, pos: u64, ratio: Ratio<u16>) -> Self {
+        let (numer, denom) = ratio.into_raw();
+        let float = if numer == 0 {
+            0.0
+        } else if denom == 0 {
+            1.0
+        } else {
+            numer as f32 / denom as f32
+        };
+        Self {
+            chrom: chrom.into(),
+            pos,
+            ratio: Some(ratio),
+            float,
+        }
+    }
+
+    pub fn from_float(chrom: impl Into<String>, pos: u64, float: f32) -> Self {
+        Self {
+            chrom: chrom.into(),
+            pos,
+            ratio: None,
+            float,
+        }
+    }
+
+    pub fn from_ratio_raw(chrom: impl Into<String>, pos: u64, ratio: i32) -> Self {
+        let ratio = i32_to_ratio(ratio);
+        Self::from_ratio(chrom, pos, ratio)
+    }
+
+    pub fn numerator(&self) -> Option<u16> {
+        self.ratio.as_ref().map(|x| *x.numer())
+    }
+
+    pub fn denominator(&self) -> Option<u16> {
+        self.ratio.as_ref().map(|x| *x.denom())
+    }
+
+    pub fn value(&self) -> f32 {
+        self.float
+    }
 }
 
 pub struct BaseData<I> {
@@ -493,7 +565,7 @@ pub struct BaseData<I> {
 
 impl<I> BaseData<I>
 where
-    I: ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>,
+    I: ExactSizeIterator<Item = (DynCsrMatrix, usize, usize)>,
 {
     pub fn new(chrom_sizes: ChromSizes, data_iter: I) -> Self {
         Self {
@@ -525,47 +597,71 @@ where
     }
 
     /// Return an iterator of raw values.
-    pub fn into_values(
-        self,
-    ) -> impl ExactSizeIterator<Item = (Vec<Vec<BaseValue>>, usize, usize)> {
-        self.data_iter.map(move |(mat, a, b)| {
+    pub fn into_values(self) -> impl ExactSizeIterator<Item = (Vec<Vec<BaseValue>>, usize, usize)> {
+        fn helper<'a, T>(
+            mat: CsrMatrix<T>,
+            exclude_chroms: &'a HashSet<String>,
+            index: &'a GenomeBaseIndex,
+        ) -> Vec<Vec<BaseValue>>
+        where
+            T: Copy + Send + Sync,
+            BaseValue: From<(&'a str, u64, T)>,
+        {
             let row_offsets = mat.row_offsets();
             let col_indices = mat.col_indices();
             let values = mat.values();
-            let values = (0..(row_offsets.len() - 1))
+            (0..(row_offsets.len() - 1))
                 .into_par_iter()
                 .map(|i| {
                     let row_start = row_offsets[i];
                     let row_end = row_offsets[i + 1];
                     (row_start..row_end)
                         .flat_map(|j| {
-                            let (chrom, start) = self.index.get_position(col_indices[j]);
-                            if self.exclude_chroms.contains(chrom) {
+                            let (chrom, start) = index.get_position(col_indices[j]);
+                            if exclude_chroms.contains(chrom) {
                                 None
                             } else {
                                 let v = values[j];
-                                Some(BaseValue {
-                                    chrom: chrom.to_string(),
-                                    pos: start,
-                                    value: v,
-                                })
+                                Some(BaseValue::from((chrom, start, v)))
                             }
                         })
                         .collect()
                 })
-                .collect();
+                .collect()
+        }
+
+        let exclude_chroms = self.exclude_chroms;
+        let index = self.index;
+        self.data_iter.map(move |(mat, a, b)| {
+            let values = match mat.data_type() {
+                DataType::CsrMatrix(ScalarType::I32) => helper(
+                    CsrMatrix::<i32>::try_from(mat).unwrap(),
+                    &exclude_chroms,
+                    &index,
+                ),
+                DataType::CsrMatrix(ScalarType::F32) => helper(
+                    CsrMatrix::<f32>::try_from(mat).unwrap(),
+                    &exclude_chroms,
+                    &index,
+                ),
+                _ => panic!("Unsupported data type"),
+            };
             (values, a, b)
         })
     }
 
     /// Output the raw coverage matrix. Note the values belong to the same interval
     /// will be aggregated by the mean value.
-    pub fn into_array_iter(self) -> impl ExactSizeIterator<Item = (ArrayData, usize, usize)>
-    {
-        let index = self.get_gindex();
-        let ori_index = self.index;
-
-        self.data_iter.map(move |(mat, i, j)| {
+    pub fn into_array_iter(self) -> impl ExactSizeIterator<Item = (ArrayData, usize, usize)> {
+        fn helper<'a, T>(
+            mat: CsrMatrix<T>,
+            exclude_chroms: &'a HashSet<String>,
+            ori_index: &'a GenomeBaseIndex,
+            index: &'a GenomeBaseIndex,
+        ) -> CsrMatrix<f32>
+        where
+            T: Copy + Send + Sync + ToFloat,
+        {
             let row_offsets = mat.row_offsets();
             let col_indices = mat.col_indices();
             let values = mat.values();
@@ -578,52 +674,123 @@ where
 
                     for k in row_start..row_end {
                         let (chrom, pos) = ori_index.get_position(col_indices[k]);
-                        if self.exclude_chroms.is_empty() || !self.exclude_chroms.contains(chrom) {
+                        if exclude_chroms.is_empty() || !exclude_chroms.contains(chrom) {
                             let i = index.get_position_rev(chrom, pos);
                             let entry = count.entry(i).or_insert(Vec::new());
-                            entry.push(values[k]);
+                            entry.push(values[k].to_float());
                         }
                     }
-                    count.into_iter().map(|(k, v)| {
-                        let len = v.len();
-                        let sum: f32 = v.into_iter().sum();
-                        (k, sum / len as f32)
-                    }).collect::<Vec<_>>()
+                    count
+                        .into_iter()
+                        .map(|(k, v)| {
+                            let len = v.len();
+                            let sum: f32 = v.into_iter().sum();
+                            (k, sum / len as f32)
+                        })
+                        .collect::<Vec<_>>()
                 })
                 .collect::<Vec<_>>();
             let (r, c, offset, ind, data) = to_csr_data(vec, index.len());
-            let new_mat = CsrMatrix::try_from_csr_data(r, c, offset, ind, data).unwrap();
+            CsrMatrix::try_from_csr_data(r, c, offset, ind, data).unwrap()
+        }
+
+        let index = self.get_gindex();
+        let ori_index = self.index;
+
+        self.data_iter.map(move |(mat, i, j)| {
+            let new_mat = match mat.data_type() {
+                DataType::CsrMatrix(ScalarType::I32) => {
+                    helper(
+                        CsrMatrix::<i32>::try_from(mat).unwrap(),
+                        &self.exclude_chroms,
+                        &ori_index,
+                        &index,
+                    )
+                }
+                DataType::CsrMatrix(ScalarType::F32) => {
+                    helper(
+                        CsrMatrix::<f32>::try_from(mat).unwrap(),
+                        &self.exclude_chroms,
+                        &ori_index,
+                        &index,
+                    )
+                }
+                _ => panic!("Unsupported data type"),
+            };
             (new_mat.into(), i, j)
         })
     }
 
     /// Aggregate the coverage by a feature counter. Values belong to the same interval
     /// will be aggregated by the mean value.
-    pub fn into_aggregated_array_iter<C>(self, counter: C) -> impl ExactSizeIterator<Item = (ArrayData, usize, usize)>
+    pub fn into_aggregated_array_iter<C>(
+        self,
+        counter: C,
+    ) -> impl ExactSizeIterator<Item = (ArrayData, usize, usize)>
     where
-        C: FeatureCounter<Value=f32> + Clone + Sync,
+        C: FeatureCounter<Value = f32> + Clone + Sync,
     {
-        let n_col = counter.num_features();
-        self.data_iter.map(move |(data, i, j)| {
-            let vec = (0..data.nrows())
+        fn helper<'a, T, C>(
+            data: CsrMatrix<T>,
+            counter: C,
+            exclude_chroms: &'a HashSet<String>,
+            index: &'a GenomeBaseIndex,
+        ) -> Vec<Vec<(usize, f32)>>
+        where
+            T: Copy + Send + Sync + ToFloat,
+            C: FeatureCounter<Value = f32> + Clone + Sync,
+        {
+            (0..data.nrows())
                 .into_par_iter()
                 .map(|i| {
                     let mut coverage = counter.clone();
                     let row = data.get_row(i).unwrap();
-                    row.col_indices().into_iter().zip(row.values()).for_each(|(idx, val)| {
-                        let (chrom, pos) = self.index.get_position(*idx);
-                        if self.exclude_chroms.is_empty() || !self.exclude_chroms.contains(chrom) {
-                            coverage.insert(&GenomicRange::new(chrom, pos, pos+1), *val);
-                        }
-                    });
-                    coverage.get_values_and_counts().map(|(idx, (val, count))| {
-                        (idx, val / count as f32)
-                    }).collect::<Vec<_>>()
+                    row.col_indices()
+                        .into_iter()
+                        .zip(row.values())
+                        .for_each(|(idx, val)| {
+                            let (chrom, pos) = index.get_position(*idx);
+                            if exclude_chroms.is_empty()
+                                || !exclude_chroms.contains(chrom)
+                            {
+                                coverage.insert(&GenomicRange::new(chrom, pos, pos + 1), val.to_float());
+                            }
+                        });
+                    coverage
+                        .get_values_and_counts()
+                        .map(|(idx, (val, count))| (idx, val / count as f32))
+                        .collect::<Vec<_>>()
                 })
-                .collect::<Vec<_>>();
+                .collect::<Vec<_>>()
+        }
+ 
+        let n_col = counter.num_features();
+        self.data_iter.map(move |(data, i, j)| {
+            let vec = match data.data_type() {
+                DataType::CsrMatrix(ScalarType::I32) => {
+                    helper(
+                        CsrMatrix::<i32>::try_from(data).unwrap(),
+                        counter.clone(),
+                        &self.exclude_chroms,
+                        &self.index,
+                    )
+                }
+                DataType::CsrMatrix(ScalarType::F32) => {
+                    helper(
+                        CsrMatrix::<f32>::try_from(data).unwrap(),
+                        counter.clone(),
+                        &self.exclude_chroms,
+                        &self.index,
+                    )
+                }
+                _ => panic!("Unsupported data type"),
+            };
+
             let (r, c, offset, ind, data) = to_csr_data(vec, n_col);
             (
-                CsrMatrix::try_from_csr_data(r, c, offset, ind, data).unwrap().into(),
+                CsrMatrix::try_from_csr_data(r, c, offset, ind, data)
+                    .unwrap()
+                    .into(),
                 i,
                 j,
             )
@@ -678,7 +845,11 @@ where
                 })
                 .collect::<Vec<_>>();
             let (r, c, offset, ind, data) = to_csr_data(vec, n_col);
-            (CsrMatrix::try_from_csr_data(r,c,offset,ind, data).unwrap(), i, j)
+            (
+                CsrMatrix::try_from_csr_data(r, c, offset, ind, data).unwrap(),
+                i,
+                j,
+            )
         })
     }
 }
@@ -717,3 +888,61 @@ where
     }
 }
 
+/*
+//  Helper functions for converting between Ratio<u16> and i32
+ */
+
+fn ratio_to_i32(x: Ratio<u16>) -> i32 {
+    let (numer, denom) = x.into_raw();
+    ((numer as i32) << 16) | (denom as i32)
+}
+
+fn i32_to_ratio(x: i32) -> Ratio<u16> {
+    let numer = (x >> 16) as u16;
+    let denom = (x & 0xffff) as u16;
+    Ratio::new_raw(numer, denom)
+}
+
+trait ToFloat {
+    fn to_float(self) -> f32;
+}
+
+impl ToFloat for f32 {
+    fn to_float(self) -> f32 {
+        self
+    }
+}
+
+impl ToFloat for i32 {
+    fn to_float(self) -> f32 {
+        let numer = (self >> 16) as u16;
+        let denom = (self & 0xffff) as u16;
+        if numer == 0 {
+            0.0
+        } else if denom == 0 {
+            1.0
+        } else {
+            numer as f32 / denom as f32
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_ratio_conversion() {
+        fn test(numer: u16, denom: u16) {
+            let x = Ratio::new_raw(numer, denom);
+            let y = ratio_to_i32(x);
+            let z = i32_to_ratio(y);
+            assert_eq!(x, z);
+        }
+
+        test(3, 4);
+        test(3, 0);
+        test(0, 0);
+        test(0, 4);
+    }
+}
diff --git a/snapatac2-core/src/feature_count/mod.rs b/snapatac2-core/src/feature_count/mod.rs
index f9bbfa31..5eaaeec0 100644
--- a/snapatac2-core/src/feature_count/mod.rs
+++ b/snapatac2-core/src/feature_count/mod.rs
@@ -5,9 +5,9 @@ mod data_iter;
 use std::str::FromStr;
 
 use anyhow::{bail, Context, Result};
-use anndata::{AnnData, AnnDataOp, AnnDataSet, ArrayElemOp, AxisArraysOp, Backend, ElemCollectionOp};
+use anndata::{data::DynCsrMatrix, AnnData, AnnDataOp, AnnDataSet, ArrayElemOp, AxisArraysOp, Backend, ElemCollectionOp};
 use bed_utils::bed::GenomicRange;
-pub use data_iter::{ChromValueIter, BaseData, FragmentData, ContactData, FragmentDataIter};
+pub use data_iter::{BaseValue, ChromValueIter, BaseData, FragmentData, ContactData, FragmentDataIter};
 pub use counter::{FeatureCounter, CountingStrategy};
 pub use matrix::{create_gene_matrix, create_tile_matrix, create_peak_matrix};
 use nalgebra_sparse::CsrMatrix;
@@ -28,7 +28,7 @@ pub trait SnapData: AnnDataOp {
     fn get_fragment_iter(&self, chunk_size: usize) -> Result<FragmentData>;
 
     /// Read base values stored in the `.obsm` matrix.
-    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>>>;
+    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (DynCsrMatrix, usize, usize)>>>;
 
     /// Read counts stored in the `X` matrix.
     fn read_chrom_values(
@@ -85,9 +85,9 @@ impl<B: Backend> SnapData for AnnData<B> {
         Ok(FragmentData::new(self.read_chrom_sizes()?, matrices))
     }
 
-    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>>> {
+    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (DynCsrMatrix, usize, usize)>>> {
         let obsm = self.obsm();
-        if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>(BASE_VALUE, chunk_size) {
+        if let Some(data) = obsm.get_item_iter(BASE_VALUE, chunk_size) {
             Ok(BaseData::new(self.read_chrom_sizes()?, data))
         } else {
             bail!("key '_values' is not present in the '.obsm'")
@@ -111,10 +111,10 @@ impl<B: Backend> SnapData for AnnDataSet<B> {
         Ok(FragmentData::new(self.read_chrom_sizes()?, matrices))
     }
 
-    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>>>
+    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (DynCsrMatrix, usize, usize)>>>
     {
         let obsm = self.obsm();
-        if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>(BASE_VALUE, chunk_size) {
+        if let Some(data) = obsm.get_item_iter(BASE_VALUE, chunk_size) {
             Ok(BaseData::new(self.read_chrom_sizes()?, data))
         } else {
             bail!("key '_values' is not present in the '.obsm'")
diff --git a/snapatac2-core/src/preprocessing/import.rs b/snapatac2-core/src/preprocessing/import.rs
index 8408b6c7..51a48354 100644
--- a/snapatac2-core/src/preprocessing/import.rs
+++ b/snapatac2-core/src/preprocessing/import.rs
@@ -1,4 +1,4 @@
-use crate::feature_count::{ContactData, BASE_VALUE, FRAGMENT_PAIRED, FRAGMENT_SINGLE};
+use crate::feature_count::{BaseValue, ContactData, BASE_VALUE, FRAGMENT_PAIRED, FRAGMENT_SINGLE};
 use crate::genome::{ChromSizes, GenomeBaseIndex};
 use crate::preprocessing::qc::{Contact, Fragment, FragmentQC, FragmentQCBuilder};
 
@@ -312,13 +312,6 @@ where
     Ok(())
 }
 
-pub struct ChromValue {
-    pub chrom: String,
-    pub pos: u64,
-    pub value: f32,
-    pub barcode: String,
-}
-
 /// Import values
 pub fn import_values<A, I>(
     anndata: &A,
@@ -328,7 +321,7 @@ pub fn import_values<A, I>(
 ) -> Result<()>
 where
     A: AnnDataOp,
-    I: Iterator<Item = ChromValue>,
+    I: Iterator<Item = (String, BaseValue)>,
 {
     let spinner = ProgressBar::with_draw_target(None, ProgressDrawTarget::stderr_with_hz(1))
         .with_style(
@@ -343,7 +336,7 @@ where
     let mut scanned_barcodes = IndexSet::new();
 
     let mut qc_metrics = Vec::new();
-    let chunked_values = values.chunk_by(|x| x.barcode.clone());
+    let chunked_values = values.chunk_by(|x| x.0.clone());
     let chunked_values = chunked_values
         .into_iter()
         .progress_with(spinner)
@@ -365,12 +358,12 @@ where
                 let mut qc = BaseValueQC::new();
                 let mut count = cell_data
                     .into_iter()
-                    .flat_map(|value| {
+                    .flat_map(|(_, value)| {
                         let chrom = &value.chrom;
                         if genome_index.contain_chrom(chrom) {
                             qc.add();
                             let pos = genome_index.get_position_rev(chrom, value.pos);
-                            Some((pos, value.value))
+                            Some((pos, value.value()))
                         } else {
                             None
                         }
diff --git a/snapatac2-core/src/preprocessing/mod.rs b/snapatac2-core/src/preprocessing/mod.rs
index 29c388da..a641e683 100644
--- a/snapatac2-core/src/preprocessing/mod.rs
+++ b/snapatac2-core/src/preprocessing/mod.rs
@@ -4,7 +4,7 @@ mod import;
 mod qc;
 
 pub use bam::{make_fragment_file, BamQC, FlagStat};
-pub use import::{import_contacts, import_fragments, import_values, ChromValue};
+pub use import::{import_contacts, import_fragments, import_values};
 pub use qc::{
     SummaryType,
     get_barcode_count, make_promoter_map,
diff --git a/snapatac2-core/src/preprocessing/qc.rs b/snapatac2-core/src/preprocessing/qc.rs
index 9499355a..3b883a55 100644
--- a/snapatac2-core/src/preprocessing/qc.rs
+++ b/snapatac2-core/src/preprocessing/qc.rs
@@ -73,7 +73,7 @@ pub trait QualityControl: SnapData {
         } else if let Ok(values) = self.get_base_iter(2000) {
             values.into_values().for_each(|(data, s, _)| {
                 data.into_iter().enumerate().for_each(|(i, values)| {
-                    let values = values.into_iter().map(|x| (x.chrom.to_string(), x.value));
+                    let values = values.into_iter().map(|x| (x.chrom.to_string(), x.value()));
                     let stat = match mode {
                         SummaryType::Sum => sum(values),
                         SummaryType::Count => count(values),
diff --git a/snapatac2-python/Cargo.toml b/snapatac2-python/Cargo.toml
index 89e421c5..ba144a68 100644
--- a/snapatac2-python/Cargo.toml
+++ b/snapatac2-python/Cargo.toml
@@ -26,6 +26,7 @@ linfa = "0.6"
 linfa-clustering = "0.6"
 noodles = { version = "0.84", features = ["bam", "sam"] }
 numpy = "0.21.0"
+num = "0.4"
 nalgebra-sparse = "0.9"
 nalgebra = "0.32"
 ndarray = "0.15"
diff --git a/snapatac2-python/src/preprocessing.rs b/snapatac2-python/src/preprocessing.rs
index 722b0f33..5be65597 100644
--- a/snapatac2-python/src/preprocessing.rs
+++ b/snapatac2-python/src/preprocessing.rs
@@ -4,6 +4,7 @@ use itertools::Itertools;
 use pyo3::{prelude::*, pybacked::PyBackedStr};
 use anndata::Backend;
 use anndata_hdf5::H5;
+use num::rational::Ratio;
 use std::collections::HashMap;
 use std::io::{BufRead, BufReader};
 use std::path::PathBuf;
@@ -16,8 +17,8 @@ use anyhow::Result;
 use snapatac2_core::{
     QualityControl,
     genome::TranscriptParserOptions,
-    feature_count::{create_gene_matrix, create_tile_matrix, create_peak_matrix, CountingStrategy},
-    preprocessing::{Fragment, Contact, ChromValue},
+    feature_count::{BaseValue, create_gene_matrix, create_tile_matrix, create_peak_matrix, CountingStrategy},
+    preprocessing::{Fragment, Contact},
     preprocessing,
     utils,
 };
@@ -191,7 +192,7 @@ pub(crate) fn import_values(
     chunk_size: usize,
 ) -> Result<()>
 {
-    fn read_chrom_values(path: PathBuf) -> impl Iterator<Item = ChromValue> {
+    fn read_chrom_values(path: PathBuf) -> impl Iterator<Item = (String, BaseValue)> {
         let barcode = path.file_stem().unwrap().to_str().unwrap().to_string();
         let reader = BufReader::new(utils::open_file_for_read(&path));
         reader.lines().skip(1).map(move |line| {
@@ -199,15 +200,10 @@ pub(crate) fn import_values(
             let mut parts = line.split_whitespace();
             let chrom = parts.next().unwrap();
             let pos = parts.next().unwrap().parse().unwrap();
-            parts.next();
-            parts.next();
-            let value = parts.next().unwrap().parse().unwrap();
-            ChromValue {
-                chrom: chrom.to_string(),
-                pos,
-                value,
-                barcode: barcode.clone(),
-            }
+            let methyl = parts.next().unwrap().parse().unwrap();
+            let unmethyl: u16 = parts.next().unwrap().parse().unwrap();
+            let value = BaseValue::from_ratio(chrom, pos, Ratio::new_raw(methyl, unmethyl + methyl));
+            (barcode.clone(), value)
         })
     }
 
diff --git a/snapatac2-python/src/utils/anndata.rs b/snapatac2-python/src/utils/anndata.rs
index e2ab2e4d..f56a10bb 100644
--- a/snapatac2-python/src/utils/anndata.rs
+++ b/snapatac2-python/src/utils/anndata.rs
@@ -1,9 +1,6 @@
 use anndata::{
-    data::{ArrayChunk, DataFrameIndex},
-    AnnDataOp, ArrayData, HasShape,
-    WriteArrayData, AxisArraysOp,
+    data::{ArrayChunk, DataFrameIndex, DynCsrMatrix}, AnnDataOp, ArrayData, AxisArraysOp, HasShape, WriteArrayData
 };
-use nalgebra_sparse::CsrMatrix;
 use anyhow::{Result, bail};
 use polars::prelude::DataFrame;
 use pyanndata::anndata::memory;
@@ -154,9 +151,9 @@ impl<'py> SnapData for PyAnnData<'py> {
         Ok(FragmentData::new(self.read_chrom_sizes()?, matrices))
     }
 
-    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (CsrMatrix<f32>, usize, usize)>>> {
+    fn get_base_iter(&self, chunk_size: usize) -> Result<BaseData<impl ExactSizeIterator<Item = (DynCsrMatrix, usize, usize)>>> {
         let obsm = self.obsm();
-        if let Some(data) = obsm.get_item_iter::<CsrMatrix<f32>>(BASE_VALUE, chunk_size) {
+        if let Some(data) = obsm.get_item_iter(BASE_VALUE, chunk_size) {
             Ok(BaseData::new(self.read_chrom_sizes()?, data))
         } else {
             bail!("key '_values' is not present in the '.obsm'")