Skip to content

Commit

Permalink
deprecate pp.import_data
Browse files Browse the repository at this point in the history
  • Loading branch information
kaizhang committed Dec 9, 2024
1 parent 5c4c49d commit 16913cb
Show file tree
Hide file tree
Showing 9 changed files with 60 additions and 29 deletions.
6 changes: 3 additions & 3 deletions docs/api/io.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
============
Input/Output
============
================================
AnnData(Set) object manipulation
================================
.. currentmodule:: snapatac2

Backed AnnData objects
Expand Down
4 changes: 2 additions & 2 deletions docs/api/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ BAM/Fragment file processing
:toctree: _autosummary

pp.make_fragment_file
pp.import_data
pp.import_contacts
pp.import_fragments
pp.import_values
pp.import_contacts
pp.call_cells

Matrix operation
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
- Fix #364: logarithm of zero in `tl.diff_test`.
- Fix #366: float32 cannot hold large values in `ex.export_coverage`.

### Other Changes:

- Rename `pp.import_data` to `pp.import_fragments`. Deprecated `pp.import_data`
will be removed in v2.9.0.

## Release 2.7.1 (released October 29, 2024)

### Features:
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/atlas.ipynb
Git LFS file not shown
4 changes: 2 additions & 2 deletions docs/tutorials/integration.ipynb
Git LFS file not shown
4 changes: 2 additions & 2 deletions docs/tutorials/pbmc.ipynb
Git LFS file not shown
12 changes: 6 additions & 6 deletions snapatac2-python/python/snapatac2/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def tsse(
) -> np.ndarray | list[np.ndarray] | None:
""" Compute the TSS enrichment score (TSSe) for each cell.
:func:`~snapatac2.pp.import_data` must be ran first in order to use this function.
:func:`~snapatac2.pp.import_fragments` must be ran first in order to use this function.
Parameters
----------
Expand Down Expand Up @@ -48,7 +48,7 @@ def tsse(
Examples
--------
>>> import snapatac2 as snap
>>> data = snap.pp.import_data(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> data = snap.pp.import_fragments(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> snap.metrics.tsse(data, snap.genome.hg38)
>>> print(data.obs['tsse'].head())
AAACTGCAGACTCGGA-1 32.129514
Expand Down Expand Up @@ -91,7 +91,7 @@ def frip(
) -> dict[str, list[float]] | list[dict[str, list[float]]] | None:
""" Add fraction of reads in peaks (FRiP) to the AnnData object.
:func:`~snapatac2.pp.import_data` must be ran first in order to use this function.
:func:`~snapatac2.pp.import_fragments` must be ran first in order to use this function.
Parameters
----------
Expand Down Expand Up @@ -126,7 +126,7 @@ def frip(
Examples
--------
>>> import snapatac2 as snap
>>> data = snap.pp.import_data(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> data = snap.pp.import_fragments(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> snap.metrics.frip(data, {"peaks_frac": snap.datasets.cre_HEA()})
>>> print(data.obs['peaks_frac'].head())
AAACTGCAGACTCGGA-1 0.715930
Expand Down Expand Up @@ -174,7 +174,7 @@ def frag_size_distr(
The result is stored in a vector where each element represents the number of fragments
and the index represents the fragment length. The first posision of the vector is
reserved for fragments with size larger than the `max_recorded_size` parameter.
:func:`~snapatac2.pp.import_data` must be ran first in order to use this function.
:func:`~snapatac2.pp.import_fragments` must be ran first in order to use this function.
Parameters
----------
Expand Down Expand Up @@ -222,7 +222,7 @@ def summary_by_chrom(
) -> dict[str, np.ndarray]:
""" Compute the cell level summary statistics by chromosome.
:func:`~snapatac2.pp.import_data` must be ran first in order to use this function.
:func:`~snapatac2.pp.import_fragments` must be ran first in order to use this function.
Parameters
----------
Expand Down
46 changes: 36 additions & 10 deletions snapatac2-python/python/snapatac2/preprocessing/_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
from snapatac2.genome import Genome
from snapatac2.preprocessing._cell_calling import filter_cellular_barcodes_ordmag

__all__ = ['make_fragment_file', 'import_data', 'import_contacts', 'import_values', 'add_tile_matrix',
'make_peak_matrix', 'call_cells', 'filter_cells', 'select_features', 'make_gene_matrix',
__all__ = ['make_fragment_file', 'import_data', 'import_fragments', 'import_contacts', 'import_values',
'add_tile_matrix', 'make_peak_matrix', 'make_gene_matrix',
'call_cells', 'filter_cells', 'select_features',
]

def make_fragment_file(
Expand Down Expand Up @@ -136,7 +137,7 @@ def make_fragment_file(
See Also
--------
import_data
import_fragments
"""
if barcode_tag is None and barcode_regex is None:
raise ValueError("Either barcode_tag or barcode_regex must be set.")
Expand Down Expand Up @@ -167,6 +168,31 @@ def import_data(
tempdir: Path | None = None,
backend: Literal['hdf5'] = 'hdf5',
n_jobs: int = 8,
) -> internal.AnnData:
from warnings import warn
warn("import_data is deprecated and will be removed in v2.9.0. Use import_fragments instead.", DeprecationWarning, stacklevel=2)
return import_fragments(
fragment_file, chrom_sizes, file=file, min_num_fragments=min_num_fragments,
sorted_by_barcode=sorted_by_barcode, whitelist=whitelist, chrM=chrM,
shift_left=shift_left, shift_right=shift_right, chunk_size=chunk_size,
tempdir=tempdir, backend=backend, n_jobs=n_jobs,
)

def import_fragments(
fragment_file: Path | list[Path],
chrom_sizes: Genome | dict[str, int],
*,
file: Path | list[Path] | None = None,
min_num_fragments: int = 200,
sorted_by_barcode: bool = True,
whitelist: Path | list[str] | None = None,
chrM: list[str] = ["chrM", "M"],
shift_left: int = 0,
shift_right: int = 0,
chunk_size: int = 2000,
tempdir: Path | None = None,
backend: Literal['hdf5'] = 'hdf5',
n_jobs: int = 8,
) -> internal.AnnData:
"""Import data fragment files and compute basic QC metrics.
Expand Down Expand Up @@ -294,7 +320,7 @@ def import_data(
Examples
--------
>>> import snapatac2 as snap
>>> data = snap.pp.import_data(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> data = snap.pp.import_fragments(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> print(data)
AnnData object with n_obs × n_vars = 585 × 0
obs: 'n_fragment', 'frac_dup', 'frac_mito'
Expand Down Expand Up @@ -459,7 +485,7 @@ def add_tile_matrix(
This function is used to generate and add a cell by bin count matrix to the AnnData
object.
:func:`~snapatac2.pp.import_data` must be ran first in order to use this function.
:func:`~snapatac2.pp.import_fragments` must be ran first in order to use this function.
Parameters
----------
Expand Down Expand Up @@ -527,7 +553,7 @@ def add_tile_matrix(
Examples
--------
>>> import snapatac2 as snap
>>> data = snap.pp.import_data(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> data = snap.pp.import_fragments(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> snap.pp.add_tile_matrix(data, bin_size=500)
>>> print(data)
AnnData object with n_obs × n_vars = 585 × 6062095
Expand Down Expand Up @@ -582,7 +608,7 @@ def make_peak_matrix(
This function will generate a cell by peak count matrix and store it in a
new .h5ad file.
:func:`~snapatac2.pp.import_data` must be ran first in order to use this function.
:func:`~snapatac2.pp.import_fragments` must be ran first in order to use this function.
Parameters
----------
Expand Down Expand Up @@ -651,7 +677,7 @@ def make_peak_matrix(
Examples
--------
>>> import snapatac2 as snap
>>> data = snap.pp.import_data(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> data = snap.pp.import_fragments(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> peak_mat = snap.pp.make_peak_matrix(data, peak_file=snap.datasets.cre_HEA())
>>> print(peak_mat)
AnnData object with n_obs × n_vars = 585 × 1154611
Expand Down Expand Up @@ -721,7 +747,7 @@ def make_gene_matrix(
The result will be stored in a new file and a new AnnData object
will be created.
:func:`~snapatac2.pp.import_data` must be ran first in order to use this function.
:func:`~snapatac2.pp.import_fragments` must be ran first in order to use this function.
Parameters
----------
Expand Down Expand Up @@ -788,7 +814,7 @@ def make_gene_matrix(
Examples
--------
>>> import snapatac2 as snap
>>> data = snap.pp.import_data(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> data = snap.pp.import_fragments(snap.datasets.pbmc500(downsample=True), chrom_sizes=snap.genome.hg38, sorted_by_barcode=False)
>>> gene_mat = snap.pp.make_gene_matrix(data, gene_anno=snap.genome.hg38)
>>> print(gene_mat)
AnnData object with n_obs × n_vars = 585 × 60606
Expand Down
4 changes: 2 additions & 2 deletions snapatac2-python/python/snapatac2/preprocessing/_recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ def recipe_10x_metrics(
qc["Mapping"]["Fraction_fragment_flanking_single_nucleosome"] = bam_qc["frac_fragment_flanking_single_nucleosome"]
qc["Library Complexity"]["Fraction_duplicates"] = bam_qc["frac_duplicates"]

adata = snapatac2.pp.import_data(
adata = snapatac2.pp.import_fragments(
output_fragment_file,
min_num_fragments=0,
file=output_h5ad_file,
**filter_kwargs(snapatac2.pp.import_data, kwargs),
**filter_kwargs(snapatac2.pp.import_fragments, kwargs),
)
snapatac2.metrics.tsse(adata, **filter_kwargs(snapatac2.metrics.tsse, kwargs))
qc["Targeting"]["TSS_enrichment_score"] = adata.uns['library_tsse']
Expand Down

0 comments on commit 16913cb

Please sign in to comment.