Skip to content

Commit

Permalink
templates and tutorials
Browse files Browse the repository at this point in the history
  • Loading branch information
UCDNJJ committed Oct 11, 2024
1 parent 37f2492 commit 9816d87
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/sciduck/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
__authors__ = ['Nelson Johansen', 'Matthew Schmitz', 'Dan Yuan']
__credits__ = 'Allen Institute for Brain Science'

from . import basic_qc as basic_qc
from . import basic_qc as qc
from . import plotting as pl
29 changes: 29 additions & 0 deletions src/sciduck/templates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np
import pandas as pd
from anndata import AnnData
import warnings

##
def allen_institute_basic(adata: AnnData):
"""
Adds Allen Institute basic QC standards to the AnnData object. Does not perform the filtering.
Args:
adata: The AnnData object where constraints will be added.
"""
## Filter cells/nuclei on UMI and gene count thresholds, showing the default values.
add_range_constraint(adata, column="counts", gt=2000, lt=100000)
add_range_constraint(adata, column="genes", gt=1000, lt=13000)

## Filter cells/nuclei on mitochondrial gene expression, showing the default values.
add_range_constraint(adata, "doublet_score", lt = 0.3)
add_range_constraint(adata, "pct_counts_mt", lt = 3.0)
add_range_constraint(adata, "GEX_Reads_mapped_confidently_to_genome", gt = 0.0)
add_range_constraint(adata, "GEX_Reads_mapped_to_genome", gt = 0.0)
add_range_constraint(adata, "GEX_Reads_with_TSO", lt = 1.0)

## Neuron / Non-Neuron QC constraints
add_range_constraint(adata, "genes", gt = 2000, subset = "Class", subset_values = ['Excitatory', 'Inhibitory'])
add_range_constraint(adata, "genes", gt = 1000, subset = "Class", subset_values = ['Astrocytes', 'Oligodendrocytes', 'Microglia', 'Endothelial', 'Pericytes'])

return adata
Empty file.
Empty file removed tutorials/cluster_centric_qc.ipynb
Empty file.
Empty file removed tutorials/entropy_qc.ipynb
Empty file.
45 changes: 28 additions & 17 deletions tutorials/standard_workflow.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"metadata": {},
"outputs": [],
"source": [
"adata = ad.read_h5ad(\"YOUR_RAW_DATA.h5ad\")"
"adata = ad.read_h5ad(\"/path/to/h5ad\")"
]
},
{
Expand Down Expand Up @@ -67,10 +67,11 @@
"outputs": [],
"source": [
"## Filter cells/nuclei on UMI and gene count thresholds, showing the default values.\n",
"sd.basic_qc.filter_on_counts_genes(adata, \n",
" min_counts = 2000, max_counts = 100000, \n",
" min_genes = 1000, max_genes = 13000,\n",
" inplace = True)\n",
"sd.qc.add_range_constraint(adata, column=\"counts\", gt=2000, lt=100000)\n",
"sd.qc.add_range_constraint(adata, column=\"genes\", gt=1000, lt=13000)\n",
"\n",
"##\n",
"sd.qc.apply_constraints(adata)\n",
"adata.obs.keeper_cells.value_counts()"
]
},
Expand All @@ -88,13 +89,14 @@
"outputs": [],
"source": [
"## Filter cells/nuclei on mitochondrial gene expression, showing the default values.\n",
"adata = sd.basic_qc.filter_on_precomputed_metrics(adata, \n",
" doublet_score = 0.3, \n",
" pct_counts_mt = 3.0,\n",
" GEX_Reads_mapped_confidently_to_genome = 0.0, \n",
" GEX_Reads_mapped_to_genome = 0.0, \n",
" GEX_Reads_with_TSO = 1.0, \n",
" inplace = False)\n",
"sd.qc.add_range_constraint(adata, column=\"doublet_score\", lt = 0.3)\n",
"sd.qc.add_range_constraint(adata, column=\"pct_counts_mt\", lt = 3.0)\n",
"sd.qc.add_range_constraint(adata, column=\"GEX_Reads_mapped_confidently_to_genome\", gt = 0.0)\n",
"sd.qc.add_range_constraint(adata, column=\"GEX_Reads_mapped_to_genome\", gt = 0.0)\n",
"sd.qc.add_range_constraint(adata, column=\"GEX_Reads_with_TSO\", lt = 1.0)\n",
"\n",
"##\n",
"sd.qc.apply_constraints(adata)\n",
"adata.obs.keeper_cells.value_counts()"
]
},
Expand All @@ -111,11 +113,20 @@
"metadata": {},
"outputs": [],
"source": [
"adata = sd.basic_qc.filter_utilizing_coarse_labels(adata, \n",
" coarse_label_column = \"Class\", \n",
" coarse_label_map = {'Neurons': ['Excitatory', 'Inhibitory'], \n",
" 'Non-Neurons': ['Astrocytes', 'Oligodendrocytes', 'Microglia', 'Endothelial', 'Pericytes']}, \n",
" coarse_label_gene_threshold = {'Neurons': 2000, 'Non-Neurons': 1000})\n",
"## Neuron / Non-Neuron QC constraints\n",
"sd.qc.add_range_constraint(adata, \n",
" column = \"genes\", \n",
" gt = 2000, \n",
" subset = \"Class\", \n",
" subset_values = ['Excitatory', 'Inhibitory'])\n",
"sd.qc.add_range_constraint(adata, \n",
" column = \"genes\", \n",
" gt = 1000, \n",
" subset = \"Class\", \n",
" subset_values = ['Astrocytes', 'Oligodendrocytes', 'Microglia', 'Endothelial', 'Pericytes'])\n",
"\n",
"##\n",
"sd.qc.apply_constraints(adata)\n",
"adata.obs.keeper_cells.value_counts()"
]
}
Expand Down

0 comments on commit 9816d87

Please sign in to comment.