Skip to content

Commit

Permalink
track cells filtered by each constraint
Browse files Browse the repository at this point in the history
  • Loading branch information
mtvector committed Nov 18, 2024
1 parent 13ba24a commit 36a216d
Showing 1 changed file with 25 additions and 17 deletions.
42 changes: 25 additions & 17 deletions src/sciduck/basic_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,28 +122,29 @@ def add_group_level_constraint(adata: AnnData,
constraint_key = f'constraint_{len(adata.uns["qc_constraints"][column]) + 1}'
adata.uns['qc_constraints'][column][constraint_key] = constraint

def apply_constraints(adata: AnnData,
inplace: bool = False
) -> AnnData:
def apply_constraints(
adata: AnnData,
inplace: bool = False
) -> tuple[AnnData, dict]:
"""
Applies all quality control (QC) constraints stored in 'adata.uns["qc_constraints"]' to 'adata.obs' and filtered indices in 'adata.uns["qc_filtered"]'.
Applies all quality control (QC) constraints stored in 'adata.uns["qc_constraints"]' to 'adata.obs'.
This function handles both individual cell-level constraints (range, exclude) and group-level constraints
(using a 'groupby' column). The resulting 'keeper_cells' column in 'adata.obs' indicates which cells passed the constraints.
Handles both individual cell-level constraints (range, exclude) and group-level constraints
(using a 'groupby' column). Adds a 'keeper_cells' column to 'adata.obs' indicating which cells passed
the constraints and returns a dictionary with filtered cell indices for each constraint.
Args:
adata: The AnnData object where constraints are applied.
inplace: If True, only the cells passing the constraints will be retained in 'adata'.
If False (default), the 'keeper_cells' column will be added to 'adata.obs'
to indicate which cells passed the filtering.
adata (AnnData): The AnnData object where constraints are applied.
inplace (bool): If True, only the cells passing the constraints will be retained in 'adata'.
If False (default), the 'keeper_cells' column will be added to 'adata.obs'
to indicate which cells passed the filtering.
Returns:
AnnData: The filtered AnnData object with the 'keeper_cells' column added to 'adata.obs'. If 'inplace=True',
the returned object will only contain the filtered cells.
tuple[AnnData, dict]: The filtered AnnData object and a dictionary with filtered cell indices for each constraint.
"""
if isinstance(adata, AnnData):
keeper_cells = pd.Series([True] * adata.n_obs, index=adata.obs.index)
adata.uns['qc_filtered'] = {}
filtered_indices = {}
agg_funcs = {'sum': np.sum, 'mean': np.mean, 'std': np.std, 'median': np.median}

for col, constraints_dict in adata.uns['qc_constraints'].items():
Expand All @@ -154,10 +155,10 @@ def apply_constraints(adata: AnnData,

column_data = adata.obs[col]
col_keeper = pd.Series([True] * adata.n_obs, index=adata.obs.index)
filtered_indices[col] = {}

# Iterate over the dictionary of constraints
for constraint_key, constraints in constraints_dict.items():
adata.uns['qc_filtered'][col] = {}
gt = pd.Series([True] * adata.n_obs, index=adata.obs.index)
lt = pd.Series([True] * adata.n_obs, index=adata.obs.index)
exclude = pd.Series([True] * adata.n_obs, index=adata.obs.index)
Expand All @@ -184,6 +185,7 @@ def apply_constraints(adata: AnnData,
groups_to_keep = group_keeper[group_keeper].index
cur_sub = adata.obs[groupby].isin(groups_to_keep)
col_keeper &= cur_sub
filtered_indices[col][constraint_key] = list(adata.obs.index[~cur_sub])
else:
if constraints['gt'] is not None:
gt = column_data >= constraints['gt']
Expand All @@ -200,15 +202,21 @@ def apply_constraints(adata: AnnData,
cur_sub = (gt & lt & exclude)
cur_sub |= ~subset_in
col_keeper &= cur_sub
filtered_indices[col][constraint_key] = list(adata.obs.index[~cur_sub])
else:
cur_sub = (gt & lt & exclude)
col_keeper &= cur_sub
adata.uns['qc_filtered'][col][constraint_key] = list(adata.obs.index[~cur_sub])
keeper_cells &= col_keeper
filtered_indices[col][constraint_key] = list(adata.obs.index[~cur_sub])

# Track cells failing this specific constraint
filtered_indices[col][constraint_key] = list(adata.obs.index[~col_keeper])

keeper_cells &= col_keeper

adata.obs['keeper_cells'] = keeper_cells

if inplace:
adata._inplace_subset_obs(adata.obs['keeper_cells'])

return adata
return adata, filtered_indices

0 comments on commit 36a216d

Please sign in to comment.