Skip to content

Commit

Permalink
optimize loss computation for scperturb (#42)
Browse files Browse the repository at this point in the history
* optimize loss computation for scperturb
  • Loading branch information
safiyecelik authored May 13, 2024
1 parent c2e965e commit 356610f
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions proxbias/scPerturb_processing_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,14 @@ def _compute_chromosomal_loss(

sorted_genes_on_chr = {}
start_block_of_chr = {}
end_block_of_chr = {}
for c in set(map(lambda chr_arm: chr_arm[0], pert_gene_chr_arm.values())):
sorted_genes_on_chr[c] = list(avar[avar.chromosome == c].sort_values("start").index)
start_block_of_chr[c] = anndat.uns["cnv"]["chr_pos"][c]
end_block_of_chr[c] = start_block_of_chr[c] + len(sorted_genes_on_chr[c]) // blocksize

ko_gene_ixs_dict = {ko_gene: anndat.obs.gene == ko_gene for ko_gene in pert_genes}
ko_gene_sum_dict = {ko_gene: sum(ixs) for ko_gene, ixs in ko_gene_ixs_dict.items()}

loss_5p_cells: List[List[str]] = [[]] * len(loss)
loss_5p_ko_cell_count = np.empty(len(loss))
Expand All @@ -78,6 +83,7 @@ def _compute_chromosomal_loss(
loss_3p_ko_cell_frac = np.empty(len(loss))
i5p = 0
i3p = 0

for aff_gene in tqdm(pert_genes_w_chr_info):
aff_chr = pert_gene_chr_arm[aff_gene][0]
# get all genes in the same chromosome with the KO gene, sorted
Expand All @@ -86,12 +92,10 @@ def _compute_chromosomal_loss(
aff_gene_ordpos = sorted_genes.index(aff_gene)
# get block position of the gene in the chromosome
aff_gene_blocknum_ = aff_gene_ordpos // blocksize
# get total number of blocks in the chromosome
total_chr_blocks = len(sorted_genes) // blocksize
# get start block of the chromosome gene is in
aff_chr_startblocknum = start_block_of_chr[aff_chr]
# get end block of the chromosome gene is in
aff_chr_endblocknum = aff_chr_startblocknum + total_chr_blocks
aff_chr_endblocknum = end_block_of_chr[aff_chr]
# get block position of the gene in the genome
aff_gene_blocknum = aff_chr_startblocknum + aff_gene_blocknum_

Expand All @@ -104,17 +108,17 @@ def _compute_chromosomal_loss(
for t, blocks in {"5p": blocks_5p, "3p": blocks_3p}.items():
low_frac = np.sum(cnvarr[:, blocks], axis=1) / len(blocks)
for ko_gene in pert_genes:
ko_gene_ixs = anndat.obs.gene == ko_gene
ko_gene_ixs = ko_gene_ixs_dict[ko_gene]
cells = list(anndat.obs.index[(low_frac >= frac_cutoff) & ko_gene_ixs])
if t == "5p":
loss_5p_cells[i5p] = cells
loss_5p_ko_cell_count[i5p] = len(cells)
loss_5p_ko_cell_frac[i5p] = len(cells) / sum(ko_gene_ixs)
loss_5p_ko_cell_frac[i5p] = len(cells) / ko_gene_sum_dict[ko_gene]
i5p += 1
elif t == "3p":
loss_3p_cells[i3p] = cells
loss_3p_ko_cell_count[i3p] = len(cells)
loss_3p_ko_cell_frac[i3p] = len(cells) / sum(ko_gene_ixs)
loss_3p_ko_cell_frac[i3p] = len(cells) / ko_gene_sum_dict[ko_gene]
i3p += 1

loss["loss5p_cells"] = loss_5p_cells
Expand Down

0 comments on commit 356610f

Please sign in to comment.