Skip to content

Commit

Permalink
extend gRNA and KO assignment functionality #11
Browse files Browse the repository at this point in the history
  • Loading branch information
sreichl committed Feb 14, 2024
1 parent f66dda3 commit a47dcd8
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 49 deletions.
8 changes: 0 additions & 8 deletions workflow/rules/process.smk
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,6 @@ rule prepare:
params:
partition=config.get("partition"),
metadata = lambda w: "" if pd.isna(annot.loc["{}".format(w.sample),'metadata']) else annot.loc["{}".format(w.sample),'metadata'],
sample = lambda w: "{}".format(w.sample),
ab_flag = config["modality_flags"]['Antibody_Capture'],
crispr_flag = config["modality_flags"]['CRISPR_Guide_Capture'],
custom_flag = config["modality_flags"]['Custom'],
crispr_umi_threshold = config["crispr_umi_threshold"],
grna_regex = config["grna_regex"],
percentage_regex = config["percentage_regex"],
metadata_eval = config["metadata_eval"],
script:
"../scripts/prepare.R"

Expand Down
105 changes: 64 additions & 41 deletions workflow/scripts/prepare.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@

#### load libraries & utility function
library(Seurat)
library("Seurat")

# source utility functions
# source("workflow/scripts/utils.R")
snakemake@source("./utils.R")

# helper function to assign each cell's gRNA and KO calls
assign_grna_KO <- function(col) {
non_zero_indices <- which(col != 0)
non_zero_guides <- names(non_zero_indices)
non_zero_kos <- unique(gsub(grna_regex,"",non_zero_guides))

if (length(non_zero_guides) > 0) {
gRNAcall <- paste(non_zero_guides, collapse = "_")
KOcall <- paste(non_zero_kos, collapse = "_")
} else {
# in case there are no non-zero values
gRNAcall <- NA
KOcall <- NA
}
gRNA_n <- length(non_zero_guides)
KO_n <- length(non_zero_kos)

return(c(gRNAcall = gRNAcall, gRNA_n = gRNA_n, KOcall = KOcall, KO_n = KO_n))
}

#### configs

# inputs
Expand All @@ -16,27 +36,22 @@ metadata_path <- snakemake@params[["metadata"]]
result_object <- snakemake@output[["sample_object"]]

# parameters
sample_name <- snakemake@params[["sample"]]
sample_name <- snakemake@wildcards[["sample"]]
result_dir <- dirname(result_object)
# 'flags' for modalities
ab_flag <- snakemake@params[["ab_flag"]]
crispr_flag <- snakemake@params[["crispr_flag"]]
custom_flag <- snakemake@params[["custom_flag"]]
ab_flag <- snakemake@config[["modality_flags"]][['Antibody_Capture']]
crispr_flag <- snakemake@config[["modality_flags"]][['CRISPR_Guide_Capture']]
custom_flag <- snakemake@config[["modality_flags"]][['Custom']]
# gRNA assignment threshold
crispr_umi_threshold <- snakemake@params[["crispr_umi_threshold"]]
crispr_umi_threshold <- snakemake@config[["crispr_umi_threshold"]]
# gRNA to gene REGEX
grna_regex <- snakemake@params[["grna_regex"]]
grna_regex <- snakemake@config[["grna_regex"]]
# REGEX-based metadata extension for Seurat::PercentageFeatureSet()
percentage_regex <- snakemake@params[["percentage_regex"]]
percentage_regex <- snakemake@config[["percentage_regex"]]
# eval based metadata column transformations
metadata_eval <- snakemake@params[["metadata_eval"]]
metadata_eval <- snakemake@config[["metadata_eval"]]


# make result directory if not exist
if (!dir.exists(result_dir)){
dir.create(result_dir, recursive = TRUE)
}

#### load data & Initialize the Seurat object with the raw data
if (dir.exists(file.path(sample_dir, "filtered_feature_bc_matrix"))){
print("Load 10X data")
Expand All @@ -63,7 +78,6 @@ if (dir.exists(file.path(sample_dir, "filtered_feature_bc_matrix"))){

print(seurat_obj)


if(ab_flag!=''){
# create a new assay to store Antibody information
seurat_obj[[ab_flag]] <- CreateAssayObject(counts = data$'Antibody Capture')
Expand Down Expand Up @@ -102,6 +116,7 @@ for (name in names(percentage_regex)){
seurat_obj[[name]] <- PercentageFeatureSet(seurat_obj, pattern = percentage_regex[[name]])
}

# assign gRNA and KO calls
if(crispr_flag!=''){
# gRNA and KO assignment
grna_data <- GetAssayData(object = seurat_obj, slot = "counts", assay = crispr_flag)
Expand All @@ -113,43 +128,51 @@ if(crispr_flag!=''){
# substitute '_' with '-' (R constraint: “Feature names cannot have underscores ('_'), replacing with dashes ('-')”)
rownames(thresholds) <- gsub("_", "-", rownames(thresholds))

# apply cellranger and configured thresholds
for (grna in rownames(grna_data)){

if (grna %in% rownames(thresholds)){
grna_data[grna, grna_data[grna,] < thresholds[grna,'UMI.threshold']] <- 0
grna_data[grna, grna_data[grna,] <= crispr_umi_threshold] <- 0
} else{
grna_data[grna, ] <- 0
sprintf("in sample %s 0 UMIs of guide RNA %s found", sample_name, grna)
sprintf("in sample %s no UMI threshold for guide RNA %s was found -> setting it to zero", sample_name, grna)
}
# apply configured UMI threshold
grna_data[grna, grna_data[grna,] <= crispr_umi_threshold] <- 0
}

# perfrom gRNA and KO target assignment
protospacer_calls_df <- data.frame(matrix(ncol=2,nrow=0, dimnames=list(c(),c("gRNAcall", "KOcall"))))

for (barcode in colnames(seurat_obj)){

# guide RNA and target gene can be assigned
if (sum(grna_data[,barcode]>0)==1){
protospacer_calls_df[barcode,"gRNAcall"] <- rownames(grna_data)[grna_data[,barcode]>0]
protospacer_calls_df[barcode,"KOcall"] <- gsub(grna_regex,"",protospacer_calls_df[barcode,"gRNAcall"])
}

# multiple guide RNAs have been detected
if (sum(grna_data[,barcode]>0)>1){
protospacer_calls_df[barcode,"gRNAcall"] <- "Multiplet"
protospacer_calls_df[barcode,"KOcall"] <- "Multiplet"
}

# no guide RNAs have been detected
if (sum(grna_data[,barcode]>0)==0){
protospacer_calls_df[barcode,"gRNAcall"] <- "Negative"
protospacer_calls_df[barcode,"KOcall"] <- "Negative"
}
}
protospacer_calls_df <- as.data.frame(t(apply(grna_data, 2, assign_grna_KO)))
protospacer_calls_df$gRNA_n <- as.numeric(protospacer_calls_df$gRNA_n)
protospacer_calls_df$KO_n <- as.numeric(protospacer_calls_df$KO_n)

# protospacer_calls_df <- data.frame(matrix(ncol=2,nrow=0, dimnames=list(c(),c("gRNAcall", "KOcall"))))

# for (barcode in colnames(seurat_obj)){

# # guide RNA and target gene can be assigned
# if (sum(grna_data[,barcode]>0)==1){
# protospacer_calls_df[barcode,"gRNAcall"] <- rownames(grna_data)[grna_data[,barcode]>0]
# protospacer_calls_df[barcode,"KOcall"] <- gsub(grna_regex,"",protospacer_calls_df[barcode,"gRNAcall"])
# }

# # multiple guide RNAs have been detected
# if (sum(grna_data[,barcode]>0)>1){
# protospacer_calls_df[barcode,"gRNAcall"] <- "Multiplet"
# protospacer_calls_df[barcode,"KOcall"] <- "Multiplet"
# }

# # no guide RNAs have been detected
# if (sum(grna_data[,barcode]>0)==0){
# protospacer_calls_df[barcode,"gRNAcall"] <- "Negative"
# protospacer_calls_df[barcode,"KOcall"] <- "Negative"
# }
# }

# add guide and KO assignment to metadata
seurat_obj[["gRNAcall"]] <- protospacer_calls_df[colnames(seurat_obj), 'gRNAcall']
seurat_obj[["KOcall"]] <- protospacer_calls_df[colnames(seurat_obj), 'KOcall']
for (col in colnames(protospacer_calls_df)){
seurat_obj[[col]] <- protospacer_calls_df[colnames(seurat_obj), col]
}
}

# add eval based metadata columns from config
Expand Down

0 comments on commit a47dcd8

Please sign in to comment.