From a47dcd852ad4caf18b3409f73dd95612428c7720 Mon Sep 17 00:00:00 2001 From: sreichl Date: Wed, 14 Feb 2024 14:35:34 +0100 Subject: [PATCH] extend gRNA and KO assignment functionality #11 --- workflow/rules/process.smk | 8 --- workflow/scripts/prepare.R | 105 ++++++++++++++++++++++--------------- 2 files changed, 64 insertions(+), 49 deletions(-) diff --git a/workflow/rules/process.smk b/workflow/rules/process.smk index 3ec907f..3fb349e 100644 --- a/workflow/rules/process.smk +++ b/workflow/rules/process.smk @@ -25,14 +25,6 @@ rule prepare: params: partition=config.get("partition"), metadata = lambda w: "" if pd.isna(annot.loc["{}".format(w.sample),'metadata']) else annot.loc["{}".format(w.sample),'metadata'], - sample = lambda w: "{}".format(w.sample), - ab_flag = config["modality_flags"]['Antibody_Capture'], - crispr_flag = config["modality_flags"]['CRISPR_Guide_Capture'], - custom_flag = config["modality_flags"]['Custom'], - crispr_umi_threshold = config["crispr_umi_threshold"], - grna_regex = config["grna_regex"], - percentage_regex = config["percentage_regex"], - metadata_eval = config["metadata_eval"], script: "../scripts/prepare.R" diff --git a/workflow/scripts/prepare.R b/workflow/scripts/prepare.R index 81fcc71..3604331 100644 --- a/workflow/scripts/prepare.R +++ b/workflow/scripts/prepare.R @@ -1,11 +1,31 @@ #### load libraries & utility function -library(Seurat) +library("Seurat") # source utility functions # source("workflow/scripts/utils.R") snakemake@source("./utils.R") +# helper function to assign each cell's gRNA and KO calls +assign_grna_KO <- function(col) { + non_zero_indices <- which(col != 0) + non_zero_guides <- names(non_zero_indices) + non_zero_kos <- unique(gsub(grna_regex,"",non_zero_guides)) + + if (length(non_zero_guides) > 0) { + gRNAcall <- paste(non_zero_guides, collapse = "_") + KOcall <- paste(non_zero_kos, collapse = "_") + } else { + # in case there are no non-zero values + gRNAcall <- NA + KOcall <- NA + } + gRNA_n <- length(non_zero_guides) + KO_n <- length(non_zero_kos) + + return(c(gRNAcall = gRNAcall, gRNA_n = gRNA_n, KOcall = KOcall, KO_n = KO_n)) +} + #### configs # inputs @@ -16,27 +36,22 @@ metadata_path <- snakemake@params[["metadata"]] result_object <- snakemake@output[["sample_object"]] # parameters -sample_name <- snakemake@params[["sample"]] +sample_name <- snakemake@wildcards[["sample"]] result_dir <- dirname(result_object) # 'flags' for modalities -ab_flag <- snakemake@params[["ab_flag"]] -crispr_flag <- snakemake@params[["crispr_flag"]] -custom_flag <- snakemake@params[["custom_flag"]] +ab_flag <- snakemake@config[["modality_flags"]][['Antibody_Capture']] +crispr_flag <- snakemake@config[["modality_flags"]][['CRISPR_Guide_Capture']] +custom_flag <- snakemake@config[["modality_flags"]][['Custom']] # gRNA assignment threshold -crispr_umi_threshold <- snakemake@params[["crispr_umi_threshold"]] +crispr_umi_threshold <- snakemake@config[["crispr_umi_threshold"]] # gRNA to gene REGEX -grna_regex <- snakemake@params[["grna_regex"]] +grna_regex <- snakemake@config[["grna_regex"]] # REGEX-based metadata extension for Seurat::PercentageFeatureSet() -percentage_regex <- snakemake@params[["percentage_regex"]] +percentage_regex <- snakemake@config[["percentage_regex"]] # eval based metadata column transformations -metadata_eval <- snakemake@params[["metadata_eval"]] +metadata_eval <- snakemake@config[["metadata_eval"]] -# make result directory if not exist -if (!dir.exists(result_dir)){ - dir.create(result_dir, recursive = TRUE) -} - #### load data & Initialize the Seurat object with the raw data if (dir.exists(file.path(sample_dir, "filtered_feature_bc_matrix"))){ print("Load 10X data") @@ -63,7 +78,6 @@ if (dir.exists(file.path(sample_dir, "filtered_feature_bc_matrix"))){ print(seurat_obj) - if(ab_flag!=''){ # create a new assay to store Antibody information seurat_obj[[ab_flag]] <- CreateAssayObject(counts = data$'Antibody Capture') @@ -102,6 +116,7 @@ for (name in names(percentage_regex)){ seurat_obj[[name]] <- PercentageFeatureSet(seurat_obj, pattern = percentage_regex[[name]]) } +# assign gRNA and KO calls if(crispr_flag!=''){ # gRNA and KO assignment grna_data <- GetAssayData(object = seurat_obj, slot = "counts", assay = crispr_flag) @@ -113,43 +128,51 @@ if(crispr_flag!=''){ # substitute '_' with '-' (R constraint: “Feature names cannot have underscores ('_'), replacing with dashes ('-')”) rownames(thresholds) <- gsub("_", "-", rownames(thresholds)) + # apply cellranger and configured thresholds for (grna in rownames(grna_data)){ + if (grna %in% rownames(thresholds)){ grna_data[grna, grna_data[grna,] < thresholds[grna,'UMI.threshold']] <- 0 - grna_data[grna, grna_data[grna,] <= crispr_umi_threshold] <- 0 } else{ grna_data[grna, ] <- 0 - sprintf("in sample %s 0 UMIs of guide RNA %s found", sample_name, grna) + sprintf("in sample %s no UMI threshold for guide RNA %s was found -> setting it to zero", sample_name, grna) } + # apply configured UMI threshold + grna_data[grna, grna_data[grna,] <= crispr_umi_threshold] <- 0 } # perfrom gRNA and KO target assignment - protospacer_calls_df <- data.frame(matrix(ncol=2,nrow=0, dimnames=list(c(),c("gRNAcall", "KOcall")))) - - for (barcode in colnames(seurat_obj)){ - - # guide RNA and target gene can be assigned - if (sum(grna_data[,barcode]>0)==1){ - protospacer_calls_df[barcode,"gRNAcall"] <- rownames(grna_data)[grna_data[,barcode]>0] - protospacer_calls_df[barcode,"KOcall"] <- gsub(grna_regex,"",protospacer_calls_df[barcode,"gRNAcall"]) - } - - # multiple guide RNAs have been detected - if (sum(grna_data[,barcode]>0)>1){ - protospacer_calls_df[barcode,"gRNAcall"] <- "Multiplet" - protospacer_calls_df[barcode,"KOcall"] <- "Multiplet" - } - - # no guide RNAs have been detected - if (sum(grna_data[,barcode]>0)==0){ - protospacer_calls_df[barcode,"gRNAcall"] <- "Negative" - protospacer_calls_df[barcode,"KOcall"] <- "Negative" - } - } + protospacer_calls_df <- as.data.frame(t(apply(grna_data, 2, assign_grna_KO))) + protospacer_calls_df$gRNA_n <- as.numeric(protospacer_calls_df$gRNA_n) + protospacer_calls_df$KO_n <- as.numeric(protospacer_calls_df$KO_n) + +# protospacer_calls_df <- data.frame(matrix(ncol=2,nrow=0, dimnames=list(c(),c("gRNAcall", "KOcall")))) + +# for (barcode in colnames(seurat_obj)){ + +# # guide RNA and target gene can be assigned +# if (sum(grna_data[,barcode]>0)==1){ +# protospacer_calls_df[barcode,"gRNAcall"] <- rownames(grna_data)[grna_data[,barcode]>0] +# protospacer_calls_df[barcode,"KOcall"] <- gsub(grna_regex,"",protospacer_calls_df[barcode,"gRNAcall"]) +# } + +# # multiple guide RNAs have been detected +# if (sum(grna_data[,barcode]>0)>1){ +# protospacer_calls_df[barcode,"gRNAcall"] <- "Multiplet" +# protospacer_calls_df[barcode,"KOcall"] <- "Multiplet" +# } + +# # no guide RNAs have been detected +# if (sum(grna_data[,barcode]>0)==0){ +# protospacer_calls_df[barcode,"gRNAcall"] <- "Negative" +# protospacer_calls_df[barcode,"KOcall"] <- "Negative" +# } +# } # add guide and KO assignment to metadata - seurat_obj[["gRNAcall"]] <- protospacer_calls_df[colnames(seurat_obj), 'gRNAcall'] - seurat_obj[["KOcall"]] <- protospacer_calls_df[colnames(seurat_obj), 'KOcall'] + for (col in colnames(protospacer_calls_df)){ + seurat_obj[[col]] <- protospacer_calls_df[colnames(seurat_obj), col] + } } # add eval based metadata columns from config