-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclassify_cell_types.R
103 lines (74 loc) · 2.47 KB
/
classify_cell_types.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# Creates several CSV files named 'cell_types_[ref]_[labels].csv', where [ref]
# indicates the celldex reference dataset and [labels] denotes whether the broad
# or fine labels from the reference dataset were used for classification. Each
# file contains the dataframe returned by SingleR::SingleR(), with added cell
# and sample information
#
# @DEPI rna_qcpassed.rds
# @DEPO cell_types_[ref]_[labels].csv
library(Seurat)
library(SingleR)
library(celldex)
library(tidyverse)
source("common_functions.R")
# Parameters --------------------------------------------------------------
# the QC-filtered datasets
filtered_datasets <- "data_generated/rna_qcpassed.rds"
# folder where results are saved
out_dir <- "data_generated"
# Load data ---------------------------------------------------------------
nb <- readRDS(filtered_datasets)
count_matrix <- merge(nb[[1]], nb[-1])$RNA@counts
# subset cells for testing
# count_matrix <- count_matrix[, sample(colnames(count_matrix), 10)]
reference_cell_types <- list(
# two general purpose datasets
hpca = HumanPrimaryCellAtlasData(),
blueprint = BlueprintEncodeData(),
# comprehensive CD4+ subsets; only one B cell subset, no dendritic cells
dice = DatabaseImmuneCellExpressionData(),
# for bone marrow samples
dmap = NovershternHematopoieticData(),
# for PBMC
monaco = MonacoImmuneData()
)
# Predict cell types ------------------------------------------------------
predicted_cell_types <-
list(
ref = names(reference_cell_types),
labels = c("label.main", "label.fine")
) %>%
cross_df() %>%
pmap(
function(ref, labels) {
info("Classifying with reference dataset '{ref}' and labels '{labels}'")
results <- SingleR(
test = count_matrix,
ref = reference_cell_types[[ref]],
labels = colData(reference_cell_types[[ref]])[, labels]
)
list(
table = results,
ref = ref,
labels = labels
)
}
)
# Save data ---------------------------------------------------------------
save_results <- function(results) {
score_colnames <- str_c(
"score_",
colnames(results$table$scores)
)
df <- as_tibble(results$table, rownames = "cell")
colnames(df) <- c(
"cell", score_colnames,
"first_labels", "tuning_scores_first", "tuning_scores_second",
"labels", "pruned_labels"
)
write_csv(
df,
str_glue("{out_dir}/cell_types_{results$ref}_{results$labels}.csv")
)
}
predicted_cell_types %>% walk(save_results)