-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathprepare_data_dong.R
64 lines (52 loc) · 1.5 KB
/
prepare_data_dong.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# @DEPI data_raw/GSE137804
# @DEPO tumor_data_dong.rds
library(Seurat)
library(tidyverse)
library(fs)
files_dong <- read_csv("metadata/samples_dong.csv", comment = "#")
metadata_dong <-
read_csv("data_raw/GSE137804/GSE137804_tumor_dataset_annotation.csv.gz") %>%
separate(cellname, into = c("sample", "cell"), sep = "_")
data_dong <- pmap(
files_dong %>% filter(high_risk),
function(file, tumor_id, bad_header, ...) {
if (bad_header) {
sce <- read_tsv(
path_join(c("data_raw", "GSE137804", file)),
skip = 1,
col_names =
read_tsv(path_join(c("data_raw", "GSE137804", file)), n_max = 0) %>%
colnames() %>%
prepend("Symbol")
)
} else {
sce <-
read_tsv(
path_join(c("data_raw", "GSE137804", file)),
col_types = cols(
Gene_ID = "c",
Symbol = "c",
.default = col_integer()
)
) %>%
select(!Gene_ID)
}
sce <-
sce %>%
mutate(Symbol = make.unique(Symbol)) %>%
column_to_rownames("Symbol") %>%
as.matrix() %>%
CreateSeuratObject(project = tumor_id)
as_tibble(rownames = "cell") %>%
rename(sample = orig.ident) %>%
left_join(metadata_dong, by = c("cell", "sample")) %>%
column_to_rownames("cell")
sce %>%
subset(subset = celltype == "tumor")
}
)
data_dong %>%
{merge(.[[1]], .[-1])} %>%
saveRDS("data_generated/tumor_data_dong.rds")