diff --git a/analyses/molecular-subtyping-MB/06-mb-shh-umap.Rmd b/analyses/molecular-subtyping-MB/06-mb-shh-umap.Rmd new file mode 100644 index 0000000000..6ceb7fcf6c --- /dev/null +++ b/analyses/molecular-subtyping-MB/06-mb-shh-umap.Rmd @@ -0,0 +1,373 @@ +--- +title: 'Create MB SHH methylation UMAP' +output: + html_document: + toc: TRUE +toc_float: TRUE +author: Ryan Corbett +date: "2024" +--- + +Load libraries and set directory paths + +```{r} +suppressPackageStartupMessages({ + library(tidyverse) + library(umap) + library(ggplot2) + library(devtools) + library(gdata) +}) + +root_dir <- rprojroot::find_root(rprojroot::has_dir(".git")) + +data_dir <- file.path(root_dir, "data") +analysis_dir <- file.path(root_dir, "analyses", "molecular-subtyping-MB") +results_dir <- file.path(analysis_dir, "results") +input_dir <- file.path(analysis_dir, "input") +plots_dir <- file.path(analysis_dir, "plot") +``` + +Set file paths + +```{r} +hist_file <- file.path(data_dir, "histologies.tsv") +methyl_file <- file.path(data_dir, "v14", "methyl-beta-values.rds") +mb_shh_file <- file.path(results_dir, "mb_shh_subtypes_w_molecular_data.tsv") +``` + +Wrangle data. We will remove sample IDs from `mb_shh_subtypes` for which a high confidence methylation call indicates a non-MB tumor (to be further pathologically reviewed). + +```{r get methyl ids} +hist <- read_tsv(hist_file) + +mb_shh_subtypes <- read_tsv(mb_shh_file) %>% + dplyr::filter(!sample_id %in% c("7316-1884", "7316-1676", + "7316-1666", "7316-3202")) +``` + +Filter hist for mb shh methyl samples, and append to subtype df + +```{r} +hist_mb_methyl <- hist %>% + dplyr::filter(pathology_diagnosis == "Medulloblastoma", + experimental_strategy == "Methylation") %>% + dplyr::rename(Kids_First_Biospecimen_ID_methyl = Kids_First_Biospecimen_ID) + +mb_shh_subtypes <- mb_shh_subtypes %>% + left_join(hist_mb_methyl %>% + dplyr::select(match_id, Kids_First_Biospecimen_ID_methyl)) %>% + dplyr::filter(!is.na(Kids_First_Biospecimen_ID_methyl)) %>% + distinct(match_id, Kids_First_Biospecimen_ID_methyl, .keep_all = TRUE) %>% + # redefine un-subtyped samples as "unk" + dplyr::mutate(molecular_subtype = case_when( + molecular_subtype == "MB, SHH" ~ "MB, SHH unk", + TRUE ~ molecular_subtype + )) %>% + dplyr::mutate(molecular_subtype = fct_relevel(molecular_subtype, + c("MB, SHH alpha", "MB, SHH beta", + "MB, SHH gamma", "MB, SHH delta", + "MB, SHH unk"))) +``` + +Get number of samples by MB SHH subtype + +```{r} +table(mb_shh_subtypes$SHH_subtype) +``` + +Load methylation data and filter for ids in `mb_shh_subtypes` + +```{r load methyl} +methyl <- readRDS(methyl_file) + +mb_methyl <- methyl[,colnames(methyl) %in% c("Probe_ID", hist_mb_methyl$Kids_First_Biospecimen_ID_methyl)] + +mb_methyl <- mb_methyl %>% + distinct(Probe_ID, .keep_all = TRUE) %>% + column_to_rownames("Probe_ID") +``` + +Identify 20k most variable probes among MB samples + +```{r} +mb_methyl_var <- apply(mb_methyl, 1, var, na.rm = TRUE) + +mb_var_probes <- names(sort(mb_methyl_var, decreasing = TRUE)[1:20000]) +``` + +Generate UMAP results + +```{r} +set.seed(2024) + +mb_umap_results <- umap::umap(t(mb_methyl[mb_var_probes, ])) +mb_umap_plot_df <- data.frame(mb_umap_results$layout) %>% + tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>% + left_join(hist_mb_methyl) +``` + +Plot UMAP with molecular subtype and age range + +```{r} +mb_umap_plot_df %>% + ggplot(aes(x = X1, + y = X2, + color = molecular_subtype)) + + geom_point(alpha = 0.7) + + labs(color = "molecular subtype") + + theme_bw() + + xlab("UMAP1") + + ylab("UMAP2") + +ggsave(file.path(plots_dir, "umap_mb.pdf"), + width = 5.5, height = 3.5) +``` + +Identify 20k most variable probes among MB Group 3/4 samples + +```{r} +g34_samples <- hist_mb_methyl %>% + dplyr::filter(molecular_subtype %in% c("MB, Group3", "MB, Group4")) %>% + pull(Kids_First_Biospecimen_ID_methyl) + +mb_g34_methyl_var <- apply(mb_methyl[,colnames(mb_methyl) %in% g34_samples], 1, var, na.rm = TRUE) + +mb_g34_var_probes <- names(sort(mb_g34_methyl_var, decreasing = TRUE)[1:20000]) +``` + +Generate UMAP results + +```{r} +mb_g34_umap_results <- umap::umap(t(mb_methyl[mb_g34_var_probes, colnames(mb_methyl) %in% g34_samples])) +mb_g34_umap_plot_df <- data.frame(mb_g34_umap_results$layout) %>% + tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>% + left_join(hist_mb_methyl) +``` + +Plot UMAP with molecular subtype and methylation subtype + +```{r} +mb_g34_umap_plot_df %>% + dplyr::filter(grepl("MB_G34", dkfz_v12_methylation_subclass)) %>% + ggplot(aes(x = X1, + y = X2, + color = dkfz_v12_methylation_subclass, + shape = molecular_subtype)) + + geom_point(alpha = 0.7) + + labs(color = "methylation subtype", + shape = "molecular subtype") + + theme_bw() + + xlab("UMAP1") + + ylab("UMAP2") + +ggsave(file.path(plots_dir, "umap_mb_group34.pdf"), + width = 5.5, height = 3.5) +``` + +Identify 20k most variable probes among MB SHH samples + +```{r} +mb_shh_methyl_var <- apply(mb_methyl[,colnames(mb_methyl) %in% mb_shh_subtypes$Kids_First_Biospecimen_ID_methyl], 1, var, na.rm = TRUE) + +mb_shh_var_probes <- names(sort(mb_shh_methyl_var, decreasing = TRUE)[1:20000]) +``` + +Generate UMAP df + +```{r} +mb_shh_umap_results <- umap::umap(t(mb_methyl[mb_shh_var_probes, colnames(mb_methyl) %in% mb_shh_subtypes$Kids_First_Biospecimen_ID_methyl])) +mb_shh_umap_plot_df <- data.frame(mb_shh_umap_results$layout) %>% + tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>% + inner_join(mb_shh_subtypes) + +mb_shh_umap_plot_df <- mb_shh_umap_plot_df %>% + dplyr::mutate(age_range = case_when( + age_at_diagnosis_years < 5 ~ "[0,5)", + age_at_diagnosis_years < 10 ~ "[5,10)", + age_at_diagnosis_years < 15 ~ "[10,15)", + TRUE ~ ">=15" + )) %>% + dplyr::mutate(age_range = fct_relevel(age_range, + c("[0,5)", "[5,10)", + "[10,15)", ">=15"))) %>% + dplyr::mutate(consensus_CN_MYCN = case_when( + is.na(consensus_CN_MYCN) ~ "neutral", + TRUE ~ consensus_CN_MYCN + )) %>% + dplyr::mutate(consensus_CN_GLI2 = case_when( + is.na(consensus_CN_GLI2) ~ "neutral", + TRUE ~ consensus_CN_GLI2 + )) %>% + dplyr::mutate(consensus_CN_CCND2 = case_when( + is.na(consensus_CN_CCND2) ~ "neutral", + TRUE ~ consensus_CN_CCND2 + )) %>% + dplyr::mutate(consensus_CN_PTEN = case_when( + is.na(consensus_CN_PTEN) ~ "neutral", + TRUE ~ consensus_CN_PTEN + )) %>% + dplyr::mutate(classification_source = case_when( + classification_source == "Genomic/Expression" ~ "Molecular", + is.na(classification_source) ~ "Unavailable", + TRUE ~ classification_source + )) %>% + write_tsv(file.path(results_dir, "mb_shh_subtypes_w_molecular_umap_data.tsv")) +``` + +Plot UMAP with molecular subtype, classification source, and age range + +```{r} +mb_shh_umap_plot_df %>% + ggplot(aes(x = X1, + y = X2, + color = molecular_subtype, + size = age_range, + shape = classification_source)) + + geom_point(alpha = 0.7) + + labs(color = "molecular subtype", + size = "age range (years)", + shape = "classifcation source") + + theme_bw() + + xlab("UMAP1") + + ylab("UMAP2") + + # colors to match subtypes in Garcia-Lopez 2020 review + scale_color_manual(values = c("aquamarine3", "goldenrod2", + "royalblue1", "plum4", + "gray")) + + guides(color = guide_legend(order = 1), + shape = guide_legend(order = 2), + size = guide_legend(order = 3)) + +ggsave(file.path(plots_dir, "umap_mb_shh.pdf"), + width = 6.5, height = 4.5) +``` + +Plot UMAP with methylation subtype and methylation score + +```{r} +mb_shh_umap_plot_df %>% + dplyr::filter(grepl("MB", dkfz_v12_methylation_subclass_collapsed)) %>% + + ggplot(aes(x = X1, + y = X2, + color = dkfz_v12_methylation_subclass_collapsed, + alpha = dkfz_v12_methylation_subclass_score_mean)) + + geom_point(size = 3) + + labs(color = "methylation subtype", + alpha = "methylation subtype score") + + theme_bw() + + xlab("UMAP1") + + ylab("UMAP2") + + # colors to match subtypes in Garcia-Lopez 2020 review + scale_color_manual(values = c("goldenrod2", "royalblue1", + "aquamarine3", "plum4")) + +ggsave(file.path(plots_dir, "umap_mb_shh_methylation_subtype.pdf"), + width = 5.5, height = 3.5) +``` + +Plot UMAP with CN status for MYCN, GLI2, CCND2, and PTEN + +```{r} +umap_plot_cn_df <- mb_shh_umap_plot_df %>% + dplyr::select(molecular_subtype, X1, X2, + consensus_CN_MYCN, + consensus_CN_GLI2, + consensus_CN_CCND2, + consensus_CN_PTEN) %>% + dplyr::rename(MCYN = consensus_CN_MYCN, + GLI2 = consensus_CN_GLI2, + CCND2 = consensus_CN_CCND2, + PTEN = consensus_CN_PTEN) %>% + gather(key = "gene_name", value = "CN_status", + -molecular_subtype, -X1, -X2) + +umap_plot_cn_df %>% + ggplot(aes(x = X1, + y = X2, + color = molecular_subtype, + shape = CN_status)) + + geom_point(alpha = 0.7, size = 3) + + labs(color = "methylation subtype", + shape = "CN status") + + facet_wrap(~gene_name, nrow = 2) + + theme_bw() + + xlab("UMAP1") + + ylab("UMAP2") + + # colors to match subtypes in Garcia-Lopez 2020 review + scale_color_manual(values = c("aquamarine3", "goldenrod2", + "royalblue1", "plum4", + "gray")) + +ggsave(file.path(plots_dir, "umap_mb_shh_cn_status.pdf"), + width = 8, height = 5.5) +``` + +Plot UMAP with TP53 alteration status + +```{r} +mb_shh_umap_plot_df %>% + ggplot(aes(x = X1, + y = X2, + color = molecular_subtype, + shape = tp53_status)) + + geom_point(alpha = 0.7, size = 3) + + labs(color = "methylation subtype", + shape = "TP53 status") + + theme_bw() + + xlab("UMAP1") + + ylab("UMAP2") + + # colors to match subtypes in Garcia-Lopez 2020 review + scale_color_manual(values = c("aquamarine3", "goldenrod2", + "royalblue1", "plum4", + "gray")) + +ggsave(file.path(plots_dir, "umap_mb_shh_tp53_status.pdf"), + width = 5.5, height = 3.5) +``` + +Identify 20k most variable probes among MB WNT samples + +```{r} +wnt_samples <- hist_mb_methyl %>% + dplyr::filter(molecular_subtype %in% c("MB, WNT")) %>% + pull(Kids_First_Biospecimen_ID_methyl) + +mb_wnt_methyl_var <- apply(mb_methyl[,colnames(mb_methyl) %in% wnt_samples], 1, var, na.rm = TRUE) + +mb_wnt_var_probes <- names(sort(mb_wnt_methyl_var, decreasing = TRUE)[1:20000]) +``` + +Generate UMAP df + +```{r} +set.seed(2024) + +mb_wnt_umap_results <- umap::umap(t(mb_methyl[mb_wnt_var_probes, colnames(mb_methyl) %in% wnt_samples])) +mb_wnt_umap_plot_df <- data.frame(mb_wnt_umap_results$layout) %>% + tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>% + left_join(hist_mb_methyl) + +``` + +Plot UMAP with methylation subtype + +```{r} +mb_wnt_umap_plot_df %>% + ggplot(aes(x = X1, + y = X2, + color = dkfz_v12_methylation_subclass)) + + geom_point(alpha = 0.7, size = 3) + + labs(color = "methylation subtype") + + theme_bw() + + xlab("UMAP1") + + ylab("UMAP2") + +ggsave(file.path(plots_dir, "umap_mb_wnt.pdf"), + width = 5.5, height = 3.5) +``` + +```{r} +sessionInfo() +``` \ No newline at end of file diff --git a/analyses/molecular-subtyping-MB/06-mb-shh-umap.html b/analyses/molecular-subtyping-MB/06-mb-shh-umap.html new file mode 100644 index 0000000000..ee3ea6e9d2 --- /dev/null +++ b/analyses/molecular-subtyping-MB/06-mb-shh-umap.html @@ -0,0 +1,835 @@ + + + + +
+ + + + + + + + + + +Load libraries and set directory paths
+suppressPackageStartupMessages({
+ library(tidyverse)
+ library(umap)
+ library(ggplot2)
+ library(devtools)
+ library(gdata)
+})
+
+root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
+
+data_dir <- file.path(root_dir, "data")
+analysis_dir <- file.path(root_dir, "analyses", "molecular-subtyping-MB")
+results_dir <- file.path(analysis_dir, "results")
+input_dir <- file.path(analysis_dir, "input")
+plots_dir <- file.path(analysis_dir, "plot")
+Set file paths
+hist_file <- file.path(data_dir, "histologies.tsv")
+methyl_file <- file.path(data_dir, "v14", "methyl-beta-values.rds")
+mb_shh_file <- file.path(results_dir, "mb_shh_subtypes_w_molecular_data.tsv")
+Wrangle data. We will remove sample IDs from
+mb_shh_subtypes
for which a high confidence methylation
+call indicates a non-MB tumor (to be further pathologically
+reviewed).
hist <- read_tsv(hist_file)
+## Rows: 47895 Columns: 66
+## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+## Delimiter: "\t"
+## chr (45): Kids_First_Participant_ID, Kids_First_Biospecimen_ID, sample_id, a...
+## dbl (21): cell_line_passage, OS_days, EFS_days, age_at_diagnosis_days, age_a...
+##
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+mb_shh_subtypes <- read_tsv(mb_shh_file) %>%
+ dplyr::filter(!sample_id %in% c("7316-1884", "7316-1676",
+ "7316-1666", "7316-3202"))
+## Rows: 265 Columns: 79
+## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+## Delimiter: "\t"
+## chr (42): Kids_First_Participant_ID, Kids_First_Biospecimen_ID, match_id, mo...
+## dbl (33): age_at_diagnosis_years, dkfz_v12_methylation_subclass_score_mean, ...
+## lgl (4): SMO_csq, TP53_csq, PTEN_exp_status, TP53_exp_status
+##
+## ℹ Use `spec()` to retrieve the full column specification for this data.
+## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
+Filter hist for mb shh methyl samples, and append to subtype df
+hist_mb_methyl <- hist %>%
+ dplyr::filter(pathology_diagnosis == "Medulloblastoma",
+ experimental_strategy == "Methylation") %>%
+ dplyr::rename(Kids_First_Biospecimen_ID_methyl = Kids_First_Biospecimen_ID)
+
+mb_shh_subtypes <- mb_shh_subtypes %>%
+ left_join(hist_mb_methyl %>%
+ dplyr::select(match_id, Kids_First_Biospecimen_ID_methyl)) %>%
+ dplyr::filter(!is.na(Kids_First_Biospecimen_ID_methyl)) %>%
+ distinct(match_id, Kids_First_Biospecimen_ID_methyl, .keep_all = TRUE) %>%
+ # redefine un-subtyped samples as "unk"
+ dplyr::mutate(molecular_subtype = case_when(
+ molecular_subtype == "MB, SHH" ~ "MB, SHH unk",
+ TRUE ~ molecular_subtype
+ )) %>%
+ dplyr::mutate(molecular_subtype = fct_relevel(molecular_subtype,
+ c("MB, SHH alpha", "MB, SHH beta",
+ "MB, SHH gamma", "MB, SHH delta",
+ "MB, SHH unk")))
+## Joining with `by = join_by(match_id)`
+## Warning in left_join(., hist_mb_methyl %>% dplyr::select(match_id, Kids_First_Biospecimen_ID_methyl)): Detected an unexpected many-to-many relationship between `x` and `y`.
+## ℹ Row 31 of `x` matches multiple rows in `y`.
+## ℹ Row 226 of `y` matches multiple rows in `x`.
+## ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
+Get number of samples by MB SHH subtype
+table(mb_shh_subtypes$SHH_subtype)
+##
+## SHH_alpha SHH_beta SHH_delta SHH_gamma
+## 8 13 12 12
+Load methylation data and filter for ids in
+mb_shh_subtypes
methyl <- readRDS(methyl_file)
+
+mb_methyl <- methyl[,colnames(methyl) %in% c("Probe_ID", hist_mb_methyl$Kids_First_Biospecimen_ID_methyl)]
+
+mb_methyl <- mb_methyl %>%
+ distinct(Probe_ID, .keep_all = TRUE) %>%
+ column_to_rownames("Probe_ID")
+Identify 20k most variable probes among MB samples
+mb_methyl_var <- apply(mb_methyl, 1, var, na.rm = TRUE)
+
+mb_var_probes <- names(sort(mb_methyl_var, decreasing = TRUE)[1:20000])
+set.seed(2024)
+
+mb_umap_results <- umap::umap(t(mb_methyl[mb_var_probes, ]))
+mb_umap_plot_df <- data.frame(mb_umap_results$layout) %>%
+ tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>%
+ left_join(hist_mb_methyl)
+## Joining with `by = join_by(Kids_First_Biospecimen_ID_methyl)`
+Plot UMAP with molecular subtype and age range
+mb_umap_plot_df %>%
+ ggplot(aes(x = X1,
+ y = X2,
+ color = molecular_subtype)) +
+ geom_point(alpha = 0.7) +
+ labs(color = "molecular subtype") +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2")
+ggsave(file.path(plots_dir, "umap_mb.pdf"),
+ width = 5.5, height = 3.5)
+mb_umap_plot_df <- mb_umap_plot_df %>%
+ dplyr::mutate(methyl_mismatch = case_when(
+ sample_id %in% c("7316-1884", "7316-1676",
+ "7316-1666", "7316-3202") ~ "Yes",
+ TRUE ~ "No"
+ ))
+
+mb_umap_plot_df %>%
+ ggplot(aes(x = X1,
+ y = X2,
+ color = molecular_subtype,
+ shape = methyl_mismatch)) +
+ geom_point(alpha = 0.7) +
+ labs(color = "molecular subtype",
+ shape = "Non-MB methylation") +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2")
+ggsave(file.path(plots_dir, "umap_mb_methylation_mismatch.pdf"),
+ width = 5.5, height = 3.5)
+Identify 20k most variable probes among MB Group 3/4 samples
+g34_samples <- hist_mb_methyl %>%
+ dplyr::filter(molecular_subtype %in% c("MB, Group3", "MB, Group4")) %>%
+ pull(Kids_First_Biospecimen_ID_methyl)
+
+mb_g34_methyl_var <- apply(mb_methyl[,colnames(mb_methyl) %in% g34_samples], 1, var, na.rm = TRUE)
+
+mb_g34_var_probes <- names(sort(mb_g34_methyl_var, decreasing = TRUE)[1:20000])
+mb_g34_umap_results <- umap::umap(t(mb_methyl[mb_g34_var_probes, colnames(mb_methyl) %in% g34_samples]))
+mb_g34_umap_plot_df <- data.frame(mb_g34_umap_results$layout) %>%
+ tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>%
+ left_join(hist_mb_methyl)
+## Joining with `by = join_by(Kids_First_Biospecimen_ID_methyl)`
+Plot UMAP with molecular subtype and age range
+mb_g34_umap_plot_df %>%
+ dplyr::filter(grepl("MB_G34", dkfz_v12_methylation_subclass)) %>%
+ ggplot(aes(x = X1,
+ y = X2,
+ color = dkfz_v12_methylation_subclass,
+ shape = molecular_subtype)) +
+ geom_point(alpha = 0.7) +
+ labs(color = "methylation subtype",
+ shape = "molecular subtype") +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2")
+ggsave(file.path(plots_dir, "umap_mb_group34.pdf"),
+ width = 5.5, height = 3.5)
+Identify 10k most variable probes among MB SHH samples
+mb_shh_methyl_var <- apply(mb_methyl[,colnames(mb_methyl) %in% mb_shh_subtypes$Kids_First_Biospecimen_ID_methyl], 1, var, na.rm = TRUE)
+
+mb_shh_var_probes <- names(sort(mb_shh_methyl_var, decreasing = TRUE)[1:20000])
+Generate UMAP df
+mb_shh_umap_results <- umap::umap(t(mb_methyl[mb_shh_var_probes, colnames(mb_methyl) %in% mb_shh_subtypes$Kids_First_Biospecimen_ID_methyl]))
+mb_shh_umap_plot_df <- data.frame(mb_shh_umap_results$layout) %>%
+ tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>%
+ inner_join(mb_shh_subtypes)
+## Joining with `by = join_by(Kids_First_Biospecimen_ID_methyl)`
+mb_shh_umap_plot_df <- mb_shh_umap_plot_df %>%
+ dplyr::mutate(age_range = case_when(
+ age_at_diagnosis_years < 5 ~ "[0,5)",
+ age_at_diagnosis_years < 10 ~ "[5,10)",
+ age_at_diagnosis_years < 15 ~ "[10,15)",
+ TRUE ~ ">=15"
+ )) %>%
+ dplyr::mutate(age_range = fct_relevel(age_range,
+ c("[0,5)", "[5,10)",
+ "[10,15)", ">=15"))) %>%
+ dplyr::mutate(consensus_CN_MYCN = case_when(
+ is.na(consensus_CN_MYCN) ~ "neutral",
+ TRUE ~ consensus_CN_MYCN
+ )) %>%
+ dplyr::mutate(consensus_CN_GLI2 = case_when(
+ is.na(consensus_CN_GLI2) ~ "neutral",
+ TRUE ~ consensus_CN_GLI2
+ )) %>%
+ dplyr::mutate(consensus_CN_CCND2 = case_when(
+ is.na(consensus_CN_CCND2) ~ "neutral",
+ TRUE ~ consensus_CN_CCND2
+ )) %>%
+ dplyr::mutate(consensus_CN_PTEN = case_when(
+ is.na(consensus_CN_PTEN) ~ "neutral",
+ TRUE ~ consensus_CN_PTEN
+ )) %>%
+ dplyr::mutate(classification_source = case_when(
+ classification_source == "Genomic/Expression" ~ "Molecular",
+ is.na(classification_source) ~ "Unavailable",
+ TRUE ~ classification_source
+ )) %>%
+ write_tsv(file.path(results_dir, "mb_shh_subtypes_w_molecular_umap_data.tsv"))
+Plot UMAP with molecular subtype, classification source, and age +range
+mb_shh_umap_plot_df %>%
+ ggplot(aes(x = X1,
+ y = X2,
+ color = molecular_subtype,
+ size = age_range,
+ shape = classification_source)) +
+ geom_point(alpha = 0.7) +
+ labs(color = "molecular subtype",
+ size = "age range (years)",
+ shape = "classifcation source") +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2") +
+ # colors to match subtypes in Garcia-Lopez 2020 review
+ scale_color_manual(values = c("aquamarine3", "goldenrod2",
+ "royalblue1", "plum4",
+ "gray")) +
+ guides(color = guide_legend(order = 1),
+ shape = guide_legend(order = 2),
+ size = guide_legend(order = 3))
+## Warning: Using size for a discrete variable is not advised.
+ggsave(file.path(plots_dir, "umap_mb_shh.pdf"),
+ width = 6.5, height = 4.5)
+## Warning: Using size for a discrete variable is not advised.
+Plot UMAP with methylation subtype and methylation score
+mb_shh_umap_plot_df %>%
+ dplyr::filter(grepl("MB", dkfz_v12_methylation_subclass_collapsed)) %>%
+
+ ggplot(aes(x = X1,
+ y = X2,
+ color = dkfz_v12_methylation_subclass_collapsed,
+ alpha = dkfz_v12_methylation_subclass_score_mean)) +
+ geom_point(size = 3) +
+ labs(color = "methylation subtype",
+ alpha = "methylation subtype score") +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2") +
+ # colors to match subtypes in Garcia-Lopez 2020 review
+ scale_color_manual(values = c("goldenrod2", "royalblue1",
+ "aquamarine3", "plum4"))
+ggsave(file.path(plots_dir, "umap_mb_shh_methylation_subtype.pdf"),
+ width = 5.5, height = 3.5)
+Plot UMAP with CN status for MYCN, GLI2, CCND2, and PTEN
+umap_plot_cn_df <- mb_shh_umap_plot_df %>%
+ dplyr::select(molecular_subtype, X1, X2,
+ consensus_CN_MYCN,
+ consensus_CN_GLI2,
+ consensus_CN_CCND2,
+ consensus_CN_PTEN) %>%
+ dplyr::rename(MCYN = consensus_CN_MYCN,
+ GLI2 = consensus_CN_GLI2,
+ CCND2 = consensus_CN_CCND2,
+ PTEN = consensus_CN_PTEN) %>%
+ gather(key = "gene_name", value = "CN_status",
+ -molecular_subtype, -X1, -X2)
+
+umap_plot_cn_df %>%
+ ggplot(aes(x = X1,
+ y = X2,
+ color = molecular_subtype,
+ shape = CN_status)) +
+ geom_point(alpha = 0.7, size = 3) +
+ labs(color = "methylation subtype",
+ shape = "CN status") +
+ facet_wrap(~gene_name, nrow = 2) +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2") +
+ # colors to match subtypes in Garcia-Lopez 2020 review
+ scale_color_manual(values = c("aquamarine3", "goldenrod2",
+ "royalblue1", "plum4",
+ "gray"))
+ggsave(file.path(plots_dir, "umap_mb_shh_cn_status.pdf"),
+ width = 8, height = 5.5)
+Plot UMAP with TP53 alteration status
+mb_shh_umap_plot_df %>%
+ ggplot(aes(x = X1,
+ y = X2,
+ color = molecular_subtype,
+ shape = tp53_status)) +
+ geom_point(alpha = 0.7, size = 3) +
+ labs(color = "methylation subtype",
+ shape = "TP53 status") +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2") +
+ # colors to match subtypes in Garcia-Lopez 2020 review
+ scale_color_manual(values = c("aquamarine3", "goldenrod2",
+ "royalblue1", "plum4",
+ "gray"))
+ggsave(file.path(plots_dir, "umap_mb_shh_tp53_status.pdf"),
+ width = 5.5, height = 3.5)
+Identify 20k most variable probes among MB samples
+wnt_samples <- hist_mb_methyl %>%
+ dplyr::filter(molecular_subtype %in% c("MB, WNT")) %>%
+ pull(Kids_First_Biospecimen_ID_methyl)
+
+mb_wnt_methyl_var <- apply(mb_methyl[,colnames(mb_methyl) %in% wnt_samples], 1, var, na.rm = TRUE)
+
+mb_wnt_var_probes <- names(sort(mb_wnt_methyl_var, decreasing = TRUE)[1:20000])
+set.seed(2024)
+
+mb_wnt_umap_results <- umap::umap(t(mb_methyl[mb_wnt_var_probes, colnames(mb_methyl) %in% wnt_samples]))
+mb_wnt_umap_plot_df <- data.frame(mb_wnt_umap_results$layout) %>%
+ tibble::rownames_to_column("Kids_First_Biospecimen_ID_methyl") %>%
+ left_join(hist_mb_methyl)
+## Joining with `by = join_by(Kids_First_Biospecimen_ID_methyl)`
+Plot UMAP with molecular subtype and age range
+mb_wnt_umap_plot_df %>%
+ ggplot(aes(x = X1,
+ y = X2,
+ color = dkfz_v12_methylation_subclass)) +
+ geom_point(alpha = 0.7, size = 3) +
+ labs(color = "methylation subtype") +
+ theme_bw() +
+ xlab("UMAP1") +
+ ylab("UMAP2")
+ggsave(file.path(plots_dir, "umap_mb_wnt.pdf"),
+ width = 5.5, height = 3.5)
+
+
+
+