Skip to content

Commit

Permalink
# This is a combination of 2 commits.
Browse files Browse the repository at this point in the history
# This is the 1st commit message:

remove checkpoint call from tp53 viz

remove checkpoint and convert to pdf

update copy burden analysis

add two additional notebook scripts for tp53 analysis

# This is the commit message greenelab#2:

remove old figure generation script
  • Loading branch information
gwaybio committed Oct 10, 2018
1 parent 9d42953 commit 8aaf696
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 54 deletions.
9 changes: 3 additions & 6 deletions scripts/copy_burden_figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,15 @@
# Output:
# Two figures summarizing copy burden across TCGA Pan Can samples

checkpoint::checkpoint("2017-06-01", checkpointLocation = ".")

library(ggplot2)

# Set File Names
base_file <- file.path("classifiers", "TP53")
burden_file <- file.path(base_file, "tables", "copy_burden_predictions.tsv")
snaptron_file <- file.path("scripts", "snaptron",
"junctions_with_mutations.csv")
frac_alt_plot <- file.path(base_file, "figures", "fraction_altered_plot.svg")
violin_plot <- file.path(base_file, "figures", "seg_altered_violin_plot.svg")
frac_alt_plot <- file.path(base_file, "figures", "fraction_altered_plot.pdf")
violin_plot <- file.path(base_file, "figures", "seg_altered_violin_plot.pdf")

# Load Files
copy_burden <- readr::read_tsv(burden_file)
Expand Down Expand Up @@ -84,7 +82,7 @@ plot_ready$TP53 <- factor(plot_ready$TP53, levels = plot_levels)
ggplot(plot_ready, aes(x = TP53, y = frac_altered)) +
ylab("CNV Burden (Fraction Altered)") + xlab("TP53 Status") +
labs(fill = "") + geom_violin(aes(fill = TP53), size = 0.3, alpha = 0.3,
adjust = 0.7, trim = TRUE) +
adjust = 0.7, trim = TRUE) +
geom_boxplot(aes(fill = TP53), size = 0.3, width = 0.1, outlier.size = 0.3) +
coord_flip() + geom_hline(yintercept = 0.5, linetype = "dashed",
color = "red") +
Expand All @@ -103,4 +101,3 @@ ggplot(plot_ready, aes(x = TP53, y = frac_altered)) +
guides(fill = guide_legend(reverse = TRUE, ncol = 1), color = FALSE)

ggsave(violin_plot, height = 2.25, width = 2.5)

29 changes: 0 additions & 29 deletions scripts/tp53_ddr_figures.sh

This file was deleted.

24 changes: 11 additions & 13 deletions scripts/viz/ddr_summary_figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
# Output:
# Several figures to summarize DDR findings

checkpoint::checkpoint("2017-06-01", checkpointLocation = ".")

library(dplyr)
library(pheatmap)
library(ggplot2)
Expand All @@ -27,8 +25,8 @@ heatmap_plot_file <- file.path(results_folder, "figures", "tp53_heatmap.pdf")
heat_file <- file.path(results_folder, "summary_counts.csv")
heat_df <- readr::read_csv(heat_file)

prop_matrix <- as.matrix(heat_df[, c('TP53_loss_proportion',
'TP53_proportion')])
prop_matrix <- as.matrix(heat_df[, c("TP53_loss_proportion",
"TP53_proportion")])
rownames(prop_matrix) <- heat_df$DISEASE
colnames(prop_matrix) <- c("Loss", "Mutation")

Expand Down Expand Up @@ -62,7 +60,7 @@ pheatmap(t(prop_matrix * 100), scale = "none", cluster_rows = FALSE,
width = 8, height = 2)

# 2) Coefficients contributing to the model
coef_plot_file <- file.path(results_folder, "figures", "ddr_coef_plot.svg")
coef_plot_file <- file.path(results_folder, "figures", "ddr_coef_plot.pdf")
coef_df <- results[["Coefficients"]]
coef_df <- coef_df[, -1]
coef_df <- coef_df[order(coef_df$weight, decreasing = FALSE), ]
Expand Down Expand Up @@ -122,7 +120,7 @@ p <- add_arrow_label(p = p, x = 6500, y = -0.03, label = "log10_mut",
ggsave(coef_plot_file, plot = p, height = 2.5, width = 2.25)

# 3) Plot distributions of predictions according to variant classification
var_plot_file <- file.path(results_folder, "figures", "variant_fill_map.svg")
var_plot_file <- file.path(results_folder, "figures", "variant_fill_map.pdf")
mut_df <- readr::read_tsv(file.path(results_folder, "tables",
"mutation_classification_scores.tsv"))

Expand Down Expand Up @@ -197,7 +195,7 @@ ggplot(final_df, aes(Weight, ..count.., fill = Class)) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 1)) +
scale_y_continuous(expand = c(0, 0)) + base_theme +
theme(legend.position = c(1.1, 0.65),
legend.background = element_rect(fill = alpha('white', 0)),
legend.background = element_rect(fill = alpha("white", 0)),
legend.text = element_text(size = 7),
plot.margin = unit(c(0.2, 1.5, 0, 0.1),"cm"),
axis.text.x = element_text(size = 9),
Expand All @@ -224,9 +222,9 @@ nuc_df <- mut_weight_df %>%

aa_df <- aa_df[order(aa_df$count, decreasing = TRUE),]
nuc_df <- nuc_df[order(nuc_df$count, decreasing = TRUE),]
write.table(aa_df, file = file.path(results_folder, 'tables',
'amino_acid_mutation_scores.tsv'),
sep = '\t', row.names = FALSE)
write.table(nuc_df, file = file.path(results_folder, 'tables',
'nucleotide_mutation_scores.tsv'),
sep = '\t', row.names = FALSE)
write.table(aa_df, file = file.path(results_folder, "tables",
"amino_acid_mutation_scores.tsv"),
sep = "\t", row.names = FALSE)
write.table(nuc_df, file = file.path(results_folder, "tables",
"nucleotide_mutation_scores.tsv"),
sep = "\t", row.names = FALSE)
34 changes: 28 additions & 6 deletions tp53_analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ alphas='0.1,0.13,0.15,0.18,0.2,0.3,0.4,0.6,0.7'
l1_mixing='0.1,0.125,0.15,0.2,0.25,0.3,0.35'
tp53_dir='classifiers/TP53'

# Pan Cancer TP53 classification
###############
# Step 1. Pan Cancer TP53 classification
###############
python scripts/pancancer_classifier.py \
--genes 'TP53' \
--diseases $tp53_diseases \
Expand All @@ -23,9 +25,13 @@ python scripts/pancancer_classifier.py \
--remove_hyper \
--alt_folder $tp53_dir \
--alphas $alphas \
--l1_ratios $l1_mixing
--l1_ratios $l1_mixing \
--keep_intermediate \
--shuffled

# Within Disease type TP53 classification
###############
# Step 2. Within Disease type TP53 classification
###############
python scripts/within_tissue_analysis.py \
--genes 'TP53' \
--diseases $tp53_diseases \
Expand All @@ -48,6 +54,9 @@ python scripts/map_mutation_class.py \
--scores $tp53_dir \
--genes 'TP53'

python scripts/copy_burden_merge.py \
--classifier_folder $tp53_dir

###############
# Step 4. Plot additional TP53 results
###############
Expand All @@ -57,9 +66,19 @@ Rscript --vanilla scripts/compare_within_models.R \
--within_dir $tp53_dir'/within_disease' \
--pancan_summary $tp53_dir

# Copy Burden Analysis Figures
python scripts/copy_burden_merge.py --classifier_folder $tp53_dir
Rscript --vanilla scripts/copy_burden_figures.R
# Mutation classification stratified by cancer-Type
jupyter nbconvert --to=html \
--FilesWriter.build_directory=scripts \
--ExecutePreprocessor.kernel_name=python3 \
--ExecutePreprocessor.timeout=100000 \
--execute scripts/tp53_phenocopy.ipynb

# Mutation classification stratified by phenocopying variant
jupyter nbconvert --to=html \
--FilesWriter.build_directory=scripts \
--ExecutePreprocessor.kernel_name=python3 \
--ExecutePreprocessor.timeout=100000 \
--execute scripts/tp53_ddr_supplementary_figures.ipynb

###############
# Step 5. SNAPTRON exon-exon junction analysis
Expand All @@ -68,3 +87,6 @@ cd scripts/snaptron
bash dna_damage_repair_tp53exon.sh
cd ../..

# Copy burden analysis requires snaptron results
Rscript --vanilla scripts/copy_burden_figures.R

0 comments on commit 8aaf696

Please sign in to comment.