Skip to content

Commit

Permalink
various updates
Browse files Browse the repository at this point in the history
  • Loading branch information
aadamk committed Sep 17, 2024
1 parent 903cc8a commit 4de25fe
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 19 deletions.
9 changes: 0 additions & 9 deletions analyses/dge_pathway_analysis/01-dge_analysis_deseq.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,6 @@ expr_mat <- expr_mat %>%
dplyr::select(mm_clusters$sample_id)
stopifnot(identical(colnames(expr_mat), mm_clusters$sample_id))

# use DCGA to filter out low count, low variance features
expr_mat <- DGCA::filterGenes(
inputMat = expr_mat,
filterTypes = c("central", "dispersion"),
filterDispersionType = "cv",
filterDispersionPercentile = 0.2,
sequential = TRUE
)

# use a for-loop
clusters <- unique(mm_clusters$mm_cluster)
output_df <- data.frame()
Expand Down
2 changes: 1 addition & 1 deletion analyses/dge_pathway_analysis/run_analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ script_directory="$(perl -e 'use File::Basename;
cd "$script_directory" || exit

# Define directory and input files
data_dir="../../data"
data_dir="/sbgenomics/project-files/opc-v15"
gtf_file="${data_dir}/v15/gencode.v39.primary_assembly.annotation.gtf.gz"
count_file="${data_dir}/v15/gene-counts-rsem-expected_count-collapsed.rds"
cluster_file="../intNMF/results/intnmf_clusters.tsv"
Expand Down
15 changes: 10 additions & 5 deletions analyses/methylation_analysis/01-limma_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,15 @@ output_dir <- opt$output_dir
dir.create(output_dir, showWarnings = F, recursive = T)

# read m-values
methyl_m_values_full <- readRDS(opt$methyl_mat)
methyl_m_values_full <- readRDS(opt$methyl_mat) %>%
dplyr::mutate('Probe_ID' = make.names(Probe_ID)) %>%
unique(by = 'Probe_ID') %>%
tibble::column_to_rownames('Probe_ID')

# read annotation
methyl_annot_full <- data.table::fread(opt$methyl_annot)
methyl_annot_full <- data.table::fread(opt$methyl_annot) %>%
dplyr::mutate('Probe_ID' = make.names(Probe_ID)) %>%
unique(by = 'Probe_ID')

# create generalized function for gene feature analysis
run_analysis <- function(methyl_m_values_full,
Expand All @@ -33,7 +38,7 @@ run_analysis <- function(methyl_m_values_full,
prefix) {
# filter only to gene feature using probe annotation file
methyl_annot <- methyl_annot_full %>%
filter(Gene_Feature %in% gene_feature_filter) %>%
dplyr::filter(Gene_Feature %in% gene_feature_filter) %>%
unique()
methyl_m_values <- methyl_m_values_full %>%
filter(rownames(methyl_m_values_full) %in% methyl_annot$Probe_ID)
Expand Down Expand Up @@ -78,8 +83,8 @@ run_analysis <- function(methyl_m_values_full,
# differentially expressed probes
toptable_output <- limma::topTable(fit.reduced, coef = 2, n = Inf)
sigCpGs <- toptable_output %>%
filter(adj.P.Val < 0.05) %>%
mutate(cluster = clusters[i]) %>%
dplyr::filter(adj.P.Val < 0.05) %>%
dplyr::mutate(cluster = clusters[i]) %>%
rownames_to_column("probes")
print(head(sigCpGs))

Expand Down
10 changes: 6 additions & 4 deletions analyses/methylation_analysis/run_analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ script_directory="$(perl -e 'use File::Basename;
print dirname(abs_path(@ARGV[0]));' -- "$0")"
cd "$script_directory" || exit

echo "R_MAX_VSIZE=100Gb" > .Renviron

# Define directory and input files
data_subset="../data_preparation/data"
count_file="${data_subset}/gene-counts-rsem-expected_count-collapsed.rds"
methyl_m_file="${data_subset}/methyl-m-values.rds"
data_dir="/sbgenomics/project-files/opc-v15"
count_file="${data_dir}/v15/gene-counts-rsem-expected_count-collapsed.rds"
methyl_m_file="${data_dir}/v15/methyl-m-values.rds"
cluster_file="../intNMF/results/intnmf_clusters.tsv"
methyl_annot_file="../../data/v15/infinium.gencode.v39.probe.annotations.tsv.gz"
methyl_annot_file="${data_dir}/v15/infinium.gencode.v39.probe.annotations.tsv.gz"

# 1) limma analysis to identify differentially expressed CpG sites
Rscript --vanilla 01-limma_analysis.R \
Expand Down

0 comments on commit 4de25fe

Please sign in to comment.