Skip to content

Commit

Permalink
plots titles and clarification
Browse files Browse the repository at this point in the history
  • Loading branch information
ionox0 committed Aug 1, 2018
1 parent c24141a commit 2039d10
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 11 deletions.
13 changes: 6 additions & 7 deletions python_tools/workflow_tools/qc/plots_module.r
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ MY_THEME = theme(text = element_text(size=14),
plot.margin = unit(c(.1, .1, .1, 1), 'in'))

# Some title file columns will not be printed
DROP_COLS = c('Pool', 'Pool_input', 'Barcode_index', 'PatientName', 'MAccession', 'Extracted_DNA_Yield')
DROP_COLS = c('Pool', 'Pool_input', 'Barcode_index', 'PatientName', 'MAccession', 'Extracted_DNA_Yield', 'Barcode_index_1', 'Barcode_index_2')
# Levels and sort order for collapsing methods
LEVEL_C = c('TotalCoverage', 'All Unique', 'Simplex', 'Duplex')

Expand Down Expand Up @@ -189,7 +189,7 @@ plotGCwithCovAllSamples = function(data) {
}


#' Plot Coverage vs %GC content, separately for each sample
#' Plot Coverage vs %GC content, separately for each sample
#' (for each collapsing method)
#' @param data data.frame with the usual columns
plotGCwithCovEachSample = function(data, sort_order) {
Expand Down Expand Up @@ -236,7 +236,7 @@ plotInsertSizeDistribution = function(insertSizes) {

g = ggplot(insertSizes, aes(x=FragmentSize, y=total_frequency_fraction, colour=sample_and_peak)) +
stat_smooth(size=.5, n=200, span=0.1, se=FALSE, method='loess', level=.01) +
ggtitle('Insert Size Distribution') +
ggtitle('Insert Size Distribution (from Unfiltered Pool A reads)') +
xlab('Insert Size') +
ylab('Frequency (%)') +
labs(colour = "Sample, Peak Insert Size") +
Expand All @@ -257,7 +257,7 @@ plotCovDistPerIntervalLine = function(data) {

g = ggplot(data) +
geom_line(aes(x=coverage_scaled, colour=Sample), stat='density') +
ggtitle('Distribution of Coverages per Target Interval') +
ggtitle('Distribution of Coverages per Target Interval (from Total Reads, Pool A)') +
scale_y_continuous('Frequency', label=format_comma) +
scale_x_continuous('Coverage (median scaled)') +
coord_cartesian(xlim=c(0, 3)) +
Expand Down Expand Up @@ -295,8 +295,7 @@ print_title = function(title_df, coverage_df, inputs_yaml) {
fg_params=list(cex = .6),
padding=unit(c(5, 3), "mm")),
colhead = list(fg_params=list(cex = 0.5)),
rowhead = list(fg_params=list(cex = 0.5))
)
rowhead = list(fg_params=list(cex = 0.5)))

# Round to one decimal place
coverage_df$average_coverage = format(round(coverage_df$average_coverage, 1), nsmall = 1)
Expand Down Expand Up @@ -456,7 +455,7 @@ parse_sort_order = function(groups_file) {
}


# Extract actual sample names from full filenames
#' Extract actual sample names from full filenames
#' Ex: sample_names = c('test_patient_T', 'test_patient_N')
#' test_patient_T_001_aln_srt_MD_IR_FX_BR --> test_patient_T
cleanup_sample_names = function(data, sample_names) {
Expand Down
2 changes: 1 addition & 1 deletion python_tools/workflow_tools/qc/qc_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

def run_plots_module(tables_output_dir, plots_output_dir, title_file_path, inputs_yaml_path):
"""
Note: The following R script should be found in your Virtual
Note: The R script should be found in your Virtual
environment PATH (/somewhere/virtualenv/bin/plots_module.r)
after installing with `python setup.py install`
"""
Expand Down
6 changes: 3 additions & 3 deletions python_tools/workflow_tools/qc/tables_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def get_gc_table(curr_method, intervals_filename_suffix, path):

# todo - columns should be given constant labels:
newDf = pd.DataFrame({
'method': [curr_method.replace('Waltz', '')] * len(curr_table),
'method': [curr_method] * len(curr_table),
'Sample': [sample] * len(curr_table),
'interval_name': curr_table.ix[:, 3],
'coverage': curr_table.ix[:, 5],
Expand Down Expand Up @@ -280,7 +280,7 @@ def main(args):
# Std, Pool A and B
read_counts_table = get_read_counts_table(args.standard_waltz_pool_a, POOL_A_LABEL)
coverage_table = get_coverage_table(args.standard_waltz_pool_a, POOL_A_LABEL)
# GC bias comes from A Targets
# GC Bias & Coverage Distribution per Interval graphs come from Standard Bam, A Targets
gc_cov_int_table = get_gc_table(TOTAL_LABEL, WALTZ_INTERVALS_FILENAME_SUFFIX, args.standard_waltz_pool_a)

read_counts_table = pd.concat([get_read_counts_table(args.standard_waltz_pool_b, POOL_B_LABEL), read_counts_table])
Expand Down Expand Up @@ -337,7 +337,7 @@ def main(args):
gc_avg_table_all.to_csv(all_samples_coverage_filename, sep='\t', index=False)
coverage_per_interval_table.to_csv(coverage_per_interval_filename, sep='\t', index=False)

# also copy the fragment-sizes.txt file, which the plots module also uses
# Fragment Sizes graph comes from Unfiltered Bam, Pool A Targets
# todo: not clean
import shutil
frag_sizes_path = os.path.join(args.unfiltered_waltz_pool_a, 'fragment-sizes.txt')
Expand Down

0 comments on commit 2039d10

Please sign in to comment.