d3b-center · jharenza · Jul 10, 2024 · Jul 6, 2024 · Jul 6, 2024 · Jul 7, 2024
@@ -1513,7 +1513,7 @@
     $(this).detach().appendTo(div);
 
     // add a show code button right above
-    var showCodeText = $('<span>' + (showThis ? 'Hide' : 'Code') + '</span>');
+    var showCodeText = $('<span>' + (showThis ? 'Hide' : 'Show') + '</span>');
     var showCodeButton = $('<button type="button" class="btn btn-default btn-xs btn-secondary btn-sm code-folding-btn pull-right float-right"></button>');
     showCodeButton.append(showCodeText);
     showCodeButton
@@ -1539,7 +1539,7 @@
     //   * Change text
     //   * add a class for intermediate states styling
     div.on('hide.bs.collapse', function () {
-      showCodeText.text('Code');
+      showCodeText.text('Show');
       showCodeButton.addClass('btn-collapsing');
     });
     div.on('hidden.bs.collapse', function () {
@@ -1793,9 +1793,9 @@ <h3>Read in data</h3>
 <pre class="r"><code>cnvkit_file &lt;- file.path(&quot;..&quot;, &quot;..&quot;, &quot;data&quot;, &quot;cnv-cnvkit.seg.gz&quot;)
 cnvkit_df &lt;- readr::read_tsv(cnvkit_file)</code></pre>
 <!-- rnb-source-end -->
-<!-- rnb-message-begin eyJkYXRhIjoiUm93czogNTEwNjM5OCBDb2x1bW5zOiA3XG7ilIDilIAgQ29sdW1uIHNwZWNpZmljYXRpb24g4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSAXG5EZWxpbWl0ZXI6IFwiXFx0XCJcbmNociAoMik6IElELCBjaHJvbVxuZGJsICg1KTogbG9jLnN0YXJ0LCBsb2MuZW5kLCBudW0ubWFyaywgc2VnLm1lYW4sIGNvcHkubnVtXG5cbuKEuSBVc2UgYHNwZWMoKWAgdG8gcmV0cmlldmUgdGhlIGZ1bGwgY29sdW1uIHNwZWNpZmljYXRpb24gZm9yIHRoaXMgZGF0YS5cbuKEuSBTcGVjaWZ5IHRoZSBjb2x1bW4gdHlwZXMgb3Igc2V0IGBzaG93X2NvbF90eXBlcyA9IEZBTFNFYCB0byBxdWlldCB0aGlzIG1lc3NhZ2UuXG4ifQ== -->
+<!-- rnb-message-begin eyJkYXRhIjoiUm93czogNTEwNjM5OCBDb2x1bW5zOiA3XG7ilIDilIAgQ29sdW1uIHNwZWNpZmljYXRpb24g4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSA4pSAXG5EZWxpbWl0ZXI6IFwiXFx0XCJcbmNociAoMik6IElELCBjaHJvbVxuZGJsICg1KTogbG9jLnN0YXJ0LCBsb2MuZW5kLCBudW0ubWFyaywgc2VnLm1lYW4sIGNvcHkubnVtXG5cbuKEuSBVc2UgYHNwZWMoKWAgdG8gcmV0cmlldmUgdGhlIGZ1bGwgY29sdW1uIHNwZWNpZmljYXRpb24gZm9yIHRoaXMgZGF0YS5cbuKEuSBTcGVjaWZ5IHRoZSBjb2x1bW4gdHlwZXMgb3Igc2V0IGBzaG93X2NvbF90eXBlcyA9IEZBTFNFYCB0byBxdWlldCB0aGlzIG1lc3NhZ2UuXG4ifQ== -->
 <pre><code>Rows: 5106398 Columns: 7
-── Column specification ─────────────────────────────────────────────────────────────────────────
+── Column specification ───────────────────────────────────────────────────────────────────────────────
 Delimiter: &quot;\t&quot;
 chr (2): ID, chrom
 dbl (5): loc.start, loc.end, num.mark, seg.mean, copy.num
@@ -1811,12 +1811,12 @@ <h3>Read in data</h3>
 <pre class="r"><code>histologies_file &lt;- file.path(&quot;..&quot;, &quot;..&quot;, &quot;data&quot;, &quot;histologies-base.tsv&quot;)
 histologies_df &lt;- readr::read_tsv(histologies_file, guess_max = 100000)</code></pre>
 <!-- rnb-source-end -->
-<!-- rnb-message-begin eyJkYXRhIjoiUm93czogNDc1MDcgQ29sdW1uczogNTZcbuKUgOKUgCBDb2x1bW4gc3BlY2lmaWNhdGlvbiDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIBcbkRlbGltaXRlcjogXCJcXHRcIlxuY2hyICgzNyk6IEtpZHNfRmlyc3RfUGFydGljaXBhbnRfSUQsIEtpZHNfRmlyc3RfQmlvc3BlY2ltZW5fSUQsIHNhbXBsZV9pZCwgYS4uLlxuZGJsICgxNyk6IGNlbGxfbGluZV9wYXNzYWdlLCBPU19kYXlzLCBFRlNfZGF5cywgYWdlX2F0X2RpYWdub3Npc19kYXlzLCBhZ2VfYS4uLlxubGdsICAoMik6IG1vbGVjdWxhcl9zdWJ0eXBlLCBpbnRlZ3JhdGVkX2RpYWdub3Npc1xuXG7ihLkgVXNlIGBzcGVjKClgIHRvIHJldHJpZXZlIHRoZSBmdWxsIGNvbHVtbiBzcGVjaWZpY2F0aW9uIGZvciB0aGlzIGRhdGEuXG7ihLkgU3BlY2lmeSB0aGUgY29sdW1uIHR5cGVzIG9yIHNldCBgc2hvd19jb2xfdHlwZXMgPSBGQUxTRWAgdG8gcXVpZXQgdGhpcyBtZXNzYWdlLlxuIn0= -->
-<pre><code>Rows: 47507 Columns: 56
-── Column specification ─────────────────────────────────────────────────────────────────────────
+<!-- rnb-message-begin eyJkYXRhIjoiUm93czogNDc4OTUgQ29sdW1uczogNjRcbuKUgOKUgCBDb2x1bW4gc3BlY2lmaWNhdGlvbiDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIDilIBcbkRlbGltaXRlcjogXCJcXHRcIlxuY2hyICg0MSk6IEtpZHNfRmlyc3RfUGFydGljaXBhbnRfSUQsIEtpZHNfRmlyc3RfQmlvc3BlY2ltZW5fSUQsIHNhbXBsZV9pZCwgYS4uLlxuZGJsICgyMSk6IGNlbGxfbGluZV9wYXNzYWdlLCBPU19kYXlzLCBFRlNfZGF5cywgYWdlX2F0X2RpYWdub3Npc19kYXlzLCBhZ2VfYS4uLlxubGdsICAoMik6IG1vbGVjdWxhcl9zdWJ0eXBlLCBpbnRlZ3JhdGVkX2RpYWdub3Npc1xuXG7ihLkgVXNlIGBzcGVjKClgIHRvIHJldHJpZXZlIHRoZSBmdWxsIGNvbHVtbiBzcGVjaWZpY2F0aW9uIGZvciB0aGlzIGRhdGEuXG7ihLkgU3BlY2lmeSB0aGUgY29sdW1uIHR5cGVzIG9yIHNldCBgc2hvd19jb2xfdHlwZXMgPSBGQUxTRWAgdG8gcXVpZXQgdGhpcyBtZXNzYWdlLlxuIn0= -->
+<pre><code>Rows: 47895 Columns: 64
+── Column specification ───────────────────────────────────────────────────────────────────────────────
 Delimiter: &quot;\t&quot;
-chr (37): Kids_First_Participant_ID, Kids_First_Biospecimen_ID, sample_id, a...
-dbl (17): cell_line_passage, OS_days, EFS_days, age_at_diagnosis_days, age_a...
+chr (41): Kids_First_Participant_ID, Kids_First_Biospecimen_ID, sample_id, a...
+dbl (21): cell_line_passage, OS_days, EFS_days, age_at_diagnosis_days, age_a...
 lgl  (2): molecular_subtype, integrated_diagnosis
 
 ℹ Use `spec()` to retrieve the full column specification for this data.

@@ -28,15 +28,27 @@ option_list <- list(
   optparse::make_option(
     c("--cnvkit_auto"),
     type = "character",
-    default = "results/cnvkit_annotated_cn_wxs_plus_freec_tumor_autosomes.tsv.gz",
+    default = NULL,
     help = "annotated cnvkit cnv calls on autosomes for WXS samples"
   ),
   optparse::make_option(
     c("--cnvkit_x_and_y"),
     type = "character",
-    default = "results/cnvkit_annotated_cn_wxs_plus_freec_tumor_x_and_y.tsv.gz",
+    default = NULL,
     help = "annotated cnvkit cnv calls on x and y for WXS samples"
   ),
+  optparse::make_option(
+    c("--freec_auto"),
+    type = "character",
+    default = NULL,
+    help = "annotated controlfreec cnv calls on autosomes for WXS samples"
+  ),
+  optparse::make_option(
+    c("--freec_x_and_y"),
+    type = "character",
+    default = NULL,
+    help = "annotated controlfreec cnv calls on x and y for WXS samples"
+  ),
   optparse::make_option(
     c("--consensus_auto"),
     type = "character",
@@ -66,41 +78,73 @@ option_list <- list(
     type = "character",
     default = "results",
     help = "path to save the final merged files"
-  )
+  ),
+  optparse::make_option(c("--cnvkitWXS"), 
+              type = "logical", action = "store_true", default = TRUE,
+              help = "flag used to indicate using cnvkit (default) or freec for WXS")
 )
 
 # Read the arguments passed
 opt_parser <- optparse::OptionParser(option_list = option_list)
 opt <- optparse::parse_args(opt_parser)
 
 # Read in the files 
-cnvkit_auto <- data.table::fread(opt$cnvkit_auto)
-cnvkit_x_and_y <- data.table::fread(opt$cnvkit_x_and_y)
 consensus_auto <- data.table::fread(opt$consensus_auto)
 consensus_x_and_y <- data.table::fread(opt$consensus_x_and_y)
 cnv_tumor_auto <- data.table::fread(opt$cnv_tumor_auto)
 cnv_tumor_x_and_y <- data.table::fread(opt$cnv_tumor_x_and_y)
-
-# merge WGS (consensus), WXS (cnvkit), tumor only (freec) for autosomes and x_and_y respectively
-merged_auto <- rbind(cnvkit_auto, consensus_auto, cnv_tumor_auto)
-merged_x_and_y <- rbind(cnvkit_x_and_y, consensus_x_and_y, cnv_tumor_x_and_y)
-
-# write out the files
-readr::write_tsv(merged_auto, 
-          file.path(opt$outdir, "consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only_autosomes.tsv.gz"))
-
-readr::write_tsv(merged_x_and_y, 
-          file.path(opt$outdir, "consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only_x_and_y.tsv.gz"))
-
-# Merge autosomes with X and Y and output a combined file 
-# For the combined file, we do not need germline_sex_estimate from x_and_y
-merged_x_and_y <- merged_x_and_y %>% 
-  dplyr::select(-germline_sex_estimate)
-combined <- rbind(merged_auto, merged_x_and_y)
-
-readr::write_tsv(combined, 
-          file.path(opt$outdir, "consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only.tsv.gz"))
-
+cnvkitWXS <- opt$cnvkitWXS
+
+
+if (cnvkitWXS){
+  # read in cnvkit files
+  cnvkit_auto <- data.table::fread(opt$cnvkit_auto)
+  cnvkit_x_and_y <- data.table::fread(opt$cnvkit_x_and_y)
+
+  # merge WGS (consensus), WXS (cnvkit), tumor only (freec) for autosomes and x_and_y respectively
+  merged_auto <- rbind(cnvkit_auto, consensus_auto, cnv_tumor_auto)
+  merged_x_and_y <- rbind(cnvkit_x_and_y, consensus_x_and_y, cnv_tumor_x_and_y)
+  # write out the files
+  readr::write_tsv(merged_auto, 
+                   file.path(opt$outdir, "consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only_autosomes.tsv.gz"))
+
+  readr::write_tsv(merged_x_and_y, 
+                   file.path(opt$outdir, "consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only_x_and_y.tsv.gz"))
+
+  # Merge autosomes with X and Y and output a combined file 
+  # For the combined file, we do not need germline_sex_estimate from x_and_y
+  merged_x_and_y <- merged_x_and_y %>% 
+    dplyr::select(-germline_sex_estimate)
+  combined <- rbind(merged_auto, merged_x_and_y)
+
+  readr::write_tsv(combined, 
+                   file.path(opt$outdir, "consensus_wgs_plus_cnvkit_wxs_plus_freec_tumor_only.tsv.gz"))
+} else {
+  freec_auto <- data.table::fread(opt$freec_auto)
+  freec_x_and_y <- data.table::fread(opt$freec_x_and_y)
+
+  # merge WGS (consensus), WXS (cnvkit), tumor only (freec) for autosomes and x_and_y respectively
+  merged_auto <- rbind(freec_auto, consensus_auto, cnv_tumor_auto)
+  merged_x_and_y <- rbind(freec_x_and_y, consensus_x_and_y, cnv_tumor_x_and_y)
+
+  # write out the files
+  readr::write_tsv(merged_auto, 
+                   file.path(opt$outdir, "consensus_wgs_plus_freec_wxs_plus_freec_tumor_only_autosomes.tsv.gz"))
+
+  readr::write_tsv(merged_x_and_y, 
+                   file.path(opt$outdir, "consensus_wgs_plus_freec_wxs_plus_freec_tumor_only_x_and_y.tsv.gz"))
+
+  # Merge autosomes with X and Y and output a combined file 
+  # For the combined file, we do not need germline_sex_estimate from x_and_y
+  merged_x_and_y <- merged_x_and_y %>% 
+    dplyr::select(-germline_sex_estimate)
+  combined <- rbind(merged_auto, merged_x_and_y)
+
+  readr::write_tsv(combined, 
+                   file.path(opt$outdir, "consensus_wgs_plus_freec_wxs_plus_freec_tumor_only.tsv.gz"))
+
+}
+
 
 
 
@@ -34,51 +34,97 @@ else
   histologies_file=${data_dir}/histologies.tsv
 fi
 
+echo "add ploidy to consensus"
 # Prep the consensus SEG file data
 Rscript --vanilla -e "rmarkdown::render('02-add-ploidy-consensus.Rmd', clean = TRUE)"
 
-# Run annotation step for consensus file
+echo "running bedtools coverage for each sample bed"
+# Run snakemake script implementing `bedtools coverage` for each sample bed file in
+# `scratch/cytoband_status` -- these files are generated in
+# `02-add-ploidy-consensus.Rmd`
+# currently runs 10 jobs in parallel, which should be fine for most implementations
+snakemake -j 10 --snakefile run-bedtools.snakemake
+
+echo "Determine the dominant status for each chromosome arm and compare with GISTIC's arm status calls"
+Rscript --vanilla -e "rmarkdown::render('03-add-cytoband-status-consensus.Rmd', clean = TRUE)"
+
+echo "Run annotation step for consensus file"
 Rscript --vanilla 04-prepare-cn-file.R \
 --cnv_file ${results_dir}/consensus_seg_with_status.tsv \
 --gtf_file $gtf_file \
 --metadata $histologies_file \
 --filename_lead "consensus_seg_annotated_cn" \
 --seg
 
+echo "Define most focal units of recurrent CNVs"
+Rscript --vanilla -e "rmarkdown::render('05-define-most-focal-cn-units.Rmd', clean = TRUE)"
+
+echo "Define the recurrent calls"
+Rscript --vanilla -e "rmarkdown::render('06-find-recurrent-calls.Rmd', clean = TRUE)"
+
 
 # if we want to process the CNV data from the original callers
 # (e.g., CNVkit, ControlFreeC)
 if [ "$RUN_ORIGINAL" -gt "0" ]
 then
-
-# Prep the CNVkit data
-Rscript --vanilla -e "rmarkdown::render('01-add-ploidy-cnvkit.Rmd', clean = TRUE)"
-
-# Run annotation step for CNVkit WXS only
-Rscript --vanilla 04-prepare-cn-file.R \
---cnv_file ${results_dir}/cnvkit_with_status.tsv \
---gtf_file $gtf_file \
---metadata $histologies_file \
---filename_lead "cnvkit_annotated_cn" \
---seg \
---runWXSonly
-
-# Run annotation step for ControlFreeC tumor only
-Rscript --vanilla 04-prepare-cn-file.R \
---cnv_file ${data_dir}/cnv-controlfreec-tumor-only.tsv.gz \
---gtf_file $gtf_file \
---metadata $histologies_file \
---filename_lead "freec-tumor-only_annotated_cn" \
---controlfreec
-
-# Run merging for all annotated files 
-Rscript --vanilla 07-consensus-annotated-merge.R \
---cnvkit_auto ${results_dir}/cnvkit_annotated_cn_wxs_autosomes.tsv.gz \
---cnvkit_x_and_y ${results_dir}/cnvkit_annotated_cn_wxs_x_and_y.tsv.gz \
---consensus_auto ${results_dir}/consensus_seg_annotated_cn_autosomes.tsv.gz \
---consensus_x_and_y ${results_dir}/consensus_seg_annotated_cn_x_and_y.tsv.gz \
---cnv_tumor_auto ${results_dir}/freec-tumor-only_annotated_cn_autosomes.tsv.gz \
---cnv_tumor_x_and_y ${results_dir}/freec-tumor-only_annotated_cn_x_and_y.tsv.gz \
---outdir ${results_dir}
+
+  echo "prep CNVkit WXS data"
+  # Prep the CNVkit data
+  Rscript --vanilla -e "rmarkdown::render('01-add-ploidy-cnvkit.Rmd', clean = TRUE)"
+
+  echo "annotate CNVkit WXS CNVs"
+  # Run annotation step for CNVkit WXS only
+  Rscript --vanilla 04-prepare-cn-file.R \
+  --cnv_file ${results_dir}/cnvkit_with_status.tsv \
+  --gtf_file $gtf_file \
+  --metadata $histologies_file \
+  --filename_lead "cnvkit_annotated_cn" \
+  --seg \
+  --runWXSonly
+
+  echo "annotate FREEC WXS CNVs"
+  # Run annotation step for ControlFreeC
+  Rscript --vanilla 04-prepare-cn-file.R \
+  --cnv_file ${data_dir}/cnv-controlfreec.tsv.gz \
+  --gtf_file $gtf_file \
+  --metadata $histologies_file \
+  --filename_lead "controlfreec_annotated_cn" \
+  --controlfreec \
+  --runWXSonly
+
+  echo "annotate CNVkit tumor only CNVs"
+  # Run annotation step for ControlFreeC tumor only
+  Rscript --vanilla 04-prepare-cn-file.R \
+  --cnv_file ${data_dir}/cnv-controlfreec-tumor-only.tsv.gz \
+  --gtf_file $gtf_file \
+  --metadata $histologies_file \
+  --filename_lead "freec-tumor-only_annotated_cn" \
+  --controlfreec
+
+results_dir=./results
+
+  echo "merge annotated files with cnvkit WXS"
+  # Run merging for all annotated files 
+  Rscript --vanilla 07-consensus-annotated-merge.R \
+  --cnvkit_auto ${results_dir}/cnvkit_annotated_cn_wxs_autosomes.tsv.gz \
+  --cnvkit_x_and_y ${results_dir}/cnvkit_annotated_cn_wxs_x_and_y.tsv.gz \
+  --consensus_auto ${results_dir}/consensus_seg_annotated_cn_autosomes.tsv.gz \
+  --consensus_x_and_y ${results_dir}/consensus_seg_annotated_cn_x_and_y.tsv.gz \
+  --cnv_tumor_auto ${results_dir}/freec-tumor-only_annotated_cn_autosomes.tsv.gz \
+  --cnv_tumor_x_and_y ${results_dir}/freec-tumor-only_annotated_cn_x_and_y.tsv.gz \
+  --cnvkitWXS TRUE \
+  --outdir ${results_dir}
+
+  echo "merge annotated files with freec WXS"
+  # Run merging for all annotated files 
+  Rscript --vanilla 07-consensus-annotated-merge.R \
+  --freec_auto ${results_dir}/controlfreec_annotated_cn_wxs_autosomes.tsv.gz \
+  --freec_x_and_y ${results_dir}/controlfreec_annotated_cn_wxs_x_and_y.tsv.gz \
+  --consensus_auto ${results_dir}/consensus_seg_annotated_cn_autosomes.tsv.gz \
+  --consensus_x_and_y ${results_dir}/consensus_seg_annotated_cn_x_and_y.tsv.gz \
+  --cnv_tumor_auto ${results_dir}/freec-tumor-only_annotated_cn_autosomes.tsv.gz \
+  --cnv_tumor_x_and_y ${results_dir}/freec-tumor-only_annotated_cn_x_and_y.tsv.gz \
+  --cnvkitWXS FALSE \
+  --outdir ${results_dir}
 
 fi