Skip to content
This repository has been archived by the owner on Apr 21, 2022. It is now read-only.

Commit

Permalink
Merge pull request #9 from a-slide/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
a-slide authored Jan 14, 2020
2 parents fb739f9 + 9ef012a commit e2573ce
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 91 deletions.
82 changes: 41 additions & 41 deletions docs/CpG_Aggregate/API_usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-13T11:46:01.608058Z",
"start_time": "2020-01-13T11:46:01.448040Z"
"end_time": "2020-01-13T22:19:46.289932Z",
"start_time": "2020-01-13T22:19:46.122741Z"
},
"init_cell": true
},
Expand All @@ -48,11 +48,11 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-10T14:47:11.977598Z",
"start_time": "2020-01-10T14:47:11.964781Z"
"end_time": "2020-01-13T21:40:40.734965Z",
"start_time": "2020-01-13T21:40:40.725428Z"
}
},
"outputs": [
Expand All @@ -67,19 +67,19 @@
"\n",
"* **nanopolish_fn** (required) [list(str)]\n",
"\n",
"Path to a nanopolish call_methylation tsv output file or a list of files or a regex matching several files\n",
"Path to a nanopolish call_methylation tsv output file or a list of files or a regex matching several files (can be gzipped)\n",
"\n",
"* **ref_fasta_fn** (required) [str]\n",
"\n",
"Reference file used for alignment in Fasta format (ideally already indexed with samtools faidx)\n",
"\n",
"* **output_bed_fn** (default: \"\") [str]\n",
"\n",
"Path to write a summary result file in BED format (At least 1 output file is required)\n",
"Path to write a summary result file in BED format (At least 1 output file is required) (can be gzipped)\n",
"\n",
"* **output_tsv_fn** (default: \"\") [str]\n",
"\n",
"Path to write an more extensive result report in TSV format (At least 1 output file is required)\n",
"Path to write a more extensive result report in TSV format (At least 1 output file is required) (can be gzipped)\n",
"\n",
"* **min_depth** (default: 10) [int]\n",
"\n",
Expand Down Expand Up @@ -130,11 +130,11 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-10T14:47:20.172308Z",
"start_time": "2020-01-10T14:47:14.807058Z"
"end_time": "2020-01-13T21:44:17.737481Z",
"start_time": "2020-01-13T21:44:12.791652Z"
},
"scrolled": false
},
Expand All @@ -146,7 +146,7 @@
"## Checking options and input files ##\n",
"## Parsing methylation_calls file ##\n",
"\tStarting to parse file Nanopolish methylation call file\n",
"\t: 100%|██████████| 51.9M/51.9M [00:05<00:00, 10.2M bytes/s]\n",
"100%|██████████| 51.9M/51.9M [00:04<00:00, 11.3M bytes/s]\n",
"\tFiltering out low coverage sites\n",
"\tSorting each chromosome by coordinates\n",
"\tParsing summary\n",
Expand All @@ -158,7 +158,7 @@
"\t\tLow Count Sites: 218,114\n",
"\t\tValid Sites Found: 239\n",
"## Processing valid sites found and write to file ##\n",
"\t: 100%|██████████| 239/239 [00:00<00:00, 3.89k sites/s]\n",
"100%|██████████| 239/239 [00:00<00:00, 2.25k sites/s]\n",
"\tResults summary\n",
"\t\tTotal Sites Writen: 239\n",
"\t\tUnmethylated sites: 162\n",
Expand All @@ -181,15 +181,15 @@
"VIII 212612 212613 CACCCCGTTGG 1 -2.910 [-7.45,1.01,-2.76,-0.81,-3.06,-2.63,-3.66,-3.11,-0.21,-2.02,-6.81,-8.47,-1.18,-7.1] \n",
"\n",
"track name=sample_1_CpG itemRgb=On\n",
"VIII\t138415\t138416\t.\t-2.355\t.\t138415\t138416\t'52,168,194'\n",
"VIII\t138429\t138430\t.\t-4.525\t.\t138429\t138430\t'33,102,171'\n",
"VIII\t212351\t212352\t.\t-2.770\t.\t212351\t212352\t'52,168,194'\n",
"VIII\t212392\t212393\t.\t-2.510\t.\t212392\t212393\t'52,168,194'\n",
"VIII\t212457\t212461\t.\t-6.080\t.\t212457\t212461\t'28,45,131'\n",
"VIII\t212530\t212531\t.\t-1.270\t.\t212530\t212531\t'230,230,230'\n",
"VIII\t212581\t212582\t.\t0.075\t.\t212581\t212582\t'230,230,230'\n",
"VIII\t212596\t212600\t.\t-4.860\t.\t212596\t212600\t'33,102,171'\n",
"VIII\t212612\t212613\t.\t-2.910\t.\t212612\t212613\t'52,168,194'\n",
"VIII\t138415\t138416\t.\t-2.355\t.\t138415\t138416\t52,168,194\n",
"VIII\t138429\t138430\t.\t-4.525\t.\t138429\t138430\t33,102,171\n",
"VIII\t212351\t212352\t.\t-2.770\t.\t212351\t212352\t52,168,194\n",
"VIII\t212392\t212393\t.\t-2.510\t.\t212392\t212393\t52,168,194\n",
"VIII\t212457\t212461\t.\t-6.080\t.\t212457\t212461\t28,45,131\n",
"VIII\t212530\t212531\t.\t-1.270\t.\t212530\t212531\t230,230,230\n",
"VIII\t212581\t212582\t.\t0.075\t.\t212581\t212582\t230,230,230\n",
"VIII\t212596\t212600\t.\t-4.860\t.\t212596\t212600\t33,102,171\n",
"VIII\t212612\t212613\t.\t-2.910\t.\t212612\t212613\t52,168,194\n",
"\n"
]
}
Expand All @@ -199,11 +199,11 @@
" nanopolish_fn=\"./data/nanopolish_sample_1.tsv\",\n",
" ref_fasta_fn=\"./data/ref.fa\",\n",
" output_bed_fn=\"./results/CpG_Aggregate_sample_1.bed\",\n",
" output_tsv_fn=\"./results/CpG_Aggregate_sample_1.tsv\",\n",
" output_tsv_fn=\"./results/CpG_Aggregate_sample_1.tsv.gz\",\n",
" sample_id=\"sample_1\",\n",
" progress=True)\n",
"\n",
"head(\"./results/CpG_Aggregate_sample_1.tsv\")\n",
"head(\"./results/CpG_Aggregate_sample_1.tsv.gz\")\n",
"head(\"./results/CpG_Aggregate_sample_1.bed\")"
]
},
Expand All @@ -216,11 +216,11 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-10T14:30:26.782758Z",
"start_time": "2020-01-10T14:29:47.302717Z"
"end_time": "2020-01-13T21:45:04.324571Z",
"start_time": "2020-01-13T21:44:22.574687Z"
},
"scrolled": false
},
Expand All @@ -232,7 +232,7 @@
"## Checking options and input files ##\n",
"## Parsing methylation_calls file ##\n",
"\tStarting to parse file Nanopolish methylation call file\n",
"\t: 100%|██████████| 209M/209M [00:17<00:00, 12.3M bytes/s] \n",
"100%|██████████| 209M/209M [00:18<00:00, 11.6M bytes/s] \n",
"\tFiltering out low coverage sites\n",
"\tSorting each chromosome by coordinates\n",
"\tParsing summary\n",
Expand All @@ -244,7 +244,7 @@
"\t\tValid Sites Found: 228,163\n",
"\t\tLow Count Sites: 23,511\n",
"## Processing valid sites found and write to file ##\n",
"\t: 100%|██████████| 228k/228k [00:21<00:00, 10.6k sites/s] \n",
"100%|██████████| 228k/228k [00:22<00:00, 10.0k sites/s] \n",
"\tResults summary\n",
"\t\tTotal Sites Writen: 228,163\n",
"\t\tUnmethylated sites: 168,018\n",
Expand All @@ -268,15 +268,15 @@
"I 557 558 CTCAGCGGTCT 1 -2.300 [-5.5,-1.85,-4.84,-2.3,-4.34,-1.14,-1.11] \n",
"\n",
"track name=sample_all_CpG itemRgb=On\n",
"I\t144\t145\t.\t-2.200\t.\t144\t145\t'52,168,194'\n",
"I\t175\t176\t.\t-1.350\t.\t175\t176\t'230,230,230'\n",
"I\t216\t217\t.\t-2.160\t.\t216\t217\t'52,168,194'\n",
"I\t325\t326\t.\t-2.660\t.\t325\t326\t'52,168,194'\n",
"I\t339\t340\t.\t-1.210\t.\t339\t340\t'230,230,230'\n",
"I\t354\t355\t.\t-1.390\t.\t354\t355\t'230,230,230'\n",
"I\t422\t433\t.\t-10.520\t.\t422\t433\t'28,45,131'\n",
"I\t542\t543\t.\t-0.780\t.\t542\t543\t'230,230,230'\n",
"I\t557\t558\t.\t-2.300\t.\t557\t558\t'52,168,194'\n",
"I\t144\t145\t.\t-2.200\t.\t144\t145\t52,168,194\n",
"I\t175\t176\t.\t-1.350\t.\t175\t176\t230,230,230\n",
"I\t216\t217\t.\t-2.160\t.\t216\t217\t52,168,194\n",
"I\t325\t326\t.\t-2.660\t.\t325\t326\t52,168,194\n",
"I\t339\t340\t.\t-1.210\t.\t339\t340\t230,230,230\n",
"I\t354\t355\t.\t-1.390\t.\t354\t355\t230,230,230\n",
"I\t422\t433\t.\t-10.520\t.\t422\t433\t28,45,131\n",
"I\t542\t543\t.\t-0.780\t.\t542\t543\t230,230,230\n",
"I\t557\t558\t.\t-2.300\t.\t557\t558\t52,168,194\n",
"\n"
]
}
Expand Down Expand Up @@ -304,11 +304,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-13T11:35:24.702473Z",
"start_time": "2020-01-13T11:34:38.110631Z"
"end_time": "2020-01-13T21:45:52.944540Z",
"start_time": "2020-01-13T21:45:04.326854Z"
},
"scrolled": false
},
Expand Down
8 changes: 4 additions & 4 deletions docs/CpG_Aggregate/CLI_usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-13T11:35:35.274813Z",
"start_time": "2020-01-13T11:35:34.514458Z"
"end_time": "2020-01-13T21:40:06.731135Z",
"start_time": "2020-01-13T21:40:06.120502Z"
},
"init_cell": true
},
Expand All @@ -34,7 +34,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"(pycoMeth) (pycoMeth) "
"(pycoMeth) "
]
},
{
Expand Down
60 changes: 34 additions & 26 deletions docs/Meth_Comp/API_usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-13T12:21:50.257341Z",
"start_time": "2020-01-13T12:21:49.418409Z"
"end_time": "2020-01-14T23:14:08.575257Z",
"start_time": "2020-01-14T23:14:07.593146Z"
},
"init_cell": true
},
Expand All @@ -47,50 +47,58 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-08T14:35:43.526182Z",
"start_time": "2020-01-08T14:35:43.516559Z"
"end_time": "2020-01-14T23:14:11.834925Z",
"start_time": "2020-01-14T23:14:11.821895Z"
}
},
"outputs": [
{
"data": {
"text/markdown": [
"**Meth_Comp** (aggregate_fn_list, ref_fasta_fn, output_tsv_fn, output_bed_fn, max_missing, min_diff_llr, sample_id, verbose, quiet, progress, kwargs)\n",
"**Meth_Comp** (aggregate_fn_list, ref_fasta_fn, output_tsv_fn, output_bed_fn, max_missing, min_diff_llr, sample_id_list, pvalue_adj_method, pvalue_adj_alpha, verbose, quiet, progress, kwargs)\n",
"\n",
"Compare methylation values for each CpG positions or intervals between n samples and perform a statistical test to evaluate if the positions are significantly different. For 2 samples a Mann_Withney test is performed otherwise multiples samples are compared with a Kruskal Wallis test. pValues are adjusted for multiple tests using the Benjamini & Hochberg procedure for controlling the false discovery rate.\n",
"\n",
"---\n",
"\n",
"* **aggregate_fn_list** (required) [list(str)]\n",
"\n",
"A list of output tsv files corresponding to several samples to compare generated by either CpG_Aggregate or Interval_Aggregate.\n",
"A list of output tsv files corresponding to several samples to compare generated by either CpG_Aggregate or Interval_Aggregate. (can be gzipped)\n",
"\n",
"* **ref_fasta_fn** (required) [str]\n",
"\n",
"Reference file used for alignment in Fasta format (ideally already indexed with samtools faidx)\n",
"\n",
"* **output_tsv_fn** (default: None) [str]\n",
"\n",
"Path to write an more extensive result report in TSV format (At least 1 output file is required)\n",
"Path to write an more extensive result report in TSV format (At least 1 output file is required) (can be gzipped)\n",
"\n",
"* **output_bed_fn** (default: None) [str]\n",
"\n",
"Path to write a summary result file in BED format (At least 1 output file is required)\n",
"Path to write a summary result file in BED format (At least 1 output file is required) (can be gzipped)\n",
"\n",
"* **max_missing** (default: 0) [int]\n",
"\n",
"Max number of missing samples to perform the test\n",
"\n",
"* **min_diff_llr** (default: 2) [int]\n",
"* **min_diff_llr** (default: 2) [float]\n",
"\n",
"Minimal llr boundary for negative and positive median llr. The test if only performed if at least one sample has a median llr above (methylated) and 1 sample has a median llr below (unmethylated)\n",
"\n",
"* **sample_id** (default: \"\") [str]\n",
"* **sample_id_list** (default: None) [list(str)]\n",
"\n",
"Sample ID to be used for the BED track header\n",
"list of sample ids to annotate results in tsv file\n",
"\n",
"* **pvalue_adj_method** (default: fdr_bh) [str]\n",
"\n",
"Method to use for pValue multiple test adjustment\n",
"\n",
"* **pvalue_adj_alpha** (default: 0.01) [float]\n",
"\n",
"Alpha parameter (family-wise error rate) for pValue adjustment\n",
"\n",
"* **verbose** (default: False) [bool]\n",
"\n",
Expand Down Expand Up @@ -132,8 +140,8 @@
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-13T12:25:28.172843Z",
"start_time": "2020-01-13T12:25:17.642436Z"
"end_time": "2020-01-14T23:14:41.010037Z",
"start_time": "2020-01-14T23:14:27.296976Z"
},
"scrolled": false
},
Expand All @@ -146,10 +154,10 @@
"## Parsing files ##\n",
"\tReading input files header and checking consistancy between headers\n",
"\tStarting asynchronous file parsing\n",
"37.1M bytes [00:10, 3.64M bytes/s] \n",
"37.1M bytes [00:13, 2.80M bytes/s] \n",
"\tAdjust pvalues\n",
"\tWriting output file\n",
"100%|██████████| 4.78k/4.78k [00:00<00:00, 16.4k sites/s]\n",
"100%|██████████| 4.78k/4.78k [00:00<00:00, 12.1k sites/s]\n",
"\tResults summary\n",
"\t\tSites with insufficient samples: 121,015\n",
"\t\tValid sites: 4,779\n",
Expand All @@ -173,15 +181,15 @@
"I 18029 18030 3 0.14148585160496208 3.9111111111111114 0.3730650887538211 2 1 0 [\"S1\",\"S3\",\"S4\"] [-4.555,-4.93,1.455] [[-5.07,-4.04],[0.56,-4.93,-13.5],[0.44,3.49,0.14,2.47]] \n",
"\n",
"track name=meth_comp itemRgb=On\n",
"I\t542\t543\t.\t0.428\t.\t542\t543\t'230,230,230'\n",
"I\t3367\t3368\t.\t0.428\t.\t3367\t3368\t'230,230,230'\n",
"I\t3516\t3517\t.\t0.409\t.\t3516\t3517\t'230,230,230'\n",
"I\t3674\t3675\t.\t0.39\t.\t3674\t3675\t'230,230,230'\n",
"I\t15199\t15200\t.\t0.428\t.\t15199\t15200\t'230,230,230'\n",
"I\t15386\t15387\t.\t0.399\t.\t15386\t15387\t'230,230,230'\n",
"I\t15845\t15846\t.\t0.179\t.\t15845\t15846\t'230,230,230'\n",
"I\t17451\t17452\t.\t0.376\t.\t17451\t17452\t'230,230,230'\n",
"I\t18029\t18030\t.\t0.428\t.\t18029\t18030\t'230,230,230'\n",
"I\t542\t543\t.\t0.428\t.\t542\t543\t230,230,230\n",
"I\t3367\t3368\t.\t0.428\t.\t3367\t3368\t230,230,230\n",
"I\t3516\t3517\t.\t0.409\t.\t3516\t3517\t230,230,230\n",
"I\t3674\t3675\t.\t0.39\t.\t3674\t3675\t230,230,230\n",
"I\t15199\t15200\t.\t0.428\t.\t15199\t15200\t230,230,230\n",
"I\t15386\t15387\t.\t0.399\t.\t15386\t15387\t230,230,230\n",
"I\t15845\t15846\t.\t0.179\t.\t15845\t15846\t230,230,230\n",
"I\t17451\t17452\t.\t0.376\t.\t17451\t17452\t230,230,230\n",
"I\t18029\t18030\t.\t0.428\t.\t18029\t18030\t230,230,230\n",
"\n"
]
}
Expand Down
8 changes: 4 additions & 4 deletions docs/Meth_Comp/CLI_usage.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2020-01-13T12:30:09.809078Z",
"start_time": "2020-01-13T12:30:09.019609Z"
"end_time": "2020-01-14T23:13:49.653438Z",
"start_time": "2020-01-14T23:13:48.621951Z"
},
"init_cell": true
},
Expand All @@ -34,7 +34,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"(pycoMeth) (pycoMeth) "
"(pycoMeth) "
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion pycoMeth/CpG_Aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def _write_bed (self, coord, med_llr):
if med_llr <= min_llr:
break
# Write line
self.bed_fp.write ("{}\t{}\t{}\t.\t{:.3f}\t.\t{}\t{}\t'{}'\n".format(
self.bed_fp.write ("{}\t{}\t{}\t.\t{:.3f}\t.\t{}\t{}\t{}\n".format(
coord.chr_name, coord.start, coord.end, med_llr, coord.start, coord.end, color))

def _init_tsv (self):
Expand Down
2 changes: 1 addition & 1 deletion pycoMeth/Interval_Aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def _write_bed (self, coord, med_llr):
if med_llr <= min_llr:
break
# Write line
self.bed_fp.write ("{}\t{}\t{}\t.\t{:.3f}\t.\t{}\t{}\t'{}'\n".format(
self.bed_fp.write ("{}\t{}\t{}\t.\t{:.3f}\t.\t{}\t{}\t{}\n".format(
coord.chr_name, coord.start, coord.end, med_llr, coord.start, coord.end, color))

def _init_tsv (self):
Expand Down
Loading

0 comments on commit e2573ce

Please sign in to comment.