From 17483abf125a8e7334682229a06f1fbd71de4817 Mon Sep 17 00:00:00 2001
From: maxibor
Date: Wed, 22 Jan 2025 20:34:46 +0100
Subject: [PATCH 1/6] fix IndexError
---
index.html | 110 +++++++++++++++++++++++++++++
pydamage/__init__.py | 2 +-
pydamage/damage.py | 148 +++++++++++++++++++++-------------------
pydamage/main.py | 2 +
pydamage/parse_fasta.py | 7 ++
test.html | 107 +++++++++++++++++++++++++++++
6 files changed, 306 insertions(+), 70 deletions(-)
create mode 100644 index.html
create mode 100644 pydamage/parse_fasta.py
create mode 100644 test.html
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..27ebcc4
--- /dev/null
+++ b/index.html
@@ -0,0 +1,110 @@
+
+
+
+
+ ADRSM geometric
+
+
+
+
+
+
+
+
+
+
Plotting aDNA simulated damage with geometric distribution for ADRSM
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/pydamage/__init__.py b/pydamage/__init__.py
index ca279e7..940ddbd 100644
--- a/pydamage/__init__.py
+++ b/pydamage/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.80"
+__version__ = "0.90"
from pydamage.main import pydamage_analyze
import logging
diff --git a/pydamage/damage.py b/pydamage/damage.py
index 9a8248f..64b1f45 100644
--- a/pydamage/damage.py
+++ b/pydamage/damage.py
@@ -121,14 +121,24 @@ def compute_damage(self):
# All conserved C and G
no_mut_pos, no_mut_counts = sort_count_array_dict(np.array(self.no_mut, dtype="uint32"))
+
CT_damage_amount = np.zeros(self.wlen)
- CT_damage_amount[CT_pos] = CT_counts / C_counts[CT_pos]
+ C_pos_dict = {pos: i for i, pos in enumerate(C_pos)}
+ CT_pos_indices = [C_pos_dict[pos] for pos in CT_pos]
+ CT_damage_amount[CT_pos] = CT_counts / C_counts[CT_pos_indices]
GA_damage_amount = np.zeros(self.wlen)
- GA_damage_amount[GA_pos] = GA_counts / G_counts[GA_pos]
+ G_pos_dict = {pos: i for i, pos in enumerate(G_pos)}
+ GA_pos_indices = [G_pos_dict[pos] for pos in GA_pos]
+
+ GA_damage_amount[GA_pos] = GA_counts / G_counts[GA_pos_indices]
+
damage_amount = np.zeros(self.wlen)
- damage_amount[damage_bases_pos] = damage_bases_counts / C_G_bases_counts[damage_bases_pos]
+ CG_pos_dict = {pos: i for i, pos in enumerate(C_G_bases_pos)}
+ CG_pos_indices = [CG_pos_dict[pos] for pos in damage_bases_pos]
+ damage_amount[damage_bases_pos] = damage_bases_counts / C_G_bases_counts[CG_pos_indices]
+
_ = np.zeros(self.wlen, dtype="uint32")
_[damage_bases_pos] = damage_bases_counts
@@ -186,69 +196,69 @@ def test_damage(ref, bam, mode, wlen, g2a, subsample, show_al, process, verbose)
dict: Dictionary containing LR test results
"""
al_handle = pysam.AlignmentFile(bam, mode=mode, threads=process)
- try:
- if ref is None:
- logging.info("Computing alignment stats for entire reference")
- reflen, cov, nb_reads_aligned = get_ref_stats(bam)
- refname = "reference"
- else:
- reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref)
- refname = ref
-
- if subsample:
- cov = cov * subsample
- nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0)
-
- al = al_to_damage(
- reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample
- )
- al.get_damage(show_al=show_al)
- read_dict = al.read_dict
- if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None:
- read_dict[refname] = read_dict.pop(None)
- (
- mut_count,
- conserved_count,
- CT_damage,
- GA_damage,
- all_damage,
- ) = al.compute_damage()
-
- al_handle.close()
-
- model_A = models.damage_model()
- model_B = models.null_model()
- test_res = fit_models(
- ref=ref,
- model_A=model_A,
- model_B=model_B,
- damage=all_damage,
- mut_count=mut_count,
- conserved_count=conserved_count,
- verbose=verbose,
- )
- test_res["reference"] = refname
- test_res["nb_reads_aligned"] = nb_reads_aligned
- test_res["coverage"] = cov
- test_res["reflen"] = reflen
-
- CT_log = {}
- GA_log = {}
-
- for i in range(wlen):
- CT_log[f"CtoT-{i}"] = CT_damage[i]
- GA_log[f"GtoA-{i}"] = GA_damage[i]
- test_res.update(CT_log)
- test_res.update(GA_log)
- return (check_model_fit(test_res, wlen, verbose), read_dict)
-
- except (ValueError, RuntimeError) as e:
- if verbose:
- logging.warning(f"Model fitting for {ref} failed")
- logging.warning(f"Model fitting error: {e}")
- logging.warning(
- f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}"
- f" - reflen: {reflen}\n"
- )
- al_handle.close()
- return False
+ # try:
+ if ref is None:
+ logging.info("Computing alignment stats for entire reference")
+ reflen, cov, nb_reads_aligned = get_ref_stats(bam)
+ refname = "reference"
+ else:
+ reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref)
+ refname = ref
+
+ if subsample:
+ cov = cov * subsample
+ nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0)
+
+ al = al_to_damage(
+ reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample
+ )
+ al.get_damage(show_al=show_al)
+ read_dict = al.read_dict
+ if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None:
+ read_dict[refname] = read_dict.pop(None)
+ (
+ mut_count,
+ conserved_count,
+ CT_damage,
+ GA_damage,
+ all_damage,
+ ) = al.compute_damage()
+
+ al_handle.close()
+
+ model_A = models.damage_model()
+ model_B = models.null_model()
+ test_res = fit_models(
+ ref=ref,
+ model_A=model_A,
+ model_B=model_B,
+ damage=all_damage,
+ mut_count=mut_count,
+ conserved_count=conserved_count,
+ verbose=verbose,
+ )
+ test_res["reference"] = refname
+ test_res["nb_reads_aligned"] = nb_reads_aligned
+ test_res["coverage"] = cov
+ test_res["reflen"] = reflen
+
+ CT_log = {}
+ GA_log = {}
+
+ for i in range(wlen):
+ CT_log[f"CtoT-{i}"] = CT_damage[i]
+ GA_log[f"GtoA-{i}"] = GA_damage[i]
+ test_res.update(CT_log)
+ test_res.update(GA_log)
+ return (check_model_fit(test_res, wlen, verbose), read_dict)
+
+ # except (ValueError, RuntimeError) as e:
+ # if verbose:
+ # logging.warning(f"Model fitting for {ref} failed")
+ # logging.warning(f"Model fitting error: {e}")
+ # logging.warning(
+ # f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}"
+ # f" - reflen: {reflen}\n"
+ # )
+ # al_handle.close()
+ # return False, read_dict
diff --git a/pydamage/main.py b/pydamage/main.py
index 0f9e6e0..2d472c9 100644
--- a/pydamage/main.py
+++ b/pydamage/main.py
@@ -76,6 +76,8 @@ def pydamage_analyze(
# bam=bam,
# ref=ref,
# wlen=wlen,
+ # g2a=g2a,
+ # subsample=subsample,
# show_al=show_al,
# mode=mode,
# process=process,
diff --git a/pydamage/parse_fasta.py b/pydamage/parse_fasta.py
new file mode 100644
index 0000000..7e51f5d
--- /dev/null
+++ b/pydamage/parse_fasta.py
@@ -0,0 +1,7 @@
+import pysam
+def parse_fasta(fasta):
+ fa_dict = {}
+ with pysam.FastxFile(fasta) as fh:
+ for entry in fh:
+ fa_dict[entry.name] = entry.sequence
+ return fa_dict
\ No newline at end of file
diff --git a/test.html b/test.html
new file mode 100644
index 0000000..4632b7c
--- /dev/null
+++ b/test.html
@@ -0,0 +1,107 @@
+
+
+
+
+ ADRSM geometric
+
+
+
+
+
+
+
+
+
+
Plotting aDNA simulated damage with geometric distribution for ADRSM
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
From 42c0d65ff262e16d136445cf5182d501daa9973f Mon Sep 17 00:00:00 2001
From: maxibor
Date: Wed, 22 Jan 2025 20:37:50 +0100
Subject: [PATCH 2/6] rm unnecessary files
---
index.html | 110 ----------------------------------------
pydamage/parse_fasta.py | 7 ---
test.html | 107 --------------------------------------
3 files changed, 224 deletions(-)
delete mode 100644 index.html
delete mode 100644 pydamage/parse_fasta.py
delete mode 100644 test.html
diff --git a/index.html b/index.html
deleted file mode 100644
index 27ebcc4..0000000
--- a/index.html
+++ /dev/null
@@ -1,110 +0,0 @@
-
-
-
-
- ADRSM geometric
-
-
-
-
-
-
-
-
-
-
Plotting aDNA simulated damage with geometric distribution for ADRSM
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/pydamage/parse_fasta.py b/pydamage/parse_fasta.py
deleted file mode 100644
index 7e51f5d..0000000
--- a/pydamage/parse_fasta.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import pysam
-def parse_fasta(fasta):
- fa_dict = {}
- with pysam.FastxFile(fasta) as fh:
- for entry in fh:
- fa_dict[entry.name] = entry.sequence
- return fa_dict
\ No newline at end of file
diff --git a/test.html b/test.html
deleted file mode 100644
index 4632b7c..0000000
--- a/test.html
+++ /dev/null
@@ -1,107 +0,0 @@
-
-
-
-
- ADRSM geometric
-
-
-
-
-
-
-
-
-
-
Plotting aDNA simulated damage with geometric distribution for ADRSM
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
From 8f0de9e9532b577dad03cf4f273174d852f1e410 Mon Sep 17 00:00:00 2001
From: maxibor
Date: Wed, 22 Jan 2025 20:45:51 +0100
Subject: [PATCH 3/6] reinstate try/except block
---
pydamage/damage.py | 132 ++++++++++++++++++++++-----------------------
1 file changed, 66 insertions(+), 66 deletions(-)
diff --git a/pydamage/damage.py b/pydamage/damage.py
index 64b1f45..f526e2c 100644
--- a/pydamage/damage.py
+++ b/pydamage/damage.py
@@ -196,69 +196,69 @@ def test_damage(ref, bam, mode, wlen, g2a, subsample, show_al, process, verbose)
dict: Dictionary containing LR test results
"""
al_handle = pysam.AlignmentFile(bam, mode=mode, threads=process)
- # try:
- if ref is None:
- logging.info("Computing alignment stats for entire reference")
- reflen, cov, nb_reads_aligned = get_ref_stats(bam)
- refname = "reference"
- else:
- reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref)
- refname = ref
-
- if subsample:
- cov = cov * subsample
- nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0)
-
- al = al_to_damage(
- reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample
- )
- al.get_damage(show_al=show_al)
- read_dict = al.read_dict
- if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None:
- read_dict[refname] = read_dict.pop(None)
- (
- mut_count,
- conserved_count,
- CT_damage,
- GA_damage,
- all_damage,
- ) = al.compute_damage()
-
- al_handle.close()
-
- model_A = models.damage_model()
- model_B = models.null_model()
- test_res = fit_models(
- ref=ref,
- model_A=model_A,
- model_B=model_B,
- damage=all_damage,
- mut_count=mut_count,
- conserved_count=conserved_count,
- verbose=verbose,
- )
- test_res["reference"] = refname
- test_res["nb_reads_aligned"] = nb_reads_aligned
- test_res["coverage"] = cov
- test_res["reflen"] = reflen
-
- CT_log = {}
- GA_log = {}
-
- for i in range(wlen):
- CT_log[f"CtoT-{i}"] = CT_damage[i]
- GA_log[f"GtoA-{i}"] = GA_damage[i]
- test_res.update(CT_log)
- test_res.update(GA_log)
- return (check_model_fit(test_res, wlen, verbose), read_dict)
-
- # except (ValueError, RuntimeError) as e:
- # if verbose:
- # logging.warning(f"Model fitting for {ref} failed")
- # logging.warning(f"Model fitting error: {e}")
- # logging.warning(
- # f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}"
- # f" - reflen: {reflen}\n"
- # )
- # al_handle.close()
- # return False, read_dict
+ try:
+ if ref is None:
+ logging.info("Computing alignment stats for entire reference")
+ reflen, cov, nb_reads_aligned = get_ref_stats(bam)
+ refname = "reference"
+ else:
+ reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref)
+ refname = ref
+
+ if subsample:
+ cov = cov * subsample
+ nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0)
+
+ al = al_to_damage(
+ reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample
+ )
+ al.get_damage(show_al=show_al)
+ read_dict = al.read_dict
+ if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None:
+ read_dict[refname] = read_dict.pop(None)
+ (
+ mut_count,
+ conserved_count,
+ CT_damage,
+ GA_damage,
+ all_damage,
+ ) = al.compute_damage()
+
+ al_handle.close()
+
+ model_A = models.damage_model()
+ model_B = models.null_model()
+ test_res = fit_models(
+ ref=ref,
+ model_A=model_A,
+ model_B=model_B,
+ damage=all_damage,
+ mut_count=mut_count,
+ conserved_count=conserved_count,
+ verbose=verbose,
+ )
+ test_res["reference"] = refname
+ test_res["nb_reads_aligned"] = nb_reads_aligned
+ test_res["coverage"] = cov
+ test_res["reflen"] = reflen
+
+ CT_log = {}
+ GA_log = {}
+
+ for i in range(wlen):
+ CT_log[f"CtoT-{i}"] = CT_damage[i]
+ GA_log[f"GtoA-{i}"] = GA_damage[i]
+ test_res.update(CT_log)
+ test_res.update(GA_log)
+ return (check_model_fit(test_res, wlen, verbose), read_dict)
+
+ except (ValueError, RuntimeError) as e:
+ if verbose:
+ logging.warning(f"Model fitting for {ref} failed")
+ logging.warning(f"Model fitting error: {e}")
+ logging.warning(
+ f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}"
+ f" - reflen: {reflen}\n"
+ )
+ al_handle.close()
+ return False, read_dict
From 91aa1b82c6735671cd4e45dd6d80ebdf27bce596 Mon Sep 17 00:00:00 2001
From: maxibor
Date: Thu, 23 Jan 2025 14:45:41 +0100
Subject: [PATCH 4/6] ci: update conda channels
---
.github/workflows/pydamage_ci.yml | 2 +-
environment.yml | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/.github/workflows/pydamage_ci.yml b/.github/workflows/pydamage_ci.yml
index 2ac52dc..bf245c6 100644
--- a/.github/workflows/pydamage_ci.yml
+++ b/.github/workflows/pydamage_ci.yml
@@ -14,7 +14,7 @@ jobs:
with:
auto-update-conda: true
mamba-version: "*"
- channels: conda-forge,bioconda,defaults
+ channels: conda-forge,bioconda
channel-priority: true
environment-file: environment.yml
activate-environment: pydamage
diff --git a/environment.yml b/environment.yml
index 676c306..c92cf0c 100644
--- a/environment.yml
+++ b/environment.yml
@@ -2,7 +2,6 @@ name: pydamage
channels:
- conda-forge
- bioconda
- - defaults
dependencies:
- click
- numpy
From 5ce41243c61608b202c055b638149f113a19ab00 Mon Sep 17 00:00:00 2001
From: maxibor
Date: Thu, 23 Jan 2025 14:52:00 +0100
Subject: [PATCH 5/6] ci: replace by micromamba
---
.github/workflows/pydamage_ci.yml | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/.github/workflows/pydamage_ci.yml b/.github/workflows/pydamage_ci.yml
index bf245c6..853292e 100644
--- a/.github/workflows/pydamage_ci.yml
+++ b/.github/workflows/pydamage_ci.yml
@@ -10,26 +10,26 @@ jobs:
if: "!contains(github.event.head_commit.message, '[skip_ci]')"
steps:
- uses: actions/checkout@v2
- - uses: conda-incubator/setup-miniconda@v3
+ - uses: mamba-org/setup-micromamba@v2
with:
- auto-update-conda: true
- mamba-version: "*"
- channels: conda-forge,bioconda
- channel-priority: true
environment-file: environment.yml
- activate-environment: pydamage
+ environment-name: pydamage
+ init-shell: >-
+ bash
+ cache-environment: true
+ post-cleanup: 'all'
- name: Test with pytest
- shell: bash -l {0}
+ shell: bash -el {0}
run: |
pip install -e .
pip install pytest
pytest
- name: Check pydamage help message
- shell: bash -l {0}
+ shell: bash -el {0}
run: |
pydamage --help
- name: Check pydamage on test data
- shell: bash -l {0}
+ shell: bash -el {0}
run: |
pydamage analyze --verbose tests/data/aligned.bam
pydamage filter pydamage_results/pydamage_results.csv
From 4d5fc957057f26cc4a3a122366cff5170a896784 Mon Sep 17 00:00:00 2001
From: maxibor
Date: Thu, 23 Jan 2025 14:54:24 +0100
Subject: [PATCH 6/6] ci_build: update python
---
.github/workflows/publish_pypi.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml
index d29379a..2a2f68a 100644
--- a/.github/workflows/publish_pypi.yml
+++ b/.github/workflows/publish_pypi.yml
@@ -13,7 +13,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v3
with:
- python-version: "3.9"
+ python-version: "3.10"
- name: Build pydamage
run: |
pip install wheel