From 17483abf125a8e7334682229a06f1fbd71de4817 Mon Sep 17 00:00:00 2001 From: maxibor Date: Wed, 22 Jan 2025 20:34:46 +0100 Subject: [PATCH 1/6] fix IndexError --- index.html | 110 +++++++++++++++++++++++++++++ pydamage/__init__.py | 2 +- pydamage/damage.py | 148 +++++++++++++++++++++------------------- pydamage/main.py | 2 + pydamage/parse_fasta.py | 7 ++ test.html | 107 +++++++++++++++++++++++++++++ 6 files changed, 306 insertions(+), 70 deletions(-) create mode 100644 index.html create mode 100644 pydamage/parse_fasta.py create mode 100644 test.html diff --git a/index.html b/index.html new file mode 100644 index 0000000..27ebcc4 --- /dev/null +++ b/index.html @@ -0,0 +1,110 @@ + + + + + ADRSM geometric + + + + + + + + +

+

Plotting aDNA simulated damage with geometric distribution for ADRSM

+

+
+ + + + + + + + + + + + + +
+ +
+ + + + + + \ No newline at end of file diff --git a/pydamage/__init__.py b/pydamage/__init__.py index ca279e7..940ddbd 100644 --- a/pydamage/__init__.py +++ b/pydamage/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.80" +__version__ = "0.90" from pydamage.main import pydamage_analyze import logging diff --git a/pydamage/damage.py b/pydamage/damage.py index 9a8248f..64b1f45 100644 --- a/pydamage/damage.py +++ b/pydamage/damage.py @@ -121,14 +121,24 @@ def compute_damage(self): # All conserved C and G no_mut_pos, no_mut_counts = sort_count_array_dict(np.array(self.no_mut, dtype="uint32")) + CT_damage_amount = np.zeros(self.wlen) - CT_damage_amount[CT_pos] = CT_counts / C_counts[CT_pos] + C_pos_dict = {pos: i for i, pos in enumerate(C_pos)} + CT_pos_indices = [C_pos_dict[pos] for pos in CT_pos] + CT_damage_amount[CT_pos] = CT_counts / C_counts[CT_pos_indices] GA_damage_amount = np.zeros(self.wlen) - GA_damage_amount[GA_pos] = GA_counts / G_counts[GA_pos] + G_pos_dict = {pos: i for i, pos in enumerate(G_pos)} + GA_pos_indices = [G_pos_dict[pos] for pos in GA_pos] + + GA_damage_amount[GA_pos] = GA_counts / G_counts[GA_pos_indices] + damage_amount = np.zeros(self.wlen) - damage_amount[damage_bases_pos] = damage_bases_counts / C_G_bases_counts[damage_bases_pos] + CG_pos_dict = {pos: i for i, pos in enumerate(C_G_bases_pos)} + CG_pos_indices = [CG_pos_dict[pos] for pos in damage_bases_pos] + damage_amount[damage_bases_pos] = damage_bases_counts / C_G_bases_counts[CG_pos_indices] + _ = np.zeros(self.wlen, dtype="uint32") _[damage_bases_pos] = damage_bases_counts @@ -186,69 +196,69 @@ def test_damage(ref, bam, mode, wlen, g2a, subsample, show_al, process, verbose) dict: Dictionary containing LR test results """ al_handle = pysam.AlignmentFile(bam, mode=mode, threads=process) - try: - if ref is None: - logging.info("Computing alignment stats for entire reference") - reflen, cov, nb_reads_aligned = get_ref_stats(bam) - refname = "reference" - else: - reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref) - refname = ref - - if subsample: - cov = cov * subsample - nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0) - - al = al_to_damage( - reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample - ) - al.get_damage(show_al=show_al) - read_dict = al.read_dict - if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None: - read_dict[refname] = read_dict.pop(None) - ( - mut_count, - conserved_count, - CT_damage, - GA_damage, - all_damage, - ) = al.compute_damage() - - al_handle.close() - - model_A = models.damage_model() - model_B = models.null_model() - test_res = fit_models( - ref=ref, - model_A=model_A, - model_B=model_B, - damage=all_damage, - mut_count=mut_count, - conserved_count=conserved_count, - verbose=verbose, - ) - test_res["reference"] = refname - test_res["nb_reads_aligned"] = nb_reads_aligned - test_res["coverage"] = cov - test_res["reflen"] = reflen - - CT_log = {} - GA_log = {} - - for i in range(wlen): - CT_log[f"CtoT-{i}"] = CT_damage[i] - GA_log[f"GtoA-{i}"] = GA_damage[i] - test_res.update(CT_log) - test_res.update(GA_log) - return (check_model_fit(test_res, wlen, verbose), read_dict) - - except (ValueError, RuntimeError) as e: - if verbose: - logging.warning(f"Model fitting for {ref} failed") - logging.warning(f"Model fitting error: {e}") - logging.warning( - f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}" - f" - reflen: {reflen}\n" - ) - al_handle.close() - return False + # try: + if ref is None: + logging.info("Computing alignment stats for entire reference") + reflen, cov, nb_reads_aligned = get_ref_stats(bam) + refname = "reference" + else: + reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref) + refname = ref + + if subsample: + cov = cov * subsample + nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0) + + al = al_to_damage( + reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample + ) + al.get_damage(show_al=show_al) + read_dict = al.read_dict + if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None: + read_dict[refname] = read_dict.pop(None) + ( + mut_count, + conserved_count, + CT_damage, + GA_damage, + all_damage, + ) = al.compute_damage() + + al_handle.close() + + model_A = models.damage_model() + model_B = models.null_model() + test_res = fit_models( + ref=ref, + model_A=model_A, + model_B=model_B, + damage=all_damage, + mut_count=mut_count, + conserved_count=conserved_count, + verbose=verbose, + ) + test_res["reference"] = refname + test_res["nb_reads_aligned"] = nb_reads_aligned + test_res["coverage"] = cov + test_res["reflen"] = reflen + + CT_log = {} + GA_log = {} + + for i in range(wlen): + CT_log[f"CtoT-{i}"] = CT_damage[i] + GA_log[f"GtoA-{i}"] = GA_damage[i] + test_res.update(CT_log) + test_res.update(GA_log) + return (check_model_fit(test_res, wlen, verbose), read_dict) + + # except (ValueError, RuntimeError) as e: + # if verbose: + # logging.warning(f"Model fitting for {ref} failed") + # logging.warning(f"Model fitting error: {e}") + # logging.warning( + # f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}" + # f" - reflen: {reflen}\n" + # ) + # al_handle.close() + # return False, read_dict diff --git a/pydamage/main.py b/pydamage/main.py index 0f9e6e0..2d472c9 100644 --- a/pydamage/main.py +++ b/pydamage/main.py @@ -76,6 +76,8 @@ def pydamage_analyze( # bam=bam, # ref=ref, # wlen=wlen, + # g2a=g2a, + # subsample=subsample, # show_al=show_al, # mode=mode, # process=process, diff --git a/pydamage/parse_fasta.py b/pydamage/parse_fasta.py new file mode 100644 index 0000000..7e51f5d --- /dev/null +++ b/pydamage/parse_fasta.py @@ -0,0 +1,7 @@ +import pysam +def parse_fasta(fasta): + fa_dict = {} + with pysam.FastxFile(fasta) as fh: + for entry in fh: + fa_dict[entry.name] = entry.sequence + return fa_dict \ No newline at end of file diff --git a/test.html b/test.html new file mode 100644 index 0000000..4632b7c --- /dev/null +++ b/test.html @@ -0,0 +1,107 @@ + + + + + ADRSM geometric + + + + + + + + +

+

Plotting aDNA simulated damage with geometric distribution for ADRSM

+

+
+ + + + + + + + +
+ +
+ + + + + + \ No newline at end of file From 42c0d65ff262e16d136445cf5182d501daa9973f Mon Sep 17 00:00:00 2001 From: maxibor Date: Wed, 22 Jan 2025 20:37:50 +0100 Subject: [PATCH 2/6] rm unnecessary files --- index.html | 110 ---------------------------------------- pydamage/parse_fasta.py | 7 --- test.html | 107 -------------------------------------- 3 files changed, 224 deletions(-) delete mode 100644 index.html delete mode 100644 pydamage/parse_fasta.py delete mode 100644 test.html diff --git a/index.html b/index.html deleted file mode 100644 index 27ebcc4..0000000 --- a/index.html +++ /dev/null @@ -1,110 +0,0 @@ - - - - - ADRSM geometric - - - - - - - - -

-

Plotting aDNA simulated damage with geometric distribution for ADRSM

-

-
- - - - - - - - - - - - - -
- -
- - - - - - \ No newline at end of file diff --git a/pydamage/parse_fasta.py b/pydamage/parse_fasta.py deleted file mode 100644 index 7e51f5d..0000000 --- a/pydamage/parse_fasta.py +++ /dev/null @@ -1,7 +0,0 @@ -import pysam -def parse_fasta(fasta): - fa_dict = {} - with pysam.FastxFile(fasta) as fh: - for entry in fh: - fa_dict[entry.name] = entry.sequence - return fa_dict \ No newline at end of file diff --git a/test.html b/test.html deleted file mode 100644 index 4632b7c..0000000 --- a/test.html +++ /dev/null @@ -1,107 +0,0 @@ - - - - - ADRSM geometric - - - - - - - - -

-

Plotting aDNA simulated damage with geometric distribution for ADRSM

-

-
- - - - - - - - -
- -
- - - - - - \ No newline at end of file From 8f0de9e9532b577dad03cf4f273174d852f1e410 Mon Sep 17 00:00:00 2001 From: maxibor Date: Wed, 22 Jan 2025 20:45:51 +0100 Subject: [PATCH 3/6] reinstate try/except block --- pydamage/damage.py | 132 ++++++++++++++++++++++----------------------- 1 file changed, 66 insertions(+), 66 deletions(-) diff --git a/pydamage/damage.py b/pydamage/damage.py index 64b1f45..f526e2c 100644 --- a/pydamage/damage.py +++ b/pydamage/damage.py @@ -196,69 +196,69 @@ def test_damage(ref, bam, mode, wlen, g2a, subsample, show_al, process, verbose) dict: Dictionary containing LR test results """ al_handle = pysam.AlignmentFile(bam, mode=mode, threads=process) - # try: - if ref is None: - logging.info("Computing alignment stats for entire reference") - reflen, cov, nb_reads_aligned = get_ref_stats(bam) - refname = "reference" - else: - reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref) - refname = ref - - if subsample: - cov = cov * subsample - nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0) - - al = al_to_damage( - reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample - ) - al.get_damage(show_al=show_al) - read_dict = al.read_dict - if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None: - read_dict[refname] = read_dict.pop(None) - ( - mut_count, - conserved_count, - CT_damage, - GA_damage, - all_damage, - ) = al.compute_damage() - - al_handle.close() - - model_A = models.damage_model() - model_B = models.null_model() - test_res = fit_models( - ref=ref, - model_A=model_A, - model_B=model_B, - damage=all_damage, - mut_count=mut_count, - conserved_count=conserved_count, - verbose=verbose, - ) - test_res["reference"] = refname - test_res["nb_reads_aligned"] = nb_reads_aligned - test_res["coverage"] = cov - test_res["reflen"] = reflen - - CT_log = {} - GA_log = {} - - for i in range(wlen): - CT_log[f"CtoT-{i}"] = CT_damage[i] - GA_log[f"GtoA-{i}"] = GA_damage[i] - test_res.update(CT_log) - test_res.update(GA_log) - return (check_model_fit(test_res, wlen, verbose), read_dict) - - # except (ValueError, RuntimeError) as e: - # if verbose: - # logging.warning(f"Model fitting for {ref} failed") - # logging.warning(f"Model fitting error: {e}") - # logging.warning( - # f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}" - # f" - reflen: {reflen}\n" - # ) - # al_handle.close() - # return False, read_dict + try: + if ref is None: + logging.info("Computing alignment stats for entire reference") + reflen, cov, nb_reads_aligned = get_ref_stats(bam) + refname = "reference" + else: + reflen, cov, nb_reads_aligned = get_contig_stats(al_handle, ref) + refname = ref + + if subsample: + cov = cov * subsample + nb_reads_aligned = np.round(nb_reads_aligned * subsample, 0) + + al = al_to_damage( + reference=ref, al_handle=al_handle, wlen=wlen, g2a=g2a, subsample=subsample + ) + al.get_damage(show_al=show_al) + read_dict = al.read_dict + if len(read_dict.keys()) == 1 and list(read_dict.keys())[0] is None: + read_dict[refname] = read_dict.pop(None) + ( + mut_count, + conserved_count, + CT_damage, + GA_damage, + all_damage, + ) = al.compute_damage() + + al_handle.close() + + model_A = models.damage_model() + model_B = models.null_model() + test_res = fit_models( + ref=ref, + model_A=model_A, + model_B=model_B, + damage=all_damage, + mut_count=mut_count, + conserved_count=conserved_count, + verbose=verbose, + ) + test_res["reference"] = refname + test_res["nb_reads_aligned"] = nb_reads_aligned + test_res["coverage"] = cov + test_res["reflen"] = reflen + + CT_log = {} + GA_log = {} + + for i in range(wlen): + CT_log[f"CtoT-{i}"] = CT_damage[i] + GA_log[f"GtoA-{i}"] = GA_damage[i] + test_res.update(CT_log) + test_res.update(GA_log) + return (check_model_fit(test_res, wlen, verbose), read_dict) + + except (ValueError, RuntimeError) as e: + if verbose: + logging.warning(f"Model fitting for {ref} failed") + logging.warning(f"Model fitting error: {e}") + logging.warning( + f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov}" + f" - reflen: {reflen}\n" + ) + al_handle.close() + return False, read_dict From 91aa1b82c6735671cd4e45dd6d80ebdf27bce596 Mon Sep 17 00:00:00 2001 From: maxibor Date: Thu, 23 Jan 2025 14:45:41 +0100 Subject: [PATCH 4/6] ci: update conda channels --- .github/workflows/pydamage_ci.yml | 2 +- environment.yml | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/pydamage_ci.yml b/.github/workflows/pydamage_ci.yml index 2ac52dc..bf245c6 100644 --- a/.github/workflows/pydamage_ci.yml +++ b/.github/workflows/pydamage_ci.yml @@ -14,7 +14,7 @@ jobs: with: auto-update-conda: true mamba-version: "*" - channels: conda-forge,bioconda,defaults + channels: conda-forge,bioconda channel-priority: true environment-file: environment.yml activate-environment: pydamage diff --git a/environment.yml b/environment.yml index 676c306..c92cf0c 100644 --- a/environment.yml +++ b/environment.yml @@ -2,7 +2,6 @@ name: pydamage channels: - conda-forge - bioconda - - defaults dependencies: - click - numpy From 5ce41243c61608b202c055b638149f113a19ab00 Mon Sep 17 00:00:00 2001 From: maxibor Date: Thu, 23 Jan 2025 14:52:00 +0100 Subject: [PATCH 5/6] ci: replace by micromamba --- .github/workflows/pydamage_ci.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pydamage_ci.yml b/.github/workflows/pydamage_ci.yml index bf245c6..853292e 100644 --- a/.github/workflows/pydamage_ci.yml +++ b/.github/workflows/pydamage_ci.yml @@ -10,26 +10,26 @@ jobs: if: "!contains(github.event.head_commit.message, '[skip_ci]')" steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v3 + - uses: mamba-org/setup-micromamba@v2 with: - auto-update-conda: true - mamba-version: "*" - channels: conda-forge,bioconda - channel-priority: true environment-file: environment.yml - activate-environment: pydamage + environment-name: pydamage + init-shell: >- + bash + cache-environment: true + post-cleanup: 'all' - name: Test with pytest - shell: bash -l {0} + shell: bash -el {0} run: | pip install -e . pip install pytest pytest - name: Check pydamage help message - shell: bash -l {0} + shell: bash -el {0} run: | pydamage --help - name: Check pydamage on test data - shell: bash -l {0} + shell: bash -el {0} run: | pydamage analyze --verbose tests/data/aligned.bam pydamage filter pydamage_results/pydamage_results.csv From 4d5fc957057f26cc4a3a122366cff5170a896784 Mon Sep 17 00:00:00 2001 From: maxibor Date: Thu, 23 Jan 2025 14:54:24 +0100 Subject: [PATCH 6/6] ci_build: update python --- .github/workflows/publish_pypi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish_pypi.yml b/.github/workflows/publish_pypi.yml index d29379a..2a2f68a 100644 --- a/.github/workflows/publish_pypi.yml +++ b/.github/workflows/publish_pypi.yml @@ -13,7 +13,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v3 with: - python-version: "3.9" + python-version: "3.10" - name: Build pydamage run: | pip install wheel