Skip to content

Commit

Permalink
Fix KeyError when creating an index with ambiguous sequences
Browse files Browse the repository at this point in the history
This happened when there were multiple ways in which a ambiguous sequence
arose.

Closes #829
  • Loading branch information
marcelm committed Jan 28, 2025
1 parent 28cd0ee commit 3db8e18
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 9 deletions.
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
Changelog
=========

development version
-------------------

* :issue:`829`: Fix crash (``KeyError``) when creating an index with ambiguous
sequences.

v5.0 (2024-12-13)
-----------------

Expand Down
17 changes: 8 additions & 9 deletions src/cutadapt/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndexDict", int]:
)
index: Dict[str, Tuple[SingleAdapter, int, int]] = dict()
lengths = set()
ambiguous = []
ambiguous = {}
for adapter in self._adapters:
sequence = adapter.sequence
k = int(adapter.max_error_rate * len(sequence))
Expand All @@ -1368,8 +1368,8 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndexDict", int]:
other_adapter, other_errors, other_matches = index[s]
if matches < other_matches:
continue
if other_matches == matches:
ambiguous.append((s, adapter, other_adapter, k, matches))
if other_matches == matches and s not in ambiguous:
ambiguous[s] = (adapter, other_adapter, k, matches)
index[s] = (adapter, errors, matches)
lengths.add(len(s))
else:
Expand All @@ -1381,10 +1381,8 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndexDict", int]:
other_adapter, other_errors, other_matches = index[s]
if matches < other_matches:
continue
if other_matches == matches:
ambiguous.append(
(s, adapter, other_adapter, k, matches)
)
if other_matches == matches and s not in ambiguous:
ambiguous[s] = (adapter, other_adapter, k, matches)
index[s] = (adapter, errors, matches)
lengths.add(n)

Expand All @@ -1394,7 +1392,8 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndexDict", int]:
"%d ambiguous sequences were found that cannot be assigned uniquely.",
len(ambiguous),
)
s, adapter, other_adapter, k, matches = ambiguous[0]
s = next(iter(ambiguous))
adapter, other_adapter, k, matches = ambiguous[s]
logger.warning(
"WARNING: For example, %r, when found in a read, would result in "
"%s matches for both %s %r and %s %r",
Expand All @@ -1408,7 +1407,7 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndexDict", int]:
logger.warning(
"WARNING: Reads with ambiguous sequence will *not* be trimmed."
)
for s, adapter, other_adapter, k, matches in ambiguous:
for s in ambiguous:
del index[s]

elapsed = time.time() - start_time
Expand Down
10 changes: 10 additions & 0 deletions tests/test_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,16 @@ def test_indexed_very_similar(caplog):
assert "ambiguous sequences" in caplog.text


@pytest.mark.parametrize("indels", [False, True])
def test_indexed_very_similar2(indels):
adapters = [
PrefixAdapter("AAA", max_errors=1, indels=indels),
PrefixAdapter("ATT", max_errors=1, indels=indels),
PrefixAdapter("TTA", max_errors=1, indels=indels),
]
IndexedPrefixAdapters(adapters)


def test_indexed_too_high_k():
with pytest.raises(ValueError) as e:
IndexedPrefixAdapters(
Expand Down

0 comments on commit 3db8e18

Please sign in to comment.