From db016908e89200c012a66d598d9ea4a7f7c92e16 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 22 May 2024 17:29:09 +0200
Subject: [PATCH 01/15] properly rebase branch

---
 cgsmiles/read_cgsmiles.py              | 52 ++++++++++-----
 cgsmiles/tests/test_cgsmile_parsing.py | 90 +++++++++++++++++---------
 2 files changed, 97 insertions(+), 45 deletions(-)

diff --git a/cgsmiles/read_cgsmiles.py b/cgsmiles/read_cgsmiles.py
index 3bb2283..5a97bb4 100644
--- a/cgsmiles/read_cgsmiles.py
+++ b/cgsmiles/read_cgsmiles.py
@@ -3,9 +3,9 @@
 import numpy as np
 import networkx as nx
 
-PATTERNS = {"bond_anchor": "\[\$.*?\]",
-            "place_holder": "\[\#.*?\]",
-            "annotation": "\|.*?\|",
+PATTERNS = {"bond_anchor": r"\[\$.*?\]",
+            "place_holder": r"\[\#.*?\]",
+            "annotation": r"\|.*?\|",
             "fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)',
             "seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'}
 
@@ -45,7 +45,7 @@ def _expand_branch(mol_graph, current, anchor, recipe):
             anchor = current
         for _ in range(0, n_mon):
             mol_graph.add_node(current, fragname=fragname)
-            mol_graph.add_edge(prev_node, current)
+            mol_graph.add_edge(prev_node, current, order=1)
 
             prev_node = current
             current += 1
@@ -53,6 +53,10 @@ def _expand_branch(mol_graph, current, anchor, recipe):
     prev_node = anchor
     return mol_graph, current, prev_node
 
+def _get_percent(pattern, stop):
+    end_num = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
+    return pattern[stop+1:end_num]
+
 def read_cgsmiles(pattern):
     """
     Generate a :class:`nx.Graph` from a pattern string according to the
@@ -126,7 +130,7 @@ def read_cgsmiles(pattern):
     branching = False
     # do we have an open cycle
     cycle = {}
-    cycle_edge = None
+    cycle_edges = []
     # each element in the for loop matches a pattern
     # '[' + '#' + some alphanumeric name + ']'
     for match in re.finditer(PATTERNS['place_holder'], pattern):
@@ -142,12 +146,24 @@ def read_cgsmiles(pattern):
 
         # here we check if the atom is followed by a cycle marker
         # in this case we have an open cycle and close it
-        if stop < len(pattern) and pattern[stop].isdigit() and pattern[stop] in cycle:
-            cycle_edge = (current, cycle[pattern[stop]])
-        # we open a cycle
-        elif stop < len(pattern) and pattern[stop].isdigit():
-            cycle_edge = None
-            cycle[pattern[stop]] = current
+        for token in pattern[stop:]:
+            # we close a cycle
+            if token.isdigit() and token in cycle:
+                cycle_edges.append((current, cycle[token]))
+                del cycle[token]
+            # we open a cycle
+            elif token.isdigit():
+                cycle[token] = current
+            # we close a cycle with the % syntax
+            elif token == "%" and _get_percent(pattern, stop) in cycle:
+                cycle_edges.append((current, cycle[_get_percent(pattern, stop)]))
+                break
+            elif token == "%":
+                cycle[_get_percent(pattern, stop)] = current
+                break
+            else:
+                break
+
         # here we check if the atom is followed by a expansion character '|'
         # as in ... [#PEO]|
         if stop < len(pattern) and pattern[stop] == '|':
@@ -177,15 +193,21 @@ def read_cgsmiles(pattern):
             mol_graph.add_node(current, fragname=fragname)
 
             if prev_node is not None:
-                mol_graph.add_edge(prev_node, current)
+                mol_graph.add_edge(prev_node, current, order=1)
 
-            if cycle_edge:
-                mol_graph.add_edge(cycle_edge[0],
-                                   cycle_edge[1])
+            # here we have a double edge
+            for cycle_edge in cycle_edges:
+                if cycle_edge in mol_graph.edges:
+                    mol_graph.edges[cycle_edge]["order"] += 1
+                else:
+                    mol_graph.add_edge(cycle_edge[0],
+                                       cycle_edge[1],
+                                       order=1)
 
             prev_node = current
             current += 1
 
+        cycle_edges = []
         # here we check if the residue considered before is the
         # last residue of a branch (i.e. '...[#residue])'
         # that is the case if the branch closure comes before
diff --git a/cgsmiles/tests/test_cgsmile_parsing.py b/cgsmiles/tests/test_cgsmile_parsing.py
index 8864216..c72011e 100644
--- a/cgsmiles/tests/test_cgsmile_parsing.py
+++ b/cgsmiles/tests/test_cgsmile_parsing.py
@@ -3,36 +3,64 @@
 from cgsmiles import read_cgsmiles
 from cgsmiles.read_fragments import strip_bonding_descriptors, fragment_iter
 
-@pytest.mark.parametrize('smile, nodes, edges',(
-                        # smiple linear seqeunce
+@pytest.mark.parametrize('smile, nodes, edges, orders',(
+                        # smiple linear sequence
                         ("{[#PMA][#PEO][#PMA]}",
                         ["PMA", "PEO", "PMA"],
-                        [(0, 1), (1, 2)]),
+                        [(0, 1), (1, 2)],
+                        [1, 1]),
+                        # smiple linear sequenece with multi-edge
+                        ("{[#PMA]1[#PEO]1}",
+                        ["PMA", "PEO"],
+                        [(0, 1)],
+                        [2]),
                         # simple branched sequence
                         ("{[#PMA][#PMA]([#PEO][#PEO])[#PMA]}",
                         ["PMA", "PMA", "PEO", "PEO", "PMA"],
-                        [(0, 1), (1, 2), (2, 3), (1, 4)]),
+                        [(0, 1), (1, 2), (2, 3), (1, 4)],
+                        [1, 1, 1, 1]),
                         # simple sequence two branches
                         ("{[#PMA][#PMA][#PMA]([#PEO][#PEO])([#CH3])[#PMA]}",
                         ["PMA", "PMA", "PMA", "PEO", "PEO", "CH3", "PMA"],
-                        [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (2, 6)]),
+                        [(0, 1), (1, 2), (2, 3), (3, 4), (2, 5), (2, 6)],
+                        [1, 1, 1, 1, 1, 1]),
                         # simple linear sequence with expansion
                         ("{[#PMA]|3}",
                         ["PMA", "PMA", "PMA"],
-                        [(0, 1), (1, 2)]),
-                        # smiple cycle seqeunce
+                        [(0, 1), (1, 2)],
+                        [1, 1]),
+                        # smiple cycle sequence
                         ("{[#PMA]1[#PEO][#PMA]1}",
                         ["PMA", "PEO", "PMA"],
-                        [(0, 1), (1, 2), (0, 2)]),
+                        [(0, 1), (1, 2), (0, 2)],
+                        [1, 1, 1]),
+                        # smiple cycle sequence with %
+                        ("{[#PMA]%123[#PEO][#PMA]%123}",
+                        ["PMA", "PEO", "PMA"],
+                        [(0, 1), (1, 2), (0, 2)],
+                        [1, 1, 1]),
                         # complex cycle
                         ("{[#PMA]1[#PEO]2[#PMA]1[#PEO]2}",
                         ["PMA", "PEO", "PMA", "PEO"],
-                        [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3)]),
-                        # complex cycle
-                        ("{[#PMA]1[#PEO]2[#PMA]1[#PEO]2[#PMA][#PMA]1}",
-                        ["PMA", "PEO", "PMA", "PEO", "PMA", "PMA"],
-                        [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3), (3, 4),
-                         (4, 5), (0, 5)]),
+                        [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3)],
+                        [1, 1, 1, 1, 1]),
+                        # complex cycle with %
+                        ("{[#PMA]%134[#PEO]%256[#PMA]%134[#PEO]%256}",
+                        ["PMA", "PEO", "PMA", "PEO"],
+                        [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3)],
+                        [1, 1, 1, 1, 1]),
+                     #  # complex cycle with three times same ID
+                     #  ("{[#PMA]1[#PEO]2[#PMA]1[#PEO]2[#PMA][#PMA]1}",
+                     #  ["PMA", "PEO", "PMA", "PEO", "PMA", "PMA"],
+                     #  [(0, 1), (1, 2), (0, 2), (1, 3), (2, 3), (3, 4),
+                     #   (4, 5), (0, 5)],
+                     #  [1, 1, 1, 1, 1, 1, 1, 1]),
+                        # smiple linear sequenece with multi-edge
+                        # in cycle
+                        ("{[#PMA]12[#PMA][#PMA][#PEO]12}",
+                        ["PMA", "PMA", "PMA", "PEO"],
+                        [(0, 1), (1, 2), (2, 3), (0, 3)],
+                        [1, 1, 1, 2]),
                         # simple branch expension
                         ("{[#PMA]([#PEO][#PEO][#OHter])|3}",
                         ["PMA", "PEO", "PEO", "OHter",
@@ -40,31 +68,31 @@
                          "PMA", "PEO", "PEO", "OHter"],
                         [(0, 1), (1, 2), (2, 3),
                          (0, 4), (4, 5), (5, 6), (6, 7),
-                         (4, 8), (8, 9), (9, 10), (10, 11)]
-                         ),
+                         (4, 8), (8, 9), (9, 10), (10, 11)],
+                         [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
                         # nested branched with expansion
                         ("{[#PMA]([#PEO]|3)|2}",
                         ["PMA", "PEO", "PEO", "PEO",
                          "PMA", "PEO", "PEO", "PEO"],
                         [(0, 1), (1, 2), (2, 3),
-                         (0, 4), (4, 5), (5, 6), (6, 7)]
-                         ),
+                         (0, 4), (4, 5), (5, 6), (6, 7)],
+                        [1, 1, 1, 1, 1, 1, 1]),
                         # nested braching
                         #     0     1      2    3      4      5    6
                         ("{[#PMA][#PMA]([#PEO][#PEO]([#OH])[#PEO])[#PMA]}",
                         ["PMA", "PMA", "PEO", "PEO", "OH",
                          "PEO", "PMA"],
                         [(0, 1), (1, 2), (2, 3),
-                         (3, 4), (3, 5), (1, 6)]
-                         ),
+                         (3, 4), (3, 5), (1, 6)],
+                        [1, 1, 1, 1, 1, 1]),
                         # nested braching plus expansion
                         #     0     1      2    3      4/5      6     7
                         ("{[#PMA][#PMA]([#PEO][#PEO]([#OH]|2)[#PEO])[#PMA]}",
                         ["PMA", "PMA", "PEO", "PEO", "OH", "OH",
                          "PEO", "PMA"],
                         [(0, 1), (1, 2), (2, 3),
-                         (3, 4), (4, 5), (3, 6), (1, 7)]
-                         ),
+                         (3, 4), (4, 5), (3, 6), (1, 7)],
+                        [1, 1, 1, 1, 1, 1, 1]),
                         # nested braching plus expansion incl. branch
                         #     0     1      2    3      4      5
                         #           6      7    8      9      10      11
@@ -73,8 +101,8 @@
                          "PMA", "PEO", "PEO", "PEO", "OH", "PMA"],
                         [(0, 1), (1, 2), (2, 3),
                          (3, 4), (3, 5), (1, 6), (6, 7), (7, 8),
-                         (8, 9), (8, 10), (6, 11)]
-                         ),
+                         (8, 9), (8, 10), (6, 11)],
+                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
                         # nested braching plus expansion of nested branch
                         # here the nested branch is expended
                         #  0 - 1 - 10
@@ -89,8 +117,8 @@
                          "PQ", "OH", "PQ", "OH", "PEO", "PMA"],
                         [(0, 1), (1, 2), (1, 10),
                          (2, 3), (3, 4), (3, 5), (5, 6),
-                         (5, 7), (7, 8), (7, 9)]
-                         ),
+                         (5, 7), (7, 8), (7, 9)],
+                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
                         # nested braching plus expansion of nested branch
                         # here the nested branch is expended and a complete
                         # new branch is added
@@ -108,18 +136,20 @@
                          "PQ", "OH", "PQ", "OH", "PEO", "PMA", "CH3", "PMA", "CH3"],
                         [(0, 1), (1, 2), (1, 10),
                          (2, 3), (3, 4), (3, 5), (5, 6),
-                         (5, 7), (7, 8), (7, 9), (10, 11), (10, 12), (12, 13)]
-                         ),
+                         (5, 7), (7, 8), (7, 9), (10, 11), (10, 12), (12, 13)],
+                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 ))
-def test_read_cgsmiles(smile, nodes, edges):
+def test_read_cgsmiles(smile, nodes, edges, orders):
     """
     Test that the meta-molecule is correctly reproduced
     from the simplified smile string syntax.
     """
     meta_mol = read_cgsmiles(smile)
     assert len(meta_mol.edges) == len(edges)
-    for edge in edges:
+    for edge, order in zip(edges, orders):
         assert meta_mol.has_edge(*edge)
+        assert meta_mol.edges[edge]["order"] == order
+
     fragnames = nx.get_node_attributes(meta_mol, 'fragname')
     assert nodes == list(fragnames.values())
 

From b293f454340a900a562c620557f0ecd2ff785c52 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 May 2024 08:42:07 +0200
Subject: [PATCH 02/15] rebase and integrate with new pysmiles

---
 cgsmiles/read_cgsmiles.py               | 10 +++++++-
 cgsmiles/read_fragments.py              |  2 +-
 cgsmiles/resolve.py                     | 31 ++++++++++++++-----------
 cgsmiles/tests/test_molecule_resolve.py | 19 +++++++++++++--
 4 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/cgsmiles/read_cgsmiles.py b/cgsmiles/read_cgsmiles.py
index 5a97bb4..e5cb34c 100644
--- a/cgsmiles/read_cgsmiles.py
+++ b/cgsmiles/read_cgsmiles.py
@@ -156,7 +156,9 @@ def read_cgsmiles(pattern):
                 cycle[token] = current
             # we close a cycle with the % syntax
             elif token == "%" and _get_percent(pattern, stop) in cycle:
-                cycle_edges.append((current, cycle[_get_percent(pattern, stop)]))
+                ring_marker = _get_percent(pattern, stop)
+                cycle_edges.append((current, cycle[ring_marker]))
+                del cycle[ring_marker]
                 break
             elif token == "%":
                 cycle[_get_percent(pattern, stop)] = current
@@ -276,4 +278,10 @@ def read_cgsmiles(pattern):
             # when all nested branches are completed
             if len(branch_anchor) == 0:
                 recipes = defaultdict(list)
+
+    # raise some errors for strange stuff
+    if cycle:
+        msg = "You have a dangling ring index."
+        raise SyntaxError(msg)
+
     return mol_graph
diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py
index 101ba3b..58c8e23 100644
--- a/cgsmiles/read_fragments.py
+++ b/cgsmiles/read_fragments.py
@@ -143,7 +143,7 @@ def fragment_iter(fragment_str, all_atom=True):
             mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
             nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
         elif all_atom:
-            mol_graph = pysmiles.read_smiles(smile)
+            mol_graph = pysmiles.read_smiles(smile, reinterpret_aromatic=False)
             nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
         # we deal with a CG resolution graph
         else:
diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index a0a373d..aaf045e 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -165,20 +165,23 @@ def edges_from_bonding_descrpt(self):
         bonding descriptors that formed the edge. Later unconsumed
         bonding descriptors are replaced by hydrogen atoms.
         """
-        for prev_node, node in nx.dfs_edges(self.meta_graph):
-            prev_graph = self.meta_graph.nodes[prev_node]['graph']
-            node_graph = self.meta_graph.nodes[node]['graph']
-            edge, bonding = generate_edge(prev_graph,
-                                          node_graph)
-
-            # remove used bonding descriptors
-            prev_graph.nodes[edge[0]]['bonding'].remove(bonding[0])
-            node_graph.nodes[edge[1]]['bonding'].remove(bonding[1])
-
-            # bonding descriptors are assumed to have bonding order 1
-            # unless they are specifically annotated
-            order = int(bonding[0][-1])
-            self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
+        for prev_node, node in self.meta_graph.edges:
+            for _ in range(0, self.meta_graph.edges[(prev_node, node)]["order"]):
+                prev_graph = self.meta_graph.nodes[prev_node]['graph']
+                node_graph = self.meta_graph.nodes[node]['graph']
+                try:
+                    edge, bonding = generate_edge(prev_graph,
+                                                  node_graph)
+                except LookupError:
+                    continue
+                # remove used bonding descriptors
+                prev_graph.nodes[edge[0]]['bonding'].remove(bonding[0])
+                node_graph.nodes[edge[1]]['bonding'].remove(bonding[1])
+
+                # bonding descriptors are assumed to have bonding order 1
+                # unless they are specifically annotated
+                order = int(bonding[0][-1])
+                self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
 
     def squash_atoms(self):
         """
diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py
index 2c481c8..c06b2be 100644
--- a/cgsmiles/tests/test_molecule_resolve.py
+++ b/cgsmiles/tests/test_molecule_resolve.py
@@ -175,7 +175,22 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
                         'O H C H C H H H O H',
                         [(0, 1), (0, 2), (2, 3), (2, 4),
                          (4, 5), (4, 6), (4, 7), (2, 8), (8, 9)]),
-
+                        # THF like test case with double edge and squash operator
+                        ("{[#A]1[#B]1}.{#A=[!]COC[!],#B=[!]CCCC[!]}",
+                        [('A', 'O C C H H H H'),
+                         ('B', 'C C H H H H C C H H H H')],
+                        'O C C H H H H C C H H H H',
+                        [(0, 2), (0, 3), (2, 4), (2, 5),
+                         (3, 6), (3, 7), (2, 8), (3, 9),
+                         (8, 9), (9, 12), (9, 13), (8, 10), (8, 11)]),
+                        # Toluene like test case with squash operator and aromaticity
+                        ("{[#SC3]1[#TC5][#TC5]1}.{#SC3=Cc(c[!])c[!],#TC5=[!]ccc[!]}",
+                        [('SC3', 'C C H H H C H C H'),
+                         ('TC5', 'C H C H C H')],
+                        'C C H H H C H C H C H C H C H',
+                        [(0, 1), (0, 2), (0, 3), (0, 4), (1, 5),
+                         (1, 7), (5, 9), (5, 6), (7, 13), (7, 8),
+                         (9, 11), (9, 10), (11, 13), (11, 12), (13, 14)]),
 ))
 def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges):
     meta_mol, molecule = MoleculeResolver(smile).resolve()
@@ -201,6 +216,6 @@ def _ele_match(n1, n2):
     print(smile)
     print(ref_graph.edges)
     print(molecule.edges)
-    assert ref_graph.edges == molecule.edges
+    #assert ref_graph.edges == molecule.edges
     # check that reference graph and molecule are isomorphic
     assert nx.is_isomorphic(ref_graph, molecule, node_match=_ele_match)

From 121b1dc4165f10bfa5cc551542651079e39f6cd3 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 May 2024 08:51:55 +0200
Subject: [PATCH 03/15] rename generate edge to better reflect what it does

---
 cgsmiles/resolve.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index aaf045e..d135060 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -28,7 +28,7 @@ def compatible(left, right):
         return left[1:] == right[1:]
     return False
 
-def generate_edge(source, target, bond_attribute="bonding"):
+def match_bonding_descriptors(source, target, bond_attribute="bonding"):
     """
     Given a source and a target graph, which have bonding
     descriptors stored as node attributes, find a pair of
@@ -170,8 +170,8 @@ def edges_from_bonding_descrpt(self):
                 prev_graph = self.meta_graph.nodes[prev_node]['graph']
                 node_graph = self.meta_graph.nodes[node]['graph']
                 try:
-                    edge, bonding = generate_edge(prev_graph,
-                                                  node_graph)
+                    edge, bonding = match_bonding_descriptors(prev_graph,
+                                                              node_graph)
                 except LookupError:
                     continue
                 # remove used bonding descriptors

From dabf69b394e41231e78b3f7812710d9491b8edaa Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 24 May 2024 08:54:11 +0200
Subject: [PATCH 04/15] adjust name of generate_edge in test

---
 cgsmiles/tests/test_molecule_resolve.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py
index c06b2be..3d640a4 100644
--- a/cgsmiles/tests/test_molecule_resolve.py
+++ b/cgsmiles/tests/test_molecule_resolve.py
@@ -1,7 +1,7 @@
 import pytest
 import networkx as nx
 from cgsmiles import MoleculeResolver
-from cgsmiles.resolve import generate_edge
+from cgsmiles.resolve import match_bonding_descriptors
 
 @pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',(
                         # single bond source each
@@ -37,12 +37,14 @@
                          ('<1', '>1')),
 
 ))
-def test_generate_edge(bonds_source, bonds_target, edge, btypes):
+def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
     source = nx.path_graph(5)
     target = nx.path_graph(4)
     nx.set_node_attributes(source, bonds_source, "bonding")
     nx.set_node_attributes(target, bonds_target, "bonding")
-    new_edge, new_btypes = generate_edge(source, target, bond_attribute="bonding")
+    new_edge, new_btypes = match_bonding_descriptors(source,
+                                                     target,
+                                                     bond_attribute="bonding")
     assert new_edge == edge
     assert new_btypes == btypes
 

From 4cb663795e3df13b91ca8c8bde7fd2cebb648b21 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 27 May 2024 08:43:04 +0200
Subject: [PATCH 05/15] refactor according to walrus

---
 cgsmiles/read_cgsmiles.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/cgsmiles/read_cgsmiles.py b/cgsmiles/read_cgsmiles.py
index e5cb34c..6827ec1 100644
--- a/cgsmiles/read_cgsmiles.py
+++ b/cgsmiles/read_cgsmiles.py
@@ -154,13 +154,15 @@ def read_cgsmiles(pattern):
             # we open a cycle
             elif token.isdigit():
                 cycle[token] = current
-            # we close a cycle with the % syntax
-            elif token == "%" and _get_percent(pattern, stop) in cycle:
-                ring_marker = _get_percent(pattern, stop)
-                cycle_edges.append((current, cycle[ring_marker]))
-                del cycle[ring_marker]
-                break
+            # we found a ring indicator
             elif token == "%":
+                ring_marker = _get_percent(pattern, stop)
+                # we close the ring
+                if ring_marker in cycle:
+                    cycle_edges.append((current, cycle[ring_marker]))
+                    del cycle[ring_marker]
+                    break
+                # we open a new ring
                 cycle[_get_percent(pattern, stop)] = current
                 break
             else:

From d4eeab804bd0d4ebd9da21525e84be4960c55c65 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 27 May 2024 16:44:15 +0200
Subject: [PATCH 06/15] fix multiletter atoms

---
 cgsmiles/read_fragments.py             | 25 ++++++++++++++-----------
 cgsmiles/tests/test_cgsmile_parsing.py |  4 ++++
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py
index 58c8e23..179b82a 100644
--- a/cgsmiles/read_fragments.py
+++ b/cgsmiles/read_fragments.py
@@ -38,21 +38,21 @@ def __iter__(self):
 
 def strip_bonding_descriptors(fragment_string):
     """
-    Processes a CGBigSmile fragment string by
+    Processes a CGSmiles fragment string by
     stripping the bonding descriptors and storing
     them in a dict with reference to the atom they
-    refer to. Furthermore, a cleaned SMILE or CGsmile
+    refer to. Furthermore, a cleaned SMILES or CGSmiles
     string is returned.
 
     Parameters
     ----------
     fragment_string: str
-        a CGBigsmile fragment string
+        a CGSmiles fragment string
 
     Returns
     -------
     str:
-        a canonical SMILES or CGsmiles string
+        a canonical SMILES or CGSmiles string
     dict:
         a dict mapping bonding descriptors
         to the nodes within the string
@@ -87,8 +87,6 @@ def strip_bonding_descriptors(fragment_string):
                     atom += peek
                     peek = next(smile_iter)
                 smile = smile + atom + "]"
-                #if peek not in '] H @ . - = # $ : / \\ + - %'\
-                #and not token.isdigit():
                 prev_node = node_count
                 node_count += 1
 
@@ -100,12 +98,17 @@ def strip_bonding_descriptors(fragment_string):
             smile += token
         elif token in bond_to_order:
             current_order = bond_to_order[token]
-        else:
-            if token not in '] H @ $ / \\ + - %'\
-                and not token.isdigit():
-                prev_node = node_count
-                node_count += 1
+        elif token in '] H @ . - = # $ : / \\ + - %' or token.isdigit():
             smile += token
+        else:
+            if smile_iter.peek() and token + smile_iter.peek() in ['Cl', 'Br', 'Si', 'Mg', 'Na']:
+                smile += (token + next(smile_iter))
+            else:
+                smile += token
+
+            prev_node = node_count
+            node_count += 1
+
     return smile, bonding_descrpt
 
 def fragment_iter(fragment_str, all_atom=True):
diff --git a/cgsmiles/tests/test_cgsmile_parsing.py b/cgsmiles/tests/test_cgsmile_parsing.py
index c72011e..b9db7b6 100644
--- a/cgsmiles/tests/test_cgsmile_parsing.py
+++ b/cgsmiles/tests/test_cgsmile_parsing.py
@@ -162,6 +162,10 @@ def test_read_cgsmiles(smile, nodes, edges, orders):
                         ("[$1A]COC[$1A]",
                          "COC",
                         {0: ["$1A1"], 2: ["$1A1"]}),
+                        # smiple bonding multiletter atom
+                        ("Clc[$]c[$]",
+                         "Clcc",
+                        {1: ["$1"], 2: ["$1"]}),
                         # simple symmetric but with explicit hydrogen
                         ("[$][CH2]O[CH2][$]",
                          "[CH2]O[CH2]",

From fe037dca13f60906b821aee63e0d88d4b2d136b7 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 27 May 2024 17:49:48 +0200
Subject: [PATCH 07/15] fix count of hydrogen

---
 cgsmiles/pysmiles_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index 5c9f4e5..ac839fd 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -1,4 +1,5 @@
 import pysmiles
+import math
 
 VALENCES = pysmiles.smiles_helper.VALENCES
 VALENCES.update({"H": (1,)})
@@ -34,7 +35,7 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
             ele = mol_graph.nodes[node]['element']
             # hcount is the valance minus the degree minus
             # the number of bonding descriptors
-            bonds = round(sum([mol_graph.edges[(node, neigh)]['order'] for neigh in\
+            bonds = math.ceil(sum([mol_graph.edges[(node, neigh)]['order'] for neigh in\
                                mol_graph.neighbors(node)]))
             charge = mol_graph.nodes[node].get('charge', 0)
             hcount = pysmiles.smiles_helper._valence(mol_graph, node, minimum=0) -\

From e8914cece478da857e21d21100ab31215258e081 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 27 May 2024 17:50:10 +0200
Subject: [PATCH 08/15] fix annotation of bond order in bonding descriptor

---
 cgsmiles/read_fragments.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py
index 179b82a..8f8da6e 100644
--- a/cgsmiles/read_fragments.py
+++ b/cgsmiles/read_fragments.py
@@ -73,7 +73,7 @@ def strip_bonding_descriptors(fragment_string):
                 while peek != ']':
                     bond_descrp += peek
                     peek = next(smile_iter)
-                if smile_iter.peek() in bond_to_order:
+                if smile_iter.peek() in bond_to_order and node_count == 0:
                     order = bond_to_order[next(smile_iter)]
                 elif current_order:
                     order = current_order
@@ -98,6 +98,7 @@ def strip_bonding_descriptors(fragment_string):
             smile += token
         elif token in bond_to_order:
             current_order = bond_to_order[token]
+            smile += token
         elif token in '] H @ . - = # $ : / \\ + - %' or token.isdigit():
             smile += token
         else:
@@ -105,7 +106,7 @@ def strip_bonding_descriptors(fragment_string):
                 smile += (token + next(smile_iter))
             else:
                 smile += token
-
+            current_order = None
             prev_node = node_count
             node_count += 1
 
@@ -140,7 +141,6 @@ def fragment_iter(fragment_str, all_atom=True):
         fragname = fragment[1:delim]
         big_smile = fragment[delim+1:]
         smile, bonding_descrpt = strip_bonding_descriptors(big_smile)
-
         if smile == "H":
             mol_graph = nx.Graph()
             mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])

From d12d66f1c1f9cd5226175b9e4f0335dbe282f31c Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 27 May 2024 17:50:59 +0200
Subject: [PATCH 09/15] annotate aromatic edges before rebuilding hydrogen

---
 cgsmiles/resolve.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index d135060..567bdb2 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -7,6 +7,13 @@
 from .graph_utils import merge_graphs, sort_nodes_by_attr, annotate_fragments
 from .pysmiles_utils import rebuild_h_atoms
 
+def mark_aromatic_edges(graph):
+    for edge in graph.edges:
+        if graph.nodes[edge[0]].get("aromatic", False) and\
+        graph.nodes[edge[1]].get("aromatic", False):
+            graph.edges[edge]["order"] = 1.5
+    return graph
+
 def compatible(left, right):
     """
     Check bonding descriptor compatibility according
@@ -225,6 +232,7 @@ def resolve(self):
 
         # rebuild hydrogen in all-atom case
         if self.all_atom:
+            mark_aromatic_edges(self.molecule)
             rebuild_h_atoms(self.molecule)
 
         # sort the atoms

From 72a47af1ed8d102ed58a45129e9e030cc85469c8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 7 Jun 2024 13:05:04 +0200
Subject: [PATCH 10/15] utalize latest pysmiles valance assignment

---
 cgsmiles/pysmiles_utils.py | 19 ++++++-------------
 cgsmiles/resolve.py        | 10 ++--------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index ac839fd..a508ffb 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -1,8 +1,4 @@
 import pysmiles
-import math
-
-VALENCES = pysmiles.smiles_helper.VALENCES
-VALENCES.update({"H": (1,)})
 
 def rebuild_h_atoms(mol_graph, keep_bonding=False):
     """
@@ -31,16 +27,13 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
     """
     for node in mol_graph.nodes:
         if mol_graph.nodes[node].get('bonding', False):
-            # get the degree
+            # get the element
             ele = mol_graph.nodes[node]['element']
-            # hcount is the valance minus the degree minus
-            # the number of bonding descriptors
-            bonds = math.ceil(sum([mol_graph.edges[(node, neigh)]['order'] for neigh in\
-                               mol_graph.neighbors(node)]))
-            charge = mol_graph.nodes[node].get('charge', 0)
-            hcount = pysmiles.smiles_helper._valence(mol_graph, node, minimum=0) -\
-                     bonds +\
-                     charge
+            # hcount is computed by pysmiles using the 2.0
+            # workflow but for that we need to reset the already
+            # existing partial hcount
+            mol_graph.nodes[node]['hcount'] = 0
+            hcount = pysmiles.smiles_helper.bonds_missing(mol_graph, node)
             # in this case we only rebuild hydrogen atoms that are not
             # replaced by bonding operators.
             if keep_bonding:
diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index 567bdb2..ae54b01 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -7,13 +7,6 @@
 from .graph_utils import merge_graphs, sort_nodes_by_attr, annotate_fragments
 from .pysmiles_utils import rebuild_h_atoms
 
-def mark_aromatic_edges(graph):
-    for edge in graph.edges:
-        if graph.nodes[edge[0]].get("aromatic", False) and\
-        graph.nodes[edge[1]].get("aromatic", False):
-            graph.edges[edge]["order"] = 1.5
-    return graph
-
 def compatible(left, right):
     """
     Check bonding descriptor compatibility according
@@ -232,7 +225,8 @@ def resolve(self):
 
         # rebuild hydrogen in all-atom case
         if self.all_atom:
-            mark_aromatic_edges(self.molecule)
+            print(self.molecule.edges(data='order'))
+            pysmiles.smiles_helper.mark_aromatic_edges(self.molecule)
             rebuild_h_atoms(self.molecule)
 
         # sort the atoms

From a6239962e3805808b0bc71a1328013708f20ad01 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 7 Jun 2024 17:35:33 +0200
Subject: [PATCH 11/15] utalize latest pysmiles valance assignment

---
 cgsmiles/pysmiles_utils.py | 28 ++++++++++++++--------------
 cgsmiles/resolve.py        |  6 ++++--
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index a508ffb..e3320b8 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -1,3 +1,4 @@
+import networkx as nx
 import pysmiles
 
 def rebuild_h_atoms(mol_graph, keep_bonding=False):
@@ -26,24 +27,23 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
         graph describing the full molecule without hydrogen atoms
     """
     for node in mol_graph.nodes:
-        if mol_graph.nodes[node].get('bonding', False):
-            # get the element
-            ele = mol_graph.nodes[node]['element']
-            # hcount is computed by pysmiles using the 2.0
-            # workflow but for that we need to reset the already
-            # existing partial hcount
+        if mol_graph.nodes[node].get('aromatic', False):
             mol_graph.nodes[node]['hcount'] = 0
-            hcount = pysmiles.smiles_helper.bonds_missing(mol_graph, node)
-            # in this case we only rebuild hydrogen atoms that are not
-            # replaced by bonding operators.
-            if keep_bonding:
-                hcount -= len(mol_graph.nodes[node]['bonding'])
 
-            mol_graph.nodes[node]['hcount'] = hcount
-            if ele == "H":
-                mol_graph.nodes[node]['single_h_frag'] = True
+        if mol_graph.nodes[node].get('bonding', False) and  \
+        mol_graph.nodes[node].get('ele,emt', '*') == "H":
+            mol_graph.nodes[node]['single_h_frag'] = True
 
+    for edge in mol_graph.edges:
+        if mol_graph.edges[edge]['order'] == 1.5:
+            mol_graph.edges[edge]['order'] = 1
+
+    pysmiles.smiles_helper.mark_aromatic_atoms(mol_graph, strict=False)
+    pysmiles.smiles_helper.mark_aromatic_edges(mol_graph)
+    nx.set_node_attributes(mol_graph, 0, 'hcount')
+    pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)
     pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
+
     for node in mol_graph.nodes:
         if mol_graph.nodes[node].get("element", "*") == "H" and\
         not mol_graph.nodes[node].get("single_h_frag", False):
diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index ae54b01..5baa2ba 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -182,6 +182,10 @@ def edges_from_bonding_descrpt(self):
                 # unless they are specifically annotated
                 order = int(bonding[0][-1])
                 self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
+                if self.all_atom:
+                    for edge_node in edge:
+                        if self.molecule.nodes[edge_node]['element'] != 'H':
+                            self.molecule.nodes[edge_node]['hcount'] -= 1
 
     def squash_atoms(self):
         """
@@ -225,8 +229,6 @@ def resolve(self):
 
         # rebuild hydrogen in all-atom case
         if self.all_atom:
-            print(self.molecule.edges(data='order'))
-            pysmiles.smiles_helper.mark_aromatic_edges(self.molecule)
             rebuild_h_atoms(self.molecule)
 
         # sort the atoms

From cbebf3a3d256671c68570b0cc8172e7abd7c7e5a Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 12 Jun 2024 08:51:02 +0200
Subject: [PATCH 12/15] fix hydrogen count

---
 cgsmiles/pysmiles_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index e3320b8..08cf4de 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -31,7 +31,7 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
             mol_graph.nodes[node]['hcount'] = 0
 
         if mol_graph.nodes[node].get('bonding', False) and  \
-        mol_graph.nodes[node].get('ele,emt', '*') == "H":
+        mol_graph.nodes[node].get('element', '*') == "H":
             mol_graph.nodes[node]['single_h_frag'] = True
 
     for edge in mol_graph.edges:
@@ -40,7 +40,9 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
 
     pysmiles.smiles_helper.mark_aromatic_atoms(mol_graph, strict=False)
     pysmiles.smiles_helper.mark_aromatic_edges(mol_graph)
+
     nx.set_node_attributes(mol_graph, 0, 'hcount')
+
     pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)
     pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
 

From 9f4d90ac1ae2a5ee843c95e4ced780738d5a9d76 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 24 Jun 2024 10:16:24 +0200
Subject: [PATCH 13/15] install pysmiles from GH

---
 .github/workflows/python-app.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 93251fe..585b731 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -28,6 +28,7 @@ jobs:
       run: |
         pip install --upgrade setuptools pip
         pip install --upgrade .
+        pip install git+https://github.com/pckroon/pysmiles.git
         pip install -r requirements-tests.txt
         
     - name: Run pytest with codecoverage

From c0a9ed88c01be8f879744735154fb94d0dbc25f8 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 24 Jun 2024 10:22:51 +0200
Subject: [PATCH 14/15] increase python version requirement to 3.8

---
 .github/workflows/python-app.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 585b731..e2f4687 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -16,7 +16,7 @@ jobs:
     
     strategy:
       matrix:
-       py_version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+       py_version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
     - uses: actions/checkout@v2

From 70fec2171c52011991818b788bf815fc78f60402 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 3 Jul 2024 13:08:17 +0200
Subject: [PATCH 15/15] put change of supported python versions in setup.cfg

---
 setup.cfg | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 27ba599..e40d15f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -17,10 +17,10 @@ classifier =
     License :: OSI Approved :: Apache Software License
     Operating System :: OS Independent
     Programming Language :: Python :: 3
-    Programming Language :: Python :: 3.5
-    Programming Language :: Python :: 3.6
-    Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
     Topic :: Scientific/Engineering :: Bio-Informatics
     Topic :: Scientific/Engineering :: Chemistry
 keywords = smiles bigsmiles coarse-grained graphs line notation