From 299c03353d5316fee9a647068e1ac32103bd2da4 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Thu, 12 Sep 2024 15:24:38 +0200
Subject: [PATCH 1/7] implement isomerism bug fixes plus tests

---
 cgsmiles/graph_utils.py                 | 27 ++++++++-
 cgsmiles/pysmiles_utils.py              | 27 +++++++++
 cgsmiles/read_fragments.py              | 30 ++++++++--
 cgsmiles/resolve.py                     | 17 ++++--
 cgsmiles/tests/test_cgsmile_parsing.py  | 76 ++++++++++++++++++++----
 cgsmiles/tests/test_molecule_resolve.py | 79 ++++++++++++++++++++-----
 6 files changed, 217 insertions(+), 39 deletions(-)

diff --git a/cgsmiles/graph_utils.py b/cgsmiles/graph_utils.py
index d2bd0ab..2d83d81 100644
--- a/cgsmiles/graph_utils.py
+++ b/cgsmiles/graph_utils.py
@@ -44,6 +44,11 @@ def merge_graphs(source_graph, target_graph, max_node=None):
         correspondence[node] = idx
         new_atom = copy.copy(target_graph.nodes[node])
         new_atom['fragid'] = [(new_atom.get('fragid', 0) + fragment_offset)]
+        # make sure to propagate the ez isomers
+        if 'ez_isomer_atoms' in new_atom:
+            new_atom['ez_isomer_atoms'] = (new_atom['ez_isomer_atoms'][0]+offset+1,
+                                           new_atom['ez_isomer_atoms'][1]+offset+1)
+        # make sure to propagate the ez atoms
         source_graph.add_node(idx, **new_atom)
 
     for node1, node2 in target_graph.edges:
@@ -53,7 +58,9 @@ def merge_graphs(source_graph, target_graph, max_node=None):
 
     return correspondence
 
-def sort_nodes_by_attr(graph, sort_attr="fragid"):
+def sort_nodes_by_attr(graph,
+                       sort_attr="fragid",
+                       relative_attr=[('ez_isomer_atoms', True)]):
     """
     Sort nodes in graph by attribute and relable the graph in place.
 
@@ -63,6 +70,13 @@ def sort_nodes_by_attr(graph, sort_attr="fragid"):
         the graph to sort nodes of
     sort_attr: `abc.hashable`
         the attribute to use for sorting
+    relative_attr: tuple(str, bool)
+        a list of attributes that are sensetive
+        to the ordering of nodes (i.e. refer to
+        other nodes). The second element indicates
+        the depth. If False the value of attr are
+        node keys. If True we expect the value to
+        be an itertable with node keys.
 
     Returns
     -------
@@ -73,6 +87,17 @@ def sort_nodes_by_attr(graph, sort_attr="fragid"):
     sorted_ids = sorted(attr_values, key=lambda item: (attr_values[item], item))
     mapping = {old: new for new, old in enumerate(sorted_ids)}
     new_graph = nx.relabel_nodes(graph, mapping, copy=True)
+    for attr, is_list in relative_attr:
+        attr_dict = nx.get_node_attributes(new_graph, attr)
+        new_dict = {}
+        for key, values in attr_dict.items():
+            if is_list:
+                new_values = [mapping[value] for value in values]
+            else:
+               new_values = mapping[values]
+            new_dict[key] = new_values
+        if new_dict:
+            nx.set_node_attributes(new_graph, new_dict, attr)
     return new_graph
 
 def _keyfunc(graph, node_idx, attrs):
diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index 08cf4de..7f536cd 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -1,5 +1,8 @@
+import logging
 import networkx as nx
 import pysmiles
+from pysmiles.smiles_helper import _annotate_ez_isomers
+LOGGER = logging.getLogger(__name__)
 
 def rebuild_h_atoms(mol_graph, keep_bonding=False):
     """
@@ -52,3 +55,27 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
             ref_node = next(mol_graph.neighbors(node))
             mol_graph.nodes[node]["fragid"] = mol_graph.nodes[ref_node]["fragid"]
             mol_graph.nodes[node]["fragname"] = mol_graph.nodes[ref_node]["fragname"]
+
+def annotate_ez_isomers(molecule):
+    """
+    Small wrapper dealing with ez_isomer annotation.
+
+    Parameters
+    ----------
+    molecule: nx.Graph
+        The molecule of intrest, which must of ez_isomer_pairs
+        and ez_isomer_class set as node attributes
+    """
+    ez_isomer_atoms = nx.get_node_attributes(molecule, 'ez_isomer_atoms')
+    ez_isomer_class = nx.get_node_attributes(molecule, 'ez_isomer_class')
+    ez_isomer_atoms_list = [atoms + [_class] for atoms, _class in zip(ez_isomer_atoms.values(), ez_isomer_class.values())]
+    ez_isomer_pairs = list(zip(ez_isomer_atoms_list[:-1], ez_isomer_atoms_list[1:]))
+    if len(ez_isomer_atoms)%2 != 0:
+        msg = ("You have an uneven amount of atoms marked as CIS/TRANS isomers."
+               "We will drop the last atom from assigning the iosmers.")
+        LOGGER.warning(msg)
+    _annotate_ez_isomers(molecule, ez_isomer_pairs)
+    # clean up
+    for node in ez_isomer_atoms:
+        del  molecule.nodes[node]['ez_isomer_atoms']
+        del  molecule.nodes[node]['ez_isomer_class']
diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py
index c07e9c4..a72d22f 100644
--- a/cgsmiles/read_fragments.py
+++ b/cgsmiles/read_fragments.py
@@ -60,6 +60,8 @@ def strip_bonding_descriptors(fragment_string):
     bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
     smile_iter = PeekIter(fragment_string)
     bonding_descrpt = defaultdict(list)
+    ez_isomer_atoms = {}
+    rs_isomers = {}
     smile = ""
     node_count = 0
     prev_node = 0
@@ -84,7 +86,14 @@ def strip_bonding_descriptors(fragment_string):
             else:
                 atom = token
                 while peek != ']':
-                    atom += peek
+                    # deal with rs chirality
+                    if peek == '@':
+                        chiral_token = peek
+                        if smile_iter.peek() == '@':
+                            chiral_token = '@' + next(smile_iter)
+                        rs_isomers[node_count] = (chiral_token, [])
+                    else:
+                        atom += peek
                     peek = next(smile_iter)
                 smile = smile + atom + "]"
                 prev_node = node_count
@@ -99,8 +108,15 @@ def strip_bonding_descriptors(fragment_string):
         elif token in bond_to_order:
             current_order = bond_to_order[token]
             smile += token
-        elif token in '] H @ . - = # $ : / \\ + - %' or token.isdigit():
+        elif token in '] H . - = # $ : + - %' or token.isdigit():
             smile += token
+        # deal with ez isomers
+        elif token in '/ \\':
+            # we have a branch
+            if node_count == 0:
+                ez_isomer_atoms[node_count] = (node_count, 1, token)
+            else:
+                ez_isomer_atoms[node_count] = (node_count, prev_node, token)
         else:
             if smile_iter.peek() and token + smile_iter.peek() in ['Cl', 'Br', 'Si', 'Mg', 'Na']:
                 smile += (token + next(smile_iter))
@@ -110,7 +126,7 @@ def strip_bonding_descriptors(fragment_string):
             prev_node = node_count
             node_count += 1
 
-    return smile, bonding_descrpt
+    return smile, bonding_descrpt, rs_isomers, ez_isomer_atoms
 
 def fragment_iter(fragment_str, all_atom=True):
     """
@@ -140,7 +156,7 @@ def fragment_iter(fragment_str, all_atom=True):
         delim = fragment.find('=', 0)
         fragname = fragment[1:delim]
         big_smile = fragment[delim+1:]
-        smile, bonding_descrpt = strip_bonding_descriptors(big_smile)
+        smile, bonding_descrpt, rs_isomers, ez_isomers = strip_bonding_descriptors(big_smile)
         if smile == "H":
             mol_graph = nx.Graph()
             mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
@@ -148,6 +164,12 @@ def fragment_iter(fragment_str, all_atom=True):
         elif all_atom:
             mol_graph = pysmiles.read_smiles(smile, reinterpret_aromatic=False, strict=False)
             nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
+            nx.set_node_attributes(mol_graph, rs_isomers, 'rs_isomer')
+            # we need to split countable node keys and the associated value
+            ez_isomer_atoms = {idx: val[:-1] for idx, val in ez_isomers.items()}
+            ez_isomer_class = {idx: val[-1] for idx, val in ez_isomers.items()}
+            nx.set_node_attributes(mol_graph, ez_isomer_atoms, 'ez_isomer_atoms')
+            nx.set_node_attributes(mol_graph, ez_isomer_class, 'ez_isomer_class')
         # we deal with a CG resolution graph
         else:
             mol_graph = read_cgsmiles(smile)
diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index 1895da3..afdc2af 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -1,13 +1,14 @@
 import re
 import copy
 import networkx as nx
+from pysmiles.smiles_helper import _mark_chiral_atoms
 from .read_cgsmiles import read_cgsmiles
 from .read_fragments import read_fragments
 from .graph_utils import (merge_graphs,
                           sort_nodes_by_attr,
                           annotate_fragments,
                           set_atom_names_atomistic)
-from .pysmiles_utils import rebuild_h_atoms
+from .pysmiles_utils import rebuild_h_atoms, annotate_ez_isomers
 
 def compatible(left, right):
     """
@@ -330,15 +331,19 @@ def resolve(self):
         # sort the atoms
         self.molecule = sort_nodes_by_attr(self.molecule, sort_attr=("fragid"))
 
+        if all_atom:
+            # assign chirality
+            _mark_chiral_atoms(self.molecule)
+            # assign rs isomerism
+            annotate_ez_isomers(self.molecule)
+            # in all-atom MD there are common naming conventions
+            # that might be expected and hence we set them here
+            set_atom_names_atomistic(self.meta_graph, self.molecule)
+
         # and redo the meta molecule
         self.meta_graph = annotate_fragments(self.meta_graph,
                                              self.molecule)
 
-        # in all-atom MD there are common naming conventions
-        # that might be expected and hence we set them here
-        if all_atom:
-            set_atom_names_atomistic(self.meta_graph, self.molecule)
-
         # increment the resolution counter
         self.resolution_counter += 1
 
diff --git a/cgsmiles/tests/test_cgsmile_parsing.py b/cgsmiles/tests/test_cgsmile_parsing.py
index b9db7b6..9323656 100644
--- a/cgsmiles/tests/test_cgsmile_parsing.py
+++ b/cgsmiles/tests/test_cgsmile_parsing.py
@@ -153,48 +153,100 @@ def test_read_cgsmiles(smile, nodes, edges, orders):
     fragnames = nx.get_node_attributes(meta_mol, 'fragname')
     assert nodes == list(fragnames.values())
 
-@pytest.mark.parametrize('big_smile, smile, bonding',(
+@pytest.mark.parametrize('big_smile, smile, bonding, rs, ez',(
                         # smiple symmetric bonding
                         ("[$]COC[$]",
                          "COC",
-                        {0: ["$1"], 2: ["$1"]}),
+                        {0: ["$1"], 2: ["$1"]},
+                        None,
+                        None),
                         # smiple symmetric bonding with more than one name
                         ("[$1A]COC[$1A]",
                          "COC",
-                        {0: ["$1A1"], 2: ["$1A1"]}),
+                        {0: ["$1A1"], 2: ["$1A1"]},
+                        None,
+                        None),
                         # smiple bonding multiletter atom
                         ("Clc[$]c[$]",
                          "Clcc",
-                        {1: ["$1"], 2: ["$1"]}),
+                        {1: ["$1"], 2: ["$1"]},
+                        None,
+                        None),
                         # simple symmetric but with explicit hydrogen
                         ("[$][CH2]O[CH2][$]",
                          "[CH2]O[CH2]",
-                        {0: ["$1"], 2: ["$1"]}),
+                        {0: ["$1"], 2: ["$1"]},
+                        None,
+                        None),
                         # smiple symmetric bonding; multiple descript
                         ("[$]COC[$][$1]",
                          "COC",
-                        {0: ["$1"], 2: ["$1", "$11"]}),
+                        {0: ["$1"], 2: ["$1", "$11"]},
+                        None,
+                        None),
                         # named different bonding descriptors
                         ("[$1]CCCC[$2]",
                          "CCCC",
-                        {0: ["$11"], 3: ["$21"]}),
+                        {0: ["$11"], 3: ["$21"]},
+                        None,
+                        None),
                         # ring and bonding descriptors
                         ("[$1]CC[$2]C1CCCCC1",
                          "CCC1CCCCC1",
-                        {0: ["$11"], 1: ["$21"]}),
+                        {0: ["$11"], 1: ["$21"]},
+                        None,
+                        None),
                         # bonding descript. after branch
                         ("C(COC[$1])[$2]CCC[$3]",
                          "C(COC)CCC",
-                        {0: ["$21"], 3: ["$11"], 6: ["$31"]}),
+                        {0: ["$21"], 3: ["$11"], 6: ["$31"]},
+                        None,
+                        None),
                         # left rigth bonding desciptors
                         ("[>]COC[<]",
                         "COC",
-                        {0: [">1"], 2: ["<1"]})
+                        {0: [">1"], 2: ["<1"]},
+                        None,
+                        None),
+                        # simple chirality in residue
+                        ("[>]C[C@](F)(B)N[<]",
+                        "C[C](F)(B)N",
+                        {0: [">1"], 4: ["<1"]},
+                        {1: ('@', [])},
+                        None),
+                        # simple chirality inverse in residue
+                        ("[>]C[C@@](F)(B)N[<]",
+                        "C[C](F)(B)N",
+                        {0: [">1"], 4: ["<1"]},
+                        {1: ('@@', [])},
+                        None),
+                        # \ fragment split
+                        ("[>]CC(\F)=[<]",
+                        "CC(F)=",
+                        {0: [">1"], 1: ["<2"]},
+                        None,
+                        {2: (2, 1, '\\')}),
+                        # / fragment split
+                        ("[>]CC(/F)=[<]",
+                        "CC(F)=",
+                        {0: [">1"], 1: ["<2"]},
+                        None,
+                        {2: (2, 1, '/')}),
+                        # both in one fragment
+                        ("[>]CC(/F)=C(\F)C[<]",
+                        "CC(F)=C(F)C",
+                        {0: [">1"], 5: ["<1"]},
+                        None,
+                        {2: (2, 1, '/'), 4: (4, 3, '\\')}),
 ))
-def test_strip_bonding_descriptors(big_smile, smile, bonding):
-    new_smile, new_bonding = strip_bonding_descriptors(big_smile)
+def test_strip_bonding_descriptors(big_smile, smile, bonding, rs, ez):
+    new_smile, new_bonding, rs_isomers, ez_isomers = strip_bonding_descriptors(big_smile)
     assert new_smile == smile
     assert new_bonding == bonding
+    if rs:
+        assert rs == rs_isomers
+    if ez:
+        assert ez == ez_isomers
 
 @pytest.mark.parametrize('fragment_str, nodes, edges',(
                         # single fragment
diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py
index b424ee0..6ad3fb7 100644
--- a/cgsmiles/tests/test_molecule_resolve.py
+++ b/cgsmiles/tests/test_molecule_resolve.py
@@ -52,7 +52,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
     assert new_btypes == btypes
 
 
-@pytest.mark.parametrize('smile, ref_frags, elements, ref_edges',(
+@pytest.mark.parametrize('smile, ref_frags, elements, ref_edges, chiral, ez',(
                         # smiple linear seqeunce
                         ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O]}",
                         #           0 1             2 3 4 5 6 7 8
@@ -62,7 +62,8 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'O H C O C H H H H C O C H H H H O H',
                         [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
                          (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
-                         (11, 14), (11, 15), (11, 16), (16, 17)]),
+                         (11, 14), (11, 15), (11, 16), (16, 17)],
+                        {}, {}),
                         # smiple linear seqeunce with bond-order in link
                         ("{[#TC1][#TC4][#TC1]}.{#TC1=[$1]=CC=[$2],#TC4=[$1]=CC=[$2]}",
                         #         0 1 2 3 4 5            6 7 8 9
@@ -72,7 +73,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'C C H H H H C C H H C C H H H H',
                         [(0, 1), (0, 2), (1, 3), (1, 4), (1, 5), (0, 6), (6, 7),
                          (6, 8), (7, 9), (7, 11), (10, 11), (10, 12), (10, 13),
-                         (10, 14), (11, 15)]),
+                         (10, 14), (11, 15)], {}, {}),
                         # smiple linear seqeunce unconsumed bonding descrpt
                         ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]CO[>]C[$],#OHter=[$][O]}",
                         #           0 1             2 3 4 5 6 7 8
@@ -82,7 +83,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'O H C O C H H H H C O C H H H H O H',
                         [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
                          (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
-                         (11, 14), (11, 15), (11, 16), (16, 17)]),
+                         (11, 14), (11, 15), (11, 16), (16, 17)], {}, {}),
                         # smiple linear seqeunce with ionic bond
                         ("{[#OHter][#PEO]|2[#OHter]}.{#PEO=[$]COC[$],#OHter=[$][O-].[Na+]}",
                         #           0 1             2 3 4 5 6 7 8
@@ -92,7 +93,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'O Na C O C H H H H C O C H H H H O Na',
                         [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
                          (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
-                         (11, 14), (11, 15), (11, 16), (16, 17)]),
+                         (11, 14), (11, 15), (11, 16), (16, 17)], {}, {}),
                         # smiple linear seqeunce with ionic ending
                         ("{[#OH][#PEO]|2[#ON]}.{#PEO=[$]COC[$],#OH=[$]O,#ON=[$][O-]}",
                         #           0 1             2 3 4 5 6 7 8
@@ -102,7 +103,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'O H C O C H H H H C O C H H H H O',
                         [(0, 1), (0, 2), (2, 3), (3, 4), (2, 5), (2, 6), (4, 7),
                          (4, 8), (4, 9), (9, 10), (10, 11), (9, 12), (9, 13),
-                         (11, 14), (11, 15), (11, 16)]),
+                         (11, 14), (11, 15), (11, 16)], {}, {}),
                         # uncomsumed bonding IDs; note that this is not the same
                         # molecule as previous test case. Here one of the OH branches
                         # and replaces an CH2 group with CH-OH
@@ -114,7 +115,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'O H C O C H H H H C O C H H H H O H',
                         [(0, 1), (0, 2), (2, 3), (2, 5), (2, 11), (3, 4),
                          (4, 6), (4, 7), (4, 8), (9, 10), (9, 12), (9, 13),
-                         (10, 11), (11, 15), (11, 14), (9, 16), (16, 17)]),
+                         (10, 11), (11, 15), (11, 14), (9, 16), (16, 17)], {}, {}),
                         # simple branched sequence
                         ("{[#Hter][#PE]([#PEO][#Hter])[#PE]([#PEO][#Hter])[#Hter]}.{#Hter=[$]H,#PE=[$]CC[$][$],#PEO=[$]COC[$]}",
                         [('Hter', 'H'), ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'),
@@ -122,7 +123,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'H C C H H H C O C H H H H H C C H H H C O C H H H H H H',
                         [(0, 1), (1, 2), (1, 3), (1, 4), (2, 5), (2, 6), (2, 14), (6, 7), (6, 9), (6, 10), (7, 8),
                          (8, 11), (8, 12), (8, 13), (14, 15), (14, 16), (14, 17), (15, 18), (15, 19), (15, 27),
-                         (19, 20), (19, 22), (19, 23), (20, 21), (21, 24), (21, 25), (21, 26)]),
+                         (19, 20), (19, 22), (19, 23), (20, 21), (21, 24), (21, 25), (21, 26)], {}, {}),
                         # something with a ring
                         #            012 34567
                         #            890123456
@@ -135,7 +136,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                          (6, 14), (7, 8), (7, 15), (8, 16), (17, 18), (17, 25),
                          (17, 26), (18, 19), (18, 27), (18, 33), (19, 20), (19, 24),
                          (20, 21), (20, 28), (21, 22), (21, 29), (22, 23), (22, 30),
-                         (23, 24), (23, 31), (24, 32)]),
+                         (23, 24), (23, 31), (24, 32)], {}, {}),
                         # something more complicated branched
                         # here we have multiple bonding descriptors
 #                       # despite being the same residue we have 3 fragments after adding hydrgens
@@ -157,7 +158,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [('A', 'O H C H H'), ('B', 'C H H C H H H'),],
                         'O H C H H C H H H',
                         [(0, 1), (0, 2), (2, 3), (2, 4), (2, 5),
-                         (5, 6), (5, 7), (5, 8)]),
+                         (5, 6), (5, 7), (5, 8)], {}, {}),
                         # smiple squash operator; unconsumed operators
                         ("{[#A][#B]}.{#A=OC[!],#B=[$][!]CC}",
                         #       0 1 2 3 4           1 5 3 4 6 7 8
@@ -168,7 +169,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [('A', 'O H C H H'), ('B', 'C H H C H H H'),],
                         'O H C H H C H H H',
                         [(0, 1), (0, 2), (2, 3), (2, 4), (2, 5),
-                         (5, 6), (5, 7), (5, 8)]),
+                         (5, 6), (5, 7), (5, 8)], {}, {}),
                         # smiple squash operator; plus connect operator
                         ("{[#A][#B][#C]}.{#A=OC[!],#B=[$][!]CC,#C=[$]O}",
                         #       0 1 2 3 4           1 5 3 4 6 7 8
@@ -179,7 +180,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [('A', 'O H C H'), ('B', 'C H C H H H'), ('C', 'O H')],
                         'O H C H C H H H O H',
                         [(0, 1), (0, 2), (2, 3), (2, 4),
-                         (4, 5), (4, 6), (4, 7), (2, 8), (8, 9)]),
+                         (4, 5), (4, 6), (4, 7), (2, 8), (8, 9)], {}, {}),
                         # THF like test case with double edge and squash operator
                         ("{[#A]1[#B]1}.{#A=[!]COC[!],#B=[!]CCCC[!]}",
                         [('A', 'O C C H H H H'),
@@ -187,7 +188,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'O C C H H H H C C H H H H',
                         [(0, 2), (0, 3), (2, 4), (2, 5),
                          (3, 6), (3, 7), (2, 8), (3, 9),
-                         (8, 9), (9, 12), (9, 13), (8, 10), (8, 11)]),
+                         (8, 9), (9, 12), (9, 13), (8, 10), (8, 11)], {}, {}),
                         # Toluene like test case with squash operator and aromaticity
                         ("{[#SC3]1[#TC5][#TC5]1}.{#SC3=Cc(c[!])c[!],#TC5=[!]ccc[!]}",
                         [('SC3', 'C C H H H C H C H'),
@@ -195,19 +196,57 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         'C C H H H C H C H C H C H C H',
                         [(0, 1), (0, 2), (0, 3), (0, 4), (1, 5),
                          (1, 7), (5, 9), (5, 6), (7, 13), (7, 8),
-                         (9, 11), (9, 10), (11, 13), (11, 12), (13, 14)]),
+                         (9, 11), (9, 10), (11, 13), (11, 12), (13, 14)], {}, {}),
+                        # smiple chirality assigment between fragments
+                        ("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@H][>]C(=O)OC}",
+                        # 0 1 2 3
+                        [('A', 'O H'), ('B', 'C C C O O C H H H H H H'),
+                         ('C', 'O H')],
+                        'O H C H H C H C O O C H H H O H',
+                        [(0, 1), (0, 2), (2, 3), (2, 4),
+                         (2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
+                         (10, 13), (5, 14), (14, 15)],
+                        {3: (2, 4, 10, 14)}, {}),
+                        # smiple chirality assigment between fragments inv
+                        ("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@@H][>]C(=O)OC}",
+                        # 0 1 2 3
+                        [('A', 'O H'), ('B', 'C C C O O C H H H H H H'),
+                         ('C', 'O H')],
+                        'O H C H H C H C O O C H H H O H',
+                        [(0, 1), (0, 2), (2, 3), (2, 4),
+                         (2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
+                         (10, 13), (5, 14), (14, 15)],
+                        {3: (2, 4, 14, 10)}, {}),
+                        # smiple ez isomerism assigment between fragments inv
+                        ("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(\F)C}",
+                        [('A', 'C C F H H H'), ('B', 'C F C H H H')],
+                        'C C F H H H F C C H H H',
+                        [(0, 1), (1, 2), (0, 3), (0, 4),
+                         (0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)],
+                        {}, {2: (2, 1, 6, 7, 'trans'), 7: (7, 6, 1, 2, 'trans')}),
+                        # smiple ez isomerism assigment between fragments inv
+                        ("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(/F)C}",
+                        [('A', 'C C F H H H'), ('B', 'C F C H H H')],
+                        'C C F H H H F C C H H H',
+                        [(0, 1), (1, 2), (0, 3), (0, 4),
+                         (0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)],
+                        {}, {2: (2, 1, 6, 7, 'cis'), 7: (7, 6, 1, 2, 'cis')}),
 ))
-def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges):
+def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral, ez):
     meta_mol, molecule = MoleculeResolver.from_string(smile).resolve()
 
     # loop and compare fragments first
+    counter=0
     for node, ref in zip(meta_mol.nodes, ref_frags):
         assert meta_mol.nodes[node]['fragname'] ==  ref[0]
         block_graph = meta_mol.nodes[node]['graph']
         target_elements = nx.get_node_attributes(block_graph, 'element')
         sorted_elements =  [target_elements[key] for key in sorted(target_elements)]
+        print("-->", sorted_elements)
+        print("-->",ref[1].split())
         assert sorted_elements == ref[1].split()
-
+        print(counter)
+        counter += 1
     # make the full scale reference graph
     ref_graph = nx.Graph()
     ref_graph.add_edges_from(ref_edges)
@@ -222,6 +261,14 @@ def _ele_match(n1, n2):
     # check that reference graph and molecule are isomorphic
     assert nx.is_isomorphic(ref_graph, molecule, node_match=_ele_match)
 
+    # check chirality
+    if chiral:
+        chiral_assigned = nx.get_node_attributes(molecule, 'rs_isomer')
+        assert chiral == chiral_assigned
+    # check ez isomerism
+    if ez:
+        ez_assigned = nx.get_node_attributes(molecule, 'ez_isomer')
+        assert ez == ez_assigned
 
 @pytest.mark.parametrize('case, cgsmiles_str, ref_string',(
     # case 1: here only the meta-graph is described by the

From df38245921010aa23b6eb3cc6fbed50630f236fd Mon Sep 17 00:00:00 2001
From: "Dr. Fabian Grunewald" <32294573+fgrunewald@users.noreply.github.com>
Date: Thu, 12 Sep 2024 17:28:56 +0200
Subject: [PATCH 2/7] Apply suggestions from code review

Co-authored-by: Peter C Kroon <pckroon@users.noreply.github.com>
---
 cgsmiles/graph_utils.py                 | 8 +++++++-
 cgsmiles/pysmiles_utils.py              | 2 +-
 cgsmiles/tests/test_molecule_resolve.py | 4 ++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/cgsmiles/graph_utils.py b/cgsmiles/graph_utils.py
index 2d83d81..96bacdf 100644
--- a/cgsmiles/graph_utils.py
+++ b/cgsmiles/graph_utils.py
@@ -91,10 +91,16 @@ def sort_nodes_by_attr(graph,
         attr_dict = nx.get_node_attributes(new_graph, attr)
         new_dict = {}
         for key, values in attr_dict.items():
+            try:
+                iter(values)
+            except TypeError:
+                is_list = False
+            else:
+                is_list = not isinstance(values, str)
             if is_list:
                 new_values = [mapping[value] for value in values]
             else:
-               new_values = mapping[values]
+                new_values = mapping[values]
             new_dict[key] = new_values
         if new_dict:
             nx.set_node_attributes(new_graph, new_dict, attr)
diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index 7f536cd..bfcc06f 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -63,7 +63,7 @@ def annotate_ez_isomers(molecule):
     Parameters
     ----------
     molecule: nx.Graph
-        The molecule of intrest, which must of ez_isomer_pairs
+        The molecule of interest, which must of ez_isomer_pairs
         and ez_isomer_class set as node attributes
     """
     ez_isomer_atoms = nx.get_node_attributes(molecule, 'ez_isomer_atoms')
diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py
index 6ad3fb7..a0f146c 100644
--- a/cgsmiles/tests/test_molecule_resolve.py
+++ b/cgsmiles/tests/test_molecule_resolve.py
@@ -236,14 +236,14 @@ def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral
     meta_mol, molecule = MoleculeResolver.from_string(smile).resolve()
 
     # loop and compare fragments first
-    counter=0
+    counter = 0
     for node, ref in zip(meta_mol.nodes, ref_frags):
         assert meta_mol.nodes[node]['fragname'] ==  ref[0]
         block_graph = meta_mol.nodes[node]['graph']
         target_elements = nx.get_node_attributes(block_graph, 'element')
         sorted_elements =  [target_elements[key] for key in sorted(target_elements)]
         print("-->", sorted_elements)
-        print("-->",ref[1].split())
+        print("-->", ref[1].split())
         assert sorted_elements == ref[1].split()
         print(counter)
         counter += 1

From 8b1e8a221f81383a7f67a092a55b50161840910d Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 24 Sep 2024 13:52:43 +0200
Subject: [PATCH 3/7] draft

---
 cgsmiles/read_fragments.py | 35 +++++++++++++++++++++++++++++++----
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py
index 0d4e614..168bda3 100644
--- a/cgsmiles/read_fragments.py
+++ b/cgsmiles/read_fragments.py
@@ -35,6 +35,28 @@ def peek(self):
     def __iter__(self):
         return self
 
+def deal_with_chiral_rings(smile_iter, token, node_count, rings):
+    """
+    """
+    multi_ring = False
+    ring_token = token
+    partial_str = token
+    while True:
+        if multi_ring and token == '%':
+            rings[ring_token].append(node_count)
+        elif multi_ring and token.isdigit():
+            ring_token += token
+        elif token == '%':
+            ring_token += token
+            multi_ring = True
+        elif token.isdigit():
+            rings[token] = node_count
+        else:
+            break
+        token = next(smile_iter)
+        partial_str += token
+
+    return smile_iter, token, partial_str, rings
 
 def strip_bonding_descriptors(fragment_string):
     """
@@ -60,6 +82,7 @@ def strip_bonding_descriptors(fragment_string):
     bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
     smile_iter = PeekIter(fragment_string)
     bonding_descrpt = defaultdict(list)
+    rings = defaultdict(list)
     ez_isomer_atoms = {}
     rs_isomers = {}
     smile = ""
@@ -92,13 +115,11 @@ def strip_bonding_descriptors(fragment_string):
                         if smile_iter.peek() == '@':
                             chiral_token = '@' + next(smile_iter)
                         rs_isomers[node_count] = (chiral_token, [])
-                    else:
-                        atom += peek
                     peek = next(smile_iter)
+
                 smile = smile + atom + "]"
                 prev_node = node_count
                 node_count += 1
-
         elif token == '(':
             anchor = prev_node
             smile += token
@@ -108,7 +129,13 @@ def strip_bonding_descriptors(fragment_string):
         elif token in bond_to_order:
             current_order = bond_to_order[token]
             smile += token
-        elif token in '] H . - = # $ : + - %' or token.isdigit():
+        # for chirality assignment we need to collect rings
+        elif token == '%' or token.isdigit():
+            smile_iter, part_str, rings = deal_with_chiral_rings(smile_iter,
+                                                                 node_count,
+                                                                 rings)
+            smile += part_str
+        elif token in '] H . - = # $ : + -':
             smile += token
         # deal with ez isomers
         elif token in '/ \\':

From 775c14eb4ec778f1ab84293f01a0d2f060c60aab Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 1 Oct 2024 15:29:09 +0200
Subject: [PATCH 4/7] have rings taken into account when dealing with
 chirality; account correctly for implicit hydrogens

---
 cgsmiles/pysmiles_utils.py              | 60 ++++++++++++++++++++++++-
 cgsmiles/read_fragments.py              | 43 ++++++++++++++----
 cgsmiles/resolve.py                     |  9 ++--
 cgsmiles/tests/test_molecule_resolve.py | 20 ++++++---
 4 files changed, 114 insertions(+), 18 deletions(-)

diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index f7c2480..d2c18bf 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -1,7 +1,11 @@
 import logging
 import networkx as nx
 import pysmiles
-from pysmiles.smiles_helper import _annotate_ez_isomers
+from pysmiles.smiles_helper import (_annotate_ez_isomers,
+                                    _mark_chiral_atoms,
+                                    remove_explicit_hydrogens,
+                                    add_explicit_hydrogens)
+
 LOGGER = logging.getLogger(__name__)
 
 def compute_mass(input_molecule):
@@ -103,3 +107,57 @@ def annotate_ez_isomers(molecule):
     for node in ez_isomer_atoms:
         del  molecule.nodes[node]['ez_isomer_atoms']
         del  molecule.nodes[node]['ez_isomer_class']
+
+def mark_chiral_atoms(molecule):
+    """
+    For all nodes tagged as chiral, figure out the three
+    substituents and annotate the node with a tuple that
+    has the order in which to rotate. This essentially
+    corresponds to the definition of an improper dihedral
+    angle centered on the chiral atom.
+
+    Pysmiles treats explicit hydrogen atoms differently
+    from implicit ones. In cgsmiles at the time when this
+    function is called all hydrogen atoms have been added
+    explicitly. However, we need to correct the chirality
+    assigment for this.
+
+    Note that this means it is not possible to have explicit
+    hydrogen atoms attached to chiral centers within cgsmiles
+    to begin with. However, this is a rather edge case.
+    """
+    chiral_nodes = nx.get_node_attributes(molecule, 'rs_isomer')
+    for node, (direction, rings) in chiral_nodes.items():
+        # first the ring atoms in order
+        # that they were connected then the
+        # other neighboring atoms in order
+        bonded_neighbours = sorted(molecule[node])
+        neighbours = list(rings)
+        hstash = None
+        for neighbour in bonded_neighbours:
+            if neighbour not in neighbours:
+                # all hydrogen atoms are explicit in cgsmiles
+                # BUT in the input of chirality they are usual
+                # implicit. At this point we need to treat all
+                # hydrogen atoms as if they were implicit.
+                if molecule.nodes[neighbour]['element'] != 'H':
+                    neighbours.append(neighbour)
+                else:
+                    hstash = neighbour
+        if hstash:
+            neighbours.insert(1, hstash)
+
+        if len(neighbours) != 4:
+            # FIXME Tetrahedral Allene-like Systems such as `NC(Br)=[C@]=C(O)C`
+            msg = (f"Chiral node {node} has {len(neighbours)} neighbors, which "
+                    "is different than the four expected for tetrahedral "
+                    "chirality.")
+            raise ValueError(msg)
+        # the default is anti-clockwise sorting indicated by '@'
+        # in this case the nodes are sorted with increasing
+        # node index; however @@ means clockwise and the
+        # order of nodes is reversed (i.e. with decreasing index)
+        if direction == '@@':
+            neighbours = [neighbours[0],  neighbours[1], neighbours[3], neighbours[2]]
+
+        molecule.nodes[node]['rs_isomer'] = tuple(neighbours)
diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py
index 168bda3..e1c2558 100644
--- a/cgsmiles/read_fragments.py
+++ b/cgsmiles/read_fragments.py
@@ -35,12 +35,20 @@ def peek(self):
     def __iter__(self):
         return self
 
+def _find_bonded_ring_node(ring_nodes, node):
+    current = ring_nodes.index(node)
+    if current%2 == 0:
+        other = ring_nodes[current+1]
+    else:
+        other = ring_nodes[current-1]
+    return other
+
 def deal_with_chiral_rings(smile_iter, token, node_count, rings):
     """
     """
     multi_ring = False
     ring_token = token
-    partial_str = token
+    partial_str = ""
     while True:
         if multi_ring and token == '%':
             rings[ring_token].append(node_count)
@@ -49,12 +57,21 @@ def deal_with_chiral_rings(smile_iter, token, node_count, rings):
         elif token == '%':
             ring_token += token
             multi_ring = True
+        elif multi_ring:
+            rings[ring_token].append(node_count)
+            ring_token = ""
         elif token.isdigit():
-            rings[token] = node_count
-        else:
-            break
-        token = next(smile_iter)
+            rings[token].append(node_count)
+
         partial_str += token
+        token = smile_iter.peek()
+        if token and not token.isdigit() and not token == '%':
+            break
+
+        try:
+            token = next(smile_iter)
+        except StopIteration:
+            break
 
     return smile_iter, token, partial_str, rings
 
@@ -115,6 +132,8 @@ def strip_bonding_descriptors(fragment_string):
                         if smile_iter.peek() == '@':
                             chiral_token = '@' + next(smile_iter)
                         rs_isomers[node_count] = (chiral_token, [])
+                    else:
+                        atom += peek
                     peek = next(smile_iter)
 
                 smile = smile + atom + "]"
@@ -131,9 +150,10 @@ def strip_bonding_descriptors(fragment_string):
             smile += token
         # for chirality assignment we need to collect rings
         elif token == '%' or token.isdigit():
-            smile_iter, part_str, rings = deal_with_chiral_rings(smile_iter,
-                                                                 node_count,
-                                                                 rings)
+            smile_iter, token, part_str, rings = deal_with_chiral_rings(smile_iter,
+                                                                        token,
+                                                                        prev_node,
+                                                                        rings)
             smile += part_str
         elif token in '] H . - = # $ : + -':
             smile += token
@@ -153,6 +173,12 @@ def strip_bonding_descriptors(fragment_string):
             prev_node = node_count
             node_count += 1
 
+    # we need to annotate rings to the chiral isomers
+    for node in rs_isomers:
+        for ring_idx, ring_nodes in rings.items():
+            if node in ring_nodes:
+                bonded_node = _find_bonded_ring_node(ring_nodes, node)
+                rs_isomers[node][1].append(bonded_node)
     return smile, bonding_descrpt, rs_isomers, ez_isomer_atoms
 
 def fragment_iter(fragment_str, all_atom=True):
@@ -197,6 +223,7 @@ def fragment_iter(fragment_str, all_atom=True):
             ez_isomer_class = {idx: val[-1] for idx, val in ez_isomers.items()}
             nx.set_node_attributes(mol_graph, ez_isomer_atoms, 'ez_isomer_atoms')
             nx.set_node_attributes(mol_graph, ez_isomer_class, 'ez_isomer_class')
+            print("hcount", mol_graph.nodes(data='hcount'))
         # we deal with a CG resolution graph
         else:
             mol_graph = read_cgsmiles(smile)
diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index c33899d..2376c1f 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -1,14 +1,15 @@
 import re
 import copy
 import networkx as nx
-from pysmiles.smiles_helper import _mark_chiral_atoms
 from .read_cgsmiles import read_cgsmiles
 from .read_fragments import read_fragments
 from .graph_utils import (merge_graphs,
                           sort_nodes_by_attr,
                           annotate_fragments,
                           set_atom_names_atomistic)
-from .pysmiles_utils import rebuild_h_atoms, annotate_ez_isomers
+from .pysmiles_utils import (rebuild_h_atoms,
+                             annotate_ez_isomers,
+                             mark_chiral_atoms)
 
 def compatible(left, right, legacy=False):
     """
@@ -359,8 +360,8 @@ def resolve(self):
         self.molecule = sort_nodes_by_attr(self.molecule, sort_attr=("fragid"))
 
         if all_atom:
-            # assign chirality
-            _mark_chiral_atoms(self.molecule)
+            # assign chiral atoms
+            mark_chiral_atoms(self.molecule)
             # assign rs isomerism
             annotate_ez_isomers(self.molecule)
             # in all-atom MD there are common naming conventions
diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py
index e9b8f11..b907757 100644
--- a/cgsmiles/tests/test_molecule_resolve.py
+++ b/cgsmiles/tests/test_molecule_resolve.py
@@ -185,7 +185,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [(0, 1), (0, 2), (0, 3), (0, 4), (1, 5),
                          (1, 7), (5, 9), (5, 6), (7, 13), (7, 8),
                          (9, 11), (9, 10), (11, 13), (11, 12), (13, 14)], {}, {}),
-                        # smiple chirality assigment between fragments
+                        # simple chirality assigment between fragments
                         ("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@H][>]C(=O)OC}",
                         # 0 1 2 3
                         [('A', 'O H'), ('B', 'C C C O O C H H H H H H'),
@@ -194,8 +194,18 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [(0, 1), (0, 2), (2, 3), (2, 4),
                          (2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
                          (10, 13), (5, 14), (14, 15)],
-                        {3: (2, 4, 10, 14)}, {}),
-                        # smiple chirality assigment between fragments inv
+                        {3: (2, 10, 4, 14)}, {}),
+                        # simple chirality assigment with rings
+                        ("{[#GLC]}.{#GLC=C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O}",
+                        # 0 1 2 3
+                        [('GLC', 'C C C C C C O O O O O O H H H H H H H H H H H H')],
+                        'C C C C C C O O O O O O H H H H H H H H H H H H',
+                        [(0, 1), (0, 11), (0, 12), (0, 13), (1, 2), (1, 6), (1, 14), (2, 3), (2, 10),
+                         (2, 15), (3, 4), (3, 9), (3, 16), (4, 5), (4, 8), (4, 17), (5, 6), (5, 7), (5, 18),
+                         (7, 19), (8, 20), (9, 21), (10, 22), (11, 23)],
+                        {1: (6, 14, 2, 0), 2: (1, 15, 3, 10), 3: (2, 16, 9, 4),
+                         4: (3, 17, 5, 8), 5: (4, 18, 6, 7)}, {}),
+                        # simple chirality assigment between fragments inv
                         ("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@@H][>]C(=O)OC}",
                         # 0 1 2 3
                         [('A', 'O H'), ('B', 'C C C O O C H H H H H H'),
@@ -204,7 +214,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [(0, 1), (0, 2), (2, 3), (2, 4),
                          (2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
                          (10, 13), (5, 14), (14, 15)],
-                        {3: (2, 4, 14, 10)}, {}),
+                        {3: (2, 10, 14, 4)}, {}),
                         # smiple ez isomerism assigment between fragments inv
                         ("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(\F)C}",
                         [('A', 'C C F H H H'), ('B', 'C F C H H H')],
@@ -212,7 +222,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [(0, 1), (1, 2), (0, 3), (0, 4),
                          (0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)],
                         {}, {2: (2, 1, 6, 7, 'trans'), 7: (7, 6, 1, 2, 'trans')}),
-                        # smiple ez isomerism assigment between fragments inv
+                        # simple ez isomerism assigment between fragments inv
                         ("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(/F)C}",
                         [('A', 'C C F H H H'), ('B', 'C F C H H H')],
                         'C C F H H H F C C H H H',

From a7a1eda46595eaeb2183ddd2b618b95c0ee1e403 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 1 Oct 2024 17:30:44 +0200
Subject: [PATCH 5/7] fix dep

---
 cgsmiles/resolve.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
index 2376c1f..a61df3e 100644
--- a/cgsmiles/resolve.py
+++ b/cgsmiles/resolve.py
@@ -366,7 +366,7 @@ def resolve(self):
             annotate_ez_isomers(self.molecule)
             # in all-atom MD there are common naming conventions
             # that might be expected and hence we set them here
-            set_atom_names_atomistic(self.meta_graph, self.molecule)
+            set_atom_names_atomistic(self.molecule, self.meta_graph)
 
         # and redo the meta molecule
         self.meta_graph = annotate_fragments(self.meta_graph,

From e9a5232d25b3f59b4a8774970c790d6bc61c54ae Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Sat, 5 Oct 2024 12:55:03 +0200
Subject: [PATCH 6/7] address comments

---
 cgsmiles/pysmiles_utils.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
index d2c18bf..83051bd 100644
--- a/cgsmiles/pysmiles_utils.py
+++ b/cgsmiles/pysmiles_utils.py
@@ -133,7 +133,7 @@ def mark_chiral_atoms(molecule):
         # other neighboring atoms in order
         bonded_neighbours = sorted(molecule[node])
         neighbours = list(rings)
-        hstash = None
+        hstash = []
         for neighbour in bonded_neighbours:
             if neighbour not in neighbours:
                 # all hydrogen atoms are explicit in cgsmiles
@@ -143,9 +143,13 @@ def mark_chiral_atoms(molecule):
                 if molecule.nodes[neighbour]['element'] != 'H':
                     neighbours.append(neighbour)
                 else:
-                    hstash = neighbour
-        if hstash:
-            neighbours.insert(1, hstash)
+                    hstash.append(neighbour)
+        if hstash and len(hstash) == 1:
+            neighbours.insert(1, hstash[0])
+        elif len(hstash) > 1:
+            msg = (f"Chiral node {node} as more than 1 hydrogen neighbor."
+                    "Therefore it is not chiral.")
+            raise ValueError(msg)
 
         if len(neighbours) != 4:
             # FIXME Tetrahedral Allene-like Systems such as `NC(Br)=[C@]=C(O)C`

From 3d58e50ed309f0a632ef407e684108bfb7641ad0 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Sat, 5 Oct 2024 12:55:16 +0200
Subject: [PATCH 7/7] address comments

---
 cgsmiles/read_fragments.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py
index e1c2558..dafef22 100644
--- a/cgsmiles/read_fragments.py
+++ b/cgsmiles/read_fragments.py
@@ -43,8 +43,28 @@ def _find_bonded_ring_node(ring_nodes, node):
         other = ring_nodes[current-1]
     return other
 
-def deal_with_chiral_rings(smile_iter, token, node_count, rings):
+def collect_ring_number(smile_iter, token, node_count, rings):
     """
+    When a ring identifier is found, this function will add
+    the current node to the rings dict.
+
+    Parameters
+    ----------
+    smile_iter: :class:PeekIter
+    token: str
+    node_count: int
+    rings: dict[list]
+
+    Returns
+    -------
+    PeekIter
+        the advanced smiles_iter
+    str
+        the current token being processed
+    str
+        the ring id
+    dict[list]
+        the updated rings dict
     """
     multi_ring = False
     ring_token = token
@@ -150,10 +170,10 @@ def strip_bonding_descriptors(fragment_string):
             smile += token
         # for chirality assignment we need to collect rings
         elif token == '%' or token.isdigit():
-            smile_iter, token, part_str, rings = deal_with_chiral_rings(smile_iter,
-                                                                        token,
-                                                                        prev_node,
-                                                                        rings)
+            smile_iter, token, part_str, rings = collect_ring_number(smile_iter,
+                                                                     token,
+                                                                     prev_node,
+                                                                     rings)
             smile += part_str
         elif token in '] H . - = # $ : + -':
             smile += token