Merge branch 'master' into drawing

gruenewald-lab · Nov 14, 2024 · be4dc0b · be4dc0b
2 parents 2f116b3 + 9e75907
commit be4dc0b
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 18 deletions.
diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
@@ -58,20 +58,22 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
         graph describing the full molecule without hydrogen atoms
     """
     for node in mol_graph.nodes:
-        if mol_graph.nodes[node].get('aromatic', False):
-            mol_graph.nodes[node]['hcount'] = 0
 
         if mol_graph.nodes[node].get('bonding', False) and  \
-        mol_graph.nodes[node].get('element', '*') == "H":
+            mol_graph.nodes[node].get('element', '*') == "H":
             mol_graph.nodes[node]['single_h_frag'] = True
 
-    for edge in mol_graph.edges:
-        if mol_graph.edges[edge]['order'] == 1.5:
-            mol_graph.edges[edge]['order'] = 1
-
-    pysmiles.smiles_helper.mark_aromatic_atoms(mol_graph, strict=False)
-    pysmiles.smiles_helper.mark_aromatic_edges(mol_graph)
-
+    try:
+        pysmiles.smiles_helper.correct_aromatic_rings(mol_graph, strict=True)
+    except SyntaxError as pysmiles_err:
+        print(pysmiles_err)
+        msg = ("Likely you are writing an aromatic molecule that does not "
+               "show delocalization-induced molecular equivalency and thus "
+               "is not considered aromatic. For example, 4-methyl imidazole "
+               "is often written as [nH]1cc(nc1)C, but should be written as "
+               "[NH]1C=C(N=C1)C. A corresponding CGSmiles string would be "
+               "{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}")
+        raise SyntaxError(msg)
     nx.set_node_attributes(mol_graph, 0, 'hcount')
 
     pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)
@@ -97,7 +99,7 @@ def annotate_ez_isomers(molecule):
     ez_isomer_atoms = nx.get_node_attributes(molecule, 'ez_isomer_atoms')
     ez_isomer_class = nx.get_node_attributes(molecule, 'ez_isomer_class')
     ez_isomer_atoms_list = [atoms + [_class] for atoms, _class in zip(ez_isomer_atoms.values(), ez_isomer_class.values())]
-    ez_isomer_pairs = list(zip(ez_isomer_atoms_list[:-1], ez_isomer_atoms_list[1:]))
+    ez_isomer_pairs = list(zip(ez_isomer_atoms_list[::2], ez_isomer_atoms_list[1::2]))
     if len(ez_isomer_atoms)%2 != 0:
         msg = ("You have an uneven amount of atoms marked as CIS/TRANS isomers."
                "We will drop the last atom from assigning the iosmers.")

diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
@@ -308,12 +308,20 @@ def edges_from_bonding_descrpt(self, all_atom=False):
                 # bonding descriptors are assumed to have bonding order 1
                 # unless they are specifically annotated
                 order = int(bonding[0][-1])
+                if self.molecule.nodes[edge[0]].get('aromatic', False) and\
+                   self.molecule.nodes[edge[1]].get('aromatic', False):
+                    order = 1.5
                 self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
                 if all_atom:
                     for edge_node in edge:
-                        if self.molecule.nodes[edge_node]['element'] != 'H':
-                            self.molecule.nodes[edge_node]['hcount'] -= 1
-
+                        if self.molecule.nodes[edge_node]['element'] == 'H':
+                            continue
+                        hcount = self.molecule.nodes[edge_node]['hcount']
+                        if self.molecule.nodes[edge_node].get('aromatic', 'False'):
+                            hcount = max(0, hcount - 1.5)
+                        else:
+                            hcount = max(0, hcount - 1)
+                        self.molecule.nodes[edge_node]['hcount'] = hcount
     def squash_atoms(self):
         """
         Applies the squash operator by removing the duplicate node

diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py
@@ -222,6 +222,16 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
                         [(0, 1), (1, 2), (0, 3), (0, 4),
                          (0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)],
                         {}, {2: (2, 1, 6, 7, 'trans'), 7: (7, 6, 1, 2, 'trans')}),
+                        # have more than one e/z pair
+                        ("{[#A][#B][#B][#C]}.{#A=CC(/F)=[>],#B=[<]=C(\F)C=[>],#C=[<]=C(\F)C}",
+                        [('A', 'C C F H H H'), ('B', 'C F C H'),
+                         ('B', 'C F C H'), ('C', 'C F C H H H')],
+                        'C C F H H H C F C H C F C H C F C H H H',
+                        [(0, 1), (0, 3), (0, 4), (0, 5), (1, 2), (1, 6), (6, 7), (6, 8),
+                         (8, 10), (8, 9), (10, 11), (10, 12), (12, 14), (12, 13),
+                         (14, 15), (14, 16), (16, 17), (16, 18), (16, 19)],
+                        {}, {2: (2, 1, 6, 7, 'trans'), 7: (7, 6, 1, 2, 'trans'),
+                             11: (11, 10, 14, 15, 'cis'), 15: (15, 14, 10, 11, 'cis')}),
                         # simple ez isomerism assigment between fragments inv
                         ("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(/F)C}",
                         [('A', 'C C F H H H'), ('B', 'C F C H H H')],
@@ -250,11 +260,7 @@ def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral
         block_graph = meta_mol.nodes[node]['graph']
         target_elements = nx.get_node_attributes(block_graph, 'element')
         sorted_elements =  [target_elements[key] for key in sorted(target_elements)]
-        print(target_elements)
-        print("-->", sorted_elements)
-        print("-->", ref[1].split())
         assert sorted_elements == ref[1].split()
-        print(counter)
         counter += 1
     # make the full scale reference graph
     ref_graph = nx.Graph()