From c84650c8821a34493b1f150f3803a37e945b3f1c Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 6 Nov 2024 13:48:06 +0100 Subject: [PATCH 1/4] updated hydrogen accounting and aromaticity --- cgsmiles/pysmiles_utils.py | 11 ++--------- cgsmiles/resolve.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py index 83051bd..414be23 100644 --- a/cgsmiles/pysmiles_utils.py +++ b/cgsmiles/pysmiles_utils.py @@ -58,19 +58,12 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False): graph describing the full molecule without hydrogen atoms """ for node in mol_graph.nodes: - if mol_graph.nodes[node].get('aromatic', False): - mol_graph.nodes[node]['hcount'] = 0 if mol_graph.nodes[node].get('bonding', False) and \ - mol_graph.nodes[node].get('element', '*') == "H": + mol_graph.nodes[node].get('element', '*') == "H": mol_graph.nodes[node]['single_h_frag'] = True - for edge in mol_graph.edges: - if mol_graph.edges[edge]['order'] == 1.5: - mol_graph.edges[edge]['order'] = 1 - - pysmiles.smiles_helper.mark_aromatic_atoms(mol_graph, strict=False) - pysmiles.smiles_helper.mark_aromatic_edges(mol_graph) + pysmiles.smiles_helper.correct_aromatic_rings(mol_graph, strict=True) nx.set_node_attributes(mol_graph, 0, 'hcount') diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index fc50cda..d079e05 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -305,12 +305,20 @@ def edges_from_bonding_descrpt(self, all_atom=False): # bonding descriptors are assumed to have bonding order 1 # unless they are specifically annotated order = int(bonding[0][-1]) + if self.molecule.nodes[edge[0]].get('aromatic', False) and\ + self.molecule.nodes[edge[1]].get('aromatic', False): + order = 1.5 self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order) if all_atom: for edge_node in edge: - if self.molecule.nodes[edge_node]['element'] != 'H': - self.molecule.nodes[edge_node]['hcount'] -= 1 - + if self.molecule.nodes[edge_node]['element'] == 'H': + continue + hcount = self.molecule.nodes[edge_node]['hcount'] + if self.molecule.nodes[edge_node].get('aromatic', 'False'): + hcount = max(0, hcount - 1.5) + else: + hcount = max(0, hcount - 1) + self.molecule.nodes[edge_node]['hcount'] = hcount def squash_atoms(self): """ Applies the squash operator by removing the duplicate node From c896964a52578ab82bf40a79f714f3663ad3b17e Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 6 Nov 2024 14:39:53 +0100 Subject: [PATCH 2/4] add more verbose error --- cgsmiles/pysmiles_utils.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py index 414be23..36fdbf0 100644 --- a/cgsmiles/pysmiles_utils.py +++ b/cgsmiles/pysmiles_utils.py @@ -63,8 +63,17 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False): mol_graph.nodes[node].get('element', '*') == "H": mol_graph.nodes[node]['single_h_frag'] = True - pysmiles.smiles_helper.correct_aromatic_rings(mol_graph, strict=True) - + try: + pysmiles.smiles_helper.correct_aromatic_rings(mol_graph, strict=True) + except SyntaxError as pysmiles_err: + print(pysmiles_err) + msg = ("Likely you are writing an aromatic molecule that does not " + "show delocalization-induced molecular equivalency and thus " + "is not considered aromatic. For example, 4-methyl imidazole " + "is often written as [nH]1cc(nc1)C, but should be written as " + "[NH]1C=C(N=C1)C. A corresponding CGSmiles string would be " + "{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}") + raise SyntaxError(msg) nx.set_node_attributes(mol_graph, 0, 'hcount') pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False) From 5e7174c88b7f25763cc3d690cc0ee7325bbdfdb2 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 14 Nov 2024 10:12:17 +0100 Subject: [PATCH 3/4] bug fix cis trans --- cgsmiles/pysmiles_utils.py | 2 +- cgsmiles/tests/test_molecule_resolve.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py index 83051bd..ef0ede2 100644 --- a/cgsmiles/pysmiles_utils.py +++ b/cgsmiles/pysmiles_utils.py @@ -97,7 +97,7 @@ def annotate_ez_isomers(molecule): ez_isomer_atoms = nx.get_node_attributes(molecule, 'ez_isomer_atoms') ez_isomer_class = nx.get_node_attributes(molecule, 'ez_isomer_class') ez_isomer_atoms_list = [atoms + [_class] for atoms, _class in zip(ez_isomer_atoms.values(), ez_isomer_class.values())] - ez_isomer_pairs = list(zip(ez_isomer_atoms_list[:-1], ez_isomer_atoms_list[1:])) + ez_isomer_pairs = list(zip(ez_isomer_atoms_list[::2], ez_isomer_atoms_list[1::2])) if len(ez_isomer_atoms)%2 != 0: msg = ("You have an uneven amount of atoms marked as CIS/TRANS isomers." "We will drop the last atom from assigning the iosmers.") diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py index 23818b0..4646d97 100644 --- a/cgsmiles/tests/test_molecule_resolve.py +++ b/cgsmiles/tests/test_molecule_resolve.py @@ -222,6 +222,16 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes): [(0, 1), (1, 2), (0, 3), (0, 4), (0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)], {}, {2: (2, 1, 6, 7, 'trans'), 7: (7, 6, 1, 2, 'trans')}), + # have more than one e/z pair + ("{[#A][#B][#B][#C]}.{#A=CC(/F)=[>],#B=[<]=C(\F)C=[>],#C=[<]=C(\F)C}", + [('A', 'C C F H H H'), ('B', 'C F C H'), + ('B', 'C F C H'), ('C', 'C F C H H H')], + 'C C F H H H C F C H C F C H C F C H H H', + [(0, 1), (0, 3), (0, 4), (0, 5), (1, 2), (1, 6), (6, 7), (6, 8), + (8, 10), (8, 9), (10, 11), (10, 12), (12, 14), (12, 13), + (14, 15), (14, 16), (16, 17), (16, 18), (16, 19)], + {}, {2: (2, 1, 6, 7, 'trans'), 7: (7, 6, 1, 2, 'trans'), + 11: (11, 10, 14, 15, 'cis'), 15: (15, 14, 10, 11, 'cis')}), # simple ez isomerism assigment between fragments inv ("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(/F)C}", [('A', 'C C F H H H'), ('B', 'C F C H H H')], From cbd18d6de4253f0840692f2c2d65e01e7c7b86c4 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Thu, 14 Nov 2024 10:16:36 +0100 Subject: [PATCH 4/4] remove prints --- cgsmiles/tests/test_molecule_resolve.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py index 4646d97..096c57d 100644 --- a/cgsmiles/tests/test_molecule_resolve.py +++ b/cgsmiles/tests/test_molecule_resolve.py @@ -260,11 +260,7 @@ def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral block_graph = meta_mol.nodes[node]['graph'] target_elements = nx.get_node_attributes(block_graph, 'element') sorted_elements = [target_elements[key] for key in sorted(target_elements)] - print(target_elements) - print("-->", sorted_elements) - print("-->", ref[1].split()) assert sorted_elements == ref[1].split() - print(counter) counter += 1 # make the full scale reference graph ref_graph = nx.Graph()