Skip to content

Commit

Permalink
Merge pull request #29 from gruenewald-lab/update_pysmiles
Browse files Browse the repository at this point in the history
updated hydrogen accounting and aromaticity
  • Loading branch information
fgrunewald authored Nov 14, 2024
2 parents cbd18d6 + c896964 commit 9e75907
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 13 deletions.
22 changes: 12 additions & 10 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,22 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
graph describing the full molecule without hydrogen atoms
"""
for node in mol_graph.nodes:
if mol_graph.nodes[node].get('aromatic', False):
mol_graph.nodes[node]['hcount'] = 0

if mol_graph.nodes[node].get('bonding', False) and \
mol_graph.nodes[node].get('element', '*') == "H":
mol_graph.nodes[node].get('element', '*') == "H":
mol_graph.nodes[node]['single_h_frag'] = True

for edge in mol_graph.edges:
if mol_graph.edges[edge]['order'] == 1.5:
mol_graph.edges[edge]['order'] = 1

pysmiles.smiles_helper.mark_aromatic_atoms(mol_graph, strict=False)
pysmiles.smiles_helper.mark_aromatic_edges(mol_graph)

try:
pysmiles.smiles_helper.correct_aromatic_rings(mol_graph, strict=True)
except SyntaxError as pysmiles_err:
print(pysmiles_err)
msg = ("Likely you are writing an aromatic molecule that does not "
"show delocalization-induced molecular equivalency and thus "
"is not considered aromatic. For example, 4-methyl imidazole "
"is often written as [nH]1cc(nc1)C, but should be written as "
"[NH]1C=C(N=C1)C. A corresponding CGSmiles string would be "
"{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}")
raise SyntaxError(msg)
nx.set_node_attributes(mol_graph, 0, 'hcount')

pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)
Expand Down
14 changes: 11 additions & 3 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,12 +305,20 @@ def edges_from_bonding_descrpt(self, all_atom=False):
# bonding descriptors are assumed to have bonding order 1
# unless they are specifically annotated
order = int(bonding[0][-1])
if self.molecule.nodes[edge[0]].get('aromatic', False) and\
self.molecule.nodes[edge[1]].get('aromatic', False):
order = 1.5
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
if all_atom:
for edge_node in edge:
if self.molecule.nodes[edge_node]['element'] != 'H':
self.molecule.nodes[edge_node]['hcount'] -= 1

if self.molecule.nodes[edge_node]['element'] == 'H':
continue
hcount = self.molecule.nodes[edge_node]['hcount']
if self.molecule.nodes[edge_node].get('aromatic', 'False'):
hcount = max(0, hcount - 1.5)
else:
hcount = max(0, hcount - 1)
self.molecule.nodes[edge_node]['hcount'] = hcount
def squash_atoms(self):
"""
Applies the squash operator by removing the duplicate node
Expand Down

0 comments on commit 9e75907

Please sign in to comment.