Skip to content

Commit

Permalink
Merge pull request #33 from gruenewald-lab/maint
Browse files Browse the repository at this point in the history
Maint
  • Loading branch information
fgrunewald authored Nov 21, 2024
2 parents 9e75907 + 6a11252 commit f53efae
Show file tree
Hide file tree
Showing 6 changed files with 85 additions and 10 deletions.
4 changes: 3 additions & 1 deletion cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ def set_atom_names_atomistic(molecule, meta_graph=None):
assert len(fragids) == 1
fraglist[fragids[0]].append(node)

for fragnodes in fraglist.values():
for meta_node, fragnodes in fraglist.items():
for idx, node in enumerate(fragnodes):
atomname = molecule.nodes[node]['element'] + str(idx)
molecule.nodes[node]['atomname'] = atomname
if meta_graph:
meta_graph.nodes[meta_node]['graph'].nodes[node]['atomname'] = atomname
12 changes: 12 additions & 0 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,21 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
raise SyntaxError(msg)
nx.set_node_attributes(mol_graph, 0, 'hcount')

# first we need to figure out the correct hcounts on each node
# this also corrects for simple aromatic problems like in thiophene
pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)

# optionally we adjust the hcount by the number of bonding operators
if keep_bonding:
bonding_nodes = nx.get_node_attributes(mol_graph, 'bonding')
for node, bond_ops in bonding_nodes.items():
mol_graph.nodes[node]['hcount'] -= sum([int(bond[-1]) for bond in bond_ops])

# now we add the hydrogen atoms
pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)

# if we are having single hydrogen fragments we need to
# make sure the fragid and fragname is keept
for node in mol_graph.nodes:
if mol_graph.nodes[node].get("element", "*") == "H" and\
not mol_graph.nodes[node].get("single_h_frag", False):
Expand Down
7 changes: 4 additions & 3 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def strip_bonding_descriptors(fragment_string):
node_count = 0
prev_node = 0
current_order = None
anchor = []
for token in smile_iter:
if token == '[':
peek = next(smile_iter)
Expand Down Expand Up @@ -157,15 +158,15 @@ def strip_bonding_descriptors(fragment_string):
else:
atom += peek
peek = next(smile_iter)

smile = smile + atom + "]"
prev_node = node_count
node_count += 1
current_order = None
elif token == '(':
anchor = prev_node
anchor.append(prev_node)
smile += token
elif token == ')':
prev_node = anchor
prev_node = anchor.pop()
smile += token
elif token in bond_to_order:
current_order = bond_to_order[token]
Expand Down
10 changes: 4 additions & 6 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,18 +382,16 @@ def resolve(self):
mark_chiral_atoms(self.molecule)
# assign rs isomerism
annotate_ez_isomers(self.molecule)
# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
set_atom_names_atomistic(self.molecule, self.meta_graph)

# and redo the meta molecule
self.meta_graph = annotate_fragments(self.meta_graph,
self.molecule)

# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
if all_atom:
set_atom_names_atomistic(self.molecule, self.meta_graph)
# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
set_atom_names_atomistic(self.molecule,
self.meta_graph)

# increment the resolution counter
self.resolution_counter += 1
Expand Down
30 changes: 30 additions & 0 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,36 @@ def test_read_cgsmiles(smile, nodes, charges, edges, orders):
{0: ["$1"], 2: ["$1"]},
None,
None),
# smiple symmetric bonding after branch
("[$]CC(CC)[$]",
"CC(CC)",
{0: ["$1"], 1: ["$1"]},
None,
None),
# smiple symmetric bonding after ring
("[$]CC1[$]CCC1",
"CC1CCC1",
{0: ["$1"], 1: ["$1"]},
None,
None),
# clear order symbol
("[CH][$a]=[CH][$c]",
"[CH]=[CH]",
{0: ["$a1"], 1: ["$c1"]},
None,
None),
# multiple non-one bonding l
("CC=[$a]=[$b]CC",
"CCCC",
{1: ["$a2", "$b2"]},
None,
None),
# multiple non-one bonding l
("CC[$a]=[$b]CC",
"CCCC",
{1: ["$a1", "$b2"]},
None,
None),
# smiple symmetric bonding with more than one name
("[$1A]COC[$1A]",
"COC",
Expand Down
32 changes: 32 additions & 0 deletions cgsmiles/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import re
import pytest
import cgsmiles

err_msg_rebuild_h = ("Likely you are writing an aromatic molecule that does not "
"show delocalization-induced molecular equivalency and thus "
"is not considered aromatic. For example, 4-methyl imidazole "
"is often written as [nH]1cc(nc1)C, but should be written as "
"[NH]1C=C(N=C1)C. A corresponding CGSmiles string would be "
"{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}")

@pytest.mark.parametrize('frag_str, hatoms_ref, error_type, err_msg', (
('{#A=[$]CCC[$]}', 6, None, None),
('{#A=CCC}', 8, None, None),
('{#A=C[!]CC}', 7, None, None),
('{#A=[$]=CCC=[$]}', 4, None, None),
('{#A=[$]cccc}',5, None, None),
('{#A=[$]ccc}', 0, SyntaxError, err_msg_rebuild_h),
))
def test_rebuild_hatoms(frag_str, hatoms_ref, error_type, err_msg):
frag_dict = cgsmiles.read_fragments(frag_str)
frag_graph = frag_dict['A']
if error_type:
with pytest.raises(error_type, match=re.escape(err_msg)):
cgsmiles.pysmiles_utils.rebuild_h_atoms(frag_graph, keep_bonding=True)
else:
cgsmiles.pysmiles_utils.rebuild_h_atoms(frag_graph, keep_bonding=True)
hatoms = 0
for node, ele in frag_graph.nodes(data='element'):
if ele == 'H':
hatoms += 1
assert hatoms == hatoms_ref

0 comments on commit f53efae

Please sign in to comment.