diff --git a/cgsmiles/graph_utils.py b/cgsmiles/graph_utils.py index 64c384f..ab750fd 100644 --- a/cgsmiles/graph_utils.py +++ b/cgsmiles/graph_utils.py @@ -175,7 +175,9 @@ def set_atom_names_atomistic(molecule, meta_graph=None): assert len(fragids) == 1 fraglist[fragids[0]].append(node) - for fragnodes in fraglist.values(): + for meta_node, fragnodes in fraglist.items(): for idx, node in enumerate(fragnodes): atomname = molecule.nodes[node]['element'] + str(idx) molecule.nodes[node]['atomname'] = atomname + if meta_graph: + meta_graph.nodes[meta_node]['graph'].nodes[node]['atomname'] = atomname diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py index e8d6213..952a353 100644 --- a/cgsmiles/pysmiles_utils.py +++ b/cgsmiles/pysmiles_utils.py @@ -76,9 +76,21 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False): raise SyntaxError(msg) nx.set_node_attributes(mol_graph, 0, 'hcount') + # first we need to figure out the correct hcounts on each node + # this also corrects for simple aromatic problems like in thiophene pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False) + + # optionally we adjust the hcount by the number of bonding operators + if keep_bonding: + bonding_nodes = nx.get_node_attributes(mol_graph, 'bonding') + for node, bond_ops in bonding_nodes.items(): + mol_graph.nodes[node]['hcount'] -= sum([int(bond[-1]) for bond in bond_ops]) + + # now we add the hydrogen atoms pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph) + # if we are having single hydrogen fragments we need to + # make sure the fragid and fragname is keept for node in mol_graph.nodes: if mol_graph.nodes[node].get("element", "*") == "H" and\ not mol_graph.nodes[node].get("single_h_frag", False): diff --git a/cgsmiles/read_fragments.py b/cgsmiles/read_fragments.py index f065317..7fb49a1 100644 --- a/cgsmiles/read_fragments.py +++ b/cgsmiles/read_fragments.py @@ -126,6 +126,7 @@ def strip_bonding_descriptors(fragment_string): node_count = 0 prev_node = 0 current_order = None + anchor = [] for token in smile_iter: if token == '[': peek = next(smile_iter) @@ -157,15 +158,15 @@ def strip_bonding_descriptors(fragment_string): else: atom += peek peek = next(smile_iter) - smile = smile + atom + "]" prev_node = node_count node_count += 1 + current_order = None elif token == '(': - anchor = prev_node + anchor.append(prev_node) smile += token elif token == ')': - prev_node = anchor + prev_node = anchor.pop() smile += token elif token in bond_to_order: current_order = bond_to_order[token] diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index d079e05..dae2eb5 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -382,18 +382,16 @@ def resolve(self): mark_chiral_atoms(self.molecule) # assign rs isomerism annotate_ez_isomers(self.molecule) - # in all-atom MD there are common naming conventions - # that might be expected and hence we set them here - set_atom_names_atomistic(self.molecule, self.meta_graph) # and redo the meta molecule self.meta_graph = annotate_fragments(self.meta_graph, self.molecule) - # in all-atom MD there are common naming conventions - # that might be expected and hence we set them here if all_atom: - set_atom_names_atomistic(self.molecule, self.meta_graph) + # in all-atom MD there are common naming conventions + # that might be expected and hence we set them here + set_atom_names_atomistic(self.molecule, + self.meta_graph) # increment the resolution counter self.resolution_counter += 1 diff --git a/cgsmiles/tests/test_cgsmile_parsing.py b/cgsmiles/tests/test_cgsmile_parsing.py index 812817f..9a59a7b 100644 --- a/cgsmiles/tests/test_cgsmile_parsing.py +++ b/cgsmiles/tests/test_cgsmile_parsing.py @@ -269,6 +269,36 @@ def test_read_cgsmiles(smile, nodes, charges, edges, orders): {0: ["$1"], 2: ["$1"]}, None, None), + # smiple symmetric bonding after branch + ("[$]CC(CC)[$]", + "CC(CC)", + {0: ["$1"], 1: ["$1"]}, + None, + None), + # smiple symmetric bonding after ring + ("[$]CC1[$]CCC1", + "CC1CCC1", + {0: ["$1"], 1: ["$1"]}, + None, + None), + # clear order symbol + ("[CH][$a]=[CH][$c]", + "[CH]=[CH]", + {0: ["$a1"], 1: ["$c1"]}, + None, + None), + # multiple non-one bonding l + ("CC=[$a]=[$b]CC", + "CCCC", + {1: ["$a2", "$b2"]}, + None, + None), + # multiple non-one bonding l + ("CC[$a]=[$b]CC", + "CCCC", + {1: ["$a1", "$b2"]}, + None, + None), # smiple symmetric bonding with more than one name ("[$1A]COC[$1A]", "COC", diff --git a/cgsmiles/tests/test_utils.py b/cgsmiles/tests/test_utils.py new file mode 100644 index 0000000..fa0c730 --- /dev/null +++ b/cgsmiles/tests/test_utils.py @@ -0,0 +1,32 @@ +import re +import pytest +import cgsmiles + +err_msg_rebuild_h = ("Likely you are writing an aromatic molecule that does not " + "show delocalization-induced molecular equivalency and thus " + "is not considered aromatic. For example, 4-methyl imidazole " + "is often written as [nH]1cc(nc1)C, but should be written as " + "[NH]1C=C(N=C1)C. A corresponding CGSmiles string would be " + "{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}") + +@pytest.mark.parametrize('frag_str, hatoms_ref, error_type, err_msg', ( + ('{#A=[$]CCC[$]}', 6, None, None), + ('{#A=CCC}', 8, None, None), + ('{#A=C[!]CC}', 7, None, None), + ('{#A=[$]=CCC=[$]}', 4, None, None), + ('{#A=[$]cccc}',5, None, None), + ('{#A=[$]ccc}', 0, SyntaxError, err_msg_rebuild_h), +)) +def test_rebuild_hatoms(frag_str, hatoms_ref, error_type, err_msg): + frag_dict = cgsmiles.read_fragments(frag_str) + frag_graph = frag_dict['A'] + if error_type: + with pytest.raises(error_type, match=re.escape(err_msg)): + cgsmiles.pysmiles_utils.rebuild_h_atoms(frag_graph, keep_bonding=True) + else: + cgsmiles.pysmiles_utils.rebuild_h_atoms(frag_graph, keep_bonding=True) + hatoms = 0 + for node, ele in frag_graph.nodes(data='element'): + if ele == 'H': + hatoms += 1 + assert hatoms == hatoms_ref