Skip to content

Commit

Permalink
implement isomerism bug fixes plus tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fgrunewald committed Sep 12, 2024
1 parent 6b824bb commit 299c033
Show file tree
Hide file tree
Showing 6 changed files with 217 additions and 39 deletions.
27 changes: 26 additions & 1 deletion cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def merge_graphs(source_graph, target_graph, max_node=None):
correspondence[node] = idx
new_atom = copy.copy(target_graph.nodes[node])
new_atom['fragid'] = [(new_atom.get('fragid', 0) + fragment_offset)]
# make sure to propagate the ez isomers
if 'ez_isomer_atoms' in new_atom:
new_atom['ez_isomer_atoms'] = (new_atom['ez_isomer_atoms'][0]+offset+1,
new_atom['ez_isomer_atoms'][1]+offset+1)
# make sure to propagate the ez atoms
source_graph.add_node(idx, **new_atom)

for node1, node2 in target_graph.edges:
Expand All @@ -53,7 +58,9 @@ def merge_graphs(source_graph, target_graph, max_node=None):

return correspondence

def sort_nodes_by_attr(graph, sort_attr="fragid"):
def sort_nodes_by_attr(graph,
sort_attr="fragid",
relative_attr=[('ez_isomer_atoms', True)]):
"""
Sort nodes in graph by attribute and relable the graph in place.
Expand All @@ -63,6 +70,13 @@ def sort_nodes_by_attr(graph, sort_attr="fragid"):
the graph to sort nodes of
sort_attr: `abc.hashable`
the attribute to use for sorting
relative_attr: tuple(str, bool)
a list of attributes that are sensetive
to the ordering of nodes (i.e. refer to
other nodes). The second element indicates
the depth. If False the value of attr are
node keys. If True we expect the value to
be an itertable with node keys.
Returns
-------
Expand All @@ -73,6 +87,17 @@ def sort_nodes_by_attr(graph, sort_attr="fragid"):
sorted_ids = sorted(attr_values, key=lambda item: (attr_values[item], item))
mapping = {old: new for new, old in enumerate(sorted_ids)}
new_graph = nx.relabel_nodes(graph, mapping, copy=True)
for attr, is_list in relative_attr:
attr_dict = nx.get_node_attributes(new_graph, attr)
new_dict = {}
for key, values in attr_dict.items():
if is_list:
new_values = [mapping[value] for value in values]
else:
new_values = mapping[values]
new_dict[key] = new_values
if new_dict:
nx.set_node_attributes(new_graph, new_dict, attr)
return new_graph

def _keyfunc(graph, node_idx, attrs):
Expand Down
27 changes: 27 additions & 0 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import logging
import networkx as nx
import pysmiles
from pysmiles.smiles_helper import _annotate_ez_isomers
LOGGER = logging.getLogger(__name__)

def rebuild_h_atoms(mol_graph, keep_bonding=False):
"""
Expand Down Expand Up @@ -52,3 +55,27 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
ref_node = next(mol_graph.neighbors(node))
mol_graph.nodes[node]["fragid"] = mol_graph.nodes[ref_node]["fragid"]
mol_graph.nodes[node]["fragname"] = mol_graph.nodes[ref_node]["fragname"]

def annotate_ez_isomers(molecule):
"""
Small wrapper dealing with ez_isomer annotation.
Parameters
----------
molecule: nx.Graph
The molecule of intrest, which must of ez_isomer_pairs
and ez_isomer_class set as node attributes
"""
ez_isomer_atoms = nx.get_node_attributes(molecule, 'ez_isomer_atoms')
ez_isomer_class = nx.get_node_attributes(molecule, 'ez_isomer_class')
ez_isomer_atoms_list = [atoms + [_class] for atoms, _class in zip(ez_isomer_atoms.values(), ez_isomer_class.values())]
ez_isomer_pairs = list(zip(ez_isomer_atoms_list[:-1], ez_isomer_atoms_list[1:]))
if len(ez_isomer_atoms)%2 != 0:
msg = ("You have an uneven amount of atoms marked as CIS/TRANS isomers."
"We will drop the last atom from assigning the iosmers.")
LOGGER.warning(msg)
_annotate_ez_isomers(molecule, ez_isomer_pairs)
# clean up
for node in ez_isomer_atoms:
del molecule.nodes[node]['ez_isomer_atoms']
del molecule.nodes[node]['ez_isomer_class']
30 changes: 26 additions & 4 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def strip_bonding_descriptors(fragment_string):
bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
smile_iter = PeekIter(fragment_string)
bonding_descrpt = defaultdict(list)
ez_isomer_atoms = {}
rs_isomers = {}
smile = ""
node_count = 0
prev_node = 0
Expand All @@ -84,7 +86,14 @@ def strip_bonding_descriptors(fragment_string):
else:
atom = token
while peek != ']':
atom += peek
# deal with rs chirality
if peek == '@':
chiral_token = peek
if smile_iter.peek() == '@':
chiral_token = '@' + next(smile_iter)
rs_isomers[node_count] = (chiral_token, [])
else:
atom += peek
peek = next(smile_iter)
smile = smile + atom + "]"
prev_node = node_count
Expand All @@ -99,8 +108,15 @@ def strip_bonding_descriptors(fragment_string):
elif token in bond_to_order:
current_order = bond_to_order[token]
smile += token
elif token in '] H @ . - = # $ : / \\ + - %' or token.isdigit():
elif token in '] H . - = # $ : + - %' or token.isdigit():
smile += token
# deal with ez isomers
elif token in '/ \\':
# we have a branch
if node_count == 0:
ez_isomer_atoms[node_count] = (node_count, 1, token)
else:
ez_isomer_atoms[node_count] = (node_count, prev_node, token)
else:
if smile_iter.peek() and token + smile_iter.peek() in ['Cl', 'Br', 'Si', 'Mg', 'Na']:
smile += (token + next(smile_iter))
Expand All @@ -110,7 +126,7 @@ def strip_bonding_descriptors(fragment_string):
prev_node = node_count
node_count += 1

return smile, bonding_descrpt
return smile, bonding_descrpt, rs_isomers, ez_isomer_atoms

def fragment_iter(fragment_str, all_atom=True):
"""
Expand Down Expand Up @@ -140,14 +156,20 @@ def fragment_iter(fragment_str, all_atom=True):
delim = fragment.find('=', 0)
fragname = fragment[1:delim]
big_smile = fragment[delim+1:]
smile, bonding_descrpt = strip_bonding_descriptors(big_smile)
smile, bonding_descrpt, rs_isomers, ez_isomers = strip_bonding_descriptors(big_smile)
if smile == "H":
mol_graph = nx.Graph()
mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
elif all_atom:
mol_graph = pysmiles.read_smiles(smile, reinterpret_aromatic=False, strict=False)
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
nx.set_node_attributes(mol_graph, rs_isomers, 'rs_isomer')
# we need to split countable node keys and the associated value
ez_isomer_atoms = {idx: val[:-1] for idx, val in ez_isomers.items()}
ez_isomer_class = {idx: val[-1] for idx, val in ez_isomers.items()}
nx.set_node_attributes(mol_graph, ez_isomer_atoms, 'ez_isomer_atoms')
nx.set_node_attributes(mol_graph, ez_isomer_class, 'ez_isomer_class')
# we deal with a CG resolution graph
else:
mol_graph = read_cgsmiles(smile)
Expand Down
17 changes: 11 additions & 6 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import re
import copy
import networkx as nx
from pysmiles.smiles_helper import _mark_chiral_atoms
from .read_cgsmiles import read_cgsmiles
from .read_fragments import read_fragments
from .graph_utils import (merge_graphs,
sort_nodes_by_attr,
annotate_fragments,
set_atom_names_atomistic)
from .pysmiles_utils import rebuild_h_atoms
from .pysmiles_utils import rebuild_h_atoms, annotate_ez_isomers

def compatible(left, right):
"""
Expand Down Expand Up @@ -330,15 +331,19 @@ def resolve(self):
# sort the atoms
self.molecule = sort_nodes_by_attr(self.molecule, sort_attr=("fragid"))

if all_atom:
# assign chirality
_mark_chiral_atoms(self.molecule)
# assign rs isomerism
annotate_ez_isomers(self.molecule)
# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
set_atom_names_atomistic(self.meta_graph, self.molecule)

# and redo the meta molecule
self.meta_graph = annotate_fragments(self.meta_graph,
self.molecule)

# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
if all_atom:
set_atom_names_atomistic(self.meta_graph, self.molecule)

# increment the resolution counter
self.resolution_counter += 1

Expand Down
76 changes: 64 additions & 12 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,48 +153,100 @@ def test_read_cgsmiles(smile, nodes, edges, orders):
fragnames = nx.get_node_attributes(meta_mol, 'fragname')
assert nodes == list(fragnames.values())

@pytest.mark.parametrize('big_smile, smile, bonding',(
@pytest.mark.parametrize('big_smile, smile, bonding, rs, ez',(
# smiple symmetric bonding
("[$]COC[$]",
"COC",
{0: ["$1"], 2: ["$1"]}),
{0: ["$1"], 2: ["$1"]},
None,
None),
# smiple symmetric bonding with more than one name
("[$1A]COC[$1A]",
"COC",
{0: ["$1A1"], 2: ["$1A1"]}),
{0: ["$1A1"], 2: ["$1A1"]},
None,
None),
# smiple bonding multiletter atom
("Clc[$]c[$]",
"Clcc",
{1: ["$1"], 2: ["$1"]}),
{1: ["$1"], 2: ["$1"]},
None,
None),
# simple symmetric but with explicit hydrogen
("[$][CH2]O[CH2][$]",
"[CH2]O[CH2]",
{0: ["$1"], 2: ["$1"]}),
{0: ["$1"], 2: ["$1"]},
None,
None),
# smiple symmetric bonding; multiple descript
("[$]COC[$][$1]",
"COC",
{0: ["$1"], 2: ["$1", "$11"]}),
{0: ["$1"], 2: ["$1", "$11"]},
None,
None),
# named different bonding descriptors
("[$1]CCCC[$2]",
"CCCC",
{0: ["$11"], 3: ["$21"]}),
{0: ["$11"], 3: ["$21"]},
None,
None),
# ring and bonding descriptors
("[$1]CC[$2]C1CCCCC1",
"CCC1CCCCC1",
{0: ["$11"], 1: ["$21"]}),
{0: ["$11"], 1: ["$21"]},
None,
None),
# bonding descript. after branch
("C(COC[$1])[$2]CCC[$3]",
"C(COC)CCC",
{0: ["$21"], 3: ["$11"], 6: ["$31"]}),
{0: ["$21"], 3: ["$11"], 6: ["$31"]},
None,
None),
# left rigth bonding desciptors
("[>]COC[<]",
"COC",
{0: [">1"], 2: ["<1"]})
{0: [">1"], 2: ["<1"]},
None,
None),
# simple chirality in residue
("[>]C[C@](F)(B)N[<]",
"C[C](F)(B)N",
{0: [">1"], 4: ["<1"]},
{1: ('@', [])},
None),
# simple chirality inverse in residue
("[>]C[C@@](F)(B)N[<]",
"C[C](F)(B)N",
{0: [">1"], 4: ["<1"]},
{1: ('@@', [])},
None),
# \ fragment split
("[>]CC(\F)=[<]",
"CC(F)=",
{0: [">1"], 1: ["<2"]},
None,
{2: (2, 1, '\\')}),
# / fragment split
("[>]CC(/F)=[<]",
"CC(F)=",
{0: [">1"], 1: ["<2"]},
None,
{2: (2, 1, '/')}),
# both in one fragment
("[>]CC(/F)=C(\F)C[<]",
"CC(F)=C(F)C",
{0: [">1"], 5: ["<1"]},
None,
{2: (2, 1, '/'), 4: (4, 3, '\\')}),
))
def test_strip_bonding_descriptors(big_smile, smile, bonding):
new_smile, new_bonding = strip_bonding_descriptors(big_smile)
def test_strip_bonding_descriptors(big_smile, smile, bonding, rs, ez):
new_smile, new_bonding, rs_isomers, ez_isomers = strip_bonding_descriptors(big_smile)
assert new_smile == smile
assert new_bonding == bonding
if rs:
assert rs == rs_isomers
if ez:
assert ez == ez_isomers

@pytest.mark.parametrize('fragment_str, nodes, edges',(
# single fragment
Expand Down
Loading

0 comments on commit 299c033

Please sign in to comment.