Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement isomerism bug fixes plus tests #20

Merged
merged 8 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def merge_graphs(source_graph, target_graph, max_node=None):
correspondence[node] = idx
new_atom = copy.copy(target_graph.nodes[node])
new_atom['fragid'] = [(new_atom.get('fragid', 0) + fragment_offset)]
# make sure to propagate the ez isomers
if 'ez_isomer_atoms' in new_atom:
new_atom['ez_isomer_atoms'] = (new_atom['ez_isomer_atoms'][0]+offset+1,
new_atom['ez_isomer_atoms'][1]+offset+1)
# make sure to propagate the ez atoms
source_graph.add_node(idx, **new_atom)

for node1, node2 in target_graph.edges:
Expand All @@ -53,7 +58,9 @@ def merge_graphs(source_graph, target_graph, max_node=None):

return correspondence

def sort_nodes_by_attr(graph, sort_attr="fragid"):
def sort_nodes_by_attr(graph,
sort_attr="fragid",
relative_attr=[('ez_isomer_atoms', True)]):
"""
Sort nodes in graph by attribute and relable the graph in place.

Expand All @@ -63,6 +70,13 @@ def sort_nodes_by_attr(graph, sort_attr="fragid"):
the graph to sort nodes of
sort_attr: `abc.hashable`
the attribute to use for sorting
relative_attr: tuple(str, bool)
a list of attributes that are sensetive
to the ordering of nodes (i.e. refer to
other nodes). The second element indicates
the depth. If False the value of attr are
node keys. If True we expect the value to
be an itertable with node keys.
Comment on lines +73 to +79
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of faffing around with this bool, you could see if you can do iter(thingy), and check that it's not a string.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I condiered that but then wasn't worth it ^^

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The downside of doing it the "clever" way is that there will be cases it gets it wrong (deeper nesting, for example). The downside of using the bool in the argument is that it complicates the api


Returns
-------
Expand All @@ -73,6 +87,23 @@ def sort_nodes_by_attr(graph, sort_attr="fragid"):
sorted_ids = sorted(attr_values, key=lambda item: (attr_values[item], item))
mapping = {old: new for new, old in enumerate(sorted_ids)}
new_graph = nx.relabel_nodes(graph, mapping, copy=True)
for attr, is_list in relative_attr:
attr_dict = nx.get_node_attributes(new_graph, attr)
new_dict = {}
for key, values in attr_dict.items():
try:
iter(values)
except TypeError:
is_list = False
else:
is_list = not isinstance(values, str)
if is_list:
new_values = [mapping[value] for value in values]
else:
new_values = mapping[values]
new_dict[key] = new_values
if new_dict:
nx.set_node_attributes(new_graph, new_dict, attr)
return new_graph

def _keyfunc(graph, node_idx, attrs):
Expand Down
27 changes: 27 additions & 0 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import logging
import networkx as nx
import pysmiles
from pysmiles.smiles_helper import _annotate_ez_isomers
LOGGER = logging.getLogger(__name__)

def rebuild_h_atoms(mol_graph, keep_bonding=False):
"""
Expand Down Expand Up @@ -52,3 +55,27 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
ref_node = next(mol_graph.neighbors(node))
mol_graph.nodes[node]["fragid"] = mol_graph.nodes[ref_node]["fragid"]
mol_graph.nodes[node]["fragname"] = mol_graph.nodes[ref_node]["fragname"]

def annotate_ez_isomers(molecule):
"""
Small wrapper dealing with ez_isomer annotation.

Parameters
----------
molecule: nx.Graph
The molecule of interest, which must of ez_isomer_pairs
and ez_isomer_class set as node attributes
"""
ez_isomer_atoms = nx.get_node_attributes(molecule, 'ez_isomer_atoms')
ez_isomer_class = nx.get_node_attributes(molecule, 'ez_isomer_class')
ez_isomer_atoms_list = [atoms + [_class] for atoms, _class in zip(ez_isomer_atoms.values(), ez_isomer_class.values())]
ez_isomer_pairs = list(zip(ez_isomer_atoms_list[:-1], ez_isomer_atoms_list[1:]))
if len(ez_isomer_atoms)%2 != 0:
msg = ("You have an uneven amount of atoms marked as CIS/TRANS isomers."
"We will drop the last atom from assigning the iosmers.")
LOGGER.warning(msg)
_annotate_ez_isomers(molecule, ez_isomer_pairs)
# clean up
for node in ez_isomer_atoms:
del molecule.nodes[node]['ez_isomer_atoms']
del molecule.nodes[node]['ez_isomer_class']
30 changes: 26 additions & 4 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def strip_bonding_descriptors(fragment_string):
bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
smile_iter = PeekIter(fragment_string)
bonding_descrpt = defaultdict(list)
ez_isomer_atoms = {}
rs_isomers = {}
smile = ""
node_count = 0
prev_node = 0
Expand All @@ -84,7 +86,14 @@ def strip_bonding_descriptors(fragment_string):
else:
atom = token
while peek != ']':
atom += peek
# deal with rs chirality
if peek == '@':
chiral_token = peek
if smile_iter.peek() == '@':
chiral_token = '@' + next(smile_iter)
rs_isomers[node_count] = (chiral_token, [])
else:
atom += peek
peek = next(smile_iter)
smile = smile + atom + "]"
prev_node = node_count
Expand All @@ -99,8 +108,15 @@ def strip_bonding_descriptors(fragment_string):
elif token in bond_to_order:
current_order = bond_to_order[token]
smile += token
elif token in '] H @ . - = # $ : / \\ + - %' or token.isdigit():
elif token in '] H . - = # $ : + - %' or token.isdigit():
smile += token
# deal with ez isomers
elif token in '/ \\':
# we have a branch
if node_count == 0:
ez_isomer_atoms[node_count] = (node_count, 1, token)
else:
ez_isomer_atoms[node_count] = (node_count, prev_node, token)
else:
if smile_iter.peek() and token + smile_iter.peek() in ['Cl', 'Br', 'Si', 'Mg', 'Na']:
smile += (token + next(smile_iter))
Expand All @@ -110,7 +126,7 @@ def strip_bonding_descriptors(fragment_string):
prev_node = node_count
node_count += 1

return smile, bonding_descrpt
return smile, bonding_descrpt, rs_isomers, ez_isomer_atoms

def fragment_iter(fragment_str, all_atom=True):
"""
Expand Down Expand Up @@ -140,14 +156,20 @@ def fragment_iter(fragment_str, all_atom=True):
delim = fragment.find('=', 0)
fragname = fragment[1:delim]
big_smile = fragment[delim+1:]
smile, bonding_descrpt = strip_bonding_descriptors(big_smile)
smile, bonding_descrpt, rs_isomers, ez_isomers = strip_bonding_descriptors(big_smile)
if smile == "H":
mol_graph = nx.Graph()
mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
elif all_atom:
mol_graph = pysmiles.read_smiles(smile, reinterpret_aromatic=False, strict=False)
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
nx.set_node_attributes(mol_graph, rs_isomers, 'rs_isomer')
# we need to split countable node keys and the associated value
ez_isomer_atoms = {idx: val[:-1] for idx, val in ez_isomers.items()}
ez_isomer_class = {idx: val[-1] for idx, val in ez_isomers.items()}
nx.set_node_attributes(mol_graph, ez_isomer_atoms, 'ez_isomer_atoms')
nx.set_node_attributes(mol_graph, ez_isomer_class, 'ez_isomer_class')
# we deal with a CG resolution graph
else:
mol_graph = read_cgsmiles(smile)
Expand Down
17 changes: 11 additions & 6 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import re
import copy
import networkx as nx
from pysmiles.smiles_helper import _mark_chiral_atoms
from .read_cgsmiles import read_cgsmiles
from .read_fragments import read_fragments
from .graph_utils import (merge_graphs,
sort_nodes_by_attr,
annotate_fragments,
set_atom_names_atomistic)
from .pysmiles_utils import rebuild_h_atoms
from .pysmiles_utils import rebuild_h_atoms, annotate_ez_isomers

def compatible(left, right):
"""
Expand Down Expand Up @@ -330,15 +331,19 @@ def resolve(self):
# sort the atoms
self.molecule = sort_nodes_by_attr(self.molecule, sort_attr=("fragid"))

if all_atom:
# assign chirality
_mark_chiral_atoms(self.molecule)
# assign rs isomerism
annotate_ez_isomers(self.molecule)
# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
set_atom_names_atomistic(self.meta_graph, self.molecule)

# and redo the meta molecule
self.meta_graph = annotate_fragments(self.meta_graph,
self.molecule)

# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
if all_atom:
set_atom_names_atomistic(self.meta_graph, self.molecule)

# increment the resolution counter
self.resolution_counter += 1

Expand Down
76 changes: 64 additions & 12 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,48 +153,100 @@ def test_read_cgsmiles(smile, nodes, edges, orders):
fragnames = nx.get_node_attributes(meta_mol, 'fragname')
assert nodes == list(fragnames.values())

@pytest.mark.parametrize('big_smile, smile, bonding',(
@pytest.mark.parametrize('big_smile, smile, bonding, rs, ez',(
# smiple symmetric bonding
("[$]COC[$]",
"COC",
{0: ["$1"], 2: ["$1"]}),
{0: ["$1"], 2: ["$1"]},
None,
None),
# smiple symmetric bonding with more than one name
("[$1A]COC[$1A]",
"COC",
{0: ["$1A1"], 2: ["$1A1"]}),
{0: ["$1A1"], 2: ["$1A1"]},
None,
None),
# smiple bonding multiletter atom
("Clc[$]c[$]",
"Clcc",
{1: ["$1"], 2: ["$1"]}),
{1: ["$1"], 2: ["$1"]},
None,
None),
# simple symmetric but with explicit hydrogen
("[$][CH2]O[CH2][$]",
"[CH2]O[CH2]",
{0: ["$1"], 2: ["$1"]}),
{0: ["$1"], 2: ["$1"]},
None,
None),
# smiple symmetric bonding; multiple descript
("[$]COC[$][$1]",
"COC",
{0: ["$1"], 2: ["$1", "$11"]}),
{0: ["$1"], 2: ["$1", "$11"]},
None,
None),
# named different bonding descriptors
("[$1]CCCC[$2]",
"CCCC",
{0: ["$11"], 3: ["$21"]}),
{0: ["$11"], 3: ["$21"]},
None,
None),
# ring and bonding descriptors
("[$1]CC[$2]C1CCCCC1",
"CCC1CCCCC1",
{0: ["$11"], 1: ["$21"]}),
{0: ["$11"], 1: ["$21"]},
None,
None),
# bonding descript. after branch
("C(COC[$1])[$2]CCC[$3]",
"C(COC)CCC",
{0: ["$21"], 3: ["$11"], 6: ["$31"]}),
{0: ["$21"], 3: ["$11"], 6: ["$31"]},
None,
None),
# left rigth bonding desciptors
("[>]COC[<]",
"COC",
{0: [">1"], 2: ["<1"]})
{0: [">1"], 2: ["<1"]},
None,
None),
# simple chirality in residue
("[>]C[C@](F)(B)N[<]",
"C[C](F)(B)N",
{0: [">1"], 4: ["<1"]},
{1: ('@', [])},
None),
# simple chirality inverse in residue
("[>]C[C@@](F)(B)N[<]",
"C[C](F)(B)N",
{0: [">1"], 4: ["<1"]},
{1: ('@@', [])},
None),
# \ fragment split
("[>]CC(\F)=[<]",
"CC(F)=",
{0: [">1"], 1: ["<2"]},
None,
{2: (2, 1, '\\')}),
# / fragment split
("[>]CC(/F)=[<]",
"CC(F)=",
{0: [">1"], 1: ["<2"]},
None,
{2: (2, 1, '/')}),
# both in one fragment
("[>]CC(/F)=C(\F)C[<]",
"CC(F)=C(F)C",
{0: [">1"], 5: ["<1"]},
None,
{2: (2, 1, '/'), 4: (4, 3, '\\')}),
))
def test_strip_bonding_descriptors(big_smile, smile, bonding):
new_smile, new_bonding = strip_bonding_descriptors(big_smile)
def test_strip_bonding_descriptors(big_smile, smile, bonding, rs, ez):
new_smile, new_bonding, rs_isomers, ez_isomers = strip_bonding_descriptors(big_smile)
assert new_smile == smile
assert new_bonding == bonding
if rs:
assert rs == rs_isomers
if ez:
assert ez == ez_isomers

@pytest.mark.parametrize('fragment_str, nodes, edges',(
# single fragment
Expand Down
Loading
Loading