Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement isomerism bug fixes plus tests #20

Merged
merged 8 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import logging
import networkx as nx
import pysmiles
from pysmiles.smiles_helper import _annotate_ez_isomers
from pysmiles.smiles_helper import (_annotate_ez_isomers,
_mark_chiral_atoms,
remove_explicit_hydrogens,
add_explicit_hydrogens)

LOGGER = logging.getLogger(__name__)

def compute_mass(input_molecule):
Expand Down Expand Up @@ -103,3 +107,57 @@ def annotate_ez_isomers(molecule):
for node in ez_isomer_atoms:
del molecule.nodes[node]['ez_isomer_atoms']
del molecule.nodes[node]['ez_isomer_class']

def mark_chiral_atoms(molecule):
"""
For all nodes tagged as chiral, figure out the three
substituents and annotate the node with a tuple that
has the order in which to rotate. This essentially
corresponds to the definition of an improper dihedral
angle centered on the chiral atom.

Pysmiles treats explicit hydrogen atoms differently
from implicit ones. In cgsmiles at the time when this
function is called all hydrogen atoms have been added
explicitly. However, we need to correct the chirality
assigment for this.

Note that this means it is not possible to have explicit
hydrogen atoms attached to chiral centers within cgsmiles
to begin with. However, this is a rather edge case.
"""
chiral_nodes = nx.get_node_attributes(molecule, 'rs_isomer')
for node, (direction, rings) in chiral_nodes.items():
# first the ring atoms in order
# that they were connected then the
# other neighboring atoms in order
bonded_neighbours = sorted(molecule[node])
neighbours = list(rings)
hstash = None
for neighbour in bonded_neighbours:
if neighbour not in neighbours:
# all hydrogen atoms are explicit in cgsmiles
# BUT in the input of chirality they are usual
# implicit. At this point we need to treat all
# hydrogen atoms as if they were implicit.
if molecule.nodes[neighbour]['element'] != 'H':
neighbours.append(neighbour)
else:
hstash = neighbour
if hstash:
neighbours.insert(1, hstash)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

if len(neighbours) != 4:
# FIXME Tetrahedral Allene-like Systems such as `NC(Br)=[C@]=C(O)C`
msg = (f"Chiral node {node} has {len(neighbours)} neighbors, which "
"is different than the four expected for tetrahedral "
"chirality.")
raise ValueError(msg)
# the default is anti-clockwise sorting indicated by '@'
# in this case the nodes are sorted with increasing
# node index; however @@ means clockwise and the
# order of nodes is reversed (i.e. with decreasing index)
if direction == '@@':
neighbours = [neighbours[0], neighbours[1], neighbours[3], neighbours[2]]

molecule.nodes[node]['rs_isomer'] = tuple(neighbours)
58 changes: 56 additions & 2 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,45 @@ def peek(self):
def __iter__(self):
return self

def _find_bonded_ring_node(ring_nodes, node):
current = ring_nodes.index(node)
if current%2 == 0:
other = ring_nodes[current+1]
else:
other = ring_nodes[current-1]
return other

def deal_with_chiral_rings(smile_iter, token, node_count, rings):
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
"""
"""
multi_ring = False
ring_token = token
partial_str = ""
while True:
if multi_ring and token == '%':
rings[ring_token].append(node_count)
elif multi_ring and token.isdigit():
ring_token += token
elif token == '%':
ring_token += token
multi_ring = True
elif multi_ring:
rings[ring_token].append(node_count)
ring_token = ""
elif token.isdigit():
rings[token].append(node_count)

partial_str += token
token = smile_iter.peek()
if token and not token.isdigit() and not token == '%':
break

try:
token = next(smile_iter)
except StopIteration:
break

return smile_iter, token, partial_str, rings

def strip_bonding_descriptors(fragment_string):
"""
Expand All @@ -60,6 +99,7 @@ def strip_bonding_descriptors(fragment_string):
bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
smile_iter = PeekIter(fragment_string)
bonding_descrpt = defaultdict(list)
rings = defaultdict(list)
ez_isomer_atoms = {}
rs_isomers = {}
smile = ""
Expand Down Expand Up @@ -95,10 +135,10 @@ def strip_bonding_descriptors(fragment_string):
else:
atom += peek
peek = next(smile_iter)

smile = smile + atom + "]"
prev_node = node_count
node_count += 1

elif token == '(':
anchor = prev_node
smile += token
Expand All @@ -108,7 +148,14 @@ def strip_bonding_descriptors(fragment_string):
elif token in bond_to_order:
current_order = bond_to_order[token]
smile += token
elif token in '] H . - = # $ : + - %' or token.isdigit():
# for chirality assignment we need to collect rings
elif token == '%' or token.isdigit():
smile_iter, token, part_str, rings = deal_with_chiral_rings(smile_iter,
token,
prev_node,
rings)
smile += part_str
elif token in '] H . - = # $ : + -':
smile += token
# deal with ez isomers
elif token in '/ \\':
Expand All @@ -126,6 +173,12 @@ def strip_bonding_descriptors(fragment_string):
prev_node = node_count
node_count += 1

# we need to annotate rings to the chiral isomers
for node in rs_isomers:
for ring_idx, ring_nodes in rings.items():
if node in ring_nodes:
bonded_node = _find_bonded_ring_node(ring_nodes, node)
rs_isomers[node][1].append(bonded_node)
return smile, bonding_descrpt, rs_isomers, ez_isomer_atoms

def fragment_iter(fragment_str, all_atom=True):
Expand Down Expand Up @@ -170,6 +223,7 @@ def fragment_iter(fragment_str, all_atom=True):
ez_isomer_class = {idx: val[-1] for idx, val in ez_isomers.items()}
nx.set_node_attributes(mol_graph, ez_isomer_atoms, 'ez_isomer_atoms')
nx.set_node_attributes(mol_graph, ez_isomer_class, 'ez_isomer_class')
print("hcount", mol_graph.nodes(data='hcount'))
# we deal with a CG resolution graph
else:
mol_graph = read_cgsmiles(smile)
Expand Down
9 changes: 5 additions & 4 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import re
import copy
import networkx as nx
from pysmiles.smiles_helper import _mark_chiral_atoms
from .read_cgsmiles import read_cgsmiles
from .read_fragments import read_fragments
from .graph_utils import (merge_graphs,
sort_nodes_by_attr,
annotate_fragments,
set_atom_names_atomistic)
from .pysmiles_utils import rebuild_h_atoms, annotate_ez_isomers
from .pysmiles_utils import (rebuild_h_atoms,
annotate_ez_isomers,
mark_chiral_atoms)

def compatible(left, right, legacy=False):
"""
Expand Down Expand Up @@ -359,8 +360,8 @@ def resolve(self):
self.molecule = sort_nodes_by_attr(self.molecule, sort_attr=("fragid"))

if all_atom:
# assign chirality
_mark_chiral_atoms(self.molecule)
# assign chiral atoms
mark_chiral_atoms(self.molecule)
# assign rs isomerism
annotate_ez_isomers(self.molecule)
# in all-atom MD there are common naming conventions
Expand Down
20 changes: 15 additions & 5 deletions cgsmiles/tests/test_molecule_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (0, 2), (0, 3), (0, 4), (1, 5),
(1, 7), (5, 9), (5, 6), (7, 13), (7, 8),
(9, 11), (9, 10), (11, 13), (11, 12), (13, 14)], {}, {}),
# smiple chirality assigment between fragments
# simple chirality assigment between fragments
("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@H][>]C(=O)OC}",
# 0 1 2 3
[('A', 'O H'), ('B', 'C C C O O C H H H H H H'),
Expand All @@ -194,8 +194,18 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (0, 2), (2, 3), (2, 4),
(2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
(10, 13), (5, 14), (14, 15)],
{3: (2, 4, 10, 14)}, {}),
# smiple chirality assigment between fragments inv
{3: (2, 10, 4, 14)}, {}),
# simple chirality assigment with rings
("{[#GLC]}.{#GLC=C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O}",
# 0 1 2 3
[('GLC', 'C C C C C C O O O O O O H H H H H H H H H H H H')],
'C C C C C C O O O O O O H H H H H H H H H H H H',
[(0, 1), (0, 11), (0, 12), (0, 13), (1, 2), (1, 6), (1, 14), (2, 3), (2, 10),
(2, 15), (3, 4), (3, 9), (3, 16), (4, 5), (4, 8), (4, 17), (5, 6), (5, 7), (5, 18),
(7, 19), (8, 20), (9, 21), (10, 22), (11, 23)],
{1: (6, 14, 2, 0), 2: (1, 15, 3, 10), 3: (2, 16, 9, 4),
4: (3, 17, 5, 8), 5: (4, 18, 6, 7)}, {}),
# simple chirality assigment between fragments inv
("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@@H][>]C(=O)OC}",
# 0 1 2 3
[('A', 'O H'), ('B', 'C C C O O C H H H H H H'),
Expand All @@ -204,15 +214,15 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (0, 2), (2, 3), (2, 4),
(2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
(10, 13), (5, 14), (14, 15)],
{3: (2, 4, 14, 10)}, {}),
{3: (2, 10, 14, 4)}, {}),
# smiple ez isomerism assigment between fragments inv
("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(\F)C}",
[('A', 'C C F H H H'), ('B', 'C F C H H H')],
'C C F H H H F C C H H H',
[(0, 1), (1, 2), (0, 3), (0, 4),
(0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)],
{}, {2: (2, 1, 6, 7, 'trans'), 7: (7, 6, 1, 2, 'trans')}),
# smiple ez isomerism assigment between fragments inv
# simple ez isomerism assigment between fragments inv
("{[#A][#B]}.{#A=CC(/F)=[$],#B=[$]=C(/F)C}",
[('A', 'C C F H H H'), ('B', 'C F C H H H')],
'C C F H H H F C C H H H',
Expand Down
Loading