Skip to content

Commit

Permalink
implement bond order syntax for CG
Browse files Browse the repository at this point in the history
  • Loading branch information
fgrunewald committed Oct 14, 2024
1 parent 3465bea commit 9e91794
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 57 deletions.
7 changes: 5 additions & 2 deletions cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,11 @@ def set_atom_names_atomistic(molecule, meta_graph=None):
fraglist = defaultdict(list)
if meta_graph:
for meta_node in meta_graph.nodes:
fraggraph = meta_graph.nodes[meta_node]['graph']
fraglist[meta_node] += list(fraggraph.nodes)
# to catch virtual side nodes that do not have a representation
# in the atomsitic structure
fraggraph = meta_graph.nodes[meta_node].get('graph', None)
if fraggraph:
fraglist[meta_node] += list(fraggraph.nodes)
else:
node_to_fragid = nx.get_node_attributes(molecule, 'fragid')
for node, fragids in node_to_fragid.items():
Expand Down
83 changes: 56 additions & 27 deletions cgsmiles/read_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ def _expand_branch(mol_graph, current, anchor, recipe):
prev_node = anchor
return mol_graph, current, prev_node

def _get_percent(pattern, stop):
end_num = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
return pattern[stop+1:end_num]

def read_cgsmiles(pattern):
"""
Generate a :class:`nx.Graph` from a pattern string according to the
Expand Down Expand Up @@ -133,6 +129,10 @@ def read_cgsmiles(pattern):
cycle_edges = []
# each element in the for loop matches a pattern
# '[' + '#' + some alphanumeric name + ']'
symbol_to_order = {".": 0, "=": 2, "-": 1, "#": 3, "$": 4}
default_bond_order = 1
bond_order = None
prev_bond_order = None
for match in re.finditer(PATTERNS['place_holder'], pattern):
start, stop = match.span()
# we start a new branch when the residue is preceded by '('
Expand All @@ -146,28 +146,54 @@ def read_cgsmiles(pattern):

# here we check if the atom is followed by a cycle marker
# in this case we have an open cycle and close it
for token in pattern[stop:]:
# we close a cycle
if token.isdigit() and token in cycle:
cycle_edges.append((current, cycle[token]))
del cycle[token]
# we open a cycle
elif token.isdigit():
cycle[token] = current
# we found a ring indicator
elif token == "%":
ring_marker = _get_percent(pattern, stop)
# we close the ring
ring_marker = ""
multi_ring = False
ring_bond_order = default_bond_order
for rdx, token in enumerate(pattern[stop:]):
if multi_ring and not token.isdigit():
if ring_marker in cycle:
cycle_edges.append((current, cycle[ring_marker]))
cycle_edges.append((current,
cycle[ring_marker][0],
cycle[ring_marker][1]))
del cycle[ring_marker]
break
# we open a new ring
cycle[_get_percent(pattern, stop)] = current
break
else:
cycle[ring_marker] = [current, ring_bond_order]
multi_ring = False
ring_marker = ""
ring_bond_order = default_bond_order

# we open a new multi ring
if token == "%":
multi_ring = True
ring_marker = '%'
# we open a ring or close
elif token.isdigit():
ring_marker += token
if not multi_ring:
# we have a single digit marker and it is in
# cycle so we close it
if ring_marker in cycle:
cycle_edges.append((current,
cycle[ring_marker][0],
cycle[ring_marker][1]))
del cycle[ring_marker]
# the marker is not in cycle so we update cycles
else:
cycle[ring_marker] = [current, ring_bond_order]
ring_marker = ""
ring_bond_order = default_bond_order
# we found bond_order
elif token in symbol_to_order:
ring_bond_order = symbol_to_order[token]
else:
break

# check if there is a bond-order following the node
if stop < len(pattern) and pattern[stop+rdx-1] in '- + . = # $':
bond_order = symbol_to_order[pattern[stop+rdx-1]]
else:
bond_order = default_bond_order

# here we check if the atom is followed by a expansion character '|'
# as in ... [#PEO]|
if stop < len(pattern) and pattern[stop] == '|':
Expand Down Expand Up @@ -197,16 +223,19 @@ def read_cgsmiles(pattern):
mol_graph.add_node(current, fragname=fragname)

if prev_node is not None:
mol_graph.add_edge(prev_node, current, order=1)
mol_graph.add_edge(prev_node, current, order=prev_bond_order)

prev_bond_order = bond_order

# here we have a double edge
for cycle_edge in cycle_edges:
if cycle_edge in mol_graph.edges:
mol_graph.edges[cycle_edge]["order"] += 1
else:
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=1)
msg=("You define two edges between the same node."
"Use bond order symbols instead.")
raise SyntaxError(msg)
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=cycle_edge[2])

prev_node = current
current += 1
Expand Down
11 changes: 10 additions & 1 deletion cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,12 @@ def resolve_disconnected_molecule(self, fragment_dict):
a dict of fragment graphs
"""
for meta_node in self.meta_graph.nodes:
neighbors = self.meta_graph.neighbors(meta_node)
edge_orders = [self.meta_graph.edges[(meta_node, ndx)]['order'] for ndx in neighbors]
# we are dealing with a virtual node that has no projection to the
# next resolution level
if edge_orders and all([order == 0 for order in edge_orders]):
continue
fragname = self.meta_graph.nodes[meta_node]['fragname']
fragment = fragment_dict[fragname]
correspondence = merge_graphs(self.molecule, fragment)
Expand Down Expand Up @@ -278,7 +284,10 @@ def edges_from_bonding_descrpt(self, all_atom=False):
if the high resolution level graph has all-atom resolution
default: False
"""
for prev_node, node in self.meta_graph.edges:
import random
edges = list(self.meta_graph.edges)
#random.shuffle(edges)
for prev_node, node in edges:
for _ in range(0, self.meta_graph.edges[(prev_node, node)]["order"]):
prev_graph = self.meta_graph.nodes[prev_node]['graph']
node_graph = self.meta_graph.nodes[node]['graph']
Expand Down
2 changes: 1 addition & 1 deletion cgsmiles/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class MoleculeSampler:
descriptor is used to highlight the fact that the probabilistic behaviour is driven
by the bonding descriptors.
The sampler features a two level connectivity determination. First using the
The sampler features a two level connectivity determination. First using the
`polymer_reactivities` an open bonding descriptor from the growing polymer
molecule is selected according to the probabilities provided. Subsequently,
a fragment is randomly chosen that has a matching complementary bonding
Expand Down
34 changes: 32 additions & 2 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
[(0, 1), (1, 2)],
[1, 1]),
# smiple linear sequenece with multi-edge
("{[#PMA]1[#PEO]1}",
("{[#PMA]=[#PEO]}",
["PMA", "PEO"],
[(0, 1)],
[2]),
Expand All @@ -34,6 +34,36 @@
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 1]),
# smiple cycle sequence bond order to next
("{[#PMA]1=[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 1]),
# smiple cycle sequence bond order in cycle
("{[#PMA]=1[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 2]),
# smiple cycle sequence two bond orders
("{[#PMA].1=[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 0]),
# smiple cycle sequence with % bond order
("{[#PMA]=%123[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 2]),
# smiple cycle sequence with % bond order next
("{[#PMA]%123=[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 1]),
# smiple cycle sequence with % two bond orders
("{[#PMA]=%123.[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[0, 1, 2]),
# smiple cycle sequence with %
("{[#PMA]%123[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
Expand All @@ -57,7 +87,7 @@
# [1, 1, 1, 1, 1, 1, 1, 1]),
# smiple linear sequenece with multi-edge
# in cycle
("{[#PMA]12[#PMA][#PMA][#PEO]12}",
("{[#PMA]=1[#PMA][#PMA][#PEO]1}",
["PMA", "PMA", "PMA", "PEO"],
[(0, 1), (1, 2), (2, 3), (0, 3)],
[1, 1, 1, 2]),
Expand Down
33 changes: 22 additions & 11 deletions cgsmiles/tests/test_molecule_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (0, 2), (2, 3), (2, 4),
(4, 5), (4, 6), (4, 7), (2, 8), (8, 9)], {}, {}),
# THF like test case with double edge and squash operator
("{[#A]1[#B]1}.{#A=[!]COC[!],#B=[!]CCCC[!]}",
("{[#A]=[#B]}.{#A=[!]COC[!],#B=[!]CCCC[!]}",
[('A', 'O C C H H H H'),
('B', 'C C H H H H C C H H H H')],
'O C C H H H H C C H H H H',
Expand All @@ -185,6 +185,16 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (0, 2), (0, 3), (0, 4), (1, 5),
(1, 7), (5, 9), (5, 6), (7, 13), (7, 8),
(9, 11), (9, 10), (11, 13), (11, 12), (13, 14)], {}, {}),
# simple chirality assigment with rings
("{[#GLC]}.{#GLC=C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O}",
# 0 1 2 3
[('GLC', 'C C C C C C O O O O O O H H H H H H H H H H H H')],
'C C C C C C O O O O O O H H H H H H H H H H H H',
[(0, 1), (0, 11), (0, 12), (0, 13), (1, 2), (1, 6), (1, 14), (2, 3), (2, 10),
(2, 15), (3, 4), (3, 9), (3, 16), (4, 5), (4, 8), (4, 17), (5, 6), (5, 7), (5, 18),
(7, 19), (8, 20), (9, 21), (10, 22), (11, 23)],
{1: (6, 14, 2, 0), 2: (1, 15, 3, 10), 3: (2, 16, 9, 4),
4: (3, 17, 5, 8), 5: (4, 18, 6, 7)}, {}),
# simple chirality assigment between fragments
("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@H][>]C(=O)OC}",
# 0 1 2 3
Expand All @@ -195,16 +205,6 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
(2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
(10, 13), (5, 14), (14, 15)],
{3: (2, 10, 4, 14)}, {}),
# simple chirality assigment with rings
("{[#GLC]}.{#GLC=C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O}",
# 0 1 2 3
[('GLC', 'C C C C C C O O O O O O H H H H H H H H H H H H')],
'C C C C C C O O O O O O H H H H H H H H H H H H',
[(0, 1), (0, 11), (0, 12), (0, 13), (1, 2), (1, 6), (1, 14), (2, 3), (2, 10),
(2, 15), (3, 4), (3, 9), (3, 16), (4, 5), (4, 8), (4, 17), (5, 6), (5, 7), (5, 18),
(7, 19), (8, 20), (9, 21), (10, 22), (11, 23)],
{1: (6, 14, 2, 0), 2: (1, 15, 3, 10), 3: (2, 16, 9, 4),
4: (3, 17, 5, 8), 5: (4, 18, 6, 7)}, {}),
# simple chirality assigment between fragments inv
("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@@H][>]C(=O)OC}",
# 0 1 2 3
Expand All @@ -229,6 +229,16 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (1, 2), (0, 3), (0, 4),
(0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)],
{}, {2: (2, 1, 6, 7, 'cis'), 7: (7, 6, 1, 2, 'cis')}),
# test skip virtual nodes
("{[#SP4]1.2[#SP4].3[#SP1r]1.[#TC4]23}.{#SP4=OC[$]C[$]O,#SP1r=[$]OC[$]CO}",
[('SP4', 'O C C O H H H H'), ('SP4', 'O C C O H H H H'),
('SP1r', 'O C C O H H H H')],
'O C C O H H H H O C C O H H H H O C C O H H H H',
[(0, 1), (0, 4), (1, 2), (1, 9), (1, 5), (2, 3), (2, 16), (2, 6),
(3, 7), (8, 9), (8, 12), (9, 10), (9, 13), (10, 11), (10, 17),
(10, 14), (11, 15), (16, 17), (17, 18), (17, 20), (18, 19),
(18, 21), (18, 22), (19, 23)],
{},{}),
))
def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral, ez):
meta_mol, molecule = MoleculeResolver.from_string(smile).resolve()
Expand All @@ -240,6 +250,7 @@ def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral
block_graph = meta_mol.nodes[node]['graph']
target_elements = nx.get_node_attributes(block_graph, 'element')
sorted_elements = [target_elements[key] for key in sorted(target_elements)]
print(target_elements)
print("-->", sorted_elements)
print("-->", ref[1].split())
assert sorted_elements == ref[1].split()
Expand Down
4 changes: 2 additions & 2 deletions cgsmiles/tests/test_write_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def test_write_fragments(input_string):
# branched nested
"{[#PE][#PMA]([#PEO][#PEO]([#OMA][#OMA]1[#OMA][#OMA]1))[#PE]}",
# special cycle
"{[#PE]1[#PMA]1}",
"{[#PE]=[#PMA]}",
# special triple cycle
"{[#A]12[#B]12}",
"{[#A]#[#B]}",
))
def test_write_mol_graphs(input_string):
mol_graph = read_cgsmiles(input_string)
Expand Down
12 changes: 1 addition & 11 deletions cgsmiles/write_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,6 @@ def write_graph(molecule, smiles_format=False, default_element='*'):
edges.add(frozenset((n_idx, n_jdx)))
total_edges = set(map(frozenset, molecule.edges))
ring_edges = list(total_edges - edges)
# in cgsmiles graphs only bonds of order 1 and 2
# exists; order 2 means we have a ring at the
# higher resolution. These orders are therefore
# represented as rings and that requires to
# add them to the ring list
if not smiles_format:
for edge in molecule.edges:
if molecule.edges[edge]['order'] != 1:
for n in range(1, molecule.edges[edge]['order']):
ring_edges.append(frozenset(edge))

atom_to_ring_idx = defaultdict(list)
ring_idx_to_bond = {}
Expand Down Expand Up @@ -123,7 +113,7 @@ def write_graph(molecule, smiles_format=False, default_element='*'):
previous = predecessors[current]
assert len(previous) == 1
previous = previous[0]
if smiles_format and _write_edge_symbol(molecule, previous, current):
if _write_edge_symbol(molecule, previous, current):
order = molecule.edges[previous, current].get('order', 1)
smiles += order_to_symbol[order]

Expand Down

0 comments on commit 9e91794

Please sign in to comment.