Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement bond order syntax for CG #24

Merged
merged 7 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,11 @@ def set_atom_names_atomistic(molecule, meta_graph=None):
fraglist = defaultdict(list)
if meta_graph:
for meta_node in meta_graph.nodes:
fraggraph = meta_graph.nodes[meta_node]['graph']
fraglist[meta_node] += list(fraggraph.nodes)
# to catch virtual side nodes that do not have a representation
# in the atomsitic structure
fraggraph = meta_graph.nodes[meta_node].get('graph', None)
if fraggraph:
fraglist[meta_node] += list(fraggraph.nodes)
else:
node_to_fragid = nx.get_node_attributes(molecule, 'fragid')
for node, fragids in node_to_fragid.items():
Expand Down
83 changes: 56 additions & 27 deletions cgsmiles/read_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ def _expand_branch(mol_graph, current, anchor, recipe):
prev_node = anchor
return mol_graph, current, prev_node

def _get_percent(pattern, stop):
end_num = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
return pattern[stop+1:end_num]

def read_cgsmiles(pattern):
"""
Generate a :class:`nx.Graph` from a pattern string according to the
Expand Down Expand Up @@ -133,6 +129,10 @@ def read_cgsmiles(pattern):
cycle_edges = []
# each element in the for loop matches a pattern
# '[' + '#' + some alphanumeric name + ']'
symbol_to_order = {".": 0, "=": 2, "-": 1, "#": 3, "$": 4}
default_bond_order = 1
bond_order = None
prev_bond_order = None
for match in re.finditer(PATTERNS['place_holder'], pattern):
start, stop = match.span()
# we start a new branch when the residue is preceded by '('
Expand All @@ -146,28 +146,54 @@ def read_cgsmiles(pattern):

# here we check if the atom is followed by a cycle marker
# in this case we have an open cycle and close it
for token in pattern[stop:]:
# we close a cycle
if token.isdigit() and token in cycle:
cycle_edges.append((current, cycle[token]))
del cycle[token]
# we open a cycle
elif token.isdigit():
cycle[token] = current
# we found a ring indicator
elif token == "%":
ring_marker = _get_percent(pattern, stop)
# we close the ring
ring_marker = ""
multi_ring = False
ring_bond_order = default_bond_order
for rdx, token in enumerate(pattern[stop:]):
if multi_ring and not token.isdigit():
if ring_marker in cycle:
cycle_edges.append((current, cycle[ring_marker]))
cycle_edges.append((current,
cycle[ring_marker][0],
cycle[ring_marker][1]))
del cycle[ring_marker]
break
# we open a new ring
cycle[_get_percent(pattern, stop)] = current
break
else:
cycle[ring_marker] = [current, ring_bond_order]
multi_ring = False
ring_marker = ""
ring_bond_order = default_bond_order

# we open a new multi ring
if token == "%":
multi_ring = True
ring_marker = '%'
# we open a ring or close
elif token.isdigit():
ring_marker += token
if not multi_ring:
# we have a single digit marker and it is in
# cycle so we close it
if ring_marker in cycle:
cycle_edges.append((current,
cycle[ring_marker][0],
cycle[ring_marker][1]))
del cycle[ring_marker]
# the marker is not in cycle so we update cycles
else:
cycle[ring_marker] = [current, ring_bond_order]
ring_marker = ""
ring_bond_order = default_bond_order
# we found bond_order
elif token in symbol_to_order:
ring_bond_order = symbol_to_order[token]
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
else:
break

# check if there is a bond-order following the node
if stop < len(pattern) and pattern[stop+rdx-1] in '- + . = # $':
bond_order = symbol_to_order[pattern[stop+rdx-1]]
else:
bond_order = default_bond_order

# here we check if the atom is followed by a expansion character '|'
# as in ... [#PEO]|
if stop < len(pattern) and pattern[stop] == '|':
Expand Down Expand Up @@ -197,16 +223,19 @@ def read_cgsmiles(pattern):
mol_graph.add_node(current, fragname=fragname)

if prev_node is not None:
mol_graph.add_edge(prev_node, current, order=1)
mol_graph.add_edge(prev_node, current, order=prev_bond_order)

prev_bond_order = bond_order

# here we have a double edge
for cycle_edge in cycle_edges:
if cycle_edge in mol_graph.edges:
mol_graph.edges[cycle_edge]["order"] += 1
else:
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=1)
msg=("You define two edges between the same node."
"Use bond order symbols instead.")
raise SyntaxError(msg)
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=cycle_edge[2])

prev_node = current
current += 1
Expand Down
11 changes: 10 additions & 1 deletion cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,12 @@ def resolve_disconnected_molecule(self, fragment_dict):
a dict of fragment graphs
"""
for meta_node in self.meta_graph.nodes:
neighbors = self.meta_graph.neighbors(meta_node)
edge_orders = [self.meta_graph.edges[(meta_node, ndx)]['order'] for ndx in neighbors]
# we are dealing with a virtual node that has no projection to the
# next resolution level
if edge_orders and all([order == 0 for order in edge_orders]):
continue
fragname = self.meta_graph.nodes[meta_node]['fragname']
fragment = fragment_dict[fragname]
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
correspondence = merge_graphs(self.molecule, fragment)
Expand Down Expand Up @@ -278,7 +284,10 @@ def edges_from_bonding_descrpt(self, all_atom=False):
if the high resolution level graph has all-atom resolution
default: False
"""
for prev_node, node in self.meta_graph.edges:
import random
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
edges = list(self.meta_graph.edges)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
#random.shuffle(edges)
for prev_node, node in edges:
for _ in range(0, self.meta_graph.edges[(prev_node, node)]["order"]):
prev_graph = self.meta_graph.nodes[prev_node]['graph']
node_graph = self.meta_graph.nodes[node]['graph']
Expand Down
2 changes: 1 addition & 1 deletion cgsmiles/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class MoleculeSampler:
descriptor is used to highlight the fact that the probabilistic behaviour is driven
by the bonding descriptors.
The sampler features a two level connectivity determination. First using the
The sampler features a two level connectivity determination. First using the
`polymer_reactivities` an open bonding descriptor from the growing polymer
molecule is selected according to the probabilities provided. Subsequently,
a fragment is randomly chosen that has a matching complementary bonding
Expand Down
34 changes: 32 additions & 2 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
[(0, 1), (1, 2)],
[1, 1]),
# smiple linear sequenece with multi-edge
("{[#PMA]1[#PEO]1}",
("{[#PMA]=[#PEO]}",
["PMA", "PEO"],
[(0, 1)],
[2]),
Expand All @@ -34,6 +34,36 @@
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 1]),
# smiple cycle sequence bond order to next
("{[#PMA]1=[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 1]),
# smiple cycle sequence bond order in cycle
("{[#PMA]=1[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 2]),
# smiple cycle sequence two bond orders
("{[#PMA].1=[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 0]),
# smiple cycle sequence with % bond order
("{[#PMA]=%123[#PEO][#PMA]%123}",
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 2]),
# smiple cycle sequence with % bond order next
("{[#PMA]%123=[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 1]),
# smiple cycle sequence with % two bond orders
("{[#PMA]=%123.[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[0, 1, 2]),
# smiple cycle sequence with %
("{[#PMA]%123[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
Expand All @@ -57,7 +87,7 @@
# [1, 1, 1, 1, 1, 1, 1, 1]),
# smiple linear sequenece with multi-edge
# in cycle
("{[#PMA]12[#PMA][#PMA][#PEO]12}",
("{[#PMA]=1[#PMA][#PMA][#PEO]1}",
["PMA", "PMA", "PMA", "PEO"],
[(0, 1), (1, 2), (2, 3), (0, 3)],
[1, 1, 1, 2]),
Expand Down
33 changes: 22 additions & 11 deletions cgsmiles/tests/test_molecule_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (0, 2), (2, 3), (2, 4),
(4, 5), (4, 6), (4, 7), (2, 8), (8, 9)], {}, {}),
# THF like test case with double edge and squash operator
("{[#A]1[#B]1}.{#A=[!]COC[!],#B=[!]CCCC[!]}",
("{[#A]=[#B]}.{#A=[!]COC[!],#B=[!]CCCC[!]}",
[('A', 'O C C H H H H'),
('B', 'C C H H H H C C H H H H')],
'O C C H H H H C C H H H H',
Expand All @@ -185,6 +185,16 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (0, 2), (0, 3), (0, 4), (1, 5),
(1, 7), (5, 9), (5, 6), (7, 13), (7, 8),
(9, 11), (9, 10), (11, 13), (11, 12), (13, 14)], {}, {}),
# simple chirality assigment with rings
("{[#GLC]}.{#GLC=C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O}",
# 0 1 2 3
[('GLC', 'C C C C C C O O O O O O H H H H H H H H H H H H')],
'C C C C C C O O O O O O H H H H H H H H H H H H',
[(0, 1), (0, 11), (0, 12), (0, 13), (1, 2), (1, 6), (1, 14), (2, 3), (2, 10),
(2, 15), (3, 4), (3, 9), (3, 16), (4, 5), (4, 8), (4, 17), (5, 6), (5, 7), (5, 18),
(7, 19), (8, 20), (9, 21), (10, 22), (11, 23)],
{1: (6, 14, 2, 0), 2: (1, 15, 3, 10), 3: (2, 16, 9, 4),
4: (3, 17, 5, 8), 5: (4, 18, 6, 7)}, {}),
# simple chirality assigment between fragments
("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@H][>]C(=O)OC}",
# 0 1 2 3
Expand All @@ -195,16 +205,6 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
(2, 5), (5, 6), (5, 7), (7, 8), (7, 9), (9, 10), (10, 11), (10, 12),
(10, 13), (5, 14), (14, 15)],
{3: (2, 10, 4, 14)}, {}),
# simple chirality assigment with rings
("{[#GLC]}.{#GLC=C([C@@H]1[C@H]([C@@H]([C@H]([C@H](O1)O)O)O)O)O}",
# 0 1 2 3
[('GLC', 'C C C C C C O O O O O O H H H H H H H H H H H H')],
'C C C C C C O O O O O O H H H H H H H H H H H H',
[(0, 1), (0, 11), (0, 12), (0, 13), (1, 2), (1, 6), (1, 14), (2, 3), (2, 10),
(2, 15), (3, 4), (3, 9), (3, 16), (4, 5), (4, 8), (4, 17), (5, 6), (5, 7), (5, 18),
(7, 19), (8, 20), (9, 21), (10, 22), (11, 23)],
{1: (6, 14, 2, 0), 2: (1, 15, 3, 10), 3: (2, 16, 9, 4),
4: (3, 17, 5, 8), 5: (4, 18, 6, 7)}, {}),
# simple chirality assigment between fragments inv
("{[#A][#B][#C]}.{#A=O[>],#C=O[<],#B=[<]C[C@@H][>]C(=O)OC}",
# 0 1 2 3
Expand All @@ -229,6 +229,16 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes):
[(0, 1), (1, 2), (0, 3), (0, 4),
(0, 5), (1, 7), (7, 6), (7, 8), (8, 9), (8, 10), (8, 11)],
{}, {2: (2, 1, 6, 7, 'cis'), 7: (7, 6, 1, 2, 'cis')}),
# test skip virtual nodes
("{[#SP4]1.2[#SP4].3[#SP1r]1.[#TC4]23}.{#SP4=OC[$]C[$]O,#SP1r=[$]OC[$]CO}",
[('SP4', 'O C C O H H H H'), ('SP4', 'O C C O H H H H'),
('SP1r', 'O C C O H H H H')],
'O C C O H H H H O C C O H H H H O C C O H H H H',
[(0, 1), (0, 4), (1, 2), (1, 9), (1, 5), (2, 3), (2, 16), (2, 6),
(3, 7), (8, 9), (8, 12), (9, 10), (9, 13), (10, 11), (10, 17),
(10, 14), (11, 15), (16, 17), (17, 18), (17, 20), (18, 19),
(18, 21), (18, 22), (19, 23)],
{},{}),
))
def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral, ez):
meta_mol, molecule = MoleculeResolver.from_string(smile).resolve()
Expand All @@ -240,6 +250,7 @@ def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges, chiral
block_graph = meta_mol.nodes[node]['graph']
target_elements = nx.get_node_attributes(block_graph, 'element')
sorted_elements = [target_elements[key] for key in sorted(target_elements)]
print(target_elements)
print("-->", sorted_elements)
print("-->", ref[1].split())
assert sorted_elements == ref[1].split()
Expand Down
4 changes: 2 additions & 2 deletions cgsmiles/tests/test_write_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ def test_write_fragments(input_string):
# branched nested
"{[#PE][#PMA]([#PEO][#PEO]([#OMA][#OMA]1[#OMA][#OMA]1))[#PE]}",
# special cycle
"{[#PE]1[#PMA]1}",
"{[#PE]=[#PMA]}",
# special triple cycle
"{[#A]12[#B]12}",
"{[#A]#[#B]}",
))
def test_write_mol_graphs(input_string):
mol_graph = read_cgsmiles(input_string)
Expand Down
12 changes: 1 addition & 11 deletions cgsmiles/write_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,16 +86,6 @@ def write_graph(molecule, smiles_format=False, default_element='*'):
edges.add(frozenset((n_idx, n_jdx)))
total_edges = set(map(frozenset, molecule.edges))
ring_edges = list(total_edges - edges)
# in cgsmiles graphs only bonds of order 1 and 2
# exists; order 2 means we have a ring at the
# higher resolution. These orders are therefore
# represented as rings and that requires to
# add them to the ring list
if not smiles_format:
for edge in molecule.edges:
if molecule.edges[edge]['order'] != 1:
for n in range(1, molecule.edges[edge]['order']):
ring_edges.append(frozenset(edge))

atom_to_ring_idx = defaultdict(list)
ring_idx_to_bond = {}
Expand Down Expand Up @@ -123,7 +113,7 @@ def write_graph(molecule, smiles_format=False, default_element='*'):
previous = predecessors[current]
assert len(previous) == 1
previous = previous[0]
if smiles_format and _write_edge_symbol(molecule, previous, current):
if _write_edge_symbol(molecule, previous, current):
order = molecule.edges[previous, current].get('order', 1)
smiles += order_to_symbol[order]

Expand Down
Loading