Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implement bond order syntax for CG #24

Merged
merged 7 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,11 @@ def set_atom_names_atomistic(molecule, meta_graph=None):
fraglist = defaultdict(list)
if meta_graph:
for meta_node in meta_graph.nodes:
fraggraph = meta_graph.nodes[meta_node]['graph']
fraglist[meta_node] += list(fraggraph.nodes)
# to catch virtual side nodes that do not have a representation
# in the atomsitic structure
fraggraph = meta_graph.nodes[meta_node].get('graph', None)
if fraggraph:
fraglist[meta_node] += list(fraggraph.nodes)
else:
node_to_fragid = nx.get_node_attributes(molecule, 'fragid')
for node, fragids in node_to_fragid.items():
Expand Down
115 changes: 81 additions & 34 deletions cgsmiles/read_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,29 @@ def _expand_branch(mol_graph, current, anchor, recipe):
anchor: abc.hashable
anchor to which to connect current node

recipie: list[(str, int)]
recipe: list[(str, int, int)]
list storing tuples of node names and
the number of times the node has to be added
and their bond order

Returns
-------
nx.Graph
"""
prev_node = anchor
for bdx, (fragname, n_mon) in enumerate(recipe):
for bdx, (fragname, n_mon, order) in enumerate(recipe):
if bdx == 0:
anchor = current
for _ in range(0, n_mon):
mol_graph.add_node(current, fragname=fragname)
mol_graph.add_edge(prev_node, current, order=1)
mol_graph.add_edge(prev_node, current, order=order)

prev_node = current
current += 1

prev_node = anchor
return mol_graph, current, prev_node

def _get_percent(pattern, stop):
end_num = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
return pattern[stop+1:end_num]

def read_cgsmiles(pattern):
"""
Generate a :class:`nx.Graph` from a pattern string according to the
Expand Down Expand Up @@ -133,6 +130,10 @@ def read_cgsmiles(pattern):
cycle_edges = []
# each element in the for loop matches a pattern
# '[' + '#' + some alphanumeric name + ']'
symbol_to_order = {".": 0, "=": 2, "-": 1, "#": 3, "$": 4}
default_bond_order = 1
bond_order = None
prev_bond_order = None
for match in re.finditer(PATTERNS['place_holder'], pattern):
start, stop = match.span()
# we start a new branch when the residue is preceded by '('
Expand All @@ -142,32 +143,61 @@ def read_cgsmiles(pattern):
branch_anchor.append(prev_node)
# the recipe for making the branch includes the anchor;
# which is hence the first residue in the list
recipes[branch_anchor[-1]] = [(mol_graph.nodes[prev_node]['fragname'], 1)]
# at this point the bond order is still 1 unless we have an expansion
recipes[branch_anchor[-1]] = [(mol_graph.nodes[prev_node]['fragname'], 1, 1)]

# here we check if the atom is followed by a cycle marker
# in this case we have an open cycle and close it
for token in pattern[stop:]:
# we close a cycle
if token.isdigit() and token in cycle:
cycle_edges.append((current, cycle[token]))
del cycle[token]
# we open a cycle
elif token.isdigit():
cycle[token] = current
# we found a ring indicator
elif token == "%":
ring_marker = _get_percent(pattern, stop)
# we close the ring
ring_marker = ""
multi_ring = False
ring_bond_order = default_bond_order
for rdx, token in enumerate(pattern[stop:]):
if multi_ring and not token.isdigit():
ring_marker = int(ring_marker[1:])
if ring_marker in cycle:
cycle_edges.append((current, cycle[ring_marker]))
cycle_edges.append((current,
cycle[ring_marker][0],
cycle[ring_marker][1]))
del cycle[ring_marker]
break
# we open a new ring
cycle[_get_percent(pattern, stop)] = current
break
else:
cycle[ring_marker] = [current, ring_bond_order]
multi_ring = False
ring_marker = ""
ring_bond_order = default_bond_order

# we open a new multi ring
if token == "%":
multi_ring = True
ring_marker = '%'
# we open a ring or close
elif token.isdigit():
ring_marker += token
if not multi_ring:
ring_marker = int(ring_marker)
# we have a single digit marker and it is in
# cycle so we close it
if ring_marker in cycle:
cycle_edges.append((current,
cycle[ring_marker][0],
cycle[ring_marker][1]))
del cycle[ring_marker]
# the marker is not in cycle so we update cycles
else:
cycle[ring_marker] = [current, ring_bond_order]
ring_marker = ""
ring_bond_order = default_bond_order
# we found bond_order
elif token in symbol_to_order:
ring_bond_order = symbol_to_order[token]
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
else:
break

# check if there is a bond-order following the node
if stop < len(pattern) and pattern[stop+rdx-1] in '- + . = # $':
bond_order = symbol_to_order[pattern[stop+rdx-1]]
else:
bond_order = default_bond_order

# here we check if the atom is followed by a expansion character '|'
# as in ... [#PEO]|
if stop < len(pattern) and pattern[stop] == '|':
Expand All @@ -189,24 +219,27 @@ def read_cgsmiles(pattern):
# the recipe dict together with the anchor residue
# and expansion number
if branching:
recipes[branch_anchor[-1]].append((fragname, n_mon))
recipes[branch_anchor[-1]].append((fragname, n_mon, prev_bond_order))

# new we add new residue as often as required
connection = []
for _ in range(0, n_mon):
mol_graph.add_node(current, fragname=fragname)

if prev_node is not None:
mol_graph.add_edge(prev_node, current, order=1)
mol_graph.add_edge(prev_node, current, order=prev_bond_order)

prev_bond_order = bond_order

# here we have a double edge
for cycle_edge in cycle_edges:
if cycle_edge in mol_graph.edges:
mol_graph.edges[cycle_edge]["order"] += 1
else:
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=1)
msg=("You define two edges between the same node."
"Use bond order symbols instead.")
raise SyntaxError(msg)
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=cycle_edge[2])

prev_node = current
current += 1
Expand Down Expand Up @@ -236,12 +269,19 @@ def read_cgsmiles(pattern):
eon_a = _find_next_character(pattern, [')'], stop)
# Then we check if the expansion character
# is next.
if eon_a+1 < len(pattern) and pattern[eon_a+1] == "|":
if (eon_a+1 < len(pattern) and pattern[eon_a+1] == "|") or\
(eon_a+2 < len(pattern) and pattern[eon_a+2] == "|"):
if pattern[eon_a+2] == "|":
anchor_order = symbol_to_order[pattern[eon_a+1]]
recipe = recipes[prev_node][0]
recipes[prev_node][0] = (recipe[0], recipe[1], anchor_order)
eon_a += 1
# If there is one we find the beginning
# of the next branch, residue or end of the string
# As before all characters inbetween are a number that
# is how often the branch is expanded.
eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1)
next_characters = ['[', ')', '(', '}'] + list(symbol_to_order.keys())
eon_b = _find_next_character(pattern, next_characters, eon_a+1)
# the outermost loop goes over how often a the branch has to be
# added to the existing sequence
for idx in range(0,int(pattern[eon_a+2:eon_b])-1):
Expand Down Expand Up @@ -276,6 +316,13 @@ def read_cgsmiles(pattern):
prev_anchor = ref_anchor
# all branches added; then go back to the base anchor
prev_node = base_anchor
#================================================
# bond orders for after branches #
#================================================
if pattern[eon_b] in symbol_to_order:
prev_bond_order = symbol_to_order[pattern[eon_b]]
elif eon_a+1 < len(pattern) and pattern[eon_a+1] in symbol_to_order:
prev_bond_order = symbol_to_order[pattern[eon_a+1]]
# if all branches are done we need to reset the lists
# when all nested branches are completed
if len(branch_anchor) == 0:
Expand Down
10 changes: 10 additions & 0 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import copy
import numpy as np
import networkx as nx
from .read_cgsmiles import read_cgsmiles
from .read_fragments import read_fragments
Expand Down Expand Up @@ -239,6 +240,15 @@ def resolve_disconnected_molecule(self, fragment_dict):
"""
for meta_node in self.meta_graph.nodes:
fragname = self.meta_graph.nodes[meta_node]['fragname']

# we have a virtual node and skip it
if fragname not in fragment_dict:
neighbors = self.meta_graph.neighbors(meta_node)
orders = [self.meta_graph.edges[(meta_node, neigh)]["order"] for neigh in neighbors]
if not all(np.array(orders) == 0):
raise SyntaxError(f"Found node #{fragname} but no corresponding fragment.")
continue

fragment = fragment_dict[fragname]
correspondence = merge_graphs(self.molecule, fragment)

Expand Down
2 changes: 1 addition & 1 deletion cgsmiles/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class MoleculeSampler:
descriptor is used to highlight the fact that the probabilistic behaviour is driven
by the bonding descriptors.

The sampler features a two level connectivity determination. First using the
The sampler features a two level connectivity determination. First using the
`polymer_reactivities` an open bonding descriptor from the growing polymer
molecule is selected according to the probabilities provided. Subsequently,
a fragment is randomly chosen that has a matching complementary bonding
Expand Down
75 changes: 73 additions & 2 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
[(0, 1), (1, 2)],
[1, 1]),
# smiple linear sequenece with multi-edge
("{[#PMA]1[#PEO]1}",
("{[#PMA]=[#PEO]}",
["PMA", "PEO"],
[(0, 1)],
[2]),
Expand All @@ -34,6 +34,41 @@
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 1]),
# smiple cycle sequence bond order to next
("{[#PMA]1=[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 1]),
# smiple cycle sequence bond order in cycle
("{[#PMA]=1[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 2]),
# smiple cycle sequence two bond orders
("{[#PMA].1=[#PEO][#PMA]1}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 0]),
# smiple cycle sequence with % bond order
("{[#PMA]=%123[#PEO][#PMA]%123}",
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 2]),
# smiple cycle mixed % and digit marker
("{[#PMA]=1[#PEO][#PMA]%01}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[1, 1, 2]),
# smiple cycle sequence with % bond order next
("{[#PMA]%123=[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[2, 1, 1]),
# smiple cycle sequence with % two bond orders
("{[#PMA]=%123.[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
[(0, 1), (1, 2), (0, 2)],
[0, 1, 2]),
# smiple cycle sequence with %
("{[#PMA]%123[#PEO][#PMA]%123}",
["PMA", "PEO", "PMA"],
Expand All @@ -57,7 +92,7 @@
# [1, 1, 1, 1, 1, 1, 1, 1]),
# smiple linear sequenece with multi-edge
# in cycle
("{[#PMA]12[#PMA][#PMA][#PEO]12}",
("{[#PMA]=1[#PMA][#PMA][#PEO]1}",
["PMA", "PMA", "PMA", "PEO"],
[(0, 1), (1, 2), (2, 3), (0, 3)],
[1, 1, 1, 2]),
Expand All @@ -70,6 +105,42 @@
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11)],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
# simple branch expension with bond orders
("{[#PMA]([#PEO][#PEO]=[#OHter])|3}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11)],
[1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2]),
# simple branch expension with bond orders
("{[#PMA].([#PEO][#PEO]=[#OHter])|3}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11)],
[0, 1, 2, 1, 0, 1, 2, 1, 0, 1, 2]),
# simple branch expension with bond orders
("{[#PMA]([#PEO][#PEO]=[#OHter])|3.[#E]}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter", "E"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11), (8, 12)],
[1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0]),
# not so simple branch expension with bond orders
("{[#PMA]([#PEO][#PEO]=[#OHter])$|3.[#E]}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter", "E"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11), (8, 12)],
[1, 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 0]),
# nested branched with expansion
("{[#PMA]([#PEO]|3)|2}",
["PMA", "PEO", "PEO", "PEO",
Expand Down
Loading
Loading