Skip to content

Commit

Permalink
add bond orders in combination with branches, expansion operators, an…
Browse files Browse the repository at this point in the history
…d branch expansion
  • Loading branch information
fgrunewald committed Oct 15, 2024
1 parent 42cae7f commit c271bcc
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 7 deletions.
29 changes: 22 additions & 7 deletions cgsmiles/read_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,22 @@ def _expand_branch(mol_graph, current, anchor, recipe):
anchor: abc.hashable
anchor to which to connect current node
recipie: list[(str, int)]
recipie: list[(str, int, int)]
list storing tuples of node names and
the number of times the node has to be added
and their bond order
Returns
-------
nx.Graph
"""
prev_node = anchor
for bdx, (fragname, n_mon) in enumerate(recipe):
for bdx, (fragname, n_mon, order) in enumerate(recipe):
if bdx == 0:
anchor = current
for _ in range(0, n_mon):
mol_graph.add_node(current, fragname=fragname)
mol_graph.add_edge(prev_node, current, order=1)
mol_graph.add_edge(prev_node, current, order=order)

prev_node = current
current += 1
Expand Down Expand Up @@ -142,7 +143,8 @@ def read_cgsmiles(pattern):
branch_anchor.append(prev_node)
# the recipe for making the branch includes the anchor;
# which is hence the first residue in the list
recipes[branch_anchor[-1]] = [(mol_graph.nodes[prev_node]['fragname'], 1)]
# at this point the bond order is still 1 unless we have an expansion
recipes[branch_anchor[-1]] = [(mol_graph.nodes[prev_node]['fragname'], 1, 1)]

# here we check if the atom is followed by a cycle marker
# in this case we have an open cycle and close it
Expand Down Expand Up @@ -217,7 +219,7 @@ def read_cgsmiles(pattern):
# the recipe dict together with the anchor residue
# and expansion number
if branching:
recipes[branch_anchor[-1]].append((fragname, n_mon))
recipes[branch_anchor[-1]].append((fragname, n_mon, prev_bond_order))

# new we add new residue as often as required
connection = []
Expand Down Expand Up @@ -267,12 +269,18 @@ def read_cgsmiles(pattern):
eon_a = _find_next_character(pattern, [')'], stop)
# Then we check if the expansion character
# is next.
if eon_a+1 < len(pattern) and pattern[eon_a+1] == "|":
if eon_a+1 < len(pattern) and (pattern[eon_a+1] == "|" or pattern[eon_a+2] == "|"):
if pattern[eon_a+2] == "|":
anchor_order = symbol_to_order[pattern[eon_a+1]]
recipe = recipes[prev_node][0]
recipes[prev_node][0] = (recipe[0], recipe[1], anchor_order)
eon_a += 1
# If there is one we find the beginning
# of the next branch, residue or end of the string
# As before all characters inbetween are a number that
# is how often the branch is expanded.
eon_b = _find_next_character(pattern, ['[', ')', '(', '}'], eon_a+1)
next_characters = ['[', ')', '(', '}'] + list(symbol_to_order.keys())
eon_b = _find_next_character(pattern, next_characters, eon_a+1)
# the outermost loop goes over how often a the branch has to be
# added to the existing sequence
for idx in range(0,int(pattern[eon_a+2:eon_b])-1):
Expand Down Expand Up @@ -307,6 +315,13 @@ def read_cgsmiles(pattern):
prev_anchor = ref_anchor
# all branches added; then go back to the base anchor
prev_node = base_anchor
#================================================
# bond orders for after branches #
#================================================
if pattern[eon_b] in symbol_to_order:
prev_bond_order = symbol_to_order[pattern[eon_b]]
elif pattern[eon_a+1] in symbol_to_order:
prev_bond_order = symbol_to_order[pattern[eon_a+1]]
# if all branches are done we need to reset the lists
# when all nested branches are completed
if len(branch_anchor) == 0:
Expand Down
36 changes: 36 additions & 0 deletions cgsmiles/tests/test_cgsmile_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,42 @@
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11)],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
# simple branch expension with bond orders
("{[#PMA]([#PEO][#PEO]=[#OHter])|3}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11)],
[1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2]),
# simple branch expension with bond orders
("{[#PMA].([#PEO][#PEO]=[#OHter])|3}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11)],
[0, 1, 2, 1, 0, 1, 2, 1, 0, 1, 2]),
# simple branch expension with bond orders
("{[#PMA]([#PEO][#PEO]=[#OHter])|3.[#E]}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter", "E"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11), (8, 12)],
[1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0]),
# not so simple branch expension with bond orders
("{[#PMA]([#PEO][#PEO]=[#OHter])$|3.[#E]}",
["PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter",
"PMA", "PEO", "PEO", "OHter", "E"],
[(0, 1), (1, 2), (2, 3),
(0, 4), (4, 5), (5, 6), (6, 7),
(4, 8), (8, 9), (9, 10), (10, 11), (8, 12)],
[1, 1, 2, 4, 1, 1, 2, 4, 1, 1, 2, 0]),
# nested branched with expansion
("{[#PMA]([#PEO]|3)|2}",
["PMA", "PEO", "PEO", "PEO",
Expand Down

0 comments on commit c271bcc

Please sign in to comment.