Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rings new #12

Merged
merged 15 commits into from
Jul 8, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 18 additions & 22 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import networkx as nx
import pysmiles

VALENCES = pysmiles.smiles_helper.VALENCES
VALENCES.update({"H": (1,)})

def rebuild_h_atoms(mol_graph, keep_bonding=False):
"""
Helper function which add hydrogen atoms to the molecule graph.
Expand All @@ -29,27 +27,25 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
graph describing the full molecule without hydrogen atoms
"""
for node in mol_graph.nodes:
if mol_graph.nodes[node].get('bonding', False):
# get the degree
ele = mol_graph.nodes[node]['element']
# hcount is the valance minus the degree minus
# the number of bonding descriptors
bonds = round(sum([mol_graph.edges[(node, neigh)]['order'] for neigh in\
mol_graph.neighbors(node)]))
charge = mol_graph.nodes[node].get('charge', 0)
hcount = pysmiles.smiles_helper._valence(mol_graph, node, minimum=0) -\
bonds +\
charge
# in this case we only rebuild hydrogen atoms that are not
# replaced by bonding operators.
if keep_bonding:
hcount -= len(mol_graph.nodes[node]['bonding'])
Comment on lines -45 to -46
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems rather important, and it disappeared?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because this is actually rather tricky thing. How do you deal with aromatic bonding connectors?


mol_graph.nodes[node]['hcount'] = hcount
if ele == "H":
mol_graph.nodes[node]['single_h_frag'] = True
if mol_graph.nodes[node].get('aromatic', False):
mol_graph.nodes[node]['hcount'] = 0
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

if mol_graph.nodes[node].get('bonding', False) and \
mol_graph.nodes[node].get('element', '*') == "H":
mol_graph.nodes[node]['single_h_frag'] = True

for edge in mol_graph.edges:
if mol_graph.edges[edge]['order'] == 1.5:
mol_graph.edges[edge]['order'] = 1

pysmiles.smiles_helper.mark_aromatic_atoms(mol_graph, strict=False)
pysmiles.smiles_helper.mark_aromatic_edges(mol_graph)

nx.set_node_attributes(mol_graph, 0, 'hcount')

pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)
pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)

for node in mol_graph.nodes:
if mol_graph.nodes[node].get("element", "*") == "H" and\
not mol_graph.nodes[node].get("single_h_frag", False):
Expand Down
62 changes: 47 additions & 15 deletions cgsmiles/read_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import numpy as np
import networkx as nx

PATTERNS = {"bond_anchor": "\[\$.*?\]",
"place_holder": "\[\#.*?\]",
"annotation": "\|.*?\|",
PATTERNS = {"bond_anchor": r"\[\$.*?\]",
"place_holder": r"\[\#.*?\]",
"annotation": r"\|.*?\|",
"fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)',
"seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'}

Expand Down Expand Up @@ -45,14 +45,18 @@ def _expand_branch(mol_graph, current, anchor, recipe):
anchor = current
for _ in range(0, n_mon):
mol_graph.add_node(current, fragname=fragname)
mol_graph.add_edge(prev_node, current)
mol_graph.add_edge(prev_node, current, order=1)

prev_node = current
current += 1

prev_node = anchor
return mol_graph, current, prev_node

def _get_percent(pattern, stop):
end_num = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
return pattern[stop+1:end_num]

def read_cgsmiles(pattern):
"""
Generate a :class:`nx.Graph` from a pattern string according to the
Expand Down Expand Up @@ -126,7 +130,7 @@ def read_cgsmiles(pattern):
branching = False
# do we have an open cycle
cycle = {}
cycle_edge = None
cycle_edges = []
# each element in the for loop matches a pattern
# '[' + '#' + some alphanumeric name + ']'
for match in re.finditer(PATTERNS['place_holder'], pattern):
Expand All @@ -142,12 +146,28 @@ def read_cgsmiles(pattern):

# here we check if the atom is followed by a cycle marker
# in this case we have an open cycle and close it
if stop < len(pattern) and pattern[stop].isdigit() and pattern[stop] in cycle:
cycle_edge = (current, cycle[pattern[stop]])
# we open a cycle
elif stop < len(pattern) and pattern[stop].isdigit():
cycle_edge = None
cycle[pattern[stop]] = current
for token in pattern[stop:]:
# we close a cycle
if token.isdigit() and token in cycle:
cycle_edges.append((current, cycle[token]))
del cycle[token]
# we open a cycle
elif token.isdigit():
cycle[token] = current
# we found a ring indicator
elif token == "%":
ring_marker = _get_percent(pattern, stop)
# we close the ring
if ring_marker in cycle:
cycle_edges.append((current, cycle[ring_marker]))
del cycle[ring_marker]
break
# we open a new ring
cycle[_get_percent(pattern, stop)] = current
break
else:
break

# here we check if the atom is followed by a expansion character '|'
# as in ... [#PEO]|
if stop < len(pattern) and pattern[stop] == '|':
Expand Down Expand Up @@ -177,15 +197,21 @@ def read_cgsmiles(pattern):
mol_graph.add_node(current, fragname=fragname)

if prev_node is not None:
mol_graph.add_edge(prev_node, current)
mol_graph.add_edge(prev_node, current, order=1)

if cycle_edge:
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1])
# here we have a double edge
for cycle_edge in cycle_edges:
if cycle_edge in mol_graph.edges:
mol_graph.edges[cycle_edge]["order"] += 1
else:
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=1)

prev_node = current
current += 1

cycle_edges = []
# here we check if the residue considered before is the
# last residue of a branch (i.e. '...[#residue])'
# that is the case if the branch closure comes before
Expand Down Expand Up @@ -254,4 +280,10 @@ def read_cgsmiles(pattern):
# when all nested branches are completed
if len(branch_anchor) == 0:
recipes = defaultdict(list)

# raise some errors for strange stuff
if cycle:
msg = "You have a dangling ring index."
raise SyntaxError(msg)

return mol_graph
31 changes: 17 additions & 14 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,21 @@ def __iter__(self):

def strip_bonding_descriptors(fragment_string):
"""
Processes a CGBigSmile fragment string by
Processes a CGSmiles fragment string by
stripping the bonding descriptors and storing
them in a dict with reference to the atom they
refer to. Furthermore, a cleaned SMILE or CGsmile
refer to. Furthermore, a cleaned SMILES or CGSmiles
string is returned.

Parameters
----------
fragment_string: str
a CGBigsmile fragment string
a CGSmiles fragment string

Returns
-------
str:
a canonical SMILES or CGsmiles string
a canonical SMILES or CGSmiles string
dict:
a dict mapping bonding descriptors
to the nodes within the string
Expand All @@ -73,7 +73,7 @@ def strip_bonding_descriptors(fragment_string):
while peek != ']':
bond_descrp += peek
peek = next(smile_iter)
if smile_iter.peek() in bond_to_order:
if smile_iter.peek() in bond_to_order and node_count == 0:
order = bond_to_order[next(smile_iter)]
elif current_order:
order = current_order
Expand All @@ -87,8 +87,6 @@ def strip_bonding_descriptors(fragment_string):
atom += peek
peek = next(smile_iter)
smile = smile + atom + "]"
#if peek not in '] H @ . - = # $ : / \\ + - %'\
#and not token.isdigit():
prev_node = node_count
node_count += 1

Expand All @@ -100,12 +98,18 @@ def strip_bonding_descriptors(fragment_string):
smile += token
elif token in bond_to_order:
current_order = bond_to_order[token]
else:
if token not in '] H @ $ / \\ + - %'\
and not token.isdigit():
prev_node = node_count
node_count += 1
smile += token
elif token in '] H @ . - = # $ : / \\ + - %' or token.isdigit():
smile += token
else:
if smile_iter.peek() and token + smile_iter.peek() in ['Cl', 'Br', 'Si', 'Mg', 'Na']:
smile += (token + next(smile_iter))
else:
smile += token
current_order = None
prev_node = node_count
node_count += 1

return smile, bonding_descrpt

def fragment_iter(fragment_str, all_atom=True):
Expand Down Expand Up @@ -137,13 +141,12 @@ def fragment_iter(fragment_str, all_atom=True):
fragname = fragment[1:delim]
big_smile = fragment[delim+1:]
smile, bonding_descrpt = strip_bonding_descriptors(big_smile)

if smile == "H":
mol_graph = nx.Graph()
mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
elif all_atom:
mol_graph = pysmiles.read_smiles(smile)
mol_graph = pysmiles.read_smiles(smile, reinterpret_aromatic=False)
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
# we deal with a CG resolution graph
else:
Expand Down
37 changes: 22 additions & 15 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def compatible(left, right):
return left[1:] == right[1:]
return False

def generate_edge(source, target, bond_attribute="bonding"):
def match_bonding_descriptors(source, target, bond_attribute="bonding"):
"""
Given a source and a target graph, which have bonding
descriptors stored as node attributes, find a pair of
Expand Down Expand Up @@ -165,20 +165,27 @@ def edges_from_bonding_descrpt(self):
bonding descriptors that formed the edge. Later unconsumed
bonding descriptors are replaced by hydrogen atoms.
"""
for prev_node, node in nx.dfs_edges(self.meta_graph):
prev_graph = self.meta_graph.nodes[prev_node]['graph']
node_graph = self.meta_graph.nodes[node]['graph']
edge, bonding = generate_edge(prev_graph,
node_graph)

# remove used bonding descriptors
prev_graph.nodes[edge[0]]['bonding'].remove(bonding[0])
node_graph.nodes[edge[1]]['bonding'].remove(bonding[1])

# bonding descriptors are assumed to have bonding order 1
# unless they are specifically annotated
order = int(bonding[0][-1])
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
for prev_node, node in self.meta_graph.edges:
for _ in range(0, self.meta_graph.edges[(prev_node, node)]["order"]):
prev_graph = self.meta_graph.nodes[prev_node]['graph']
node_graph = self.meta_graph.nodes[node]['graph']
try:
edge, bonding = match_bonding_descriptors(prev_graph,
node_graph)
except LookupError:
continue
# remove used bonding descriptors
prev_graph.nodes[edge[0]]['bonding'].remove(bonding[0])
node_graph.nodes[edge[1]]['bonding'].remove(bonding[1])

# bonding descriptors are assumed to have bonding order 1
# unless they are specifically annotated
order = int(bonding[0][-1])
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
if self.all_atom:
for edge_node in edge:
if self.molecule.nodes[edge_node]['element'] != 'H':
self.molecule.nodes[edge_node]['hcount'] -= 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.molecule.nodes[edge_node]['hcount'] -= 1
self.molecule.nodes[edge_node]['hcount'] -= order

?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this works is the short answer; having the order there does not presumably due to aromatic bond orders


def squash_atoms(self):
"""
Expand Down
Loading
Loading