Skip to content

Commit

Permalink
Merge pull request #12 from gruenewald-lab/rings_new
Browse files Browse the repository at this point in the history
Rings new
  • Loading branch information
fgrunewald authored Jul 8, 2024
2 parents 56752fe + 70fec21 commit ca10d4f
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 105 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:

strategy:
matrix:
py_version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
py_version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v2
Expand All @@ -28,6 +28,7 @@ jobs:
run: |
pip install --upgrade setuptools pip
pip install --upgrade .
pip install git+https://github.com/pckroon/pysmiles.git
pip install -r requirements-tests.txt
- name: Run pytest with codecoverage
Expand Down
40 changes: 18 additions & 22 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import networkx as nx
import pysmiles

VALENCES = pysmiles.smiles_helper.VALENCES
VALENCES.update({"H": (1,)})

def rebuild_h_atoms(mol_graph, keep_bonding=False):
"""
Helper function which add hydrogen atoms to the molecule graph.
Expand All @@ -29,27 +27,25 @@ def rebuild_h_atoms(mol_graph, keep_bonding=False):
graph describing the full molecule without hydrogen atoms
"""
for node in mol_graph.nodes:
if mol_graph.nodes[node].get('bonding', False):
# get the degree
ele = mol_graph.nodes[node]['element']
# hcount is the valance minus the degree minus
# the number of bonding descriptors
bonds = round(sum([mol_graph.edges[(node, neigh)]['order'] for neigh in\
mol_graph.neighbors(node)]))
charge = mol_graph.nodes[node].get('charge', 0)
hcount = pysmiles.smiles_helper._valence(mol_graph, node, minimum=0) -\
bonds +\
charge
# in this case we only rebuild hydrogen atoms that are not
# replaced by bonding operators.
if keep_bonding:
hcount -= len(mol_graph.nodes[node]['bonding'])

mol_graph.nodes[node]['hcount'] = hcount
if ele == "H":
mol_graph.nodes[node]['single_h_frag'] = True
if mol_graph.nodes[node].get('aromatic', False):
mol_graph.nodes[node]['hcount'] = 0

if mol_graph.nodes[node].get('bonding', False) and \
mol_graph.nodes[node].get('element', '*') == "H":
mol_graph.nodes[node]['single_h_frag'] = True

for edge in mol_graph.edges:
if mol_graph.edges[edge]['order'] == 1.5:
mol_graph.edges[edge]['order'] = 1

pysmiles.smiles_helper.mark_aromatic_atoms(mol_graph, strict=False)
pysmiles.smiles_helper.mark_aromatic_edges(mol_graph)

nx.set_node_attributes(mol_graph, 0, 'hcount')

pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)
pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)

for node in mol_graph.nodes:
if mol_graph.nodes[node].get("element", "*") == "H" and\
not mol_graph.nodes[node].get("single_h_frag", False):
Expand Down
62 changes: 47 additions & 15 deletions cgsmiles/read_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import numpy as np
import networkx as nx

PATTERNS = {"bond_anchor": "\[\$.*?\]",
"place_holder": "\[\#.*?\]",
"annotation": "\|.*?\|",
PATTERNS = {"bond_anchor": r"\[\$.*?\]",
"place_holder": r"\[\#.*?\]",
"annotation": r"\|.*?\|",
"fragment": r'#(\w+)=((?:\[.*?\]|[^,\[\]]+)*)',
"seq_pattern": r'\{([^}]*)\}(?:\.\{([^}]*)\})?'}

Expand Down Expand Up @@ -45,14 +45,18 @@ def _expand_branch(mol_graph, current, anchor, recipe):
anchor = current
for _ in range(0, n_mon):
mol_graph.add_node(current, fragname=fragname)
mol_graph.add_edge(prev_node, current)
mol_graph.add_edge(prev_node, current, order=1)

prev_node = current
current += 1

prev_node = anchor
return mol_graph, current, prev_node

def _get_percent(pattern, stop):
end_num = _find_next_character(pattern, ['[', ')', '(', '}'], stop)
return pattern[stop+1:end_num]

def read_cgsmiles(pattern):
"""
Generate a :class:`nx.Graph` from a pattern string according to the
Expand Down Expand Up @@ -126,7 +130,7 @@ def read_cgsmiles(pattern):
branching = False
# do we have an open cycle
cycle = {}
cycle_edge = None
cycle_edges = []
# each element in the for loop matches a pattern
# '[' + '#' + some alphanumeric name + ']'
for match in re.finditer(PATTERNS['place_holder'], pattern):
Expand All @@ -142,12 +146,28 @@ def read_cgsmiles(pattern):

# here we check if the atom is followed by a cycle marker
# in this case we have an open cycle and close it
if stop < len(pattern) and pattern[stop].isdigit() and pattern[stop] in cycle:
cycle_edge = (current, cycle[pattern[stop]])
# we open a cycle
elif stop < len(pattern) and pattern[stop].isdigit():
cycle_edge = None
cycle[pattern[stop]] = current
for token in pattern[stop:]:
# we close a cycle
if token.isdigit() and token in cycle:
cycle_edges.append((current, cycle[token]))
del cycle[token]
# we open a cycle
elif token.isdigit():
cycle[token] = current
# we found a ring indicator
elif token == "%":
ring_marker = _get_percent(pattern, stop)
# we close the ring
if ring_marker in cycle:
cycle_edges.append((current, cycle[ring_marker]))
del cycle[ring_marker]
break
# we open a new ring
cycle[_get_percent(pattern, stop)] = current
break
else:
break

# here we check if the atom is followed by a expansion character '|'
# as in ... [#PEO]|
if stop < len(pattern) and pattern[stop] == '|':
Expand Down Expand Up @@ -177,15 +197,21 @@ def read_cgsmiles(pattern):
mol_graph.add_node(current, fragname=fragname)

if prev_node is not None:
mol_graph.add_edge(prev_node, current)
mol_graph.add_edge(prev_node, current, order=1)

if cycle_edge:
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1])
# here we have a double edge
for cycle_edge in cycle_edges:
if cycle_edge in mol_graph.edges:
mol_graph.edges[cycle_edge]["order"] += 1
else:
mol_graph.add_edge(cycle_edge[0],
cycle_edge[1],
order=1)

prev_node = current
current += 1

cycle_edges = []
# here we check if the residue considered before is the
# last residue of a branch (i.e. '...[#residue])'
# that is the case if the branch closure comes before
Expand Down Expand Up @@ -254,4 +280,10 @@ def read_cgsmiles(pattern):
# when all nested branches are completed
if len(branch_anchor) == 0:
recipes = defaultdict(list)

# raise some errors for strange stuff
if cycle:
msg = "You have a dangling ring index."
raise SyntaxError(msg)

return mol_graph
31 changes: 17 additions & 14 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,21 @@ def __iter__(self):

def strip_bonding_descriptors(fragment_string):
"""
Processes a CGBigSmile fragment string by
Processes a CGSmiles fragment string by
stripping the bonding descriptors and storing
them in a dict with reference to the atom they
refer to. Furthermore, a cleaned SMILE or CGsmile
refer to. Furthermore, a cleaned SMILES or CGSmiles
string is returned.
Parameters
----------
fragment_string: str
a CGBigsmile fragment string
a CGSmiles fragment string
Returns
-------
str:
a canonical SMILES or CGsmiles string
a canonical SMILES or CGSmiles string
dict:
a dict mapping bonding descriptors
to the nodes within the string
Expand All @@ -73,7 +73,7 @@ def strip_bonding_descriptors(fragment_string):
while peek != ']':
bond_descrp += peek
peek = next(smile_iter)
if smile_iter.peek() in bond_to_order:
if smile_iter.peek() in bond_to_order and node_count == 0:
order = bond_to_order[next(smile_iter)]
elif current_order:
order = current_order
Expand All @@ -87,8 +87,6 @@ def strip_bonding_descriptors(fragment_string):
atom += peek
peek = next(smile_iter)
smile = smile + atom + "]"
#if peek not in '] H @ . - = # $ : / \\ + - %'\
#and not token.isdigit():
prev_node = node_count
node_count += 1

Expand All @@ -100,12 +98,18 @@ def strip_bonding_descriptors(fragment_string):
smile += token
elif token in bond_to_order:
current_order = bond_to_order[token]
else:
if token not in '] H @ $ / \\ + - %'\
and not token.isdigit():
prev_node = node_count
node_count += 1
smile += token
elif token in '] H @ . - = # $ : / \\ + - %' or token.isdigit():
smile += token
else:
if smile_iter.peek() and token + smile_iter.peek() in ['Cl', 'Br', 'Si', 'Mg', 'Na']:
smile += (token + next(smile_iter))
else:
smile += token
current_order = None
prev_node = node_count
node_count += 1

return smile, bonding_descrpt

def fragment_iter(fragment_str, all_atom=True):
Expand Down Expand Up @@ -137,13 +141,12 @@ def fragment_iter(fragment_str, all_atom=True):
fragname = fragment[1:delim]
big_smile = fragment[delim+1:]
smile, bonding_descrpt = strip_bonding_descriptors(big_smile)

if smile == "H":
mol_graph = nx.Graph()
mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
elif all_atom:
mol_graph = pysmiles.read_smiles(smile)
mol_graph = pysmiles.read_smiles(smile, reinterpret_aromatic=False)
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
# we deal with a CG resolution graph
else:
Expand Down
37 changes: 22 additions & 15 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def compatible(left, right):
return left[1:] == right[1:]
return False

def generate_edge(source, target, bond_attribute="bonding"):
def match_bonding_descriptors(source, target, bond_attribute="bonding"):
"""
Given a source and a target graph, which have bonding
descriptors stored as node attributes, find a pair of
Expand Down Expand Up @@ -165,20 +165,27 @@ def edges_from_bonding_descrpt(self):
bonding descriptors that formed the edge. Later unconsumed
bonding descriptors are replaced by hydrogen atoms.
"""
for prev_node, node in nx.dfs_edges(self.meta_graph):
prev_graph = self.meta_graph.nodes[prev_node]['graph']
node_graph = self.meta_graph.nodes[node]['graph']
edge, bonding = generate_edge(prev_graph,
node_graph)

# remove used bonding descriptors
prev_graph.nodes[edge[0]]['bonding'].remove(bonding[0])
node_graph.nodes[edge[1]]['bonding'].remove(bonding[1])

# bonding descriptors are assumed to have bonding order 1
# unless they are specifically annotated
order = int(bonding[0][-1])
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
for prev_node, node in self.meta_graph.edges:
for _ in range(0, self.meta_graph.edges[(prev_node, node)]["order"]):
prev_graph = self.meta_graph.nodes[prev_node]['graph']
node_graph = self.meta_graph.nodes[node]['graph']
try:
edge, bonding = match_bonding_descriptors(prev_graph,
node_graph)
except LookupError:
continue
# remove used bonding descriptors
prev_graph.nodes[edge[0]]['bonding'].remove(bonding[0])
node_graph.nodes[edge[1]]['bonding'].remove(bonding[1])

# bonding descriptors are assumed to have bonding order 1
# unless they are specifically annotated
order = int(bonding[0][-1])
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
if self.all_atom:
for edge_node in edge:
if self.molecule.nodes[edge_node]['element'] != 'H':
self.molecule.nodes[edge_node]['hcount'] -= 1

def squash_atoms(self):
"""
Expand Down
Loading

0 comments on commit ca10d4f

Please sign in to comment.