Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1] Squash opr #1

Merged
merged 20 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 20 additions & 21 deletions cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ def merge_graphs(source_graph, target_graph, max_node=None):
# We assume that the last id is always the largest.
last_node_idx = max_node
offset = last_node_idx
fragment_offset = source_graph.nodes[last_node_idx].get('fragid', 0)
fragment_offset = max(source_graph.nodes[last_node_idx].get('fragid', [0])) + 1

correspondence = {}
for idx, node in enumerate(target_graph.nodes(), start=offset + 1):
correspondence[node] = idx
new_atom = copy.copy(target_graph.nodes[node])
new_atom['fragid'] = (new_atom.get('fragid', 0) + fragment_offset)
new_atom['fragid'] = [(new_atom.get('fragid', 0) + fragment_offset)]
source_graph.add_node(idx, **new_atom)

for node1, node2 in target_graph.edges:
Expand All @@ -53,31 +53,27 @@ def merge_graphs(source_graph, target_graph, max_node=None):

return correspondence

def sort_nodes(graph, sortby_attrs=("fragid", "atomid"), target_attr=None):
def sort_nodes_by_attr(graph, sort_attr="fragid"):
"""
Sort nodes in graph by multiple attributes.
Sort nodes in graph by attribute and relable the graph in place.

Parameters
----------
graph: :class:`nx.Graph`
the graph to sort nodes of
sortby_attrs: tuple(str)
the attributes and in which order to sort
target_attr: `abc.hashable`
if not None indices are assigned to this
attribute starting at 1
sort_attr: `abc.hashable`
the attribute to use for sorting

Returns
-------
nx.Graph
graph with nodes sorted in correct order
"""
node_order = sorted(graph, key=partial(_keyfunc, graph, attrs=sortby_attrs))
for new_idx, node_key in enumerate(node_order, 1):
# graph._node.move_to_end(node_key)
if target_attr is not None:
graph.nodes[node_key][target_attr] = new_idx
return graph
attr_values = nx.get_node_attributes(graph, sort_attr)
sorted_ids = sorted(attr_values, key=lambda item: (attr_values[item], item))
mapping = {old: new for new, old in enumerate(sorted_ids)}
new_graph = nx.relabel_nodes(graph, mapping, copy=True)
return new_graph

def _keyfunc(graph, node_idx, attrs):
"""
Expand All @@ -95,23 +91,26 @@ def annotate_fragments(meta_graph, molecule):
"""
node_to_fragids = nx.get_node_attributes(molecule, 'fragid')

fragids = defaultdict(list)
for fragid, node in node_to_fragids.items():
fragids[fragid].append(node)
fragid_to_node = defaultdict(list)
for node, fragids in node_to_fragids.items():
for fragid in fragids:
fragid_to_node[fragid].append(node)

for meta_node in meta_graph.nodes:
# adding node to the fragment graph
graph_frag = nx.Graph()
for node in fragids[meta_node]:
for node in fragid_to_node[meta_node]:
attrs = molecule.nodes[node]
graph_frag.add_node(node, **attrs)

# adding the edges
# this is slow but OK; we always assume that the fragment
# is much much smaller than the fullblown graph
combinations = itertools.combinations(fragids[meta_node], r=2)
combinations = itertools.combinations(fragid_to_node[meta_node], r=2)
for a, b in combinations:
if molecule.has_edge(a, b):
graph.add_edge(a, b)
graph_frag.add_edge(a, b)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

meta_graph.nodes[meta_node]['graph'] = graph_frag

return meta_graph
58 changes: 58 additions & 0 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import pysmiles

VALENCES = pysmiles.smiles_helper.VALENCES
VALENCES.update({"H": (1,)})

def rebuild_h_atoms(mol_graph, keep_bonding=False):
"""
Helper function which add hydrogen atoms to the molecule graph.

First the hcount attribute produced by pysmiles is updated, because
fragments have no bonds at time of reading so pysmiles does not
know the connectivity. Hence the hcount is redone based on the
actual connectivity of the final molecule.

This function also makes sure to tag single hydrogen fragments,
in order to not merge them with adjecent fragments. Otherwise,
explicit single hydrogen residues would not be possible.

The molecule graph is updated in place with the hydrogen atoms
that are missing.

Using the keep_bonding argument the hydrogen count is reduced
by the number of bonding descriptors. In this way hydrogen
atoms can also be added to fragments only.

Parameters
----------
mol_graph: :class:`nx.Graph`
graph describing the full molecule without hydrogen atoms
"""
for node in mol_graph.nodes:
if mol_graph.nodes[node].get('bonding', False):
# get the degree
ele = mol_graph.nodes[node]['element']
# hcount is the valance minus the degree minus
# the number of bonding descriptors
bonds = round(sum([mol_graph.edges[(node, neigh)]['order'] for neigh in\
mol_graph.neighbors(node)]))
charge = mol_graph.nodes[node].get('charge', 0)
hcount = pysmiles.smiles_helper._valence(mol_graph, node, minimum=0) -\
bonds +\
charge
# in this case we only rebuild hydrogen atoms that are not
# replaced by bonding operators.
if keep_bonding:
hcount -= len(mol_graph.nodes[node]['bonding'])

mol_graph.nodes[node]['hcount'] = hcount
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
if ele == "H":
mol_graph.nodes[node]['single_h_frag'] = True
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
for node in mol_graph.nodes:
if mol_graph.nodes[node].get("element", "*") == "H" and\
not mol_graph.nodes[node].get("single_h_frag", False):
ref_node = next(mol_graph.neighbors(node))
mol_graph.nodes[node]["fragid"] = mol_graph.nodes[ref_node]["fragid"]
mol_graph.nodes[node]["fragname"] = mol_graph.nodes[ref_node]["fragname"]
83 changes: 44 additions & 39 deletions cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,35 @@
import pysmiles
from .read_cgsmiles import read_cgsmiles

class PeekIter(object):
"""
Custom iter that allows looking ahead, without
advancing the actual iter.
"""
def __init__(self, collection):
self.collection = collection
self.index = 0

def __next__(self):
try:
result = self.collection[self.index]
self.index += 1
except IndexError:
raise StopIteration
return result

def peek(self):
try:
result = self.collection[self.index]
except IndexError:
return ""
return result

def __iter__(self):
self.index = 0
return self
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved


def strip_bonding_descriptors(fragment_string):
"""
Processes a CGBigSmile fragment string by
Expand All @@ -27,21 +56,30 @@ def strip_bonding_descriptors(fragment_string):
a dict mapping bonding descriptors
to the nodes within the string
"""
smile_iter = iter(fragment_string)
bond_to_order = {'-': 1, '=': 2, '#': 3, '$': 4, ':': 1.5, '.': 0}
smile_iter = PeekIter(fragment_string)
bonding_descrpt = defaultdict(list)
smile = ""
node_count = 0
prev_node = 0
current_order = None
for token in smile_iter:
if token == '[':
peek = next(smile_iter)
if peek in ['$', '>', '<']:
if peek in ['$', '>', '<', '!']:
bond_descrp = peek
peek = next(smile_iter)
while peek != ']':
bond_descrp += peek
peek = next(smile_iter)
bonding_descrpt[prev_node].append(bond_descrp)
if smile_iter.peek() in bond_to_order:
order = bond_to_order[next(smile_iter)]
elif current_order:
order = current_order
current_order = None
else:
order = 1
bonding_descrpt[prev_node].append(bond_descrp + str(order))
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
else:
atom = token
while peek != ']':
Expand All @@ -59,47 +97,16 @@ def strip_bonding_descriptors(fragment_string):
elif token == ')':
prev_node = anchor
smile += token
elif token in bond_to_order:
current_order = bond_to_order[token]
else:
if token not in '] H @ . - = # $ : / \\ + - %'\
if token not in '] H @ $ / \\ + - %'\
and not token.isdigit():
prev_node = node_count
node_count += 1
smile += token
return smile, bonding_descrpt

def _rebuild_h_atoms(mol_graph):
# special hack around to fix
# pysmiles bug for a single
# atom molecule; we assume that the
# hcount is just wrong and set it to
# the valance number minus bonds minus
# bonding connectors
if len(mol_graph.nodes) == 1:
ele = mol_graph.nodes[0]['element']
# for N and P we assume the regular valency
hcount = pysmiles.smiles_helper.VALENCES[ele][0]
if mol_graph.nodes[0].get('bonding', False):
hcount -= 1
mol_graph.nodes[0]['hcount'] = hcount
else:
for node in mol_graph.nodes:
if mol_graph.nodes[node].get('bonding', False):
# get the degree
ele = mol_graph.nodes[node]['element']
# hcount is the valance minus the degree minus
# the number of bonding descriptors
bonds = round(sum([mol_graph.edges[(node, neigh)]['order'] for neigh in\
mol_graph.neighbors(node)]))
charge = mol_graph.nodes[node].get('charge', 0)
hcount = pysmiles.smiles_helper.VALENCES[ele][0] -\
bonds -\
len(mol_graph.nodes[node]['bonding']) +\
charge
mol_graph.nodes[node]['hcount'] = hcount

pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
return mol_graph

def fragment_iter(fragment_str, all_atom=True):
"""
Iterates over fragments defined in a CGBigSmile string.
Expand Down Expand Up @@ -137,8 +144,6 @@ def fragment_iter(fragment_str, all_atom=True):
elif all_atom:
mol_graph = pysmiles.read_smiles(smile)
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
# we need to rebuild hydrogen atoms now
_rebuild_h_atoms(mol_graph)
# we deal with a CG resolution graph
else:
mol_graph = read_cgsmiles(smile)
Expand Down
Loading
Loading