Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1] Squash opr #1

Merged
merged 20 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ def merge_graphs(source_graph, target_graph, max_node=None):
# We assume that the last id is always the largest.
last_node_idx = max_node
offset = last_node_idx
fragment_offset = source_graph.nodes[last_node_idx].get('fragid', 0)
fragment_offset = max(source_graph.nodes[last_node_idx].get('fragid', [0]))

correspondence = {}
for idx, node in enumerate(target_graph.nodes(), start=offset + 1):
correspondence[node] = idx
new_atom = copy.copy(target_graph.nodes[node])
new_atom['fragid'] = (new_atom.get('fragid', 0) + fragment_offset)
new_atom['fragid'] = [(new_atom.get('fragid', 0) + fragment_offset)]
source_graph.add_node(idx, **new_atom)

for node1, node2 in target_graph.edges:
Expand Down Expand Up @@ -95,23 +95,24 @@ def annotate_fragments(meta_graph, molecule):
"""
node_to_fragids = nx.get_node_attributes(molecule, 'fragid')

fragids = defaultdict(list)
for fragid, node in node_to_fragids.items():
fragids[fragid].append(node)
fragid_to_node = defaultdict(list)
for node, fragids in node_to_fragids.items():
for fragid in fragids:
fragid_to_node[fragid].append(node)

for meta_node in meta_graph.nodes:
# adding node to the fragment graph
graph_frag = nx.Graph()
for node in fragids[meta_node]:
for node in fragid_to_node[meta_node]:
attrs = molecule.nodes[node]
graph_frag.add_node(node, **attrs)

# adding the edges
# this is slow but OK; we always assume that the fragment
# is much much smaller than the fullblown graph
combinations = itertools.combinations(fragids[meta_node], r=2)
combinations = itertools.combinations(fragid_to_node[meta_node], r=2)
for a, b in combinations:
if molecule.has_edge(a, b):
graph.add_edge(a, b)
graph_frag.add_edge(a, b)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

return meta_graph
2 changes: 1 addition & 1 deletion cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def strip_bonding_descriptors(fragment_string):
for token in smile_iter:
if token == '[':
peek = next(smile_iter)
if peek in ['$', '>', '<']:
if peek in ['$', '>', '<', '!']:
bond_descrp = peek
peek = next(smile_iter)
while peek != ']':
Expand Down
52 changes: 49 additions & 3 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def resolve_disconnected_molecule(self):
new_node = correspondence[node]
attrs = self.molecule.nodes[new_node]
graph_frag.add_node(correspondence[node], **attrs)
nx.set_node_attributes(graph_frag, meta_node, 'fragid')
nx.set_node_attributes(graph_frag, [meta_node], 'fragid')

for a, b in fragment.edges:
new_a = correspondence[a]
Expand All @@ -151,7 +151,6 @@ def resolve_disconnected_molecule(self):

self.meta_graph.nodes[meta_node]['graph'] = graph_frag


def edges_from_bonding_descrpt(self):
"""
Make edges according to the bonding descriptors stored
Expand Down Expand Up @@ -185,6 +184,51 @@ def edges_from_bonding_descrpt(self):
order = 1
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)

def squash_atoms(self):
"""
Applies the squash operator.
"""
bondings = nx.get_edge_attributes(self.molecule, 'bonding')
squashed = False
for edge, bonding in bondings.items():
# we have a squash operator
if bonding[0].startswith('!'):
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
# find all hydrogens to remove
# and which atoms to connect
nodes_to_remove = [edge[1]]
new_edge_nodes = []
for hnode in self.molecule.neighbors(edge[1]):
if self.molecule.nodes[hnode].get('element', 'Nan') == 'H':
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
nodes_to_remove.append(hnode)
elif hnode != edge[0]:
new_edge_nodes.append(hnode)

# remove edges
self.molecule.remove_edge(*edge)
for node in nodes_to_remove[1:]:
self.molecule.remove_edge(edge[1], node)

# add edges
for node in new_edge_nodes:
self.molecule.add_edge(edge[0], node)

# find the reference hydrogen atoms
nodes_to_keep = [edge[0]]
for hnode in self.molecule.neighbors(edge[0]):
if self.molecule.nodes[hnode].get('element', 'Nan') == 'H':
nodes_to_keep.append(hnode)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

# remove squashed node and hydrogen atoms
for ref_node, node in zip(nodes_to_keep, nodes_to_remove):
other_fragid = self.molecule.nodes[node]['fragid']
self.molecule.remove_node(node)
self.molecule.nodes[ref_node]['fragid'] += other_fragid
squashed = True

if squashed:
self.meta_graph = annotate_fragments(self.meta_graph,
self.molecule)

def replace_unconsumed_bonding_descrpt(self):
"""
We allow multiple bonding descriptors per atom, which
Expand All @@ -203,12 +247,13 @@ def replace_unconsumed_bonding_descrpt(self):
attrs = {attr: graph.nodes[node][attr] for attr in ['fragname', 'fragid']}
attrs['element'] = 'H'
for _ in range(0, hcount):
new_node = len(self.molecule.nodes) + 1
new_node = len(self.molecule.nodes) #+ 1
graph.add_edge(node, new_node)
attrs['atomname'] = "H" + str(len(graph.nodes)-1)
graph.nodes[new_node].update(attrs)
self.molecule.add_edge(node, new_node, order=1)
self.molecule.nodes[new_node].update(attrs)

# now we want to sort the atoms
sort_nodes(self.molecule)
# and redo the meta molecule
Expand All @@ -227,4 +272,5 @@ def resolve(self):
if self.all_atom:
self.replace_unconsumed_bonding_descrpt()

self.squash_atoms()
return self.meta_graph, self.molecule
27 changes: 22 additions & 5 deletions cgsmiles/tests/test_molecule_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
[('OHter', 'O H'), ('PEO', 'C O C H H H H'),
('PEO', 'C O C H H H H'), ('OHter', 'O H')],
[(0, 1), (0, 2), (2, 3), (2, 5), (2, 10), (3, 4),
(4, 6), (4, 7), (4, 17), (8, 9), (8, 11), (8, 14),
(8, 18), (9, 10), (10, 12), (10, 13), (14, 15)]),
(4, 6), (4, 7), (4, 16), (8, 9), (8, 11), (8, 14),
(8, 17), (9, 10), (10, 12), (10, 13), (14, 15)]),
# simple branched sequence
("{[#Hter][#PE]([#PEO][#Hter])[#PE]([#PEO][#Hter])[#Hter]}.{#Hter=[$]H,#PE=[$]CC[$][$],#PEO=[$]COC[$]}",
[('Hter', 'H'), ('PE', 'C C H H H'), ('PEO', 'C O C H H H H'), ('Hter', 'H'),
Expand Down Expand Up @@ -107,10 +107,26 @@ def test_generate_edge(bonds_source, bonds_target, edge, btypes):
# ('PE', 'C C H H H H H'), ('PE', 'C C H H H H'), ('PE', 'C C H H H'),
# ('PE', 'C C H H H H'), ('PE', 'C C H H H H H'), ('PE', 'C C H H H H H')]
# [,
# (

# (
# smiple squash operator; no unconsumed operators
("{[#A][#B]}.{#A=OC[!],#B=[!]CC}",
# 0 1 2 3 4 1 5 3 4 6 7 8
# 0 1 2 3 4 5 6 7 8 9 10 11
[('A', 'O C H H H'), ('B', 'C C H H H H H'),],
[(0, 1), (0, 2), (1, 3), (1, 4), (1, 6), (6, 9), (6, 10),
(6, 11),]),
# smiple squash operator; unconsumed operators
("{[#A][#B]}.{#A=OC[!],#B=[$][!]CC}",
# 0 1 2 3 4 1 5 3 4 6 7 8
# 0 1 2 3 4 5 6 7 11 8 9 10
# note that the unconsumed $ triggers rebuild of a hydrogen
# which however is appended to the end of the molecule so
# making it 11
[('A', 'O C H H H'), ('B', 'C C H H H H H'),],
[(0, 1), (0, 2), (1, 3), (1, 4), (1, 6), (6, 8), (6, 9),
(6, 10),]),
))
def test_def_big_smile_parser(smile, ref_nodes, ref_edges):
def test_resolve_molecule(smile, ref_nodes, ref_edges):
meta_mol, molecule = MoleculeResolver(smile).resolve()
# nx.draw_networkx(meta_mol.molecule, with_labels=True, labels=nx.get_node_attributes(meta_mol.molecule, 'element'))
# plt.show()
Expand All @@ -120,4 +136,5 @@ def test_def_big_smile_parser(smile, ref_nodes, ref_edges):
elements = nx.get_node_attributes(block_graph, 'element') #.values())
sorted_elements = [elements[key] for key in sorted(elements)]
assert sorted_elements == ref[1].split()
print(molecule.edges)
assert sorted(molecule.edges) == sorted(ref_edges)
Loading