Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[2] Aromatic fragments #5

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def merge_graphs(source_graph, target_graph, max_node=None):
for node1, node2 in target_graph.edges:
if correspondence[node1] != correspondence[node2]:
attrs = target_graph.edges[(node1, node2)]
print(attrs)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
print(attrs)

source_graph.add_edge(correspondence[node1], correspondence[node2], **attrs)

return correspondence
Expand Down
43 changes: 43 additions & 0 deletions cgsmiles/pysmiles_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,49 @@
VALENCES = pysmiles.smiles_helper.VALENCES
VALENCES.update({"H": (1,)})

def _smiles_node_iter(smiles_str):
"""
Iterate over all nodes in SMILES string and return
the index of the node.
"""
organic_subset = 'B C N O P S F Cl Br I * b c n o s p'.split()
batom = False
for idx, node in enumerate(smiles_str):
if node == '[':
batom = True
start = idx

if node == ']' and batom:
stop = idx+1
batom = False
yield start, stop

if node in organic_subset and not batom:
yield idx, idx + 1
Comment on lines +11 to +24
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This won't work for multi-letter organic atoms, such as Br!

Copy link
Collaborator Author

@fgrunewald fgrunewald May 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed; hence PR #4 🤣


def strip_aromatic_nodes(smiles_str):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like the name of the function very much, since it deals with the smiles string, and not with graph nodes

"""
Find all aromatic nodes and change them to lower
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Find all aromatic nodes and change them to lower
Find all aromatic nodes and change them to upper

case but keep a mapping of changed nodes.
"""
aromatic_shorthand = 'b c n o s p'.split()
aromatic_atoms = {}
nodes_iter = _smiles_node_iter(smiles_str)
cleaned_str = ""
prev_stop = 0
for idx, (start, stop) in enumerate(nodes_iter):
if smiles_str[start] in aromatic_shorthand:
aromatic_atoms[idx] = True
cleaned_str += smiles_str[prev_stop:start] + smiles_str[start:stop].upper()
else:
aromatic_atoms[idx] = False
cleaned_str += smiles_str[prev_stop:stop]
prev_stop = stop

cleaned_str += smiles_str[prev_stop:]
return aromatic_atoms, cleaned_str


def rebuild_h_atoms(mol_graph, keep_bonding=False):
"""
Helper function which add hydrogen atoms to the molecule graph.
Expand Down
21 changes: 20 additions & 1 deletion cgsmiles/read_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
import networkx as nx
import pysmiles
from .read_cgsmiles import read_cgsmiles
from .pysmiles_utils import strip_aromatic_nodes

def mark_aromatic_edges(graph):
for edge in graph.edges:
if graph.nodes[edge[0]].get("aromatic", False) and\
graph.nodes[edge[1]].get("aromatic", False):
graph.edges[edge]["order"] = 1.5
return graph

def strip_bonding_descriptors(fragment_string):
"""
Expand Down Expand Up @@ -102,7 +110,18 @@ def fragment_iter(fragment_str, all_atom=True):
mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0])
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
elif all_atom:
mol_graph = pysmiles.read_smiles(smile)
try:
mol_graph = pysmiles.read_smiles(smile)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
mol_graph = pysmiles.read_smiles(smile)
mol_graph = pysmiles.read_smiles(smile, reinterpret_aromatic=False)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, not. The error is raised already at the parsing stage of the cgsmiles string. You categorically do not allow non-cyclic aromatics. We could modify pysmiles though to allow this if 'reinterpret_aromatic' is set to False

# we have non-ring aromitic fragments that need to be handled
# a bit hacky
except ValueError:
arom_nodes, smile = strip_aromatic_nodes(smile)
mol_graph = pysmiles.read_smiles(smile)
# overwrite the aromaticity assignment
nx.set_node_attributes(mol_graph, arom_nodes, "aromatic")
# set the bond order for the aromatic edges
mol_graph = mark_aromatic_edges(mol_graph)

nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
# we deal with a CG resolution graph
else:
Expand Down
11 changes: 8 additions & 3 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import networkx as nx
import pysmiles
from .read_cgsmiles import read_cgsmiles
from .read_fragments import read_fragments
from .read_fragments import read_fragments, mark_aromatic_edges
from .graph_utils import merge_graphs, sort_nodes_by_attr, annotate_fragments
from .pysmiles_utils import rebuild_h_atoms

Expand Down Expand Up @@ -165,7 +165,8 @@ def edges_from_bonding_descrpt(self):
bonding descriptors that formed the edge. Later unconsumed
bonding descriptors are replaced by hydrogen atoms.
"""
for prev_node, node in nx.dfs_edges(self.meta_graph):
for prev_node, node in self.meta_graph.edges:
print(prev_node, node)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
print(prev_node, node)

prev_graph = self.meta_graph.nodes[prev_node]['graph']
node_graph = self.meta_graph.nodes[node]['graph']
edge, bonding = generate_edge(prev_graph,
Expand All @@ -177,9 +178,12 @@ def edges_from_bonding_descrpt(self):

# bonding descriptors are assumed to have bonding order 1
# unless they are specifically annotated
order = re.findall("\d+\.\d+", bonding[0])
order = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", bonding[0])
print(order)
Comment on lines +181 to +182
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
order = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", bonding[0])
print(order)
order = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", bonding[0])

Cool, this got more complicated. At least needs a comment, but we should probably finish the discussion on #1 about this first.

if not order:
order = 1
else:
order = float(order[0])
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)

def squash_atoms(self):
Expand Down Expand Up @@ -225,6 +229,7 @@ def resolve(self):

# rebuild hydrogen in all-atom case
if self.all_atom:
mark_aromatic_edges(self.molecule)
rebuild_h_atoms(self.molecule)

# sort the atoms
Expand Down
Loading