-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[2] Aromatic fragments #5
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -3,6 +3,49 @@ | |||||
VALENCES = pysmiles.smiles_helper.VALENCES | ||||||
VALENCES.update({"H": (1,)}) | ||||||
|
||||||
def _smiles_node_iter(smiles_str): | ||||||
""" | ||||||
Iterate over all nodes in SMILES string and return | ||||||
the index of the node. | ||||||
""" | ||||||
organic_subset = 'B C N O P S F Cl Br I * b c n o s p'.split() | ||||||
batom = False | ||||||
for idx, node in enumerate(smiles_str): | ||||||
if node == '[': | ||||||
batom = True | ||||||
start = idx | ||||||
|
||||||
if node == ']' and batom: | ||||||
stop = idx+1 | ||||||
batom = False | ||||||
yield start, stop | ||||||
|
||||||
if node in organic_subset and not batom: | ||||||
yield idx, idx + 1 | ||||||
Comment on lines
+11
to
+24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This won't work for multi-letter organic atoms, such as Br! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indeed; hence PR #4 🤣 |
||||||
|
||||||
def strip_aromatic_nodes(smiles_str): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like the name of the function very much, since it deals with the smiles string, and not with graph nodes |
||||||
""" | ||||||
Find all aromatic nodes and change them to lower | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
case but keep a mapping of changed nodes. | ||||||
""" | ||||||
aromatic_shorthand = 'b c n o s p'.split() | ||||||
aromatic_atoms = {} | ||||||
nodes_iter = _smiles_node_iter(smiles_str) | ||||||
cleaned_str = "" | ||||||
prev_stop = 0 | ||||||
for idx, (start, stop) in enumerate(nodes_iter): | ||||||
if smiles_str[start] in aromatic_shorthand: | ||||||
aromatic_atoms[idx] = True | ||||||
cleaned_str += smiles_str[prev_stop:start] + smiles_str[start:stop].upper() | ||||||
else: | ||||||
aromatic_atoms[idx] = False | ||||||
cleaned_str += smiles_str[prev_stop:stop] | ||||||
prev_stop = stop | ||||||
|
||||||
cleaned_str += smiles_str[prev_stop:] | ||||||
return aromatic_atoms, cleaned_str | ||||||
|
||||||
|
||||||
def rebuild_h_atoms(mol_graph, keep_bonding=False): | ||||||
""" | ||||||
Helper function which add hydrogen atoms to the molecule graph. | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -5,6 +5,14 @@ | |||||
import networkx as nx | ||||||
import pysmiles | ||||||
from .read_cgsmiles import read_cgsmiles | ||||||
from .pysmiles_utils import strip_aromatic_nodes | ||||||
|
||||||
def mark_aromatic_edges(graph): | ||||||
for edge in graph.edges: | ||||||
if graph.nodes[edge[0]].get("aromatic", False) and\ | ||||||
graph.nodes[edge[1]].get("aromatic", False): | ||||||
graph.edges[edge]["order"] = 1.5 | ||||||
return graph | ||||||
|
||||||
def strip_bonding_descriptors(fragment_string): | ||||||
""" | ||||||
|
@@ -102,7 +110,18 @@ def fragment_iter(fragment_str, all_atom=True): | |||||
mol_graph.add_node(0, element="H", bonding=bonding_descrpt[0]) | ||||||
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') | ||||||
elif all_atom: | ||||||
mol_graph = pysmiles.read_smiles(smile) | ||||||
try: | ||||||
mol_graph = pysmiles.read_smiles(smile) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately, not. The error is raised already at the parsing stage of the cgsmiles string. You categorically do not allow non-cyclic aromatics. We could modify pysmiles though to allow this if 'reinterpret_aromatic' is set to False |
||||||
# we have non-ring aromitic fragments that need to be handled | ||||||
# a bit hacky | ||||||
except ValueError: | ||||||
arom_nodes, smile = strip_aromatic_nodes(smile) | ||||||
mol_graph = pysmiles.read_smiles(smile) | ||||||
# overwrite the aromaticity assignment | ||||||
nx.set_node_attributes(mol_graph, arom_nodes, "aromatic") | ||||||
# set the bond order for the aromatic edges | ||||||
mol_graph = mark_aromatic_edges(mol_graph) | ||||||
|
||||||
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') | ||||||
# we deal with a CG resolution graph | ||||||
else: | ||||||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -3,7 +3,7 @@ | |||||||
import networkx as nx | ||||||||
import pysmiles | ||||||||
from .read_cgsmiles import read_cgsmiles | ||||||||
from .read_fragments import read_fragments | ||||||||
from .read_fragments import read_fragments, mark_aromatic_edges | ||||||||
from .graph_utils import merge_graphs, sort_nodes_by_attr, annotate_fragments | ||||||||
from .pysmiles_utils import rebuild_h_atoms | ||||||||
|
||||||||
|
@@ -165,7 +165,8 @@ def edges_from_bonding_descrpt(self): | |||||||
bonding descriptors that formed the edge. Later unconsumed | ||||||||
bonding descriptors are replaced by hydrogen atoms. | ||||||||
""" | ||||||||
for prev_node, node in nx.dfs_edges(self.meta_graph): | ||||||||
for prev_node, node in self.meta_graph.edges: | ||||||||
print(prev_node, node) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
prev_graph = self.meta_graph.nodes[prev_node]['graph'] | ||||||||
node_graph = self.meta_graph.nodes[node]['graph'] | ||||||||
edge, bonding = generate_edge(prev_graph, | ||||||||
|
@@ -177,9 +178,12 @@ def edges_from_bonding_descrpt(self): | |||||||
|
||||||||
# bonding descriptors are assumed to have bonding order 1 | ||||||||
# unless they are specifically annotated | ||||||||
order = re.findall("\d+\.\d+", bonding[0]) | ||||||||
order = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", bonding[0]) | ||||||||
print(order) | ||||||||
Comment on lines
+181
to
+182
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Cool, this got more complicated. At least needs a comment, but we should probably finish the discussion on #1 about this first. |
||||||||
if not order: | ||||||||
order = 1 | ||||||||
else: | ||||||||
order = float(order[0]) | ||||||||
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order) | ||||||||
|
||||||||
def squash_atoms(self): | ||||||||
|
@@ -225,6 +229,7 @@ def resolve(self): | |||||||
|
||||||||
# rebuild hydrogen in all-atom case | ||||||||
if self.all_atom: | ||||||||
mark_aromatic_edges(self.molecule) | ||||||||
rebuild_h_atoms(self.molecule) | ||||||||
|
||||||||
# sort the atoms | ||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.