Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolution layering #13

Merged
merged 16 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions cgsmiles/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,17 @@ def annotate_fragments(meta_graph, molecule):
meta_graph.nodes[meta_node]['graph'] = graph_frag

return meta_graph


def set_atom_names_atomistic(meta_graph, molecule):
"""
Set atomnames according to commonly used convention
in molecular dynamics (MD) forcefields. This convention
is defined as element plus counter for atom in residue.
"""
for meta_node in meta_graph.nodes:
fraggraph = meta_graph.nodes[meta_node]['graph']
for idx, node in enumerate(fraggraph.nodes):
atomname = fraggraph.nodes[node]['element'] + str(idx)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
fraggraph.nodes[node]['atomname'] = atomname
molecule.nodes[node]['atomname'] = atomname
2 changes: 1 addition & 1 deletion cgsmiles/read_cgsmiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def _find_next_character(string, chars, start):
for idx, token in enumerate(string[start:]):
if token in chars:
return idx+start
return np.inf
return len(string)

def _expand_branch(mol_graph, current, anchor, recipe):
"""
Expand Down
202 changes: 144 additions & 58 deletions cgsmiles/resolve.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import re
import copy
import networkx as nx
import pysmiles
from .read_cgsmiles import read_cgsmiles
from .read_fragments import read_fragments
from .graph_utils import merge_graphs, sort_nodes_by_attr, annotate_fragments
from .graph_utils import (merge_graphs,
sort_nodes_by_attr,
annotate_fragments,
set_atom_names_atomistic)
from .pysmiles_utils import rebuild_h_atoms

def compatible(left, right):
Expand Down Expand Up @@ -69,62 +71,89 @@ def match_bonding_descriptors(source, target, bond_attribute="bonding"):

class MoleculeResolver:
"""
Resolve the molecule described by a CGBigSmile
string.
Resolve the molecule(s) described by a
CGSmiles string. Each execution of the
resolve function will return the next
resolved molecule and the graph of the
previous resolution.

Use the resolve_iter method to loop over
all possible molecule resolutions enconded
in the CGSmiles string.
"""

def __init__(self,
pattern,
meta_graph=None,
fragment_dict={},
all_atom=True):

# let's start by setting some attributes
# this is the fragment string
self.fragment_str = None
# this is the cgsmiles string
self.cgsmiles_str = None
fragment_dicts=[],
last_all_atom=True):

"""
Parameters
----------
pattern: str
the cgsmiles string to resolve
meta_graph: `:class:nx.Graph`
a potential higher resolution graph
fragment_dicts: list[dict[str, nx.Graph]]
a dict of fragment graphs per resolution
last_all_atom: bool
if the last resolution is at the all
atom level. If True the code will use
pysmiles to parse the fragments and
return the all-atom molecule.
Default: True
"""
# this is the dict with the fragments
# either provided and/or extracted
# from the fragment string
self.fragment_dict = fragment_dict
# this is the lower resolution graph
self.meta_graph = meta_graph
# this is the higher resolution graph
self.molecule = nx.Graph()
# this attribute stores if the fragments
# follow open_smiles syntax or cg_smiles
# syntax
self.all_atom = all_atom
self.fragment_dicts = fragment_dicts
# this is the current meta_graph
self.meta_graph = nx.Graph()

# here we figure out what we are dealing with
# here we figure out how many resolutions
# we are dealing with
elements = re.findall(r"\{[^\}]+\}", pattern)
# case 1)
# a meta_graph is provided which means we only
# have fragments to deal with
if meta_graph:
if len(elements) > 1:
msg = ("When providing a meta_graph, the pattern can only"
"contain fragment.")
raise IOError(msg)
else:
self.fragment_string = elements[0]

# case 2) we have a meta graph only described
# case 1) we have a meta graph only described
# and the fragment come from elsewhere
elif len(elements) == 1 and self.fragment_dict:
self.cgsmiles_string = elements[0]

# case 3) a string containing both fragments and
# the meta sequence is provided
if len(elements) == 1 and self.fragment_dicts:
self.molecule = read_cgsmiles(elements[0])
self.fragment_strs = None
# the number of resolutions available
self.resolutions = len(self.fragment_dicts)
# turn the framgent strings into an iterator
self.fragment_strs = None
self.fragment_dicts = iter(self.fragment_dicts)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
else:
if len(elements) < 2:
msg = ("When providing a meta_graph, the pattern can only"
"contain fragment.")
raise IOError(msg)
# case 2)
# a meta_graph is provided which means we only
# have fragments to deal with
if meta_graph:
self.fragment_strs = elements
self.molecule = meta_graph
# case 3) a string containing both fragments and
# the meta sequence is provided
else:
self.cgsmiles_string = elements[0]
self.fragment_string = elements[1]
self.molecule = read_cgsmiles(elements[0])
self.fragment_strs = elements[1:]

# the number of resolutions available
self.resolutions = len(self.fragment_strs)
# turn the framgent strings into an iterator
self.fragment_strs = iter(self.fragment_strs)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

# at this stage there are no atomnames for the nodes
new_names = nx.get_node_attributes(self.molecule, "fragname")
nx.set_node_attributes(self.meta_graph, new_names, "atomname")

# if the last resolution is all_atom
self.last_all_atom = last_all_atom

# what is the current resolution
self.resolution_counter = 0

# the next resolution fragments
self.fragment_dict = {}
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

def resolve_disconnected_molecule(self):
"""
Expand Down Expand Up @@ -156,14 +185,23 @@ def resolve_disconnected_molecule(self):

self.meta_graph.nodes[meta_node]['graph'] = graph_frag

def edges_from_bonding_descrpt(self):
def edges_from_bonding_descrpt(self, all_atom=False):
"""
Make edges according to the bonding descriptors stored
Makes edges according to the bonding descriptors stored
in the node attributes of meta_molecule residue graph.

If a bonding descriptor is consumed it is removed from the list,
however, the meta_molecule edge gets an attribute with the
bonding descriptors that formed the edge. Later unconsumed
bonding descriptors are replaced by hydrogen atoms.
however, the meta_graph edge gets an attribute with the
bonding descriptors that formed the edge.

Later unconsumed descriptors are discarded and the valence
filled in using hydrogen atoms in case of an atomistic molecule.

Parameters
----------
all_atom: bool
if the high resolution level graph has all-atom resolution
default: False
"""
for prev_node, node in self.meta_graph.edges:
for _ in range(0, self.meta_graph.edges[(prev_node, node)]["order"]):
Expand All @@ -182,7 +220,7 @@ def edges_from_bonding_descrpt(self):
# unless they are specifically annotated
order = int(bonding[0][-1])
self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order)
if self.all_atom:
if all_atom:
for edge_node in edge:
if self.molecule.nodes[edge_node]['element'] != 'H':
self.molecule.nodes[edge_node]['hcount'] -= 1
Expand Down Expand Up @@ -210,25 +248,52 @@ def squash_atoms(self):
self.molecule.nodes[node_to_keep]['fragid'] += self.molecule.nodes[node_to_keep]['contraction'][node_to_remove]['fragid']

def resolve(self):
"""
Resolve a CGSmiles string once and return the next resolution.
"""
# increment the resolution counter
self.resolution_counter += 1

# check if this is an all-atom level resolution
if self.resolution_counter == self.resolutions and self.last_all_atom:
all_atom = True
else:
all_atom = False

# get the next set of fragments
if self.fragment_strs:
fragment_str = next(self.fragment_strs)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

# empty the fragment dict just as a precaution
self.fragment_dict = {}

# read the fragment str and populate dict
self.fragment_dict.update(read_fragments(fragment_str,
all_atom=all_atom))
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
else:
self.fragment_dict = next(self.fragment_dicts)
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved

# set the previous molecule as meta_graph
self.meta_graph = self.molecule

if self.cgsmiles_string is not None:
self.meta_graph = read_cgsmiles(self.cgsmiles_string)
# now we have to switch the node names and the fragment names
new_fragnames = nx.get_node_attributes(self.meta_graph, "atomname")
nx.set_node_attributes(self.meta_graph, new_fragnames, "fragname")

if self.fragment_string is not None:
self.fragment_dict.update(read_fragments(self.fragment_string,
all_atom=self.all_atom))
# create an empty molecule graph
self.molecule = nx.Graph()

# add disconnected fragments to graph
self.resolve_disconnected_molecule()

# connect valid bonding descriptors
self.edges_from_bonding_descrpt()
self.edges_from_bonding_descrpt(all_atom=all_atom)

# contract atoms with squash descriptors
self.squash_atoms()

# rebuild hydrogen in all-atom case
if self.all_atom:
if all_atom:
rebuild_h_atoms(self.molecule)

# sort the atoms
Expand All @@ -238,4 +303,25 @@ def resolve(self):
self.meta_graph = annotate_fragments(self.meta_graph,
self.molecule)

# in all-atom MD there are common naming conventions
# that might be expected and hence we set them here
if all_atom:
set_atom_names_atomistic(self.meta_graph, self.molecule)

return self.meta_graph, self.molecule

def resolve_iter(self):
"""
Iterator returning all resolutions in oder.
"""
for _ in range(self.resolutions):
meta_graph, molecule = self.resolve()
yield meta_graph, molecule

def resolve_all(self):
"""
Resolve all layers and return final molecule
as well as the last layer above that.
"""
*_, (meta_graph, graph) = resolver.resolve_iter()
return meta_graph, graph
41 changes: 41 additions & 0 deletions cgsmiles/tests/test_layering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import textwrap
import networkx as nx
import pytest
import cgsmiles
from cgsmiles.resolve import MoleculeResolver

@pytest.mark.parametrize('cgsmiles_str, ref_strings',(
# simple linear
("""{[#A0][#B0]}.{#A0=[#A1a][#A1b][>],
#B0=[<][#B1a][#B1b]}.
{#A1a=[<][#A2a]([#A2b][#A2c)[#A2d][>],
#A1b=[<][#A2c][#A2d][>],
#B1a=[<][#B2a][#B2b][>],
#B1b=[<][#B2c][>]([#B2d]1[#B2e][#B2f]1)}""",
["{[#A1a][#A1b][#B1a][#B1b]}",
"{[#A2a]([#A2b][#A2c])[#A2d][#A2c][#A2d][#B2a][#B2b][#B2c]([#B2d]1[#B2e][#B2f]1)}"]
),
# linear with squash
("""{[#A0][#B0]}.{#A0=[!][#A1a][#A1b][>],
#B0=[!][#A1a][#B1b]}.
{#A1a=[<][#A2a]([#A2b][#A2c)[#A2c][!],
#A1b=[!][#A2c][#A2d][>],
#B1b=[<][#B2c][>]([#B2d]1[#B2e][#B2f]1)}""",
["{[#A1a][#A1b][#B1b]}",
"{[#A2a]([#A2b][#A2c)[#A2c][#A2d][#B2c]([#B2d]1[#B2e][#B2f]1)}"],),
# cycle layering
("""{[#A0]1[#A0][#A0]1}.{#A0=[>][#A1a][#A1b][<]}.
{#A1a=[>][#A2a][#A2b][#A2c][<],#A1b=[<][#A2e][>]([#C][#D])}""",
["{[#A1a]1[#A1b][#A1a][#A1b][#A1a][#A1b]1}",
"{[#A2a]1[#A2b][#A2c][#A2e]([#C][#D])[#A2e]([#C][#D])[#A2a][#A2b][#A2c][#A2e]([#C][#D])[#A2a][#A2b][#A2c][#A2e]1([#C][#D])"]
)))
def test_layering_of_resolutions(cgsmiles_str, ref_strings):

def _node_match(n1, n2):
return n1["fragname"] == n2["fragname"]

cgsmiles_str = cgsmiles_str.strip().replace('\n','').replace(' ','')
fgrunewald marked this conversation as resolved.
Show resolved Hide resolved
resolver = MoleculeResolver(cgsmiles_str, last_all_atom=False)
for (low_graph, high_graph), ref_str in zip(resolver.resolve_iter(), ref_strings):
ref_graph = cgsmiles.read_cgsmiles(ref_str)
nx.is_isomorphic(ref_graph, high_graph, node_match=_node_match)
Loading