From 5ebfad8d2a61ba19dd7762f63248914f7542cf9c Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 24 Jun 2024 13:33:20 +0200 Subject: [PATCH 01/16] small bug fix; when the multiplication operator is used in fragments it is possible that it is the last character --- cgsmiles/read_cgsmiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgsmiles/read_cgsmiles.py b/cgsmiles/read_cgsmiles.py index 6827ec1..f56180c 100644 --- a/cgsmiles/read_cgsmiles.py +++ b/cgsmiles/read_cgsmiles.py @@ -13,7 +13,7 @@ def _find_next_character(string, chars, start): for idx, token in enumerate(string[start:]): if token in chars: return idx+start - return np.inf + return len(string) def _expand_branch(mol_graph, current, anchor, recipe): """ From bfea1da9da98cfb31053a648cb3674270053f921 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 24 Jun 2024 13:33:41 +0200 Subject: [PATCH 02/16] implement layered resolutions --- cgsmiles/resolve.py | 152 ++++++++++++++++++++++++++++++-------------- 1 file changed, 105 insertions(+), 47 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 5baa2ba..9782865 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -1,7 +1,6 @@ import re import copy import networkx as nx -import pysmiles from .read_cgsmiles import read_cgsmiles from .read_fragments import read_fragments from .graph_utils import merge_graphs, sort_nodes_by_attr, annotate_fragments @@ -69,62 +68,80 @@ def match_bonding_descriptors(source, target, bond_attribute="bonding"): class MoleculeResolver: """ - Resolve the molecule described by a CGBigSmile - string. + Resolve the molecule(s) described by a + CGSmiles string. Each execution of the + resolve function will return the next + resolved molecule and the graph of the + previous resolution. + + Use the resolve_iter method to loop over + all possible molecule resolutions enconded + in the CGSmiles string. """ - def __init__(self, pattern, meta_graph=None, fragment_dict={}, - all_atom=True): + last_all_atom=True): - # let's start by setting some attributes - # this is the fragment string - self.fragment_str = None - # this is the cgsmiles string - self.cgsmiles_str = None + """ + Parameters + ---------- + pattern: str + the cgsmiles string to resolve + meta_graph: `:class:nx.Graph` + a potential higher resolution graph + fragment_dict: dict[`:class:nx.Garph`] + a dict of fragment graphs + last_all_atom: bool + if the last resolution is at the all + atom level. Default: True + """ # this is the dict with the fragments # either provided and/or extracted # from the fragment string self.fragment_dict = fragment_dict - # this is the lower resolution graph - self.meta_graph = meta_graph - # this is the higher resolution graph + # this is the current resolution graph self.molecule = nx.Graph() - # this attribute stores if the fragments - # follow open_smiles syntax or cg_smiles - # syntax - self.all_atom = all_atom + # this is the current meta_graph + self.meta_graph = nx.Graph() + # these are possible fragment strings + self.fragment_strs = [] - # here we figure out what we are dealing with + # here we figure out how many resolutions + # we are dealing with elements = re.findall(r"\{[^\}]+\}", pattern) + # case 1) # a meta_graph is provided which means we only # have fragments to deal with if meta_graph: - if len(elements) > 1: - msg = ("When providing a meta_graph, the pattern can only" - "contain fragment.") - raise IOError(msg) - else: - self.fragment_string = elements[0] - + self.fragment_strs = elements # case 2) we have a meta graph only described # and the fragment come from elsewhere elif len(elements) == 1 and self.fragment_dict: - self.cgsmiles_string = elements[0] - + self.molecule = read_cgsmiles(elements[0]) # case 3) a string containing both fragments and # the meta sequence is provided else: - if len(elements) < 2: - msg = ("When providing a meta_graph, the pattern can only" - "contain fragment.") - raise IOError(msg) - else: - self.cgsmiles_string = elements[0] - self.fragment_string = elements[1] + self.molecule = read_cgsmiles(elements[0]) + self.fragment_strs = elements[1:] + + # at this stage there are no atomnames for the nodes + new_names = nx.get_node_attributes(self.molecule, "fragname") + nx.set_node_attributes(self.meta_graph, new_names, "atomname") + + # the number of resolutions available + self.resolutions = len(self.fragment_strs) + + # turn the framgent strings into an iterator + self.fragment_strs = iter(self.fragment_strs) + + # if the last resolution is all_atom + self.last_all_atom = last_all_atom + + # what is the current resolution + self.resolution_counter = 0 def resolve_disconnected_molecule(self): """ @@ -156,14 +173,23 @@ def resolve_disconnected_molecule(self): self.meta_graph.nodes[meta_node]['graph'] = graph_frag - def edges_from_bonding_descrpt(self): + def edges_from_bonding_descrpt(self, all_atom=False): """ - Make edges according to the bonding descriptors stored + Makes edges according to the bonding descriptors stored in the node attributes of meta_molecule residue graph. + If a bonding descriptor is consumed it is removed from the list, - however, the meta_molecule edge gets an attribute with the - bonding descriptors that formed the edge. Later unconsumed - bonding descriptors are replaced by hydrogen atoms. + however, the meta_graph edge gets an attribute with the + bonding descriptors that formed the edge. + + Later unconsumed descriptors are discarded and the valance + filled in using hydrogen atoms in case of an atomistic molecule. + + Parameters + ---------- + all_atom: bool + if the high resolution level graph has all-atom resolution + default: False """ for prev_node, node in self.meta_graph.edges: for _ in range(0, self.meta_graph.edges[(prev_node, node)]["order"]): @@ -182,7 +208,7 @@ def edges_from_bonding_descrpt(self): # unless they are specifically annotated order = int(bonding[0][-1]) self.molecule.add_edge(edge[0], edge[1], bonding=bonding, order=order) - if self.all_atom: + if all_atom: for edge_node in edge: if self.molecule.nodes[edge_node]['element'] != 'H': self.molecule.nodes[edge_node]['hcount'] -= 1 @@ -210,25 +236,49 @@ def squash_atoms(self): self.molecule.nodes[node_to_keep]['fragid'] += self.molecule.nodes[node_to_keep]['contraction'][node_to_remove]['fragid'] def resolve(self): + """ + Resolve a CGSmiles string once and return the next resolution. + """ + # get the next set of fragments + fragment_str = next(self.fragment_strs) + + # increment the resolution counter + self.resolution_counter += 1 + + # check if this is an all-atom level resolution + if self.resolution_counter == self.resolutions and self.last_all_atom: + all_atom = True + else: + all_atom = False + + # empty the fragment dict just as a precaution + self.fragment_dict = {} - if self.cgsmiles_string is not None: - self.meta_graph = read_cgsmiles(self.cgsmiles_string) + # read the fragment str and populate dict + self.fragment_dict.update(read_fragments(fragment_str, + all_atom=all_atom)) - if self.fragment_string is not None: - self.fragment_dict.update(read_fragments(self.fragment_string, - all_atom=self.all_atom)) + # set the previous molecule as meta_graph + self.meta_graph = self.molecule + + # now we have to switch the node names and the fragment names + new_fragnames = nx.get_node_attributes(self.meta_graph, "atomname") + nx.set_node_attributes(self.meta_graph, new_fragnames, "fragname") + + # create an empty molecule graph + self.molecule = nx.Graph() # add disconnected fragments to graph self.resolve_disconnected_molecule() # connect valid bonding descriptors - self.edges_from_bonding_descrpt() + self.edges_from_bonding_descrpt(all_atom=all_atom) # contract atoms with squash descriptors self.squash_atoms() # rebuild hydrogen in all-atom case - if self.all_atom: + if all_atom: rebuild_h_atoms(self.molecule) # sort the atoms @@ -239,3 +289,11 @@ def resolve(self): self.molecule) return self.meta_graph, self.molecule + + def resolve_iter(self): + """ + Iterator returning all resolutions in oder. + """ + for _ in range(self.resolutions): + meta_graph, molecule = self.resolve() + yield meta_graph, molecule From 43b08beeac091feffa6d7c06cd09b121ad74cdc5 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 11:36:34 +0200 Subject: [PATCH 03/16] add test for layering --- cgsmiles/tests/test_layering.py | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 cgsmiles/tests/test_layering.py diff --git a/cgsmiles/tests/test_layering.py b/cgsmiles/tests/test_layering.py new file mode 100644 index 0000000..2b14483 --- /dev/null +++ b/cgsmiles/tests/test_layering.py @@ -0,0 +1,42 @@ +import textwrap +import networkx as nx +import pytest +import cgsmiles +from cgsmiles.resolve import MoleculeResolver + +@pytest.mark.parametrize('cgsmiles_str, ref_strings',( + # simple linear + ("""{[#A0][#B0]}.{#A0=[#A1a][#A1b][>], + #B0=[<][#B1a][#B1b]}. + {#A1a=[<][#A2a]([#A2b][#A2c)[#A2d][>], + #A1b=[<][#A2c][#A2d][>], + #B1a=[<][#B2a][#B2b][>], + #B1b=[<][#B2c][>]([#B2d]1[#B2e][#B2f]1)}""", + ["{[#A1a][#A1b][#B1a][#B1b]}", + "{[#A2a]([#A2b][#A2c])[#A2d][#A2c][#A2d][#B2a][#B2b][#B2c]([#B2d]1[#B2e][#B2f]1)}"] + ), + # linear with squash + ("""{[#A0][#B0]}.{#A0=[!][#A1a][#A1b][>], + #B0=[!][#A1a][#B1b]}. + {#A1a=[<][#A2a]([#A2b][#A2c)[#A2c][!], + #A1b=[!][#A2c][#A2d][>], + #B1b=[<][#B2c][>]([#B2d]1[#B2e][#B2f]1)}""", + ["{[#A1a][#A1b][#B1b]}", + "{[#A2a]([#A2b][#A2c)[#A2c][#A2d][#B2c]([#B2d]1[#B2e][#B2f]1)}"],), + # cycle layering + ("""{[#A0]1[#A0][#A0]1}.{#A0=[>][#A1a][#A1b][<]}. + {#A1a=[>][#A2a][#A2b][#A2c][<],#A1b=[<][#A2e][>]([#C][#D])}""", + ["{[#A1a]1[#A1b][#A1a][#A1b][#A1a][#A1b]1}", + "{[#A2a]1[#A2b][#A2c][#A2e]([#C][#D])[#A2e]([#C][#D])[#A2a][#A2b][#A2c][#A2e]([#C][#D])[#A2a][#A2b][#A2c][#A2e]1([#C][#D])"] +))) +def test_layering_of_resolutions(cgsmiles_str, ref_strings): + + def _node_match(n1, n2): + return n1["fragname"] == n2["fragname"] + + cgsmiles_str = cgsmiles_str.strip().replace('\n','').replace(' ','') + resolver = MoleculeResolver(cgsmiles_str, last_all_atom=False) + for (low_graph, high_graph), ref_str in zip(resolver.resolve_iter(), ref_strings): + print("here") + ref_graph = cgsmiles.read_cgsmiles(ref_str) + nx.is_isomorphic(ref_graph, high_graph, node_match=_node_match) From ed0f7c8e6a649c1e8ce689ebee0958643b26395e Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 11:37:04 +0200 Subject: [PATCH 04/16] set atomnames according to all-atom MD convetion if resolution is AA --- cgsmiles/graph_utils.py | 14 ++++++++++++++ cgsmiles/resolve.py | 10 +++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/cgsmiles/graph_utils.py b/cgsmiles/graph_utils.py index 1bbd88c..4fb28d8 100644 --- a/cgsmiles/graph_utils.py +++ b/cgsmiles/graph_utils.py @@ -114,3 +114,17 @@ def annotate_fragments(meta_graph, molecule): meta_graph.nodes[meta_node]['graph'] = graph_frag return meta_graph + + +def set_atom_names_atomsitic(meta_graph, molecule): + """ + Set atomnames according to commonly used convention + in molecular dynamics (MD) forcefields. This convention + is defined as element plus counter for atom in residue. + """ + for meta_node in meta_graph.nodes: + fraggraph = meta_graph.nodes[meta_node]['graph'] + for idx, node in enumerate(fraggraph.nodes): + atomname = fraggraph.nodes[node]['element'] + str(idx) + fraggraph.nodes[node]['atomname'] = atomname + molecule.nodes[node]['atomname'] = atomname diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 9782865..c07f5a4 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -3,7 +3,10 @@ import networkx as nx from .read_cgsmiles import read_cgsmiles from .read_fragments import read_fragments -from .graph_utils import merge_graphs, sort_nodes_by_attr, annotate_fragments +from .graph_utils import (merge_graphs, + sort_nodes_by_attr, + annotate_fragments, + set_atom_names_atomsitic) from .pysmiles_utils import rebuild_h_atoms def compatible(left, right): @@ -288,6 +291,11 @@ def resolve(self): self.meta_graph = annotate_fragments(self.meta_graph, self.molecule) + # in all-atom MD there are common naming conventions + # that might be expected and hence we set them here + if all_atom: + set_atom_names_atomsitic(self.meta_graph, self.molecule) + return self.meta_graph, self.molecule def resolve_iter(self): From 0253d1d5f845338b41ddcc54295dc7c2b9f464eb Mon Sep 17 00:00:00 2001 From: "Dr. Fabian Grunewald" <32294573+fgrunewald@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:54:04 +0200 Subject: [PATCH 05/16] Apply suggestions from code review add edits Co-authored-by: Peter C Kroon --- cgsmiles/graph_utils.py | 2 +- cgsmiles/resolve.py | 4 ++-- cgsmiles/tests/test_layering.py | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cgsmiles/graph_utils.py b/cgsmiles/graph_utils.py index 4fb28d8..d2bd0ab 100644 --- a/cgsmiles/graph_utils.py +++ b/cgsmiles/graph_utils.py @@ -116,7 +116,7 @@ def annotate_fragments(meta_graph, molecule): return meta_graph -def set_atom_names_atomsitic(meta_graph, molecule): +def set_atom_names_atomistic(meta_graph, molecule): """ Set atomnames according to commonly used convention in molecular dynamics (MD) forcefields. This convention diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index c07f5a4..553a67d 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -94,7 +94,7 @@ def __init__(self, the cgsmiles string to resolve meta_graph: `:class:nx.Graph` a potential higher resolution graph - fragment_dict: dict[`:class:nx.Garph`] + fragment_dict: dict[str, nx.Graph] a dict of fragment graphs last_all_atom: bool if the last resolution is at the all @@ -185,7 +185,7 @@ def edges_from_bonding_descrpt(self, all_atom=False): however, the meta_graph edge gets an attribute with the bonding descriptors that formed the edge. - Later unconsumed descriptors are discarded and the valance + Later unconsumed descriptors are discarded and the valence filled in using hydrogen atoms in case of an atomistic molecule. Parameters diff --git a/cgsmiles/tests/test_layering.py b/cgsmiles/tests/test_layering.py index 2b14483..96aec5a 100644 --- a/cgsmiles/tests/test_layering.py +++ b/cgsmiles/tests/test_layering.py @@ -37,6 +37,5 @@ def _node_match(n1, n2): cgsmiles_str = cgsmiles_str.strip().replace('\n','').replace(' ','') resolver = MoleculeResolver(cgsmiles_str, last_all_atom=False) for (low_graph, high_graph), ref_str in zip(resolver.resolve_iter(), ref_strings): - print("here") ref_graph = cgsmiles.read_cgsmiles(ref_str) nx.is_isomorphic(ref_graph, high_graph, node_match=_node_match) From d05526abbaf8a2a422292e13b07eb6c43468c954 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 11:59:50 +0200 Subject: [PATCH 06/16] add a resolve all --- cgsmiles/resolve.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 553a67d..3438a20 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -6,7 +6,7 @@ from .graph_utils import (merge_graphs, sort_nodes_by_attr, annotate_fragments, - set_atom_names_atomsitic) + set_atom_names_atomistic) from .pysmiles_utils import rebuild_h_atoms def compatible(left, right): @@ -305,3 +305,11 @@ def resolve_iter(self): for _ in range(self.resolutions): meta_graph, molecule = self.resolve() yield meta_graph, molecule + + def resolve_all(self): + """ + Resolve all layers and return final molecule + as well as the last layer above that. + """ + *_, (meta_graph, graph) = resolver.resolve_iter() + return meta_graph, graph From 8f7ac853d85cc3fb3d54b89ab72aa58b604de444 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 12:03:46 +0200 Subject: [PATCH 07/16] fix typo set_atom_names_atomistic --- cgsmiles/resolve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 3438a20..4e28bf6 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -294,7 +294,7 @@ def resolve(self): # in all-atom MD there are common naming conventions # that might be expected and hence we set them here if all_atom: - set_atom_names_atomsitic(self.meta_graph, self.molecule) + set_atom_names_atomistic(self.meta_graph, self.molecule) return self.meta_graph, self.molecule From 71027b5a6e895700a370d71ddde0409a47bac89c Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 12:46:55 +0200 Subject: [PATCH 08/16] fix handling of fragments --- cgsmiles/resolve.py | 80 ++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 4e28bf6..063ac2b 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -84,7 +84,7 @@ class MoleculeResolver: def __init__(self, pattern, meta_graph=None, - fragment_dict={}, + fragment_dicts=[], last_all_atom=True): """ @@ -94,58 +94,67 @@ def __init__(self, the cgsmiles string to resolve meta_graph: `:class:nx.Graph` a potential higher resolution graph - fragment_dict: dict[str, nx.Graph] - a dict of fragment graphs + fragment_dicts: list[dict[str, nx.Graph]] + a dict of fragment graphs per resolution last_all_atom: bool if the last resolution is at the all - atom level. Default: True + atom level. If True the code will use + pysmiles to parse the fragments and + return the all-atom molecule. + Default: True """ # this is the dict with the fragments # either provided and/or extracted # from the fragment string - self.fragment_dict = fragment_dict - # this is the current resolution graph - self.molecule = nx.Graph() + self.fragment_dicts = fragment_dicts # this is the current meta_graph self.meta_graph = nx.Graph() - # these are possible fragment strings - self.fragment_strs = [] # here we figure out how many resolutions # we are dealing with elements = re.findall(r"\{[^\}]+\}", pattern) - # case 1) - # a meta_graph is provided which means we only - # have fragments to deal with - if meta_graph: - self.fragment_strs = elements - # case 2) we have a meta graph only described + # case 1) we have a meta graph only described # and the fragment come from elsewhere - elif len(elements) == 1 and self.fragment_dict: + if len(elements) == 1 and self.fragment_dicts: self.molecule = read_cgsmiles(elements[0]) - # case 3) a string containing both fragments and - # the meta sequence is provided + self.fragment_strs = None + # the number of resolutions available + self.resolutions = len(self.fragment_dicts) + # turn the framgent strings into an iterator + self.fragment_strs = None + self.fragment_dicts = iter(self.fragment_dicts) else: - self.molecule = read_cgsmiles(elements[0]) - self.fragment_strs = elements[1:] + # case 2) + # a meta_graph is provided which means we only + # have fragments to deal with + if meta_graph: + self.fragment_strs = elements + self.molecule = meta_graph + # case 3) a string containing both fragments and + # the meta sequence is provided + else: + self.molecule = read_cgsmiles(elements[0]) + self.fragment_strs = elements[1:] + + # the number of resolutions available + self.resolutions = len(self.fragment_strs) + # turn the framgent strings into an iterator + self.fragment_strs = iter(self.fragment_strs) # at this stage there are no atomnames for the nodes new_names = nx.get_node_attributes(self.molecule, "fragname") nx.set_node_attributes(self.meta_graph, new_names, "atomname") - # the number of resolutions available - self.resolutions = len(self.fragment_strs) - - # turn the framgent strings into an iterator - self.fragment_strs = iter(self.fragment_strs) - # if the last resolution is all_atom self.last_all_atom = last_all_atom # what is the current resolution self.resolution_counter = 0 + # the next resolution fragments + self.fragment_dict = {} + def resolve_disconnected_molecule(self): """ Given a connected graph of nodes with associated fragment graphs @@ -242,9 +251,6 @@ def resolve(self): """ Resolve a CGSmiles string once and return the next resolution. """ - # get the next set of fragments - fragment_str = next(self.fragment_strs) - # increment the resolution counter self.resolution_counter += 1 @@ -254,12 +260,18 @@ def resolve(self): else: all_atom = False - # empty the fragment dict just as a precaution - self.fragment_dict = {} + # get the next set of fragments + if self.fragment_strs: + fragment_str = next(self.fragment_strs) + + # empty the fragment dict just as a precaution + self.fragment_dict = {} - # read the fragment str and populate dict - self.fragment_dict.update(read_fragments(fragment_str, - all_atom=all_atom)) + # read the fragment str and populate dict + self.fragment_dict.update(read_fragments(fragment_str, + all_atom=all_atom)) + else: + self.fragment_dict = next(self.fragment_dicts) # set the previous molecule as meta_graph self.meta_graph = self.molecule From 0fe7b9132645bb78e64f02922357d11f3b41d66a Mon Sep 17 00:00:00 2001 From: "Dr. Fabian Grunewald" <32294573+fgrunewald@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:15:57 +0200 Subject: [PATCH 09/16] Apply suggestions from code review Co-authored-by: Peter C Kroon --- cgsmiles/resolve.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 063ac2b..c186ce8 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -139,8 +139,6 @@ def __init__(self, # the number of resolutions available self.resolutions = len(self.fragment_strs) - # turn the framgent strings into an iterator - self.fragment_strs = iter(self.fragment_strs) # at this stage there are no atomnames for the nodes new_names = nx.get_node_attributes(self.molecule, "fragname") @@ -262,16 +260,12 @@ def resolve(self): # get the next set of fragments if self.fragment_strs: - fragment_str = next(self.fragment_strs) - - # empty the fragment dict just as a precaution - self.fragment_dict = {} + fragment_str = self.fragment_strs[self.resolution_counter - 1] # -1 because the counter has already been incremented #FIXME # read the fragment str and populate dict - self.fragment_dict.update(read_fragments(fragment_str, - all_atom=all_atom)) + self.fragment_dict = read_fragments(fragment_str, all_atom=all_atom)) else: - self.fragment_dict = next(self.fragment_dicts) + self.fragment_dict = self.fragment_dicts[self.resolution_counter - 1] # FIXME # set the previous molecule as meta_graph self.meta_graph = self.molecule From 7a2a7385cc40792e091080aab47a96c1debc947e Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 14:24:14 +0200 Subject: [PATCH 10/16] refactor and include comments --- cgsmiles/resolve.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index c186ce8..b5ab36c 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -249,23 +249,19 @@ def resolve(self): """ Resolve a CGSmiles string once and return the next resolution. """ - # increment the resolution counter - self.resolution_counter += 1 - # check if this is an all-atom level resolution - if self.resolution_counter == self.resolutions and self.last_all_atom: + if self.resolution_counter == self.resolutions -1 and self.last_all_atom: all_atom = True else: all_atom = False # get the next set of fragments if self.fragment_strs: - fragment_str = self.fragment_strs[self.resolution_counter - 1] # -1 because the counter has already been incremented #FIXME - + fragment_str = self.fragment_strs[self.resolution_counter] # read the fragment str and populate dict - self.fragment_dict = read_fragments(fragment_str, all_atom=all_atom)) + self.fragment_dict = read_fragments(fragment_str, all_atom=all_atom) else: - self.fragment_dict = self.fragment_dicts[self.resolution_counter - 1] # FIXME + self.fragment_dict = self.fragment_dicts[self.resolution_counter ] # set the previous molecule as meta_graph self.meta_graph = self.molecule @@ -302,6 +298,9 @@ def resolve(self): if all_atom: set_atom_names_atomistic(self.meta_graph, self.molecule) + # increment the resolution counter + self.resolution_counter += 1 + return self.meta_graph, self.molecule def resolve_iter(self): From 81eb1d4ee34179c65d46852872240ab871500024 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 14:54:29 +0200 Subject: [PATCH 11/16] add tests for resolve and fix bugs in workflow --- cgsmiles/resolve.py | 2 +- cgsmiles/tests/test_molecule_resolve.py | 35 ++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index b5ab36c..950885e 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -123,7 +123,7 @@ def __init__(self, self.resolutions = len(self.fragment_dicts) # turn the framgent strings into an iterator self.fragment_strs = None - self.fragment_dicts = iter(self.fragment_dicts) + self.fragment_dicts = self.fragment_dicts else: # case 2) # a meta_graph is provided which means we only diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py index 3d640a4..4dcaf82 100644 --- a/cgsmiles/tests/test_molecule_resolve.py +++ b/cgsmiles/tests/test_molecule_resolve.py @@ -1,7 +1,10 @@ +import re import pytest import networkx as nx from cgsmiles import MoleculeResolver from cgsmiles.resolve import match_bonding_descriptors +from cgsmiles.read_cgsmiles import read_cgsmiles +from cgsmiles.read_fragments import read_fragments @pytest.mark.parametrize('bonds_source, bonds_target, edge, btypes',( # single bond source each @@ -215,9 +218,35 @@ def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges): def _ele_match(n1, n2): return n1["element"] == n2["element"] - print(smile) - print(ref_graph.edges) - print(molecule.edges) #assert ref_graph.edges == molecule.edges # check that reference graph and molecule are isomorphic assert nx.is_isomorphic(ref_graph, molecule, node_match=_ele_match) + + +@pytest.mark.parametrize('case, cgsmiles_str, ref_string',( + # case 1: here only the meta-graph is described by the + # cgsmiles string the fragments are provided via a dict + (1, "{[#A][#B]}.{#A=[#A1][#A2][>],#B=[<][#B1][#B2]}", + "{[#A1][#A2][#B1][#B2]}"), + # case 2: opposite case of 1; here only the fragments are + # described by the input string + (2, "{[#A][#B]}.{#A=[#A1][#A2][>],#B=[<][#B1][#B2]}", + "{[#A1][#A2][#B1][#B2]}"),)) +def test_resolve_cases(case, cgsmiles_str, ref_string): + elements = re.findall(r"\{[^\}]+\}", cgsmiles_str) + if case == 1: + fragment_dict = read_fragments(elements[-1], all_atom=False) + meta_mol, molecule = MoleculeResolver(fragment_dicts=[fragment_dict], + pattern=elements[0], + last_all_atom=False).resolve() + elif case == 2: + meta_input = read_cgsmiles(elements[0]) + meta_mol, molecule = MoleculeResolver(meta_graph=meta_input, + pattern=elements[1], + last_all_atom=False).resolve() + ref_graph = read_cgsmiles(ref_string) + + def _atomname_match(n1, n2): + return n1["fragname"] == n2["atomname"] + assert nx.is_isomorphic(ref_graph, molecule, node_match=_atomname_match) + From 7d3203a69d59657ac420a2f363017c7e8ea35498 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 8 Jul 2024 16:57:13 +0200 Subject: [PATCH 12/16] read all fragments during init --- cgsmiles/resolve.py | 60 +++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 950885e..e47dc4b 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -109,51 +109,49 @@ def __init__(self, self.fragment_dicts = fragment_dicts # this is the current meta_graph self.meta_graph = nx.Graph() - + # if the last resolution is all_atom + self.last_all_atom = last_all_atom + # what is the current resolution level + self.resolution_counter = 0 # here we figure out how many resolutions # we are dealing with elements = re.findall(r"\{[^\}]+\}", pattern) # case 1) we have a meta graph only described # and the fragment come from elsewhere - if len(elements) == 1 and self.fragment_dicts: + if self.fragment_dicts: + msg = ("You cannot provide a fragment dict and fragments defined as CGSmiles string." + "Instead first read all fragments using cgsmiles.read_fragments then provide " + "all fragments together to the MoleculeResolver") + if len(elements) != 1: raise IOError(msg) self.molecule = read_cgsmiles(elements[0]) - self.fragment_strs = None - # the number of resolutions available - self.resolutions = len(self.fragment_dicts) - # turn the framgent strings into an iterator - self.fragment_strs = None - self.fragment_dicts = self.fragment_dicts else: # case 2) # a meta_graph is provided which means we only # have fragments to deal with if meta_graph: - self.fragment_strs = elements + fragment_strs = elements self.molecule = meta_graph # case 3) a string containing both fragments and # the meta sequence is provided else: self.molecule = read_cgsmiles(elements[0]) - self.fragment_strs = elements[1:] + fragment_strs = elements[1:] - # the number of resolutions available - self.resolutions = len(self.fragment_strs) + # now we populate the fragment dicts + self.fragment_dicts = [] + for idx, fragment_str in enumerate(fragment_strs): + all_atom = (idx == len(fragment_strs) - 1 and self.last_all_atom) + f_dict = read_fragments(fragment_str, all_atom=all_atom) + self.fragment_dicts.append(f_dict) + + self.resolutions = len(self.fragment_dicts) # at this stage there are no atomnames for the nodes new_names = nx.get_node_attributes(self.molecule, "fragname") nx.set_node_attributes(self.meta_graph, new_names, "atomname") - # if the last resolution is all_atom - self.last_all_atom = last_all_atom - - # what is the current resolution - self.resolution_counter = 0 - - # the next resolution fragments - self.fragment_dict = {} - - def resolve_disconnected_molecule(self): + def resolve_disconnected_molecule(self, fragment_dict): """ Given a connected graph of nodes with associated fragment graphs generate a disconnected graph of the fragments and annotate @@ -162,7 +160,7 @@ def resolve_disconnected_molecule(self): """ for meta_node in self.meta_graph.nodes: fragname = self.meta_graph.nodes[meta_node]['fragname'] - fragment = self.fragment_dict[fragname] + fragment = fragment_dict[fragname] correspondence = merge_graphs(self.molecule, fragment) graph_frag = nx.Graph() @@ -250,18 +248,10 @@ def resolve(self): Resolve a CGSmiles string once and return the next resolution. """ # check if this is an all-atom level resolution - if self.resolution_counter == self.resolutions -1 and self.last_all_atom: - all_atom = True - else: - all_atom = False + all_atom = (self.resolution_counter == self.resolutions - 1 and self.last_all_atom) # get the next set of fragments - if self.fragment_strs: - fragment_str = self.fragment_strs[self.resolution_counter] - # read the fragment str and populate dict - self.fragment_dict = read_fragments(fragment_str, all_atom=all_atom) - else: - self.fragment_dict = self.fragment_dicts[self.resolution_counter ] + fragment_dict = self.fragment_dicts[self.resolution_counter] # set the previous molecule as meta_graph self.meta_graph = self.molecule @@ -274,7 +264,7 @@ def resolve(self): self.molecule = nx.Graph() # add disconnected fragments to graph - self.resolve_disconnected_molecule() + self.resolve_disconnected_molecule(fragment_dict) # connect valid bonding descriptors self.edges_from_bonding_descrpt(all_atom=all_atom) @@ -307,7 +297,7 @@ def resolve_iter(self): """ Iterator returning all resolutions in oder. """ - for _ in range(self.resolutions): + for r in range(self.resolutions): meta_graph, molecule = self.resolve() yield meta_graph, molecule From a4e71b33cba3ddcc6bba7e017ab27cc001d70639 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 9 Jul 2024 12:48:40 +0200 Subject: [PATCH 13/16] refactor new API using constructors --- cgsmiles/resolve.py | 274 ++++++++++++++++++------ cgsmiles/tests/test_layering.py | 2 +- cgsmiles/tests/test_molecule_resolve.py | 14 +- 3 files changed, 215 insertions(+), 75 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index e47dc4b..3078fda 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -71,92 +71,144 @@ def match_bonding_descriptors(source, target, bond_attribute="bonding"): class MoleculeResolver: """ - Resolve the molecule(s) described by a - CGSmiles string. Each execution of the - resolve function will return the next - resolved molecule and the graph of the - previous resolution. - - Use the resolve_iter method to loop over - all possible molecule resolutions enconded - in the CGSmiles string. + Resolve the molecule(s) described by a CGSmiles string and return a nx.Graph + of the molecule. + + First, this class has to be initiated using one of three class construction + methods. When trying to read a CGSmiles string always use the first method. + The other constructors can be used in case fragments or the lowest + resolution molecule are defined by graphs that come from elsewhere. + + `self.from_string`: use when fragments and lowest resolution are + described in one CGSmiles string. + `self.from_graph`: use when fragments are described by CGSmiles + strings but the lowest resolution is given + as nx.Graph + `self.from_fragment_dicts`: use when fragments are given as nx.Graphs + and the lowest resolution is provided as + CGSmiles string + + Once the `MoleculeResolver` is initiated you can call the `resolve_iter` to + loop over the different levels of resolution. The resolve iter will always + return the previous lower resolution graph as well as the current higher + resolution graph. For example, if the CGSmiles string describes a monomer + sequence of a regular polymer, the lower resolution graph will be the graph + of this monomer sequence and the higher resolution graph the full molecule. + + Basic Examples + -------------- + Blocky-copolymer of PE and PEO with the first resolution being the sequence + of blocks, followed by the monomer graph, and then the full molecule. + + >>> cgsmiles_str = "{[#B1][#B2][#B1]}.{#B1=[#PEO]|4,#B2=[#PE]|2}.{#PEO=[>]COC[<],#PE=[>]CC[<]}" + >>> resolver = MoleculeResolver.from_string(cgsmiles_str) + >>> for low_res, high_res in resolver.resolve_iter(): + print(low_res.nodes(data='fragname')) + print(high_res.nodes(data='atomname)) + + To only access the final resolution level you can simply call `resolve all`: + >>> monomer_graph, full_molecule = resolver.resolve_all() + + Advanced API Examples + --------------------- + Alternatively, one could have gotten the block level graph from somewhere + else defined as `nx.Graph` in that case: + >>> # the string only defines the fragments + >>> cgsmiles_str = "{#B1=[#PEO]|4,#B2=[#PE]|2}.{#PEO=[>]COC[<],#PE=[>]CC[<]}" + >>> block_graph = nx.Graph() + >>> block_graph.add_edges_from([(0, 1), (1, 2), (2, 3)]) + >>> nx.set_node_attributes(block_graph, {0: "B1", 1: "B2", 2: "B1"}, 'fragname') + >>> resolver = MoleculeResolver.from_graph(cgsmiles_str, block_graph) + + Finally, there is the option of having the fragments from elsewhere for + example a library. Then only the graph defined as CGSmiles string. In this + case the `from_fragment_dicts` method can be used. Please note that the + fragment graphs need to have the following attributes as a graph returned + by the `cgsmiles.read_fragments` function. + >>> fragment_dicts = [] + >>> for frag_string in ["{#B1=[#PEO]|4,#B2=[#PE]|2}", "{#PEO=[>]COC[<],#PE=[>]CC[<]}"]: + >>> frag_dict = read_fragments(frag_string) + >>> fragment_dicts.append(frag_dict) + >>> cgsmiles_str = "{[#B1][#B2][#B1]}" + >>> resolver = MoleculeResolver.from_fragment_dicts(cgsmiles_str, fragment_dicts) + + Subclassing + ----------- + More advanced workflows can easily be implemented by subclassing the MoleculeResolver + and adding new constructors that peform more complex preparation instructions for + example. """ def __init__(self, - pattern, - meta_graph=None, - fragment_dicts=[], + molecule_graph, + fragment_dicts, last_all_atom=True): """ Parameters ---------- - pattern: str - the cgsmiles string to resolve - meta_graph: `:class:nx.Graph` - a potential higher resolution graph + molecule_graph: `:class:nx.Graph` + a lower resolution molecule graph to be resolved to higher + resolutions molecule graphs. Each node must have the fragname + with a dict entry in the next fragment_dicts list. fragment_dicts: list[dict[str, nx.Graph]] - a dict of fragment graphs per resolution + a dict of fragment graphs per resolution. Each graph must have the + same attributes as returned by the `cgsmiles.read_fragments` + function. last_all_atom: bool - if the last resolution is at the all - atom level. If True the code will use - pysmiles to parse the fragments and - return the all-atom molecule. - Default: True + if the last resolution is at the all atom level. If True the code + will use pysmiles to parse the fragments and return the all-atom + molecule. Default: True """ - # this is the dict with the fragments - # either provided and/or extracted - # from the fragment string - self.fragment_dicts = fragment_dicts - # this is the current meta_graph self.meta_graph = nx.Graph() - # if the last resolution is all_atom + self.fragment_dicts = fragment_dicts + self.molecule = molecule_graph self.last_all_atom = last_all_atom - # what is the current resolution level self.resolution_counter = 0 - # here we figure out how many resolutions - # we are dealing with - elements = re.findall(r"\{[^\}]+\}", pattern) - - # case 1) we have a meta graph only described - # and the fragment come from elsewhere - if self.fragment_dicts: - msg = ("You cannot provide a fragment dict and fragments defined as CGSmiles string." - "Instead first read all fragments using cgsmiles.read_fragments then provide " - "all fragments together to the MoleculeResolver") - if len(elements) != 1: raise IOError(msg) - self.molecule = read_cgsmiles(elements[0]) - else: - # case 2) - # a meta_graph is provided which means we only - # have fragments to deal with - if meta_graph: - fragment_strs = elements - self.molecule = meta_graph - # case 3) a string containing both fragments and - # the meta sequence is provided - else: - self.molecule = read_cgsmiles(elements[0]) - fragment_strs = elements[1:] - - # now we populate the fragment dicts - self.fragment_dicts = [] - for idx, fragment_str in enumerate(fragment_strs): - all_atom = (idx == len(fragment_strs) - 1 and self.last_all_atom) - f_dict = read_fragments(fragment_str, all_atom=all_atom) - self.fragment_dicts.append(f_dict) - self.resolutions = len(self.fragment_dicts) - - # at this stage there are no atomnames for the nodes new_names = nx.get_node_attributes(self.molecule, "fragname") nx.set_node_attributes(self.meta_graph, new_names, "atomname") + @staticmethod + def read_fragment_strings(fragment_strings, last_all_atom=True): + """ + Read a list of CGSmiles fragment_strings and return a list + of dicts with the fragment graphs. If `last_all_atom` is + True then pysmiles is used to read the last fragment string + provided in the list. + + Parameters + ---------- + fragment_strings: list[str] + list of CGSmiles fragment strings + last_all_atom: bool + if the last string in the list is an all atom string + and should be read using pysmiles. + + Returns + ------- + list[dict[str, nx.Graph]] + a list of the fragment dicts composed of the fragment + name and a nx.Graph describing the fragment + """ + fragment_dicts = [] + for idx, fragment_str in enumerate(fragment_strings): + all_atom = (idx == len(fragment_strings) - 1 and last_all_atom) + print(idx == len(fragment_strings) - 1, last_all_atom) + f_dict = read_fragments(fragment_str, all_atom=all_atom) + fragment_dicts.append(f_dict) + return fragment_dicts + def resolve_disconnected_molecule(self, fragment_dict): """ Given a connected graph of nodes with associated fragment graphs generate a disconnected graph of the fragments and annotate each fragment graph to the node in the higher resolution graph. + + Parameters + ---------- + fragment_dict: dict[str, nx.Graph] + a dict of fragment graphs """ for meta_node in self.meta_graph.nodes: fragname = self.meta_graph.nodes[meta_node]['fragname'] @@ -297,14 +349,102 @@ def resolve_iter(self): """ Iterator returning all resolutions in oder. """ - for r in range(self.resolutions): + for _ in range(self.resolutions): meta_graph, molecule = self.resolve() yield meta_graph, molecule def resolve_all(self): """ - Resolve all layers and return final molecule - as well as the last layer above that. + Resolve all layers and return final moleculs as well as the previous + resolution graph. """ - *_, (meta_graph, graph) = resolver.resolve_iter() + *_, (meta_graph, graph) = self.resolve_iter() return meta_graph, graph + + @classmethod + def from_string(cls, cgsmiles_str, last_all_atom=True): + """ + Initiate a MoleculeResolver instance from a cgsmiles string. + + Parameters + ---------- + cgsmiles_str: str + last_all_atom: bool + if the last resolution is all-atom and is read using pysmiles + + Returns + ------- + :class:`MoleculeResolver` + """ + # here we figure out how many resolutions we are dealing with + elements = re.findall(r"\{[^\}]+\}", cgsmiles_str) + # the first one describes our lowest resolution + molecule = read_cgsmiles(elements[0]) + # the rest are fragment lists + fragment_dicts = cls.read_fragment_strings(elements[1:], + last_all_atom=last_all_atom) + resolver_obj = cls(molecule_graph=molecule, + fragment_dicts=fragment_dicts, + last_all_atom=last_all_atom) + return resolver_obj + + @classmethod + def from_graph(cls, cgsmiles_str, meta_graph, last_all_atom=True): + """ + Initiate a MoleculeResolver instance from a cgsmiles string. + + Parameters + ---------- + cgsmiles_str: str + meta_graph: nx.Graph + a graph describing the lowest resolution. All nodes must have the + fragname attribute set. + last_all_atom: bool + if the last resolution is all-atom and is read using pysmiles + + Returns + ------- + :class:`MoleculeResolver` + """ + # here we figure out how many resolutions we are dealing with + elements = re.findall(r"\{[^\}]+\}", cgsmiles_str) + # all elements are are fragment lists + fragment_dicts = cls.read_fragment_strings(elements, + last_all_atom=last_all_atom) + if len(nx.get_node_attributes(meta_graph, 'fragname')) != len(meta_graph.nodes): + msg = "All nodes must have the fragname attribute set." + raise IOError(msg) + + resolver_obj = cls(molecule_graph=meta_graph, + fragment_dicts=fragment_dicts, + last_all_atom=last_all_atom) + + return resolver_obj + + @classmethod + def from_fragment_dicts(cls, cgsmiles_str, fragment_dicts, last_all_atom=True): + """ + Initiate a MoleculeResolver instance from a cgsmiles string. + + Parameters + ---------- + cgsmiles_str: str + fragment_dicts: list[dict[str, nx.Graph]] + a dict of fragment graphs per resolution. Each graph must have the + same attributes as returned by the `cgsmiles.read_fragments` + function. + last_all_atom: bool + if the last resolution is all-atom and is read using pysmiles + + Returns + ------- + :class:`MoleculeResolver` + """ + # here we figure out how many resolutions we are dealing with + elements = re.findall(r"\{[^\}]+\}", cgsmiles_str) + # the first one describes our lowest resolution + molecule = read_cgsmiles(elements[0]) + resolver_obj = cls(molecule_graph=molecule, + fragment_dicts=fragment_dicts, + last_all_atom=last_all_atom) + return resolver_obj diff --git a/cgsmiles/tests/test_layering.py b/cgsmiles/tests/test_layering.py index 96aec5a..a6ce5d9 100644 --- a/cgsmiles/tests/test_layering.py +++ b/cgsmiles/tests/test_layering.py @@ -35,7 +35,7 @@ def _node_match(n1, n2): return n1["fragname"] == n2["fragname"] cgsmiles_str = cgsmiles_str.strip().replace('\n','').replace(' ','') - resolver = MoleculeResolver(cgsmiles_str, last_all_atom=False) + resolver = MoleculeResolver.from_string(cgsmiles_str, last_all_atom=False) for (low_graph, high_graph), ref_str in zip(resolver.resolve_iter(), ref_strings): ref_graph = cgsmiles.read_cgsmiles(ref_str) nx.is_isomorphic(ref_graph, high_graph, node_match=_node_match) diff --git a/cgsmiles/tests/test_molecule_resolve.py b/cgsmiles/tests/test_molecule_resolve.py index 4dcaf82..b424ee0 100644 --- a/cgsmiles/tests/test_molecule_resolve.py +++ b/cgsmiles/tests/test_molecule_resolve.py @@ -198,7 +198,7 @@ def test_match_bonding_descriptors(bonds_source, bonds_target, edge, btypes): (9, 11), (9, 10), (11, 13), (11, 12), (13, 14)]), )) def test_all_atom_resolve_molecule(smile, ref_frags, elements, ref_edges): - meta_mol, molecule = MoleculeResolver(smile).resolve() + meta_mol, molecule = MoleculeResolver.from_string(smile).resolve() # loop and compare fragments first for node, ref in zip(meta_mol.nodes, ref_frags): @@ -236,14 +236,14 @@ def test_resolve_cases(case, cgsmiles_str, ref_string): elements = re.findall(r"\{[^\}]+\}", cgsmiles_str) if case == 1: fragment_dict = read_fragments(elements[-1], all_atom=False) - meta_mol, molecule = MoleculeResolver(fragment_dicts=[fragment_dict], - pattern=elements[0], - last_all_atom=False).resolve() + meta_mol, molecule = MoleculeResolver.from_fragment_dicts(fragment_dicts=[fragment_dict], + cgsmiles_str=elements[0], + last_all_atom=False).resolve() elif case == 2: meta_input = read_cgsmiles(elements[0]) - meta_mol, molecule = MoleculeResolver(meta_graph=meta_input, - pattern=elements[1], - last_all_atom=False).resolve() + meta_mol, molecule = MoleculeResolver.from_graph(meta_graph=meta_input, + cgsmiles_str=elements[1], + last_all_atom=False).resolve() ref_graph = read_cgsmiles(ref_string) def _atomname_match(n1, n2): From ddf0aa834c10e46d4050636df799a73a276f29ce Mon Sep 17 00:00:00 2001 From: "Dr. Fabian Grunewald" <32294573+fgrunewald@users.noreply.github.com> Date: Tue, 9 Jul 2024 13:26:01 +0200 Subject: [PATCH 14/16] Apply suggestions from code review Co-authored-by: Peter C Kroon --- cgsmiles/resolve.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 3078fda..383faaa 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -193,7 +193,6 @@ def read_fragment_strings(fragment_strings, last_all_atom=True): fragment_dicts = [] for idx, fragment_str in enumerate(fragment_strings): all_atom = (idx == len(fragment_strings) - 1 and last_all_atom) - print(idx == len(fragment_strings) - 1, last_all_atom) f_dict = read_fragments(fragment_str, all_atom=all_atom) fragment_dicts.append(f_dict) return fragment_dicts @@ -411,7 +410,7 @@ def from_graph(cls, cgsmiles_str, meta_graph, last_all_atom=True): # all elements are are fragment lists fragment_dicts = cls.read_fragment_strings(elements, last_all_atom=last_all_atom) - if len(nx.get_node_attributes(meta_graph, 'fragname')) != len(meta_graph.nodes): + if all('fragname' in meta_graph.nodes[n] for n in meta_graph): msg = "All nodes must have the fragname attribute set." raise IOError(msg) From 9c8e40d5dd9ca0660436865447da639c36540653 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 9 Jul 2024 13:28:39 +0200 Subject: [PATCH 15/16] fix typo --- cgsmiles/resolve.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index 383faaa..f0c3abc 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -410,7 +410,7 @@ def from_graph(cls, cgsmiles_str, meta_graph, last_all_atom=True): # all elements are are fragment lists fragment_dicts = cls.read_fragment_strings(elements, last_all_atom=last_all_atom) - if all('fragname' in meta_graph.nodes[n] for n in meta_graph): + if not all('fragname' in meta_graph.nodes[n] for n in meta_graph.nodes): msg = "All nodes must have the fragname attribute set." raise IOError(msg) From 18685d67fa42748f36aa513e048c7c48a87ff537 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 9 Jul 2024 13:31:53 +0200 Subject: [PATCH 16/16] adjust doc strings and add error message --- cgsmiles/resolve.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py index f0c3abc..1895da3 100644 --- a/cgsmiles/resolve.py +++ b/cgsmiles/resolve.py @@ -390,7 +390,8 @@ def from_string(cls, cgsmiles_str, last_all_atom=True): @classmethod def from_graph(cls, cgsmiles_str, meta_graph, last_all_atom=True): """ - Initiate a MoleculeResolver instance from a cgsmiles string. + Initiate a MoleculeResolver instance from a cgsmiles string + and a `meta_graph` that describes the lowest resolution. Parameters ---------- @@ -423,7 +424,8 @@ def from_graph(cls, cgsmiles_str, meta_graph, last_all_atom=True): @classmethod def from_fragment_dicts(cls, cgsmiles_str, fragment_dicts, last_all_atom=True): """ - Initiate a MoleculeResolver instance from a cgsmiles string. + Initiate a MoleculeResolver instance from a cgsmiles string, describing + one molecule and fragment_dicts containing fragments for each resolution. Parameters ---------- @@ -441,6 +443,10 @@ def from_fragment_dicts(cls, cgsmiles_str, fragment_dicts, last_all_atom=True): """ # here we figure out how many resolutions we are dealing with elements = re.findall(r"\{[^\}]+\}", cgsmiles_str) + if len(elements) > 1: + msg = ("Your cgsmiles string can only describe one " + "resolution of a molecule when using this function.") + raise IOError(msg) # the first one describes our lowest resolution molecule = read_cgsmiles(elements[0]) resolver_obj = cls(molecule_graph=molecule,