gruenewald-lab · fgrunewald · Sep 18, 2024 · Jul 9, 2024 · Jul 9, 2024 · Jul 10, 2024
diff --git a/cgsmiles/cgsmiles_utils.py b/cgsmiles/cgsmiles_utils.py
@@ -1,28 +1,61 @@
 from collections import defaultdict
 import networkx as nx
 
-def find_complementary_bonding_descriptor(bonding_descriptor):
+def find_complementary_bonding_descriptor(bonding_descriptor, ellegible_descriptors=None):
     """
-    Given a bonding desciptor find the complementary match.
+    Given a bonding descriptor find the complementary match.
     In the case of '$' prefixed descriptors this is just
     the same and '>' or '<' get flipped to the other
     symbol.
+
+    Parameters
+    ----------
+    bonding_descriptor: str
+    ellegible_descriptors: list[str]
+        a list of allowed descriptors to match
+
+    Return
+    ------
+    list[str]
     """
+    compl = []
+    if bonding_descriptor[0] == '$' and ellegible_descriptors:
+        for descriptor in ellegible_descriptors:
+            if descriptor[0] == '$' and descriptor[-1] == bonding_descriptor[-1]:
+                compl.append(descriptor)
+        return compl
+
     if bonding_descriptor[0] == '<':
         compl = '>' + bonding_descriptor[1:]
     elif bonding_descriptor[0] == '>':
         compl = '<' + bonding_descriptor[1:]
     else:
         compl = bonding_descriptor
-    return compl
+
+    if compl not in ellegible_descriptors:
+        msg = ("Bonding descriptor {compl} was not found in list of potential"
+               "matching descriptors.")
+        raise IOError(msg.format(compl=compl))
+
+    return [compl]
 
 def find_open_bonds(molecule, target_nodes=None):
     """
-    Collect all nodes which have an open bonding descriptor and store
-    them as keys with a list of nodes as values.
+    Collect all nodes which have an open bonding descriptor
+    and store them as keys with a list of nodes as values.
+
+    Parameters
+    ----------
+    molecule: nx.Graph
+    target_nodes: list[abc.hashable]
+        a list of node keys matching molecule
+
+    Return
+    ------
+    dict
     """
     if target_nodes is None:
-        target_nodes = list(molecule.nodes)
+        target_nodes = molecule
 
     open_bonds_by_descriptor = defaultdict(list)
     open_bonds = nx.get_node_attributes(molecule, 'bonding')

diff --git a/cgsmiles/pysmiles_utils.py b/cgsmiles/pysmiles_utils.py
@@ -16,7 +16,6 @@ def compute_mass(input_molecule):
         the atomic mass
     """
     molecule = input_molecule.copy()
-    print(molecule.nodes(data=True))
     # we need to add the hydrogen atoms
     # for computing the mass
     rebuild_h_atoms(molecule)

diff --git a/cgsmiles/resolve.py b/cgsmiles/resolve.py
@@ -9,7 +9,7 @@
                           set_atom_names_atomistic)
 from .pysmiles_utils import rebuild_h_atoms
 
-def compatible(left, right):
+def compatible(left, right, legacy=False):
     """
     Check bonding descriptor compatibility according
     to the BigSmiles syntax conventions.
@@ -23,14 +23,23 @@ def compatible(left, right):
     -------
     bool
     """
-    if left == right and left[0] not in '> <':
-        return True
-    l, r = left[0], right[0]
-    if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
-        return left[1:] == right[1:]
-    return False
-
-def match_bonding_descriptors(source, target, bond_attribute="bonding"):
+    if legacy:
+        if left == right and left[0] not in '> <':
+            return True
+        l, r = left[0], right[0]
+        if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
+            return left[1:] == right[1:]
+        return False
+    else:
+        if left[0] == right[0] == '$' or left[0] == right[0] == '!':
+            return True
+
+        l, r = left[0], right[0]
+        if (l, r) == ('<', '>') or (l, r) == ('>', '<'):
+            return True
+        return False
+
+def match_bonding_descriptors(source, target, bond_attribute="bonding", legacy=False):
     """
     Given a source and a target graph, which have bonding
     descriptors stored as node attributes, find a pair of
@@ -65,7 +74,7 @@ def match_bonding_descriptors(source, target, bond_attribute="bonding"):
             bond_targets = target_nodes[target_node]
             for bond_source in bond_sources:
                 for bond_target in bond_targets:
-                    if compatible(bond_source, bond_target):
+                    if compatible(bond_source, bond_target, legacy=legacy):
                         return ((source_node, target_node), (bond_source, bond_target))
     raise LookupError
 
@@ -141,7 +150,8 @@ class MoleculeResolver:
     def __init__(self,
                  molecule_graph,
                  fragment_dicts,
-                 last_all_atom=True):
+                 last_all_atom=True,
+                 legacy=False):
 
         """
         Parameters
@@ -167,6 +177,7 @@ def __init__(self,
         self.resolutions = len(self.fragment_dicts)
         new_names = nx.get_node_attributes(self.molecule, "fragname")
         nx.set_node_attributes(self.meta_graph, new_names, "atomname")
+        self.legacy = legacy
 
     @staticmethod
     def read_fragment_strings(fragment_strings, last_all_atom=True):
@@ -256,7 +267,8 @@ def edges_from_bonding_descrpt(self, all_atom=False):
                 node_graph = self.meta_graph.nodes[node]['graph']
                 try:
                     edge, bonding = match_bonding_descriptors(prev_graph,
-                                                              node_graph)
+                                                              node_graph,
+                                                              legacy=self.legacy)
                 except LookupError:
                     continue
                 # remove used bonding descriptors
@@ -361,7 +373,7 @@ def resolve_all(self):
         return meta_graph, graph
 
     @classmethod
-    def from_string(cls, cgsmiles_str, last_all_atom=True):
+    def from_string(cls, cgsmiles_str, last_all_atom=True, legacy=False):
         """
         Initiate a MoleculeResolver instance from a cgsmiles string.
 
@@ -384,7 +396,8 @@ def from_string(cls, cgsmiles_str, last_all_atom=True):
                                                    last_all_atom=last_all_atom)
         resolver_obj = cls(molecule_graph=molecule,
                            fragment_dicts=fragment_dicts,
-                           last_all_atom=last_all_atom)
+                           last_all_atom=last_all_atom,
+                           legacy=legacy)
         return resolver_obj
 
     @classmethod