keep working later

ipqa-research · Oct 7, 2024 · d64e2d4 · d64e2d4
1 parent 2b7c56c
commit d64e2d4
Show file tree

Hide file tree

Showing 5 changed files with 122 additions and 144 deletions.
diff --git a/ugropy/__init__.py b/ugropy/__init__.py
@@ -5,7 +5,7 @@
 from .fragmentation_models.fragmentation_model import FragmentationModel
 from .fragmentation_models.implementations.unifac import unifac
 
-#from .groups import Groups
+# from .groups import Groups
 
 
 __all__ = [

diff --git a/ugropy/core/__init__.py b/ugropy/core/__init__.py
@@ -3,14 +3,12 @@
 FragmentationModel subgroups detection functions.
 """
 
-from .checks import (
-    check_has_overlapping_groups
-)
+from .checks import FragmentationSolutionChecker
 
 from .get_rdkit_object import instantiate_mol_object
 
 
 __all__ = [
-    "check_has_overlapping_groups",
+    "FragmentationSolutionChecker",
     "instantiate_mol_object",
 ]
diff --git a/ugropy/core/checks.py b/ugropy/core/checks.py
@@ -4,123 +4,100 @@
 algorithm to obtain the molecule's FragmentationModel subgroups.
 """
 
+from abc import ABC
+
 import numpy as np
 
-from rdkit import Chem
+import pandas as pd
 
-# from ugropy.fragmentation_models.fragmentation_model import FragmentationModel
-
-
-# def check_has_molecular_weight_right(
-#     mol_object: Chem.rdchem.Mol,
-#     mol_subgroups: dict,
-#     model: FragmentationModel,
-# ) -> bool:
-#     """Check the molecular weight of the molecule using its functional groups.
-
-#     Compares the RDKit molecular weight of the molecule to the computed
-#     molecular weight from the functional groups. Returns True if both molecular
-#     weights are equal with 0.5 u (half hydrogen atom) as atol of
-#     numpy.allclose(). Also, the method will check if the molecule has negative
-#     occurrences on its functional groups, also returning False in that case.
-
-#     Parameters
-#     ----------
-#     mol_object : Chem.rdchem.Mol
-#         RDKit Chem object
-#     mol_subgroups : dict
-#         FragmentationModel subgroups of the mol_object
-#     model: FragmentationModel
-#         FragmentationModel object.
-
-#     Returns
-#     -------
-#     bool
-#         True if RDKit and ugropy molecular weight are equal with a tolerance.
-#     """
-#     # check for negative occurrences
-#     if not all(occurrence > 0 for occurrence in mol_subgroups.values()):
-#         return False
-
-#     # rdkit molecular weight
-#     rdkit_mw = Descriptors.MolWt(mol_object)
-
-#     # Molecular weight from functional groups
-#     mws = model.subgroups.loc[
-#         list(mol_subgroups.keys()), "molecular_weight"
-#     ].to_numpy()
-
-#     func_group_mw = np.dot(mws, list(mol_subgroups.values()))
-
-#     return np.allclose(rdkit_mw, func_group_mw, atol=0.5)
-
-
-# def check_can_fit_atoms(
-#     mol_object: Chem.rdchem.Mol,
-#     mol_subgroups: dict,
-#     model: FragmentationModel,
-# ) -> bool:
-#     """Check if a solution can be fitted in the mol_object atoms.
-
-#     Parameters
-#     ----------
-#     mol_object : Chem.rdchem.Mol
-#         RDKit Mol object.
-#     mol_subgroups : dict
-#         Subgroups of mol_object.
-#     model: FragmentationModel
-#         FragmentationModel object.
-
-#     Returns
-#     -------
-#     bool
-#         True if the solution can be fitted.
-#     """
-#     if fit_atoms(mol_object, mol_subgroups, model):
-#         return True
-#     else:
-#         return False
-
-
-def check_has_overlapping_groups(
-    mol_object: Chem.rdchem.Mol,
-    mol_subgroups: dict,
-) -> tuple[bool, np.ndarray]:
-    """Check if the groups detection overlapping groups.
-
-    Parameters
-    ----------
-    mol_object : Chem.rdchem.Mol
-        RDKit Mol object.
-    mol_subgroups : dict
-        Subgroups of mol_object with the atoms indexes of each detection.
-    model: FragmentationModel
-        FragmentationModel object.
-
-    Returns
-    -------
-    tuple[bool, np.ndarray]
-        True if the groups detection has overlapping groups and the indexes of
-        the overlapped atoms.
-    """
-    n_atoms = mol_object.GetNumAtoms()
-
-    # Count the number of times an atom is in a group
-    atoms = np.zeros(n_atoms)
-
-    for indexes in mol_subgroups.values():
-        np.add.at(atoms, np.array(indexes).flatten(), 1)
-
-    overlapped_atoms = np.argwhere(atoms > 1).flatten()
-
-    # Separate the fragments that participate in the overlapped atoms
-    fragments = {}
-
-    for oatom in overlapped_atoms:
-        ...
-
-
-    if np.size(overlapped_atoms) > 0:
-        return True, overlapped_atoms
-    else:
-        return False, np.array([])
+from rdkit import Chem
+from rdkit.Chem import Descriptors
+
+
+class FragmentationSolutionChecker(ABC):
+    def __init__(self, mol_subgroups: pd.DataFrame) -> None:
+        self.mol_subgroups = mol_subgroups
+
+    def check_atoms_fragments_presence(
+        self, molecule: Chem.rdchem.Mol, fragments: dict
+    ) -> tuple[bool, np.ndarray]:
+        """Find overlapped atoms and free atoms.
+
+        Check the detected fragments to find the atoms that appears in more
+        than one fragment (overlapping), and the atoms that are not present in
+        any fragment (free atoms). Returning two np.ndarray with the indexes of
+        the overlapping and free atoms.
+
+        Example of a `fragments` dictionary that not presents overlapping
+        atoms:
+
+        N-hexane:
+
+        {
+            'CH3_0': (0,),
+            'CH3_1': (5,),
+            'CH2_0': (1,),
+            'CH2_1': (2,),
+            'CH2_2': (3,),
+            'CH2_3': (4,)
+        }
+
+        Example of a `fragments` dictionary that presents overlapping atoms:
+
+        Toluene:
+
+        {
+            'CH3_0': (0,),
+            'ACH_0': (2,),
+            'ACH_1': (3,),
+            'ACH_2': (4,),
+            'ACH_3': (5,),
+            'ACH_4': (6,),
+            'AC_0': (1,),
+            'ACCH3_0': (1, 0)
+        }
+
+        Parameters
+        ----------
+        molecule : Chem.rdchem.Mol
+            RDKit molecule object.
+        fragments : dict
+            Dictionary containing the fragments detected in the molecule. The
+            keys are the group names and the values are the indexes of the
+            atoms in the group.
+
+        Returns
+        -------
+        tuple[np.ndarray, np.ndarray]
+            Overlapping atoms indexes and free atoms indexes.
+        """
+        n_atoms = molecule.GetNumAtoms()
+
+        # Count the number of times an atom is in a group. Also find the atoms
+        # that are not present in any fragment.
+        atoms = np.zeros(n_atoms, dtype=int)
+
+        for indexes in fragments.values():
+            np.add.at(atoms, np.array(indexes).flatten(), 1)
+
+        overlapped_atoms = np.argwhere(atoms > 1).flatten()
+        free_atoms = np.argwhere(atoms == 0).flatten()
+
+        return overlapped_atoms, free_atoms
+
+
+    # def check_has_molecular_weight_right(
+    #     self, mol_object: Chem.rdchem.Mol, fragments: dict
+    # ) -> bool:
+
+    #     # rdkit molecular weight
+    #     rdkit_mw = Descriptors.MolWt(mol_object)
+
+    #     # Molecular weight from functional groups
+    #     mws = self.mol_subgroups.loc[
+    #         list(fragments.keys()), "molecular_weight"
+    #     ].to_numpy()
+
+    #     func_group_mw = np.dot(mws, list(mol_subgroups.values()))
+
+    #     return np.allclose(rdkit_mw, func_group_mw, atol=0.5)
diff --git a/ugropy/fragmentation_models/__init__.py b/ugropy/fragmentation_models/__init__.py
@@ -2,7 +2,8 @@
 
 from .fragmentation_model import FragmentationModel
 from .gibbs_model import GibbsModel
-#from .joback import Joback
+
+# from .joback import Joback
 from . import implementations
 
 

diff --git a/ugropy/fragmentation_models/fragmentation_model.py b/ugropy/fragmentation_models/fragmentation_model.py
@@ -4,7 +4,7 @@
 FragmentationModule class.
 """
 
-from typing import Union
+from typing import List, Union
 
 import pandas as pd
 
@@ -13,11 +13,11 @@
 
 import numpy as np
 
-from ugropy.core.checks import check_has_overlapping_groups
+from ugropy.core.checks import FragmentationSolutionChecker
 from ugropy.core.get_rdkit_object import instantiate_mol_object
 
 
-class FragmentationModel:
+class FragmentationModel(FragmentationSolutionChecker):
     """FragmentationModel class.
 
     All ugropy supported models are an instance of this class. This class must
@@ -57,7 +57,7 @@ def __init__(
         self.detection_mols = {}
 
         for group, row in self.subgroups.iterrows():
-            self.detection_mols[group] = (Chem.MolFromSmarts(row["smarts"]))
+            self.detection_mols[group] = Chem.MolFromSmarts(row["smarts"])
 
     def get_groups(
         self,
@@ -66,46 +66,48 @@ def get_groups(
         ilp_solver: str = "cbc",
     ) -> "FragmentationResult":
 
-        # RDKit Mol object
+        # Instantiate a RDKit Mol object
         mol_object = instantiate_mol_object(identifier, identifier_type)
 
         # Direct detection of fragments presence and its atoms indexes
         detections = self.detect_fragments(mol_object)
 
         # First return
-        if detections == {}:  # No groups detected
-            return self.set_fragmentation_result(mol_object, {}, {})
+        # No groups have been detected, a strange but possible case. We have
+        # a fast path to return the result and avoid the rest of the code.
+        if detections == {}:
+            return self.set_fragmentation_result(mol_object, [{}])
 
-        # Check overlapping groups
-        has_overlap, overlapping_atoms = check_has_overlapping_groups(
+        # Check overlapping atoms and free atoms
+        overlapping_atoms, free_atoms = self.check_atoms_fragments_presence(
             mol_object, detections
         )
 
         # Second return
-        if not has_overlap:
-            return self.set_fragmentation_result(mol_object, detections, overlapping_atoms)
-
+        # If there is free atoms in the molecule, we can't fragment it with the
+        # current model.
+        if np.size(free_atoms) > 0:
+            return self.set_fragmentation_result(mol_object, [{}])
 
     def set_fragmentation_result(
         self,
         molecule: Chem.rdchem.Mol,
-        subgroups_occurrences: dict,
-        subgroups_atoms_indexes: dict,
+        solutions_fragments: List[dict],
     ) -> "FragmentationResult":
 
-        result = FragmentationResult(
-            molecule, subgroups_occurrences, subgroups_atoms_indexes
-        )
+        # result = FragmentationResult(
+        #     molecule, subgroups_occurrences, subgroups_atoms_indexes
+        # )
 
-        return result
+        return []
 
     def detect_fragments(self, molecule: Chem.rdchem.Mol) -> dict:
         """Detect all the fragments in the molecule.
 
         Return a dictionary with the detected fragments as keys and a tuple
         with the atoms indexes of the fragment as values. For example, n-hexane
         for the UNIFAC model will return:
-        
+
         {
             'CH3_0': (0,),
             'CH3_1': (5,),
@@ -115,7 +117,7 @@ def detect_fragments(self, molecule: Chem.rdchem.Mol) -> dict:
             'CH2_3': (4,)
         }
 
-        You may note that multiple occurrence of a fragment name will be 
+        You may note that multiple occurrence of a fragment name will be
         indexed. The convention is always: <fragment_name>_i where `i` is the
         index of the occurrence.
 
@@ -130,7 +132,7 @@ def detect_fragments(self, molecule: Chem.rdchem.Mol) -> dict:
             Detected fragments in the molecule.
         """
         detected_fragments = {}
-        
+
         for fragment_name, mol in self.detection_mols.items():
             matches = molecule.GetSubstructMatches(mol)