Skip to content

Commit

Permalink
keep working later
Browse files Browse the repository at this point in the history
  • Loading branch information
SalvadorBrandolin committed Oct 7, 2024
1 parent 2b7c56c commit d64e2d4
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 144 deletions.
2 changes: 1 addition & 1 deletion ugropy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .fragmentation_models.fragmentation_model import FragmentationModel
from .fragmentation_models.implementations.unifac import unifac

#from .groups import Groups
# from .groups import Groups


__all__ = [
Expand Down
6 changes: 2 additions & 4 deletions ugropy/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
FragmentationModel subgroups detection functions.
"""

from .checks import (
check_has_overlapping_groups
)
from .checks import FragmentationSolutionChecker

from .get_rdkit_object import instantiate_mol_object


__all__ = [
"check_has_overlapping_groups",
"FragmentationSolutionChecker",
"instantiate_mol_object",
]
211 changes: 94 additions & 117 deletions ugropy/core/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,123 +4,100 @@
algorithm to obtain the molecule's FragmentationModel subgroups.
"""

from abc import ABC

import numpy as np

from rdkit import Chem
import pandas as pd

# from ugropy.fragmentation_models.fragmentation_model import FragmentationModel


# def check_has_molecular_weight_right(
# mol_object: Chem.rdchem.Mol,
# mol_subgroups: dict,
# model: FragmentationModel,
# ) -> bool:
# """Check the molecular weight of the molecule using its functional groups.

# Compares the RDKit molecular weight of the molecule to the computed
# molecular weight from the functional groups. Returns True if both molecular
# weights are equal with 0.5 u (half hydrogen atom) as atol of
# numpy.allclose(). Also, the method will check if the molecule has negative
# occurrences on its functional groups, also returning False in that case.

# Parameters
# ----------
# mol_object : Chem.rdchem.Mol
# RDKit Chem object
# mol_subgroups : dict
# FragmentationModel subgroups of the mol_object
# model: FragmentationModel
# FragmentationModel object.

# Returns
# -------
# bool
# True if RDKit and ugropy molecular weight are equal with a tolerance.
# """
# # check for negative occurrences
# if not all(occurrence > 0 for occurrence in mol_subgroups.values()):
# return False

# # rdkit molecular weight
# rdkit_mw = Descriptors.MolWt(mol_object)

# # Molecular weight from functional groups
# mws = model.subgroups.loc[
# list(mol_subgroups.keys()), "molecular_weight"
# ].to_numpy()

# func_group_mw = np.dot(mws, list(mol_subgroups.values()))

# return np.allclose(rdkit_mw, func_group_mw, atol=0.5)


# def check_can_fit_atoms(
# mol_object: Chem.rdchem.Mol,
# mol_subgroups: dict,
# model: FragmentationModel,
# ) -> bool:
# """Check if a solution can be fitted in the mol_object atoms.

# Parameters
# ----------
# mol_object : Chem.rdchem.Mol
# RDKit Mol object.
# mol_subgroups : dict
# Subgroups of mol_object.
# model: FragmentationModel
# FragmentationModel object.

# Returns
# -------
# bool
# True if the solution can be fitted.
# """
# if fit_atoms(mol_object, mol_subgroups, model):
# return True
# else:
# return False


def check_has_overlapping_groups(
mol_object: Chem.rdchem.Mol,
mol_subgroups: dict,
) -> tuple[bool, np.ndarray]:
"""Check if the groups detection overlapping groups.
Parameters
----------
mol_object : Chem.rdchem.Mol
RDKit Mol object.
mol_subgroups : dict
Subgroups of mol_object with the atoms indexes of each detection.
model: FragmentationModel
FragmentationModel object.
Returns
-------
tuple[bool, np.ndarray]
True if the groups detection has overlapping groups and the indexes of
the overlapped atoms.
"""
n_atoms = mol_object.GetNumAtoms()

# Count the number of times an atom is in a group
atoms = np.zeros(n_atoms)

for indexes in mol_subgroups.values():
np.add.at(atoms, np.array(indexes).flatten(), 1)

overlapped_atoms = np.argwhere(atoms > 1).flatten()

# Separate the fragments that participate in the overlapped atoms
fragments = {}

for oatom in overlapped_atoms:
...


if np.size(overlapped_atoms) > 0:
return True, overlapped_atoms
else:
return False, np.array([])
from rdkit import Chem
from rdkit.Chem import Descriptors


class FragmentationSolutionChecker(ABC):
def __init__(self, mol_subgroups: pd.DataFrame) -> None:
self.mol_subgroups = mol_subgroups

def check_atoms_fragments_presence(
self, molecule: Chem.rdchem.Mol, fragments: dict
) -> tuple[bool, np.ndarray]:
"""Find overlapped atoms and free atoms.
Check the detected fragments to find the atoms that appears in more
than one fragment (overlapping), and the atoms that are not present in
any fragment (free atoms). Returning two np.ndarray with the indexes of
the overlapping and free atoms.
Example of a `fragments` dictionary that not presents overlapping
atoms:
N-hexane:
{
'CH3_0': (0,),
'CH3_1': (5,),
'CH2_0': (1,),
'CH2_1': (2,),
'CH2_2': (3,),
'CH2_3': (4,)
}
Example of a `fragments` dictionary that presents overlapping atoms:
Toluene:
{
'CH3_0': (0,),
'ACH_0': (2,),
'ACH_1': (3,),
'ACH_2': (4,),
'ACH_3': (5,),
'ACH_4': (6,),
'AC_0': (1,),
'ACCH3_0': (1, 0)
}
Parameters
----------
molecule : Chem.rdchem.Mol
RDKit molecule object.
fragments : dict
Dictionary containing the fragments detected in the molecule. The
keys are the group names and the values are the indexes of the
atoms in the group.
Returns
-------
tuple[np.ndarray, np.ndarray]
Overlapping atoms indexes and free atoms indexes.
"""
n_atoms = molecule.GetNumAtoms()

# Count the number of times an atom is in a group. Also find the atoms
# that are not present in any fragment.
atoms = np.zeros(n_atoms, dtype=int)

for indexes in fragments.values():
np.add.at(atoms, np.array(indexes).flatten(), 1)

overlapped_atoms = np.argwhere(atoms > 1).flatten()
free_atoms = np.argwhere(atoms == 0).flatten()

return overlapped_atoms, free_atoms


# def check_has_molecular_weight_right(
# self, mol_object: Chem.rdchem.Mol, fragments: dict
# ) -> bool:

# # rdkit molecular weight
# rdkit_mw = Descriptors.MolWt(mol_object)

# # Molecular weight from functional groups
# mws = self.mol_subgroups.loc[
# list(fragments.keys()), "molecular_weight"
# ].to_numpy()

# func_group_mw = np.dot(mws, list(mol_subgroups.values()))

# return np.allclose(rdkit_mw, func_group_mw, atol=0.5)
3 changes: 2 additions & 1 deletion ugropy/fragmentation_models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from .fragmentation_model import FragmentationModel
from .gibbs_model import GibbsModel
#from .joback import Joback

# from .joback import Joback
from . import implementations


Expand Down
44 changes: 23 additions & 21 deletions ugropy/fragmentation_models/fragmentation_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
FragmentationModule class.
"""

from typing import Union
from typing import List, Union

import pandas as pd

Expand All @@ -13,11 +13,11 @@

import numpy as np

from ugropy.core.checks import check_has_overlapping_groups
from ugropy.core.checks import FragmentationSolutionChecker
from ugropy.core.get_rdkit_object import instantiate_mol_object


class FragmentationModel:
class FragmentationModel(FragmentationSolutionChecker):
"""FragmentationModel class.
All ugropy supported models are an instance of this class. This class must
Expand Down Expand Up @@ -57,7 +57,7 @@ def __init__(
self.detection_mols = {}

for group, row in self.subgroups.iterrows():
self.detection_mols[group] = (Chem.MolFromSmarts(row["smarts"]))
self.detection_mols[group] = Chem.MolFromSmarts(row["smarts"])

def get_groups(
self,
Expand All @@ -66,46 +66,48 @@ def get_groups(
ilp_solver: str = "cbc",
) -> "FragmentationResult":

# RDKit Mol object
# Instantiate a RDKit Mol object
mol_object = instantiate_mol_object(identifier, identifier_type)

# Direct detection of fragments presence and its atoms indexes
detections = self.detect_fragments(mol_object)

# First return
if detections == {}: # No groups detected
return self.set_fragmentation_result(mol_object, {}, {})
# No groups have been detected, a strange but possible case. We have
# a fast path to return the result and avoid the rest of the code.
if detections == {}:
return self.set_fragmentation_result(mol_object, [{}])

# Check overlapping groups
has_overlap, overlapping_atoms = check_has_overlapping_groups(
# Check overlapping atoms and free atoms
overlapping_atoms, free_atoms = self.check_atoms_fragments_presence(
mol_object, detections
)

# Second return
if not has_overlap:
return self.set_fragmentation_result(mol_object, detections, overlapping_atoms)

# If there is free atoms in the molecule, we can't fragment it with the
# current model.
if np.size(free_atoms) > 0:
return self.set_fragmentation_result(mol_object, [{}])

def set_fragmentation_result(
self,
molecule: Chem.rdchem.Mol,
subgroups_occurrences: dict,
subgroups_atoms_indexes: dict,
solutions_fragments: List[dict],
) -> "FragmentationResult":

result = FragmentationResult(
molecule, subgroups_occurrences, subgroups_atoms_indexes
)
# result = FragmentationResult(
# molecule, subgroups_occurrences, subgroups_atoms_indexes
# )

return result
return []

def detect_fragments(self, molecule: Chem.rdchem.Mol) -> dict:
"""Detect all the fragments in the molecule.
Return a dictionary with the detected fragments as keys and a tuple
with the atoms indexes of the fragment as values. For example, n-hexane
for the UNIFAC model will return:
{
'CH3_0': (0,),
'CH3_1': (5,),
Expand All @@ -115,7 +117,7 @@ def detect_fragments(self, molecule: Chem.rdchem.Mol) -> dict:
'CH2_3': (4,)
}
You may note that multiple occurrence of a fragment name will be
You may note that multiple occurrence of a fragment name will be
indexed. The convention is always: <fragment_name>_i where `i` is the
index of the occurrence.
Expand All @@ -130,7 +132,7 @@ def detect_fragments(self, molecule: Chem.rdchem.Mol) -> dict:
Detected fragments in the molecule.
"""
detected_fragments = {}

for fragment_name, mol in self.detection_mols.items():
matches = molecule.GetSubstructMatches(mol)

Expand Down

0 comments on commit d64e2d4

Please sign in to comment.