diff --git a/tests/agani.ipynb b/tests/agani.ipynb index ff81f31..43b736d 100644 --- a/tests/agani.ipynb +++ b/tests/agani.ipynb @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -247,6 +247,9 @@ } ], "source": [ + "from rdkit.Chem import Draw\n", + "from rdkit import Chem\n", + "\n", "smiles = \"C1=CC2=NOC=C2C=C1\"\n", "\n", "mol = instantiate_mol_object(smiles, \"smiles\")\n", @@ -286,428 +289,22 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
12345678910...211212213214215216217218219220
SMILES
C=CC(=O)NCCC1=NC=NC10100100000...0000000000
C=CC(=O)NCCCN(C)C1200100000...0000000000
C=CC(=O)NCCCN(CC)CC2300100000...0000000000
C=CC(=O)NCCCN1CCOCC10200100000...0000000000
C=CC(=O)NCCCN1CCSCC10200100000...0000000000
..................................................................
OCCNC(=O)CCN1C=CN=C1N(=O)=O0300000000...0000000000
OCCNC(=O)CCN1C=NC(=N1)N(=O)=O0300000000...0000000000
OCCNC(=O)CN1C=CN=C1N(=O)=O0200000000...0000000000
OCCNC(=O)CN1C=NC(=N1)N(=O)=O0200000000...0000000000
OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC(Cc2ccccc2)C(=O)N3CCC(OCOC)CC3)C(C)C3840000000...0000000000
\n", - "

198 rows × 220 columns

\n", - "
" - ], "text/plain": [ - " 1 2 3 4 5 \\\n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 1 0 0 1 \n", - "C=CC(=O)NCCCN(C)C 1 2 0 0 1 \n", - "C=CC(=O)NCCCN(CC)CC 2 3 0 0 1 \n", - "C=CC(=O)NCCCN1CCOCC1 0 2 0 0 1 \n", - "C=CC(=O)NCCCN1CCSCC1 0 2 0 0 1 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 3 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 3 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 2 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 2 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 3 8 4 0 0 \n", - "\n", - " 6 7 8 9 10 \\\n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(C)C 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(CC)CC 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCOCC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCSCC1 0 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 0 0 0 0 \n", - "\n", - " ... 211 212 213 214 \\\n", - "SMILES ... \n", - "C=CC(=O)NCCC1=NC=NC1 ... 0 0 0 0 \n", - "C=CC(=O)NCCCN(C)C ... 0 0 0 0 \n", - "C=CC(=O)NCCCN(CC)CC ... 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCOCC1 ... 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCSCC1 ... 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O ... 0 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O ... 0 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O ... 0 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O ... 0 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... ... 0 0 0 0 \n", - "\n", - " 215 216 217 218 219 \\\n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(C)C 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(CC)CC 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCOCC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCSCC1 0 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 0 0 0 0 \n", - "\n", - " 220 \n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 \n", - "C=CC(=O)NCCCN(C)C 0 \n", - "C=CC(=O)NCCCN(CC)CC 0 \n", - "C=CC(=O)NCCCN1CCOCC1 0 \n", - "C=CC(=O)NCCCN1CCSCC1 0 \n", - "... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 \n", - "\n", - "[198 rows x 220 columns]" + "'BrC'" ] }, - "execution_count": 9, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[df[88]>0]" + "df[df[1]>0].index[0]" ] }, { @@ -719,7 +316,7 @@ "data": { "image/png": "", "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -734,6 +331,77 @@ "\n", "mol" ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"abdulelah_gani_frags/log_p.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "61\n", + "2\n", + "OCCN(C(=O)C)c1c(I)c(C(=O)NCC(O)CO)c(I)c(C(=O)NCC(O)CO)c1I\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group = \"241\"\n", + "idx = 55\n", + "\n", + "cantidad = df[df[group]>0].shape[0]\n", + "ocurr = df[df[group]>0][group].iloc[idx]\n", + "smiles = df[df[group]>0].index[idx]\n", + "\n", + "print(cantidad)\n", + "print(ocurr)\n", + "print(smiles)\n", + "\n", + "mol = instantiate_mol_object(smiles, \"smiles\")\n", + "mol" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.int64(3)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[group]>1][group].iloc[0]" + ] } ], "metadata": { diff --git a/tests/agani_s.ipynb b/tests/agani_s.ipynb new file mode 100644 index 0000000..8381bee --- /dev/null +++ b/tests/agani_s.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from ugropy import abdulelah_gani_s, instantiate_mol_object\n", + "\n", + "import pandas as pd\n", + "\n", + "import numpy as np\n", + "\n", + "from rdkit.Chem import Draw\n", + "from rdkit import Chem" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"abdulelah_gani_frags/log_p.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")\n", + "\n", + "primary = np.linspace(221, 350, 130, dtype=int).astype(str)\n", + "\n", + "df = df[primary]\n", + "df.rename(columns=lambda col: int(col) if col.isdigit() else col, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BrC1=CC2=C(S1)N3C(C)=NN=C3CN=C2c4ccccc4Cl\n", + "11\n", + "Subgrupos:\n", + "{'(N=C)cyc-CH3': 2}\n", + "Subgrupos por número:\n", + "{346: 2}\n", + "Gani: \n", + "{324: 1, 346: 1}\n" + ] + } + ], + "source": [ + "for idx, smiles in enumerate(df.index):\n", + " solution = abdulelah_gani_s.get_groups(smiles, \"smiles\")\n", + " \n", + " # Filtrar las columnas de la fila que no sean iguales a 0\n", + " row = df.loc[smiles][df.loc[smiles] != 0]\n", + " \n", + " # Convertir la fila filtrada a diccionario\n", + " row_dict = row.to_dict()\n", + "\n", + " # Verificar si ninguna solución tiene subgroups_numbers igual a row_dict\n", + " if solution.subgroups_numbers != row_dict:\n", + " print(smiles)\n", + " print(idx)\n", + " print(\"Subgrupos:\")\n", + " print(str(solution.subgroups)) # Convertir a string y unir con saltos de línea\n", + " print(\"Subgrupos por número:\")\n", + " print(str(solution.subgroups_numbers)) # Convertir a string y unir con saltos de línea\n", + " print(\"Gani: \")\n", + " print(row_dict)\n", + " \n", + " wrong_smiles = smiles\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'(N=C)cyc-CH3_0': (6, 7, 8), '(N=C)cyc-CH3_1': (9, 7, 8)}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol = instantiate_mol_object(wrong_smiles, \"smiles\")\n", + "\n", + "abdulelah_gani_s.detect_fragments(mol)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "column_sums = df.sum()\n", + "\n", + "# Identificar columnas cuya suma es igual a 0\n", + "columns_with_zero_sum = column_sums[column_sums == 0].index.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[225, 238, 256, 264, 276, 280, 308, 314, 342]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns_with_zero_sum" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ugropy", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ugropy/__init__.py b/ugropy/__init__.py index ee9588b..747a8d4 100644 --- a/ugropy/__init__.py +++ b/ugropy/__init__.py @@ -7,11 +7,11 @@ """ from .core import instantiate_mol_object -from .core.frag_classes.abdulelah_gani.abdulelah_gani_p import ( - AbdulelahGaniPrimaryModel, +from .core.frag_classes.abdulelah_gani.abdulelah_gani_pst import ( + AbdulelahGaniPSTModel, ) -from .core.frag_classes.abdulelah_gani.abdulelah_gani_p_result import ( - AGaniPFragmentationResult, +from .core.frag_classes.abdulelah_gani.abdulelah_gani_pst_result import ( + AGaniPSTFragmentationResult, ) from .core.frag_classes.base.fragmentation_model import ( FragmentationModel, @@ -29,6 +29,7 @@ from .core.ilp_solvers.ilp_solver import ILPSolver from .groups import Groups from .models.abdulelah_gani_pmod import abdulelah_gani_p +from .models.abdulelah_gani_smod import abdulelah_gani_s from .models.jobackmod import joback from .models.psrkmod import psrk from .models.unifacmod import unifac @@ -38,8 +39,8 @@ "constants", "writers", "instantiate_mol_object", - "AbdulelahGaniPrimaryModel", - "AGaniPFragmentationResult", + "AbdulelahGaniPSTModel", + "AGaniPSTFragmentationResult", "FragmentationModel", "FragmentationResult", "GibbsModel", @@ -48,6 +49,7 @@ "JobackFragmentationResult", "Groups", "abdulelah_gani_p", + "abdulelah_gani_s", "joback", "unifac", "psrk", diff --git a/ugropy/core/frag_classes/abdulelah_gani/__init__.py b/ugropy/core/frag_classes/abdulelah_gani/__init__.py index 19da812..7e94fde 100644 --- a/ugropy/core/frag_classes/abdulelah_gani/__init__.py +++ b/ugropy/core/frag_classes/abdulelah_gani/__init__.py @@ -1,6 +1,6 @@ """Abdulelah-Gani frag classes module.""" -from . import abdulelah_gani_p, abdulelah_gani_p_result +from . import abdulelah_gani_pst, abdulelah_gani_pst_result -__all__ = ["abdulelah_gani_p", "abdulelah_gani_p_result"] +__all__ = ["abdulelah_gani_pst", "abdulelah_gani_pst_result"] diff --git a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py similarity index 94% rename from ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py rename to ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py index f72b048..41a1c54 100644 --- a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py +++ b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py @@ -6,8 +6,8 @@ from rdkit import Chem -from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_p_result import ( - AGaniPFragmentationResult, +from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst_result import ( + AGaniPSTFragmentationResult, ) from ugropy.core.frag_classes.base.fragmentation_model import ( FragmentationModel, @@ -16,7 +16,7 @@ from ugropy.core.ilp_solvers.ilp_solver import ILPSolver -class AbdulelahGaniPrimaryModel(FragmentationModel): +class AbdulelahGaniPSTModel(FragmentationModel): """Abdulelah-Gani model dedicated to properties estimation models. Class to construct the primary structures detector for the Abdulelah-Gani @@ -48,13 +48,17 @@ def __init__( self, subgroups: pd.DataFrame, subgroups_info: pd.DataFrame, + allow_overlapping: bool = False, + allow_free_atoms: bool = False, ) -> None: super().__init__( subgroups=subgroups, - allow_overlapping=False, - fragmentation_result=AGaniPFragmentationResult, + allow_overlapping=allow_overlapping, + allow_free_atoms=allow_free_atoms, + fragmentation_result=AGaniPSTFragmentationResult, ) + self.subgroups_info = subgroups_info def get_groups( @@ -63,7 +67,7 @@ def get_groups( identifier_type: str = "name", solver: ILPSolver = DefaultSolver, search_multiple_solutions: bool = False, - ) -> Union[AGaniPFragmentationResult, List[AGaniPFragmentationResult]]: + ) -> Union[AGaniPSTFragmentationResult, List[AGaniPSTFragmentationResult]]: """Get the groups of a molecule. Parameters diff --git a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py similarity index 96% rename from ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py rename to ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py index 67a02b5..8b114c8 100644 --- a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py +++ b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py @@ -9,7 +9,7 @@ ) -class AGaniPFragmentationResult(FragmentationResult): +class AGaniPSTFragmentationResult(FragmentationResult): """Abdulelah-Gani primary group contribution properties estimator. Parameters diff --git a/ugropy/core/frag_classes/base/fragmentation_model.py b/ugropy/core/frag_classes/base/fragmentation_model.py index 213379f..af48b55 100644 --- a/ugropy/core/frag_classes/base/fragmentation_model.py +++ b/ugropy/core/frag_classes/base/fragmentation_model.py @@ -54,10 +54,12 @@ def __init__( self, subgroups: pd.DataFrame, allow_overlapping: bool = False, + allow_free_atoms: bool = False, fragmentation_result: FragmentationResult = FragmentationResult, ) -> None: self.subgroups = subgroups self.allow_overlapping = allow_overlapping + self.allow_free_atoms = allow_free_atoms self.fragmentation_result = fragmentation_result # Instantiate all de mol object from their SMARTS representation @@ -122,7 +124,7 @@ def get_groups( ) # If there is free atoms in the molecule can't fragment with the model - if np.size(free_atoms) > 0: + if np.size(free_atoms) > 0 and not self.allow_free_atoms: return self.set_fragmentation_result( mol, [{}], search_multiple_solutions, **kwargs ) diff --git a/ugropy/groupscsv/abdulelah_gani/secondary/info.csv b/ugropy/groupscsv/abdulelah_gani/secondary/info.csv new file mode 100644 index 0000000..ae2da41 --- /dev/null +++ b/ugropy/groupscsv/abdulelah_gani/secondary/info.csv @@ -0,0 +1,131 @@ +group|group_number +(CH3)2CH|221 +(CH3)3C|222 +CH(CH3)CH(CH3)|223 +CH(CH3)C(CH3)2|224 +C(CH3)2C(CH3)2|225 +CHn=CHm-CHp=CHk (k,m,n,p in 0..2)|226 +CH3-CHm=CHn (m,n in 0..2)|227 +CH2-CHm=CHn (m,n in 0..2)|228 +CHp-CHm=CHn (m,n in 0..2; p in 0..1)|229 +CHCHO or CCHO|230 +CH3COCH2|231 +CH3COCH or CH3COC|232 +CHCOOH or CCOOH|233 +CH3COOCH or CH3COOC|234 +CO-O-CO|235 +CHOH|236 +COH|237 +CH3COCHnOH (n in 0..2)|238 +NCCHOH or NCCOH|239 +OH-CHn-COO (n in 0..2)|240 +CHm(OH)CHn(OH) (m,n in 0..2)|241 +CHm(OH)CHn(NHp) (m,n,p in 0..2)|242 +CHm(NH2)CHn(NH2) (m,n in 0..2)|243 +CHm(NH)CHn(NH2) (m,n in 1..2)|244 +H2NCOCHnCHmCONH2 (m,n in 1..2)|245 +CHm(NHn)-COOH (m,n in 0..2)|246 +HOOC-CHn-COOH (n in 1..2)|247 +HOOC-CHn-CHm-COOH (n, m in 1..2)|248 +HO-CHn-COOH (n in 1..2)|249 +NH2-CHn-CHm-COOH (n, m in 1..2)|250 +CH3-O-CHn-COOH (n in 1..2)|251 +HS-CH-COOH|252 +HS-CHn-CHm-COOH (n, m in 1..2)|253 +NC-CHn-CHm-CN (n, m in 1..2)|254 +OH-CHn-CHm-CN (n, m in 1..2)|255 +HS-CHn-CHm-SH (n, m in 1..2)|256 +COO-CHn-CHm-OOC (n, m in 1..2)|257 +OOC-CHm-CHm-COO (n, m in 1..2)|258 +NC-CHn-COO (n in 1..2)|259 +COCHnCOO (n in 1..2)|260 +CHm-O-CHn=CHp (m,n,p in 0..3)|261 +CHm=CHn-F (m,n in 0..2)|262 +CHm=CHn-Br (m,n in 0..2)|263 +CHm=CHn-I (m,n in 0..2)|264 +CHm=CHn-Cl (m,n in 0..2)|265 +CHm=CHn-CN (m,n in 0..2)|266 +CHn=CHm-COO-CHp (m,n,p in 0..3)|267 +CHm=CHn-CHO (m,n in 0..2)|268 +CHm=CHn-COOH (m,n in 0..2)|269 +aC-CHn-X (n in 1..2) X: Halogen|270 +aC-CHn-NHm (n in 1..2; m in 0..2))|271 +aC-CHn-O- (n in 1..2)|272 +aC-CHn-OH (n in 1..2)|273 +aC-CHn-CN (n in 1..2)|274 +aC-CHn-CHO (n in 1..2)|275 +aC-CHn-SH (n in 1..2)|276 +aC-CHn-COOH (n in 1..2)|277 +aC-CHn-CO- (n in 1..2)|278 +aC-CHn-S- (n in 1..2)|279 +aC-CHn-OOC-H (n in 1..2)|280 +aC-CHm-NO2 (n in 1..2)|281 +aC-CHn-CONH2 (n in 1..2)|282 +aC-CHn-OOC (n in 1..2)|283 +aC-CHn-COO (n in 1..2)|284 +aC-SO2-OH|285 +aC-CH(CH3)2|286 +aC-C(CH3)3|287 +aC-CF3|288 +(CHn=C)(cyc)-CHO (n in 0..2)|289 +(CHn=C)cyc-COO-CHm (n,m in 0..3)|290 +(CHn=C)cyc-CO- (n in 0..2)|291 +(CHn=C)cyc-CH3 (n in 0..2)|292 +(CHn=C)cyc-CH2 (n in 0..2)|293 +(CHn=C)cyc-CN (n in 0..2)|294 +(CHn=C)cyc-Cl (n in 0..2)|295 +CHcyc-CH3|296 +CHcyc-CH2|297 +CHcyc-CH|298 +CHcyc-C|299 +CHcyc-CH=CHn (n in 1..2)|300 +CHcyc-C=CHn (n in 1..2)|301 +CHcyc-Cl|302 +CHcyc-F|303 +CHcyc-OH|304 +CHcyc-NH2|305 +CHcyc-NH-CHn (n in 0..3)|306 +CHcyc-N-CHn (n in 0..3)|307 +CHcyc-SH|308 +CHcyc-CN|309 +CHcyc-COOH|310 +CHcyc-CO|311 +CHcyc-NO2|312 +CHcyc-S-|313 +CHcyc-CHO|314 +CHcyc-O-|315 +CHcyc-OOCH|316 +CHcyc-COO|317 +CHcyc-OOC|318 +Ccyc-CH3|319 +Ccyc-CH2|320 +Ccyc-OH|321 +>Ncyc-CH3|322 +>Ncyc-CH2|323 +AROMRINGs1s2|324 +AROMRINGs1s3|325 +AROMRINGs1s4|326 +AROMRINGs1s2s3|327 +AROMRINGs1s2s4|328 +AROMRINGs1s3s5|329 +AROMRINGs1s2s3s4|330 +AROMRINGs1s2s3s5|331 +AROMRINGs1s2s4s5|332 +PYRIDINEs2|333 +PYRIDINEs3|334 +PYRIDINEs4|335 +PYRIDINEs2s3|336 +PYRIDINEs2s4|337 +PYRIDINEs2s5|338 +PYRIDINEs2s6|339 +PYRIDINEs3s4|340 +PYRIDINEs3s5|341 +PYRIDINEs2s3s6|342 +(CHn=CHm)cyc-COOH|343 +AROMRINGs1s2s3s4s5|344 +aC-NHCOCH2N|345 +(N=C)cyc-CH3|346 +aC-CONH(CH2)2N|347 +aC-SO2NHn (n>=0;n<3)|348 +aC-SO2NHn (n>=0;n<3)|349 +aC-SO2NHn (n>=0;n<3)|350 \ No newline at end of file diff --git a/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv b/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv index 96f4ade..9c10aaa 100644 --- a/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv +++ b/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv @@ -1,130 +1,131 @@ -(CH3)2CH -(CH3)3C -CH(CH3)CH(CH3) -CH(CH3)C(CH3)2 -C(CH3)2C(CH3)2 -CHn=CHm-CHp=CHk (k,m,n,p in 0..2) -CH3-CHm=CHn (m,n in 0..2) -CH2-CHm=CHn (m,n in 0..2) -CHp-CHm=CHn (m,n in 0..2; p in 0..1) -CHCHO or CCHO -CH3COCH2 -CH3COCH or CH3COC -CHCOOH or CCOOH -CH3COOCH or CH3COOC -CO-O-CO -CHOH -COH -CH3COCHnOH (n in 0..2) -NCCHOH or NCCOH -OH-CHn-COO (n in 0..2) -CHm(OH)CHn(OH) (m,n in 0..2) -CHm(OH)CHn(NHp) (m,n,p in 0..2) -CHm(NH2)CHn(NH2) (m,n in 0..2) -CHm(NH)CHn(NH2) (m,n in 1..2) -H2NCOCHnCHmCONH2 (m,n in 1..2) -CHm(NHn)-COOH (m,n in 0..2) -HOOC-CHn-COOH (n in 1..2) -HOOC-CHn-CHm-COOH (n, m in 1..2) -HO-CHn-COOH (n in 1..2) -NH2-CHn-CHm-COOH (n, m in 1..2) -CH3-O-CHn-COOH (n in 1..2) -HS-CH-COOH -HS-CHn-CHm-COOH (n, m in 1..2) -NC-CHn-CHm-CN (n, m in 1..2) -OH-CHn-CHm-CN (n, m in 1..2) -HS-CHn-CHm-SH (n, m in 1..2) -COO-CHn-CHm-OOC (n, m in 1..2) -OOC-CHm-CHm-COO (n, m in 1..2) -NC-CHn-COO (n in 1..2) -COCHnCOO (n in 1..2) -CHm-O-CHn=CHp (m,n,p in 0..3) -CHm=CHn-F (m,n in 0..2) -CHm=CHn-Br (m,n in 0..2) -CHm=CHn-I (m,n in 0..2) -CHm=CHn-Cl (m,n in 0..2) -CHm=CHn-CN (m,n in 0..2) -CHn=CHm-COO-CHp (m,n,p in 0..3) -CHm=CHn-CHO (m,n in 0..2) -CHm=CHn-COOH (m,n in 0..2) -aC-CHn-X (n in 1..2) X: Halogen -aC-CHn-NHm (n in 1..2; m in 0..2)) -aC-CHn-O- (n in 1..2) -aC-CHn-OH (n in 1..2) -aC-CHn-CN (n in 1..2) -aC-CHn-CHO (n in 1..2) -aC-CHn-SH (n in 1..2) -aC-CHn-COOH (n in 1..2) -aC-CHn-CO- (n in 1..2) -aC-CHn-S- (n in 1..2) -aC-CHn-OOC-H (n in 1..2) -aC-CHm-NO2 (n in 1..2) -aC-CHn-CONH2 (n in 1..2) -aC-CHn-OOC (n in 1..2) -aC-CHn-COO (n in 1..2) -aC-SO2-OH -aC-CH(CH3)2 -aC-C(CH3)3 -aC-CF3 -(CHn=C)(cyc)-CHO (n in 0..2) -(CHn=C)cyc-COO-CHm (n,m in 0..3) -(CHn=C)cyc-CO- (n in 0..2) -(CHn=C)cyc-CH3 (n in 0..2) -(CHn=C)cyc-CH2 (n in 0..2) -(CHn=C)cyc-CN (n in 0..2) -(CHn=C)cyc-Cl (n in 0..2) -CHcyc-CH3 -CHcyc-CH2 -CHcyc-CH -CHcyc-C -CHcyc-CH=CHn (n in 1..2) -CHcyc-C=CHn (n in 1..2) -CHcyc-Cl -CHcyc-F -CHcyc-OH -CHcyc-NH2 -CHcyc-NH-CHn (n in 0..3) -CHcyc-N-CHn (n in 0..3) -CHcyc-SH -CHcyc-CN -CHcyc-COOH -CHcyc-CO -CHcyc-NO2 -CHcyc-S- -CHcyc-CHO -CHcyc-O- -CHcyc-OOCH -CHcyc-COO -CHcyc-OOC -Ccyc-CH3 -Ccyc-CH2 -Ccyc-OH ->Ncyc-CH3 ->Ncyc-CH2 -AROMRINGs1s2 -AROMRINGs1s3 -AROMRINGs1s4 -AROMRINGs1s2s3 -AROMRINGs1s2s4 -AROMRINGs1s3s5 -AROMRINGs1s2s3s4 -AROMRINGs1s2s3s5 -AROMRINGs1s2s4s5 -PYRIDINEs2 -PYRIDINEs3 -PYRIDINEs4 -PYRIDINEs2s3 -PYRIDINEs2s4 -PYRIDINEs2s5 -PYRIDINEs2s6 -PYRIDINEs3s4 -PYRIDINEs3s5 -PYRIDINEs2s3s6 -(CHn=CHm)cyc-COOH -AROMRINGs1s2s3s4s5 -aC-NHCOCH2N -(N=C)cyc-CH3 -aC-CONH(CH2)2N -aC-SO2NHn (n>=0;n<3) -aC-SO2NHn (n>=0;n<3) -aC-SO2NHn (n>=0;n<3) \ No newline at end of file +group|smarts +(CH3)2CH|[CH;!R]([CH3])[CH3] +?(CH3)3C| +?CH(CH3)CH(CH3)| +?CH(CH3)C(CH3)2| +????C(CH3)2C(CH3)2| +?CHn=CHm-CHp=CHk (k,m,n,p in 0..2)| +?CH3-CHm=CHn (m,n in 0..2)| +?CH2-CHm=CHn (m,n in 0..2)| +?CHp-CHm=CHn (m,n in 0..2; p in 0..1)| +?CHCHO or CCHO| +?CH3COCH2| +?CH3COCH or CH3COC| +?CHCOOH or CCOOH| +?CH3COOCH or CH3COOC| +?CO-O-CO| +?CHOH| +?COH| +????CH3COCHnOH (n in 0..2)| +?NCCHOH or NCCOH| +?OH-CHn-COO (n in 0..2)| +?CHm(OH)CHn(OH) (m,n in 0..2)| +?CHm(OH)CHn(NHp) (m,n,p in 0..2)| +?CHm(NH2)CHn(NH2) (m,n in 0..2)| +?CHm(NH)CHn(NH2) (m,n in 1..2)| +?H2NCOCHnCHmCONH2 (m,n in 1..2)| +?CHm(NHn)-COOH (m,n in 0..2)| +?HOOC-CHn-COOH (n in 1..2)| +?HOOC-CHn-CHm-COOH (n, m in 1..2)| +?HO-CHn-COOH (n in 1..2)| +?NH2-CHn-CHm-COOH (n, m in 1..2)| +?CH3-O-CHn-COOH (n in 1..2)| +?HS-CH-COOH| +?HS-CHn-CHm-COOH (n, m in 1..2)| +?NC-CHn-CHm-CN (n, m in 1..2)| +?OH-CHn-CHm-CN (n, m in 1..2)| +????HS-CHn-CHm-SH (n, m in 1..2)| +?COO-CHn-CHm-OOC (n, m in 1..2)| +?OOC-CHm-CHm-COO (n, m in 1..2)| +?NC-CHn-COO (n in 1..2)| +?COCHnCOO (n in 1..2)| +?CHm-O-CHn=CHp (m,n,p in 0..3)| +?CHm=CHn-F (m,n in 0..2)| +CHm=CHn-Br (m,n in 0..2)|[CH0,CH1,CH2;!R]=[CH0,CH1,CH2;!R][Br] +????CHm=CHn-I (m,n in 0..2)| +?CHm=CHn-Cl (m,n in 0..2)| +?CHm=CHn-CN (m,n in 0..2)| +?CHn=CHm-COO-CHp (m,n,p in 0..3)| +?CHm=CHn-CHO (m,n in 0..2)| +?CHm=CHn-COOH (m,n in 0..2)| +?aC-CHn-X (n in 1..2) X: Halogen| +?aC-CHn-NHm (n in 1..2; m in 0..2))| +?aC-CHn-O- (n in 1..2)| +?aC-CHn-OH (n in 1..2)| +?aC-CHn-CN (n in 1..2)| +?aC-CHn-CHO (n in 1..2)| +????aC-CHn-SH (n in 1..2)| +?aC-CHn-COOH (n in 1..2)| +?aC-CHn-CO- (n in 1..2)| +?aC-CHn-S- (n in 1..2)| +????aC-CHn-OOC-H (n in 1..2)| +?aC-CHm-NO2 (n in 1..2)| +?aC-CHn-CONH2 (n in 1..2)| +?aC-CHn-OOC (n in 1..2)| +?aC-CHn-COO (n in 1..2)| +?aC-SO2-OH| +?aC-CH(CH3)2| +?aC-C(CH3)3| +?aC-CF3| +?(CHn=C)(cyc)-CHO (n in 0..2)| +?(CHn=C)cyc-COO-CHm (n,m in 0..3)| +?(CHn=C)cyc-CO- (n in 0..2)| +?(CHn=C)cyc-CH3 (n in 0..2)| +?(CHn=C)cyc-CH2 (n in 0..2)| +?(CHn=C)cyc-CN (n in 0..2)| +?(CHn=C)cyc-Cl (n in 0..2)| +?CHcyc-CH3| +?CHcyc-CH2| +?CHcyc-CH| +?CHcyc-C| +CHcyc-CH=CHn (n in 1..2)|[CH;R][CH]=[CH1,CH2;!R] +?CHcyc-C=CHn (n in 1..2)| +?CHcyc-Cl| +?CHcyc-F| +?CHcyc-OH| +?CHcyc-NH2| +?CHcyc-NH-CHn (n in 0..3)| +?CHcyc-N-CHn (n in 0..3)| +????CHcyc-SH| +?CHcyc-CN| +?CHcyc-COOH| +?CHcyc-CO| +CHcyc-NO2|[CH;R][N+](=O)[O-] +?CHcyc-S-| +????CHcyc-CHO| +?CHcyc-O-| +?CHcyc-OOCH| +?CHcyc-COO| +?CHcyc-OOC| +?Ccyc-CH3| +?Ccyc-CH2| +?Ccyc-OH| +?>Ncyc-CH3| +?>Ncyc-CH2| +?AROMRINGs1s2| +?AROMRINGs1s3| +?AROMRINGs1s4| +?AROMRINGs1s2s3| +?AROMRINGs1s2s4| +?AROMRINGs1s3s5| +?AROMRINGs1s2s3s4| +?AROMRINGs1s2s3s5| +?AROMRINGs1s2s4s5| +?PYRIDINEs2| +?PYRIDINEs3| +?PYRIDINEs4| +?PYRIDINEs2s3| +?PYRIDINEs2s4| +?PYRIDINEs2s5| +?PYRIDINEs2s6| +?PYRIDINEs3s4| +?PYRIDINEs3s5| +????PYRIDINEs2s3s6| +?(CHn=CHm)cyc-COOH| +?AROMRINGs1s2s3s4s5| +?aC-NHCOCH2N| +(N=C)cyc-CH3|[#7;R]@[#6;R][CH3] +?aC-CONH(CH2)2N| +?aC-SO2NHn (n>=0;n<3)| +?aC-SO2NHn (n>=0;n<3)| +?aC-SO2NHn (n>=0;n<3)| \ No newline at end of file diff --git a/ugropy/models/abdulelah_gani_pmod.py b/ugropy/models/abdulelah_gani_pmod.py index ce5b20a..9f74e32 100644 --- a/ugropy/models/abdulelah_gani_pmod.py +++ b/ugropy/models/abdulelah_gani_pmod.py @@ -23,8 +23,8 @@ """ from ugropy.constants import _csvs -from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_p import ( - AbdulelahGaniPrimaryModel, +from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst import ( + AbdulelahGaniPSTModel, ) from ugropy.models.read_csv import _rd @@ -37,4 +37,4 @@ _ag_sg = _rd(_ag / "primary.csv", "group") _ag_info = _rd(_ag / "info.csv", "group") -abdulelah_gani_p = AbdulelahGaniPrimaryModel(_ag_sg, _ag_info) +abdulelah_gani_p = AbdulelahGaniPSTModel(_ag_sg, _ag_info, False, False) diff --git a/ugropy/models/abdulelah_gani_smod.py b/ugropy/models/abdulelah_gani_smod.py new file mode 100644 index 0000000..b5a2da8 --- /dev/null +++ b/ugropy/models/abdulelah_gani_smod.py @@ -0,0 +1,40 @@ +"""AbdulelahGani Secondary Structures FragmentationModel implementation. + +Import and use the AbdulelahGani Secondary Structures FragmentationModel with: + +.. code-block:: python + + from ugropy import abdulelah_gani_p + + # Get groups from molecule's name + tol = abdulelah_gani_p.get_groups("toluene") + + print(tol.subgroups) + + # Get groups from molecule's SMILES + eth = abdulelah_gani_p.get_groups("CCO", "smiles") + + print(eth.subgroups) + +Attributes +---------- +abdulelah_gani_p: AbdulelahGaniPrimaryModel + AbdulelahGaniPrimaryModel FragmentationModel :cite:p:`gani` +""" + +from ugropy.constants import _csvs +from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst import ( + AbdulelahGaniPSTModel, +) +from ugropy.models.read_csv import _rd + + +# ============================================================================= +# Abdulelah Gani Secondary Structures FragmentationModel +# ============================================================================= +_ag = _csvs / "abdulelah_gani" / "secondary" + +_ag_sg = _rd(_ag / "secondary.csv", "group") +_ag_info = _rd(_ag / "info.csv", "group") + +abdulelah_gani_s = AbdulelahGaniPSTModel(_ag_sg, _ag_info, True, True)