diff --git a/tests/agani.ipynb b/tests/agani.ipynb
index ff81f31..43b736d 100644
--- a/tests/agani.ipynb
+++ b/tests/agani.ipynb
@@ -230,7 +230,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -247,6 +247,9 @@
}
],
"source": [
+ "from rdkit.Chem import Draw\n",
+ "from rdkit import Chem\n",
+ "\n",
"smiles = \"C1=CC2=NOC=C2C=C1\"\n",
"\n",
"mol = instantiate_mol_object(smiles, \"smiles\")\n",
@@ -286,428 +289,22 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " 1 | \n",
- " 2 | \n",
- " 3 | \n",
- " 4 | \n",
- " 5 | \n",
- " 6 | \n",
- " 7 | \n",
- " 8 | \n",
- " 9 | \n",
- " 10 | \n",
- " ... | \n",
- " 211 | \n",
- " 212 | \n",
- " 213 | \n",
- " 214 | \n",
- " 215 | \n",
- " 216 | \n",
- " 217 | \n",
- " 218 | \n",
- " 219 | \n",
- " 220 | \n",
- "
\n",
- " \n",
- " SMILES | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " C=CC(=O)NCCC1=NC=NC1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " C=CC(=O)NCCCN(C)C | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " C=CC(=O)NCCCN(CC)CC | \n",
- " 2 | \n",
- " 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " C=CC(=O)NCCCN1CCOCC1 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " C=CC(=O)NCCCN1CCSCC1 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 1 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " OCCNC(=O)CCN1C=CN=C1N(=O)=O | \n",
- " 0 | \n",
- " 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " OCCNC(=O)CCN1C=NC(=N1)N(=O)=O | \n",
- " 0 | \n",
- " 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " OCCNC(=O)CN1C=CN=C1N(=O)=O | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " OCCNC(=O)CN1C=NC(=N1)N(=O)=O | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC(Cc2ccccc2)C(=O)N3CCC(OCOC)CC3)C(C)C | \n",
- " 3 | \n",
- " 8 | \n",
- " 4 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " ... | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
198 rows × 220 columns
\n",
- "
"
- ],
"text/plain": [
- " 1 2 3 4 5 \\\n",
- "SMILES \n",
- "C=CC(=O)NCCC1=NC=NC1 0 1 0 0 1 \n",
- "C=CC(=O)NCCCN(C)C 1 2 0 0 1 \n",
- "C=CC(=O)NCCCN(CC)CC 2 3 0 0 1 \n",
- "C=CC(=O)NCCCN1CCOCC1 0 2 0 0 1 \n",
- "C=CC(=O)NCCCN1CCSCC1 0 2 0 0 1 \n",
- "... ... ... ... ... ... \n",
- "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 3 0 0 0 \n",
- "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 3 0 0 0 \n",
- "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 2 0 0 0 \n",
- "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 2 0 0 0 \n",
- "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 3 8 4 0 0 \n",
- "\n",
- " 6 7 8 9 10 \\\n",
- "SMILES \n",
- "C=CC(=O)NCCC1=NC=NC1 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN(C)C 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN(CC)CC 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN1CCOCC1 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN1CCSCC1 0 0 0 0 0 \n",
- "... ... ... ... ... ... \n",
- "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 0 0 0 0 \n",
- "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n",
- "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 0 0 0 0 \n",
- "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n",
- "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 0 0 0 0 \n",
- "\n",
- " ... 211 212 213 214 \\\n",
- "SMILES ... \n",
- "C=CC(=O)NCCC1=NC=NC1 ... 0 0 0 0 \n",
- "C=CC(=O)NCCCN(C)C ... 0 0 0 0 \n",
- "C=CC(=O)NCCCN(CC)CC ... 0 0 0 0 \n",
- "C=CC(=O)NCCCN1CCOCC1 ... 0 0 0 0 \n",
- "C=CC(=O)NCCCN1CCSCC1 ... 0 0 0 0 \n",
- "... ... ... ... ... ... \n",
- "OCCNC(=O)CCN1C=CN=C1N(=O)=O ... 0 0 0 0 \n",
- "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O ... 0 0 0 0 \n",
- "OCCNC(=O)CN1C=CN=C1N(=O)=O ... 0 0 0 0 \n",
- "OCCNC(=O)CN1C=NC(=N1)N(=O)=O ... 0 0 0 0 \n",
- "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... ... 0 0 0 0 \n",
- "\n",
- " 215 216 217 218 219 \\\n",
- "SMILES \n",
- "C=CC(=O)NCCC1=NC=NC1 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN(C)C 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN(CC)CC 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN1CCOCC1 0 0 0 0 0 \n",
- "C=CC(=O)NCCCN1CCSCC1 0 0 0 0 0 \n",
- "... ... ... ... ... ... \n",
- "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 0 0 0 0 \n",
- "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n",
- "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 0 0 0 0 \n",
- "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n",
- "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 0 0 0 0 \n",
- "\n",
- " 220 \n",
- "SMILES \n",
- "C=CC(=O)NCCC1=NC=NC1 0 \n",
- "C=CC(=O)NCCCN(C)C 0 \n",
- "C=CC(=O)NCCCN(CC)CC 0 \n",
- "C=CC(=O)NCCCN1CCOCC1 0 \n",
- "C=CC(=O)NCCCN1CCSCC1 0 \n",
- "... ... \n",
- "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 \n",
- "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 \n",
- "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 \n",
- "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 \n",
- "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 \n",
- "\n",
- "[198 rows x 220 columns]"
+ "'BrC'"
]
},
- "execution_count": 9,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "df[df[88]>0]"
+ "df[df[1]>0].index[0]"
]
},
{
@@ -719,7 +316,7 @@
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3de1xUdd4H8M8Mw00EQRBQUC4aiiKJZN4tAVtN3doIt9vg6hq+trXRbHMsd5/p6ea4XRzLVinreUZNV+zZlMxU1Gx1xdQEFWVdQkBUFImbyJ35PX/8RjRmQGBmzjnDfN8vX72U32HOFxo+/M453/M7MsYYCCGEdJdc7AIIIcS+UYwSQohFKEYJIcQiFKOEEGIRilFCCLEIxSghhFiEYpQQQixCMUoIIRahGCWEEIsoxC7AsZWW4tAhFBSgogJ+fhg8GFOnwttb7LIIIV1AMSqSK1egVmPrVhgMv/i4szOefx5vvQUfH5EqI4R0jYzuqRfBhQuYNg3FxVAoMHs2Jk6EpyfKy3HgAA4cAGMYPhwZGRgwQOxCCSH3RjEquPp6jBmDnBwMGoRduzBy5C9GDxxAYiKqqhAXh4wMyOnkNSFSRz+lgvv8c+TkwNkZX3/dNkMBxMdj82YAOHgQu3YJXx0hpKsoRgW3YQMAJCYiOtr8BrNmYdw4APj0U+GqIoR0F8WosKqqcPo0APz61x1t9pvfAMCRI20vQBFCpIdiVFgXLhiT0fRw/m58tLISJSVCVEUIsQDFqLDKy41/6du3o818fY1/qaiwbT2EEItRjAqrtS9CJutos9YL9HRQT4jkUfu9sFrvUKqqQv/+7W7WOgmlJnzi2IqLceECAERHw9/fzAZHj6K2FpGRCAoSuLQ7aDYqrCFDjH/Jze1os/PnAcDDo6OoJcQBfPUVpk3DtGlQKs1vMG8epk3D7t3ClvVLFKPC6tfPmKQZGR1txkfHjoWCDhcIAYB9+7Btm9hFtINiVHD8t+qWLbh2zfwGOTnYuxcA5s4VripCJMzNDQBeeglVVWKXYg7FqOBeeAH+/qiqwtNPo6am7ej163j6aTQ3Y/hw/Pa3YtRHiOTExmLCBJSU4M9/FrsUcyhGBefnB70erq44dAhRUfjoI2Rno7AQJ09Cq8XIkcjJgbc3Nm+Gq6vYtRIiCTIZVq2CTIZ163DihNjVmKBTb2KYPh179mDBAuTnQ6VqOxoTg40bERUlRmWESNSkSXjqKWzdigUL8OOP0rpqIKVaHMrDD+P8eezcif37kZ+Pykr4+iIiAjNmYPp0WtiJEFOrViE9HWfO4OOPsXix2NXchWJUPC4uSEpCUpLYdRBiHwYOxIoVeO01/OUvePJJMRtF26BZjxhefBF/+hNqa8WugxAJKSnB0aPYsgVvv40FC7Bpk5ltXn4ZkZG4eRNLlgheX/toNio4gwHr16OlBStXth1qaUFYGPr1w8mT97hblBC7VVGBq1dRUoKLF+/8uXChbd9Kc7OZlnsXF6xfj4cfxpdfIiMD06YJVnVHKEYFV1GB5mb07Qtn57ZDZWUoLkZ9PWUo6QFqalBYiIKCO3/4P6urzW/frx9CQxEWZvzv6NHmN5syxXit6aWXkJ1tu/K7gGJUcDduAEC/fl0bIkSqGhoaioqKCgoKCgoKrlxJ/s9/evG45G9nU15exqBs/cP/2bt3Z/f4/vv45hucO4fPP7fWF2ERilHBUYwSu1VRUXHRRFFRUUtLC9/gvvsm5uUZ19J1cUFwMMLD7/zp3x8DBiAszNLDrf798cYbWLIEr78OJycLvyYroBgVHMUokbySkhI+uywsLGz9S3FxcVNTk+nGzs7OISEhoaGhYWFhI0e6+vkZ55g2XVdn0SLo9cjKsuEuOo9iVHAUo0RKsrKytm3b5uvre3di1tfXm24pk8mCgoLCbuO5GRoaGhwcrBC8G97JCampGDdOEkvyUowKjmKUSEZmZmZCQkKtSe+dj49PeHh4eHh4//79BwwYwP8+bNgwDw8PUeo0a8wYzJuHzz4Tuw6KURFQjBLJWLduXW1trUKhWLBgQWRkZOs0s3fnL/fY2H33ISkJw4aZH9VqUVMDgwHh4cKW9UsUo4KjGCWSkZWVBWD69Onr1q0TuxbzZszAjBntjvr54e9/F7CadtBdTIK7Z4yafVQCITZQU1MDIC4uTuxCuu/UKcybh1u3xKyBYlRwNBsl0mAwGMrKygAo23tAhz148UX87/9iwwYxa6AYFRzFKJGGvLy8mpqagQMH+vn5iV1L9y1fDgDvvYfGRtFqoBgVFmMoKwMA0zeuwYDycshkdx5ST4gt8ROjMTExYhdikVmzMHIkLl/Gli2i1UAxKqyqKjQ2wsvLzMr2/F57Hx9pLUhLeq7Tp08DGDVqlNiF3JvBgL/9DePHo66u7ZBMBrUaALRa0XpIKUaFRUf0RDL4bNQuYlQux+bNOHYM//M/ZkafegpDhuDCBezYIXhlAChGhUYxSiQjOzsbdhKjAF55BQBWrYLp/ahOTli6FADeeguMCV0YKEaFRjFKpKGkpOT69eve3t6hoaFi19Ipjz+OESNw6ZL5p9XPn4/+/ZGVhQMHBK+MYlRoFKNEGvhU9P7775fZyeK2MplxQvrOO2bOgbq6Gp/OZLoYugAoRoXVQVaWlrY7RIi12eNl+meeQUgIcnPx9ddmRv/wB3h74+BBZGYKXRjFqLBoNkqkgV+mv//++8UupAucnfHyywDwzjtmRr288MILAPDXvwpaFShGhdbB7Z4Uo0RA9jgbBfD88wgMxPHj+O47M6NLl8LDAzt3IidH0KooRoVFs1EiATU1Nfn5+S4uLpGRkWLX0jVubnjxRaCdc6C+vpg3D4wJPSGlGBUWxSiRgNOnTxsMhhEjRri4uIhdS5ctWgRvb2Rk4ORJM6PLlsHFBVu3orBQuJIoRoVFMUokwI4a7015eWHhQgDQas2MDhyIp59GczPee0+4kihGhdXeDfWM4eefzQ8RYm12dBuoWS+9BHd3fPUVzp83M7p8OeRyfPYZSkoEqodiVDg1NTU+jD0UHo5evdoMNVZXVw8b1jBihJl77QmxNruejQIICMDvfgeDwfyUc9gwPPYY6uvx0UcC1SNjotw85ZAKCgrCw8NDQ0MLCgraDOXl5UVERAwePPinn34SpTbiOJqbmz09PRsaGioqKvr06SN2Od1UUICICMhkyMtDSEjb0RMn8OCD8PJCURG8vW1eDM1GhXPjxg0A/cyd/exgiBDrys3Nra+vDw8Pt98MBRAWht/+Fk1NWL3azOiYMYiPR3U11q8XohiKUeHcM0b96fEhxPbsa0WSDqxYAbkcn35qvDrbxquvAsDq1WbW1rM6ilHh0GyUSEGPidHISMycidpa8+dA4+MxfjxKS82vrWddFKPCoRglUtBjYhTAihUA8OGHqKoyM8qXMnn3XTQ327YMm8Rok+mKgIRilEjDmTNn0FNidOxYPPwwqqrw2WdmMuexx/DrX5f06aP6+98327QM68doRkbG0KFD//nPf1r9le0dxSgRXVFRUVlZma+vb3BwsNi1WMeKFfWTJm3Q6YbV19e3GZLL8cQT+06f/uidd94x2PIBI9aP0TVr1hQUFMTFxb322muNIj6sT3ooRono+BH96NGjxS7EahIS3Orq1hcXX9y4caPp6DPPPBMSEpKbm7tr1y7b1WD9GN2xY4dWq5XL5StXroyNjeX/2wgoRokE9KQTo63UajWAVatWNZucBHV2dn755ZcBvP3227YrwPoxqlAo1Gr14cOHIyIicnJyxo8fv2rVqpaWFqvvyO5QjBLR9cgYTUxMjIyMvHjx4pdffmk6+vvf/97f3//48eOHDh2yUQG2ulI/duzY7OxslUrV0NCwfPnyKVOm5Ofn22hf9qKDrCwrKwPgRzfUExvrkTEql8v5lHPlypWmt2X26tVLpVLxUVtVwGxs7969QUFBALy8vFJTU229O8mqq6sD4Orqajp08+ZNAB4eHsJXRRxKRUWFTCZzd3dvamoSuxYra2xsHDRoEIBvvvnGdLSyspLfsnXixAlb7N3mfaOPPPJITk7Os88+W11dvXDhwpkzZ5YItu6KlNARPRFdVlYWYyw6OlqhUIhdi5U5OzsvWbIEwJtvvmk62qdPn4ULFwJYtWqVLfZuhRjNzc1duXJlB2c/vb29N2/enJaW1rdv3927d48aNWrnzp2W79e+UIwS0fXII/pWKSkpfn5+x44dO3LkiOno0qVL3d3d//GPf5w3u7ieZSyNUcbY3LlzX3vttcmTJ3e8OlFSUlJWVlZcXFxpaenjjz+enJzMD2YdBMUoEV3PjlEPD49FixahnXOgAQEBc+fONRgM77//vtV3bWmMymSylStXDhw4MDMzMzo6es2aNaz9lfcGDRq0f//+1NTUXr16bdq0KTo62nG69ClGieh6dowCePHFFz09PXfv3n3q1CnT0WXLlikUik2bNl26dMm6+7XCQX18fPzZs2dTUlLq6uqWLFkyY8aMK1eutLexTCZLSUk5efJkbGxsYWHh1KlTFy9e3NDQYHkZEldaWgqKUSKehoaG3NxcuVweFRUldi220rdv3+effx7Au+++azoaFhY2Z86cpqam1WYX17OAdS4x9enTJzU1dfv27b6+vnv37o2Kivriiy862D4yMvLYsWNardbJyenDDz984IEHenyXPs1GibjOnTvX1NQUERHRu3dvsWuxoVdeecXNzW379u15eXmmoytWrJDL5Z988skNs4vrdZc1r9Q/+eST586dmz17dmVl5XPPPTdnzpzy8vL2Nna0Ln2KUSIuO30wfVcFBgY+99xzLS0t75l7wMjw4cNnzpxZW1v7kVUfMGLlhqeAgID09HS9Xt+7d+/t27fHxMR89913HWzvOF36FKNEXPwxdvfff7/Yhdjc8uXLnZyc9Hr91atXTUdXrFgBYO3atVa8xG2TvtHk5OQzZ85Mnjz50qVL8fHxCxcurK2tbW9jd3f3NWvW7NmzJygo6OjRo6NHj/7kk09sUZW4KEaJuBxkNgpg8ODBiYmJDQ0NZs+Bjh079qGHHqqoqEhNTbXaLm3R0881NTVptVoXFxcAw4cPP3nyZMfbV1RUPPvss7yqRx999OrVq7arTXhDhgwBcOHCBcZYZmbm2rVrW78hYWFhAPLz80UtkPRkBoPBy8sLwLVr18SuRQjZ2dkymczDw6OsrMx0dM+ePQACAwPr6uqssjub3wx6+vTp6OhoAAqFQqPRNDc3d7w979IH4O/vv2PHDluXJxh+L9rOnTsjIyP5rwq5XP7xxx8zxvgp/+rqarFrJD0Wv94yYMAAsQsRzvTp0wG88cYbZkdjY2MBWOv2dJvHKGOsrq5OrVbL5XI+o+Yzsg4UFRXFxcXxrFEqlT0gXxobG2W38a/L9fbz6B9//HG0c689Idayfft2fpAndiHC+f777wH4+vrevHnTdDQtLQ1AeHi4VZYXECJGuf379w8cOBCAu7u7TqczGAwdbGwwGHiXPoDQ0NDvv/9esDqt7vDhw/wXIwAXF5eYmJj9+/czxtLS0nx8fPjHfX19xS6T9GT8usqKFSvELkRQEydOlMlkZg9qW1paIiIiAGzdutXyHQkXo4yxysrKlJQUHhy/+tWvLl++3PH258+f53NvuVyuUqnq6+uFqdMqDAZDenr6+PHj+dfr6empUqnafMlFRUWTJ09unXeb/bVJiOVmzpwJIC0tTexCBPXjjz/m5OS0N7phwwYAQ4cObWlpsXBHgsYox7v0cXvJko435tepnJ2dAURFRfElaiSupaUlLS1txIgRPB/79eun0WjKy8vNbnz3vDssLMyu591EsgYMGAAgLy9P7EIkpKGhwc3NDcDatWstfCkRYpQxdu3atdmzZ/OUSUpK+vnnnzve/tixY3wG7ubmptVq73mdSiz19fV6vf6+++7jX1pISIhOp7t169Y9P/HcuXP88ThOTk5qtbqhoUGAaomD4Dcie3p6Wj7t6km2bt3Kr1Xs2bPHwpcSJ0Y53qUPYNCgQQcPHux449raWpVKxb/sCRMm/PTTT8IU2UnV1dU6nY7/zgcwZMiQ1NTUxsbGzr9CU1OTRqNxcnICMHLkyOzsbNtVSxzK3r17AUyaNEnsQiRk48aNfNHV+Pj4jq/TdIaYMcoYu3jx4qRJk3B7yZJ7TtwkuJZ+aWmpRqNpvVgUExOj1+u7PV/OzMzkk1k+76bpA7EcX6t40aJFYhciFR999BGfkKnVaqu8oMgxyuy5S7+wsFClUvHTmgAmTpyYnp5u+cvePe+eOHGi1ObdosjLy4uOjvby8oqIiCgqKhK7HDvz9NNPA9iwYYPYhUiCVqvl87bVq1db6zXFj1HOvrr0c3JylEolv/All8tnzZqVmZlp3V3s2bOHnyKQzrxbFNnZ2U899RQ/18E5OTnxZW1JJw0bNgzAPScoPZ7BYFi6dCl/C33++edWfGWpxCizky79U6dOKZVK/lPt7OysVCrPnTtno32VlpY+8cQT/AtMTEy8ceOGjXYkTYcPH541axaflbu4uMyZM2fVqlWenp6dPwVEGGO3bt1ycnJSKBTWuvHRTjU3Ny9YsIC/l7788kvrvriEYpSTbJc+/6luvQcpJSVFmKPL1i59f3//nTt3CrBH0R0+fDghIYF/qz08PFQqVXFxMR+qq6tLSUnp/CkgkpmZCSA6Orq9DWpra4WsRxQNDQ1z5swB0KtXL8uvy5uSXIwyiXXpt7S0pKenjx079u4ueoFPyBYVFU2dOrXHd+nzb/WYMWP4V+rl5aVWq80uLXHmzJnWU0BqtbpLHRGOZt26dQCSk5PNjmZlZfn6+mq12h48V62vr+e3XPfp0+fIkSO22IUUY5QTvUu/sbFRr9e3riTi7++v0WgqKiosf+Vu4MvQ8m7hIUOGHDtmqzMJomhoaNDr9UOHDuXf6oCAAI1GU1lZ2cGndPUUkMPiDxb+4IMPzI4uW7aMf8/DwsK2bNlieeuP1Ny8eTM+Ph5A3759f/jhBxvtRboxyky69Nu7EajV0aNH+Xp0bm5u77//frf3W1NTo9Pp+LkFfrpAp9NJ4diHd+n7+Q0PCGhRq1kPaNK/efOmTqcLDg7m3+rw8HCdTtf5mVGXTgE5Jn4g1UFfdkZGRutazrGxsQcOHBCyPJsqLy8fN24cgP79+589e9Z2O5J0jLLbZz+70aX/xz/+sRu7q6qq0ul0gYGB/F0VFRWl1+utsgaMtTQ0NKxcednJiQFs9GjW/h3DUnfjxg2NRsPbLfjJu+7123b1FJBDaW5u5lcOOr5RkN++HBoayr+NCQkJPeDuj5KSEn7mJywszNZdg1KPUa6rXfr79u3r6mXc69evazQab29v/k4aPXp0WlqaZGc3mZnsvvsYwNzcmFbL7KtJv6SkRK1We3h48G8177e18FvdpVNAjmPfvn0AAgMDO7PxrVu3tFotXxhXLpcnJSUVFBTYuEBbKSws5LexREZGCvCb1T5ilHW9S7/zLl68qFKp3N3d7/6pttaL286tW0ylYjIZA9jEicwumvTz8/NVKhU/w8tnPf/617+s9eJdXaihZ8vLy0tJSXF2duYnPTo/wSwrK1Or1Xw93F69eqnV6o5PUktQbm4uP00UGxsrTJug3cQo19Uu/Y6dOXNGqVTyW2t5F/3x48etVaow9uxhAwYwgHl5MSk36f/449nExER+RUihUDz77LNnzpyxxY7uXqihJ53m67xjx4499thj/Fvt7Ow8YcIE/vgQJyen+fPnd3JqVlhYqFQqedMuv5RvL8tUnjp1ij/WbMqUKVVVVcLs1M5ilFnpEm2b1m6lUpmbm2v1UoVRWsqeeIIBDGCJiUxqTfqHD7NZs9iDD+5q/Vbb+qr6xYsX+Squjtal36a1WalU8pXxuj3BPH78+EMPPcRfMCQkRK/XS/Y0F/fDDz/ws+2PPvqokNeE7S9GuW5for37rdamtduupaUxHx8GsIAAJoUmfYOB7djBxo0z5nufPobXX3+3pKREmL3b7hSQBHWytbnbE8yMjIyRI0fyFx8zZsyhQ4ds8EVYwd69e/kJ9zlz5gjcSmyvMcq6eIm2863d9quoiE2daowtpZKJ1aTf0sLS09no0cZK/PyYRsPu1atmE9Y9BSRB3Wht7t4Es6WlRa/X9+/fv/Wkto3OyXRbejqLjf2Xh4eHUqkUvrXGjmOUu/sS7RdffGG6QTdau+2XwcBSU1mvXgxgYWFM4KX06+uZXm9sIQBYSAjT6Zi4h9Q9tUu/vr4+NTV10KBB/F3d1dbm7k0wb968qdFo+KlnZ2eXZcsqhDq6uAe9nikUDGD//d/nRDntYPcxyn55iTYhIaGwsJB//MaNG5a0dtuvc+eMk0EnJyZMl351NdPpjBe7ADZ4MNPpmHSuSfSkLn3e2tw6Mex2a3O3J5g3btxQqVSTJq0FmIcHU6uZUBdyzPvb35hczgBmpbVDu6MnxCi73aXPz4woFIpXX311ypQprU8z5q3dkuqit7WmJqbRMN6lP3Iks10zdWkp02iMp2UBNmoU0+uZBI+ee0CXvi1am2tqarRaLb+Ur1AoUlJSOrleRG6u4bHHjP/TAwPZ+vVMlB8vrZYBTCZj770nwt5b9ZAY5Q4ePMiPOFqFhoZ+++23YtclGpt26RcWMpXKeAKB965Kv93WTrv0CwoKbNrazCeYvPPPw8NDrVZ3slUoM5NNnmx8AwwdygR+8Oh//ZfxkOvTTwXdr6keFaOMsbq6uri4OB8fn4CAgPXr14tdjvja69KfMYP5+LC+fdmJE2Y+6+xZ5uPDfHzMH6/l5bGUFObsbJwIzJrFrL1otQ3ZV5e+aWuz7dbX+Pe//52UlMSP4fz8/HQ6XScP4NLT2ZAhxjAdN44dPmyjAu8wGJhKxQDm7My2bbP57u6pp8UoMevrr1lgIG88Ylu3MsbYpEnG931srJlj8Oxs42ibS3GnTjGl0niuwNmZKZXMZotW21aXHqcoiiNHjojS2pyZmclvvAYwdOjQTj7avrGRpaaygADj22bWLGa7Zzk3N7Pf/Y4BzNWVffWVrfbSJRSjjqK1S58fAbXGKMBMH8lhGqO8i55/0NWVKZU2/DkRRlcXahCMaWvzpUuXhCzAYDBs27Zt8ODBvIaHHnr4+PFOXaa8eZNptax3b+Nv2ZQUdu2alWurrze+jXv3ZhkZVn7xbqMYdSx79xr/wmN0+nQGME9P1uaKy90x+u9/swceMP7T25utWMFKS4Uv3CYk1aUvtdbmxsbG1NTUgICA8eN1XZpgXr7MUlKMhyy9ezO12motzDU1bNo0BjAfH2mdR6IYdVA8RnU6FhfHAPbkk78YvTtGa2qYry/z92caDRNp0WrbEr1Ln7c28wfPSa21ubKy8s03q9zdGcBcXJhK1dm7jc+fZ0lJxndRUBBLTbW0f6Oigk2YYGwMOH3aopeyOopRB9Uaozk5xtblr7++M9rmoP74cdaz223F6tJvs2p1WFiYNFubuz3B3L//zv1skZHdv5R/7RobNcp4T8d//tPNF7EdilEH1RqjjLEXXjC+QWtqjKPtXWLq2YTs0i8rK9NoNLz7yl5am7s3wWxpYZs3s5AQ4ycmJHT5rHpREYuIYAAbNoxJcwEMilEHdXeM/vwz8/NjAHv1VeOoY8YoE6RL/9q1axqNhq+ODCutWi2k7k0wGxpYairr14+5u3ctCi9eZOHhDGAxMdI9KU8x6qDujlHG2IYNDGAKhfGsk8PGKGejLn3TVatt9KBKWzMYWFqaMd0AFh/PfvyxU5/488+sS483zskx3mH84INMyg2+FKMOqk2MGgzG8/dTpzLm8DHKuv44xY5lZ2crlUonJ6fWLvoTZm97sCutE0x+F0ZSEsvPt+brnzjBfH2N70mJP1OcYtRBtYlRxtiZM8ZrTf/3fxSjjHX9cYpmmS4Q3mNWmeLKy5lazdzcjJfyU1Ksc+h96BDz9GQAmz3bDi5vUow6KNMYZYwtXswAFh7OfviBYtSoe136BoMhPT19woQJfD7bu3dvlUplj+uhdNKlS3cu5fv4MK3WTPYVFrK0NJaW1u4lpgMHWFoay81lFRWsTx8GsLlzpbjMjSmKUQdlNkarq42nopKTKUbv6FKXPu+ij42N5QHq5+en0Wgkfue+teTksJkzje+cgQPbXsrftMk4FBXFzC5Oz5+VoNUyxtjOnWzJEmYvF94oRh2U2RhljG3ZwgDjAo4Uo3e7Z5c+76KPiIjgARoYGKjVaqVzj6lgMjKMPZ4AGzGC7dpl/HhrjALsr38184l3x6h9oRh1UO3FKGPG+5ooRk3d3aUfFRW1b98+/vGrV6+uXr06KCiIB+jgwYN1Op29PErTFlpa2Oefs+Bg47toxgxWX2+MUQ8PJpMxDw92e3X1O+w3RuUg5JfWrYOrq9hFSJKbm5tWq923b19wcHBOTs4jjzwye/bsyZMnBwUFvfTSS1euXBk1apRer79w4cLixYtdHfibKJdj3jzk50Ong7c33N3vvKNCQpCYiFu38Ic/iFqiVVGMOihPT/j44HYL4y9EROCVV+DjAx8f3H6AALkjPj7+6NGjoaGhAHbt2sV7P4ODgzMyMrKyspKTk3ljE3FxweLFyMvDmjW/+PjKlXBxwbffYscOkSqzNopRB7V7N8rLsXCh+dE330R5OcrL4eUlbFl2YuDAgQUFBUuWLAkNDQ0KCvrss8+Ki4sTEhLErkuK/Pxwe80AoyFDsGQJAKhUqKkRpSgroxh1UFu2YNMmXLpkftRgwKZN2LQJ164JW5ZdWb16dUFBweXLl+fPny92LXbmz39G//4oLoZGI3Yp1kAx6qDmz0dyMk6cMD/a2IjkZCQn49w5YcsijsHTE+++CwAffoisLLGrsRjFKCFEBM88g6lT0dyMF14AY2JXYxmKUUKICGQyrF8PV1ccO4aNG8WuxjIUo4QQcUREGK81qdWorha7GgtQjBJCRPOXv2DQIFy/bjxVaqcoRgkhovHwgE4HAB98gOvXxa6muxRiF0DEdOkSzp418/HGRsFLIY7qN7/BrFnYtQsFBWKX0l0Uow5t6VKxKyAE0Omwfz/q68Wuo6j0vh0AAADdSURBVLsoRh3ayJHw9zfzcYMB330neDXEUQ0ejOXL8frrYtfRXRSjDk2jQWKimY/X18PdXfBqSI82YgTUavO/tgGo1WhqQnMzxo0TtixroBglhAghJgYxMe2OurnhrbcErMaq6Eo9IYRYhGKUEEIsQjFKCCEWoRglhBCLUIwSQohF6Eq9g1q6FE1NuP0Uy7YUCvzpTwAQEiJkUYTYJRmz96X+CCFEVHRQTwghFqEYJYQQi1CMEkKIRShGCSHEIhSjhBBiEYpRQgixCMUoIYRYhGKUEEIsQjFKCCEW+X8Aznzf1SB3pgAAAQx6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wMy41AAB4nHu/b+09BiAQAGJGBgjggeIGRjaGDCDNzMQEZTADRTRADBYOCM3EDlUBlEgAMkAqYTREITuEZoabgBCAKORmYNRgYmRSYGLWYGJiUWBhZWBlY2BjZ2Dn0GDi4FTg5NJg4uJm4GZncAK5j5Wdg5OLW3wWknMZeDZUbzwQeaHBDsTZxNJ1YJGO1H4QO9ne9sBfD3lbEDsx2fSAR+gRexD7zKI7++0XBIDVGFnv2Wfyx3AfiB0zK9b+XWczWHy6AZ9DyNOjYHGNtCQHl1W9YPFYxSkOR5fN3wti51rVO0yeuRhs5g1GBYeNBt1gthgAKrs4m5ZxkiwAAAFielRYdE1PTCByZGtpdCAyMDI0LjAzLjUAAHicfZNdbsMgDMffcwpfoMgfgPFj21TTNDWRtm532Hvvr9mZOlIJzYkRkB9/sE0mCHuf377v8Gc8TxMA/vOaGXwJIk5XiA6cLi+vC5xvx9Nj5rx+LrcPII4X43lmj7f1+pghOMOhpJKrZANMXKiYr0i4WV/KAeYkJKgCB0wVmXMekBIkJyvVbPsuzW0AZlgDbJLJCCgVkVZkABZY4EBJWZRa7N0oWy4DssbemKRqFYseqTg8INVJF6qlioY4stasA7A5yImZlOqm7WtotLf5KcWD0NxKKBJWLTYAyUsEOVU2VA5JVLUykiTaNK2p5cgQayMeRUO8nbIgEomD2DTzKJWXZX6q/++NOK3L3G8Eh/e6x4T04voAci8h+bD0QpF77dUgd+0pJ/fWE8vu1tNH4fsscTREu2zQ1vAu7Bh2zQhvH0yMH7+G96cf0yKlSN/jHp0AAAC3elRYdFNNSUxFUyByZGtpdCAyMDI0LjAzLjUAAHicJY47DoNADESvkhKkxfL/I0S1PbkQh483WK7ejMczrzm367vfc0667tlLn2c7DExdbCCwkdU4DwUhwRgHgiOzajOGMq9qF0r2/FGKUg0CE0lrQhAsweswSUsXQxAPl1qQQpqPs2U3l/5AgBzePgZmek+lRWoknRtay0TYGeNUcK63GEaUaS5XZZR2C44kXkmGSNQAM5TH/vwATM0y0DPnCj0AAAAASUVORK5CYII=",
"text/plain": [
- ""
+ ""
]
},
"execution_count": 10,
@@ -734,6 +331,77 @@
"\n",
"mol"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"abdulelah_gani_frags/log_p.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "61\n",
+ "2\n",
+ "OCCN(C(=O)C)c1c(I)c(C(=O)NCC(O)CO)c(I)c(C(=O)NCC(O)CO)c1I\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAeCklEQVR4nO3deVhUZfsH8O8AAmdYB8GN3AAxUQdRSHEp99dSK0rFNbdQMBGFVEI2F7BXXzHSXHBJC9PCt3zDMpOf+5qSCmIqiIBhyKJsM8MynOf3x3CRWRnOnGGG4f5c/XGucc59bopun3PO89yPiDEGQggh6jLSdQKEENK8URklhBCNUBklhBCNUBklhBCNUBklhBCNUBklhBCNUBklRF8xhoMHEReH27cB4OjR+s8bDoh+oDJKiF5SKhETA1NTzJiBVavw4AH276//o4YDoh9MdJ0AIS1IRkbGvn37OI6TyWRlZWUKhUIul7ubmq7IyYFCgdJSyOVQKFBWBj8/yOUIDweAKVNw6hRyc7FmDQBUVOj2pyBPoTJKSBO5du2ap6cnz/NPLR3kXV1x587T366qAs+DMYhEKC+HpSU6dkRICAAEBDRVyqRRqIwS0kS2bNlSV1fXpk0bPz8/Kysra2trjuMsLCwcJRKYmUEshq0tOA4cB4kEAL79FsuWwdMTycnYswdffw2O0/UPQf6CiNbUE9IEsrKy3NzceJ7PyMjo3r17Y097+BC//QY3N5ia4sEDdOgA4PcDoh9oNEpIU4iOjq6trX333Xefo4YCaNsWbdvWHzeUTqqheoZGo4RoXUZGhlQqNTExuXXrVteuXXWdDhEYTXgiROsiIyN5np8/fz7VUINEo1FCtCs1NdXLy8vc3DwzM9PR0VHX6RDh0WiUEO2KiIhgjAUGBlINNVQ0GiVEi86dOzd48GBLS8vs7GwHBwddp0O0gkajhGhRREQEgJCQEKqhBoxGo4Roy7Fjx0aPHi2RSLKzs21tbXWdDtEWGo0Soi2RkZEAQkNDqYYaNhqNEqIV33777RtvvNGuXbu7d++KxWJdp0O0iEajhAiPMRYVFQUgLCyMaqjBo9EoMQi1tWjVSnvheZ4/cOBAmzZtKioq5HL5k23uSktLFQqFQqF4/Pix6pOysrKysrKKigo7O7ucnBwzMzPtJUb0Aa2pJ81cTQ0WLkSHDsjNxdKlcHPTxkVGjRp1/Pjx5zrF0tKytrZWLpdTGTV4NBolzdz+/TA3h48PyssRHIydOwW/wtGjR8eMGdOqVatBgwbZ2tpyHGdlZWVlZcVxnKWlpbW1tVgsFovFqj/iOE4ikZibm0+bNu3kyZNz587dqYWUiF6h0SgBZDJYWABATQ1MTf9woP8KC+HtDQDW1qiq0sYVVE85Y2Nj33///caftX37dnd39927d0+ePHnkyJHaSIzoCXrF1LI9fIiZMxEfjxkz8OAB5s2r/7zhQP8NHIjvvgOAixfxXD3oGufQoUOXLl1q3779ggULnutEV1fXFStWMMbmzZsnk8kET4zoDyqjeubAgT8c3LuHqCisXo3CQq1cLiEB77+PsDCsWIFt2wCgpAQlJaip0crltMHLC1IpwsJw9iyWLsVnn2H6dKFi8zwfHR0NYMWKFWq8cF++fHnv3r3v3bsXo9pDiRgqRvTKzJm/H9TWsrffZhUVrLCQ+fpq5XJBQaywkDHGSktZQACbMYNt2MA2bGDe3lq5nFbdusV27WISCQPYvn2ChNy3bx+Azp07V1VVqRfh4sWL/u7uiu7dWWqqICkRPUSjUT1TXo6wMISFITsbeXno0QOWlnBwAMehulr4yw0ZgkOHAOCbb/DyyzAyQnAwgoPh6ir8tbTq3j306YNFi7B8OQAEBaGoSMOQSiU+/9xZKh0SGRmp9tv2/v37bx061Pz2bcyZg9paDVMi+onKqJ6xtkZsLGJj4eQEW1sUF9d/XlWllXc+gwbBzKx+F19fXzS8CWl2r0S6doWPD2Qy/PgjRo5EcTGWLtUw5Gef4Ycf+ldVnZ45c7ZGgVT/Na9fR3y8hikR/URlVI/Z2cHREStXIjQUQ4dCJBI4/uHDcHJCfj7WrMGsWRCJfn+qKNzjxabz8cdwcMDx4xgzBhYW2LsXP/6odrCamvo94VeuhLGxZv/mxWJ88gkAREXh7l2NQhH9pOunCuSPZLKnD8rKWGUlu32bTZ/++4ea43nm4cEAFh8vWEyd27OHAax1axYezoC6Xr1klZXqRdq0iQGsVy9WVydQbtOmMYANG8Z4XqCIRF/QaFTPNLwObjiwtoaFBWbORGIioqIEu1BSEq5ehaMj/PwEi6lzM2di9GiUlCA7+9eJEwc+ehQRGalGGIUCH34IAGvWwEio/0U2boS9PR48wMOHAkUk+oLKaDOxdStMTLBxI1JTBYhWV4foaACIigLHCRBQfyQk1HXunJCb+/XgwT8XFcXHx1+5cuV5Y3zyCfLz4eWF118XLjEHB/z4I1JTceQIoqJw9SqA+vd7Tx6Q5kjXw2HSaMHBDGBSKaup0TTUp58ygLm4CBBK/8THxQHo3LlzUFAQAKlUWtO4H5PnWUUFY4z9+isLCGBHjtR/Xl0tXHLh4ez4cVZVxWbOZLm57J136j9vmOhGmiEajTYfq1fD2Rlpadi4UZMwtbW1uV9/DQBRUVrtiqQr7y1a5OnpmZubq1QqnZ2d09LSNmzY0JgTq6rQtSsKCiCXo1Mn7N9f/7mQS7qyszFsGMzMMGkSzpzBvXv189vKy4W7BmlqtKa++RCLsWUL/vWv2oSE/Lfe6uLiol6YXbt2BSQnR44du3LKFGET1BPGxsa7du3y9PTcunVrfHz8okWLoqOjX3755Xbt2pWXlysUCplMplD0LCtrL5fj8WMoFFAoUFqKiRMxejRWrcKSJfWhVHNPhZywyxjq6mBsjJIS2Nmha1fExgLArFnCXYM0NSqjzcro0enLl4/avLnn/PkpKSmi558CVVVVFRMTA8B9zhwYG2shxedz9781Dn1NrLsKfFcklUpDQkI+/PDDmJgYjuPkcvmgQYOe/MLLL6edPt3+qbO6dYODA1xckJJS/0liIgDk5AiX2ezZCApCnz44dQq7d+Orr4QLTXSGGuU1MyUlJW5uboWFhbt27ZozZ87znh4XFxcSEtKvX7/Lly+rUYUFd3NHdfshJpIXhS/oCoVCKpVmZWWJRCJTU1MrKytra2tLS0tVm7uePdcWF3taWMDGBhwHsRi2tujTB/v3Y8MGvPYahg3DL79g714AmDULe/YIl1lFBYqK0KULjIzw6BHs7AD8fkCaIRqNNjOtW7eOi4ubPn16cHDwmDFjOnTo0PhzKysr//3vfwNYs2aNPtRQreI4bvjw4VlZWR4eHleuXGnMz1tbi5wcGBsjNhZFRXjhhfrPR40SNDMrK1hZ1R83lE6qoc0ZjUabpddffz05OXnSpElffvnlM77WsKeFTCZTKBTbtm379NNPBw0adPbs2SZL9dlu7qiW9DAqSeN7LRC4RXxZWZmzs3NJScmxY8eo3SfRKhqNNkubN28+ceLEV199lZ2d3atXr7/bGuips0QiEcdxqmlAesJMYnRuSZW8kLfsZNRlnJDTBjZu3FhSUjJkyBCqoUTbaDTaXH344Yfr1q37c618krm5uWpPC9XmFr/99lt+fv64ceOSk5ObLM9/dGdfzcUwhbm96M3jVqY2wjxqKCkpcXJyKi8vP3Xq1MsvvyxITEL+DpXR5ooxtmfPnuvXr0ul0r/bGuipB4IFBQVubm6PHz8+cOCAr6+vrjJ/GsOPU2UF55UuvqYD1wmzpGrZsmXr169/7bXXvlM1xidEm6iMtiwJCQnz58+3t7f/5Zdf7O3tdZ1OvfJsPnlMRV0NRiVatB+s6YOmgoICZ2dnhUJx8eLFl156SZAMCXkGWsXUsvj5+Y0YMaK4uHjZsmW6zuV31k5GvQPNwXAhVKGUa/r3ekxMjFwuf+utt6iGkqZBo9EWJzMz093dvaqq6ujRo6MEnsujPl6J78ZVlt+ts1l4a1xQf7Xj5OXlubq61tbWXr16VSqVCpghIX+HRqMtTrdu3cLDw5me7VhpZAKvD412mE73eX/wVVX3I7WsWrWqurp66tSpVENJk6Ey2hItW7bMw8MjJydn1apVus7ld+36iD1GuyiVytmzZ9eqtW1RZmbm3r17TUxMwlXbohDSJKiMtkQmJibbt283NjaOi4tLFaSBqUBiYmKcnJyuX78er9a2RVFRUUqlcs6cOd21sGE9IX+Hno22XMHBwRs3bnR3d798+XIrvemY98MPP7z66qtisTgtLc3Z2bnxJ2ZkZEil0latWmVmZnbs2FF7GRLyFCqjLZdcLu/du3d2dva6deuWaryPpoCmT5++b9++YcOG/d///d+TU18rKioUCkVlZeVfLtlKTEy8ceNGUFDQRx99pMPkSQtEZbRFUw39OI5LS0tzaXQD04QEDB0KV1d89hm8vFBRgZdeQkYGFAp4egqQVVFRUY8ePUpKSpydnRljlZWVCoWioqLi2WeJxWIjI6Pvv/9+yJAhAiRBSKNRGW3pZsyYkZiYOHTo0OPHjz+jDVJlJRQKVFSAsfpGwzt3ws8Pvr4oLMS0aThyBOXlEGpt1IgRI65du/bo0aMnP2xoc/fkki2O4ywsLGxsbI4fP56amjpgwIBz584ZCbYRHSH/jMpoS1dcXNyjR4/i4uLevXt7eHiobpBVd8p2ds43bnwll/9hhwtvb0ilGDUKCgVOnoSvLzZvRs+eyM6Gj48wZTQzM9PNzU0kEh08eLBXr16qta3W1tbPOKWysvKHH35YvHhxfn7+li1bAgICBMiDkEbSxQZQRL+EhYX95e9G9+59AKb6Ryxm9vbMyYm9/TabP58xxmbNYr6+7McfWWIiY4x9/z07cECYfFTr/efNmzd16tRz58794/fLy8u7dOliamoaFxcHwNra+v79+8KkQkgjUKM8gpiYmL59+969e7dNmzaqG2SO48RisZ1dG3NzcBxsbf/wfX9/AFixApMnC59Menp6UlKSubl5p06dwsPD09PTr1+//uymy1ZWVq+++urWrVsPHDjw5ptvHjp0yN/f//Dhw8InR8hf0nUdJ83Y1ats2DCWm8sYYzzPeF6AmK+//jqAwMBAR0dHAIcOHWrMWWVlZS+88AKANWvW2NraAvjqq68EyIaQRqAn8UR9MTE4cQILFwKASATN9yW5cuVKcnKyhYWFRCLJz8/v16+fqqr+I2tr623btgFYu3ZtSEgIgEWLFj27GSshQqEyStT38ceQSJCcjKQkYQJ+8MEHjLEFCxaoamJsbGzj94waO3bshAkTZDLZ+fPnhw8fXlBQoFeTYYkBozJK1Ne+PdauBYBFi6D5yO/s2bMpKSk2NjbGxsaFhYWDBw8ePXr0c0XYtGmTRCI5cuTI2LFjzc3Nd+/endKwVzIh2qPrpwqkeeN5Nnw4A9jcuZqGeuWVVwCEh4dLJBIAJ0+eVCPIjh07ANjb24eGhgLo1q2bXC7XNDNCnolGo0QjIhG2bIG5OY4eVZ4+fU3tOEeOHDl16pS9vb1MJnv8+PGYMWNUVfV5zZ07d+TIkcXFxQ8ePOjTp09mZqZedbEihknXdZwYgk2bbllYtHFxcVFv6MfzvKpT/cqVKy0tLQFcvHhR7WTu3LnDcRyAjz/+2NjY2MTEJDU19bki/Hau9sHZWrUTIC0NjUaJAPz9nV1dHbOystQb+n3zzTc//fRT+/btHz58WFlZ6ePj07+/+g3wu3XrFhkZCSAuLm7+/Pmq1nlKpfLJ75SWlj548KA4t6Ikva7wJ+WD08rc72rvJtXc/rzmxtZqRRGvrKTVfaSxaDEoEcbly5e9vb1FItGlS5f69u3b+BN5nvfw8EhLS4uNjV29enV1dfXPP//s7u6uSTJKpdLLy+vatWuhoaH79u27f/9+p06dTExMGlpDqb62441Lplf/ojNp/zWcuI2o47/0pXkg0XO0iokIw8vLa+HChfHx8XPmzHl2A9Py8nK5XN7Q5u7w4cNpaWmdO3fOyspSKBRTp07VsIYCMDEx2blz586dO0NDQzMzM8vLy/Py8p78gmqllkhSZdfL2MQcxuYiUxuRibnImIOplcjYTMPrk5aFRqNEMKmpqS+99BLP805OTt7e3gqFory8vKHNXUO30D+faGNjExgY2K9fv+jo6KSkpG7dugmV0oMHD1xcXKqqqg4cOODp6dnQEerZZz28pARD2wE0yCCNQmWUCGbixIkHDx7kOE6hUDzja0+1uSsqKsrKyurZs+fPP/9samoqbEr+/v7bt2+fPHny/v37hY1MSAMqo0QY6enpffr0MTU1nTx58p49ezp27LhhwwaJRCIWizmOs7GxaegW+tSJ1dXVHh4ev/zyy6pVqyIiIgRMKScnp3v37nV1denp6T169BAwMiF/oNuJAsRgjB07FkBQUFC7du0AJCcnN/7ckydPikQiMzOzjIwMAVN65513AMyePVvAmIT8GU14IgL46aefvv/+e0tLS0tLy4KCAi8vL1VVbaRXXnll7ty51dXVc+fO5XlekJTu3LnzxRdftGrVijZbJtpGZZQIIHLFCsbYggULtm7dCmDt2rWNbymi8p///MfR0fHixYvbt28XJKXw8HClUunn5+fk5CRIQEL+DpVRorHjx7/Nyto0YgSAR48eDRkyZMSIEc8bw8bGRrWjZ2ho6K+//qphRunp6f/973/Nzc0/+OADDUMR8o+ojBKNhYeb5uQsHDAg6tixmP7916qaPj2/CRMm+Pj4lJeX+6va62vggw8+4Hl+4cKFql7OhGgVvaknmjl8GOPHw8EBU6ciPh6vvYbvvlM72G+//ebm5lZaWpqUlDRhwgT1gly+fLl///4WFhaqbVHUToaQRqLRKNEAY4iMBIDAQOzYAZEI0dGaxGvfvn1sbCyAwMBAtXvXh4aGMsaWLFmilRpaXIyaGuHDkuaMRqNEA0lJmDQJHTpg/Hhs344JEzTvg8/z/NChQ8+cOfPuu++qmocWFRVVVFSoVo6q9n+Wy+UKhaKsrEwmkzWslfL09FyyZMnp06dfeeUVW1vb7OxsVd9SwTAGf390746sLLz6KsaPFzI4adZ0O9+KNGNKJevRgwFs7VpmZsaMjNj164IEvn37trm5uUgkSklJYYypeuj9o3HjxjHGhgwZAiA2NlaQTP7g/Hn20UeMMcbzbNIk4eOTZotWDRN1GRkhOhp79+L2bVRXY8YMSKWCBHZ1dQ0LC4uMjAwICLh+/XqnTp2Ki4sbtn22tbXlOI7jONVg09jYmOM4xpiNjc3q1avPnDnj4OCwULXNnrBKS2FvDwAiEUzofxzyO/ptIOq6dAlyOTZvxrlzOH8eUVECxl6+fHlSUlJ6evqgQYO6d+/eu3dvmUxWVlZWUFCQnZ1dWloql8urqqr+fKKdnZ2Pj8+fl5wKwNMTwcHw9cWdO2jdWvj4pNmiZ6NELYmJePgQ48djzRpER6NrVwG2V/6jHTt2LF26tKys7BnfeXJkynHc48ePMzIyOnbsePPmTVUXfWGoxtrvvQcrK3z9NVq3hr8/OE6w+KSZozJK1DJtGhITIRLhwgWkpkILN9HDhw8/ceLEuHHjpkyZoupp0tDmruEG/6lT6urqBgwYcOXKlaCgINVkfmFs3ozAQEilmDQJI0dCg878xCBRGSVqmTEDn34KExOcPInbtzF/vrDhU1JSRo0aJZFIsrOzbW1tG39iWlqap6dnXV3dmTNnBg4cKEAqCgW6dUN+PjZuRHAwOA6//gph5wCQZo7mjRK1TJuGsDCcOIHt2/HWW4KHV3XMW758+XPVUABSqTQkJITneX9//xpBJnhu2oT8fHh64tgxMIb33qMaSp5Co1GirpwcZGWhXz/By8q33377xhtvODg4ZGdnq/GIU6FQuLu7Z2Vlxcf/LzBQs9mdlZVwckJRET76CIsXw9ISd++CVkaRP6LRKFFXly4YOVLwGsoYi4qKAhAREaHeayKO4xISPvXwSFm6dPytW5pls2EDiooweDD+9z8ACAmhGkr+jMoo0S+3fijuJHLv2LHjvHnz1A4ydOggD4/h1dWYMwdq9y8tKSn59OxZpaMj3n4bJ07A1hZBQWqnRAwYlVGiR1gd7qw3f7skPjnuJzMzjfbn3LABjo64cAEJCWpGWL9+/ZyUlIl9+vzv6lW+bVssX05PRclfojJK9Ej21zVld3mrLka932ynYSgbG2zcCADLl0ON/qUFBQWbNm0SiUSDhw1787PPpGIxHxioYUrEUFEZJfqCVyLt42oA7kvMjYRYXjdxIt58E+XlCAh47nNjY2PlcrmPj09iYiKAeYsXG/3TtsykxaIySvRF5v6aijze1tW46+uthIq5ZQu6dsX48XiuCSl5eXkJCQlGRkbe3t7Xrl1zdHT08/MTKiVieKiMEr1QV430T6oB9AkxEwn3W9m+PTIzcfo0zp8HgNDQv7l6Xd3jx4/v379/586d1NTUgICA6urq4cOH7969G0BUVBRHSz/J36N5o0QvXFlddXNndevexmOTLSHw6nz4+aG6Grt2ISAAbdrg8mU8fgy5HAoFysogk6FXr1E//5zyl+e6uLjcvHmzVSvBBsjE8FCHJ6IXqkvZwPWcZScjwWsoAGNjzJuH+HgAuHEDKX8qmKamVhKJxMLCguM4a2trCwuLvLy8nJwcAMHBwVRDybNRGSX6QQSXSabaCz94MJKSUFqK6GgsWgRbW3AcOA4SCcRimJl9/edTRo8efezYsXPnzgWo8YqKtCT0bJQYuMpKvPgiAISHw8kJffti5Eh4eqJnTzg5QSLB381PTUhIsLCw2Ldv3+HDh5syYdLsUBklBm7CBBw6hLt34eCAdeue48QuXbqolqUuXLiwsrJSW/mR5o9eMRG9UJHHW3US/i/1s2cxZAhsbJCdDTu75z5dqVQOGDAgNTV1yZIlcXFxgqdHDAONRole0EYNBRARAQAhIerUUAAmJia7du1q1apVfHz8hQsXhM2NGAwqo8RgHT2KkyfRurVGHUXc3d2XLFnC8/z8+fNra2uFy44YDiqjxGCpNtkLDYW1tUZxoqOjXVxc0tPT169fL0hixMDQs1FimI4cyX377c62tsjKwp82bXpuJ0+eHD58uKmp6dWrV3v06CFEgsRw0GiUGCCe55cvH29t3Tc29p7mNRTA0KFDZ82aVV1d7e/vTyMP8hQqo8QAffnll+np6ebmj6ZM6SBUzI0bN3bo0OH06dM7duwQKiYxDFRGiaGpq6tbtWoVgMjISA17Pz/JxsZGNedp6dKl+fn5QoUlBoDKKDE0e/fuvXXrVrdu3d555x1hI/v6+o4fP768vDwkJETYyKRZozX1xKDU1NSsWbMGwMqVK01MhP/1/uSTT5RKpWp1EyEq9KaeGJSEhISlS5eamZkVFBQYGdHNFmkK9HtGDIqtrW1lZWVFRUVeXp6ucyEtBZVRYlAmTZo0ceLEqqoqf39/XedCWgq6qSeGpri42M3Nraio6PPPP58+fbqu0yGGj0ajxNDY29uvW7cOwOLFiwsLC3WdDjF8VEaJAZo1a9aoUaNKSkpoZhJpAnRTTwxTTk5Or169ZDJZcnLyuHHjdJ0OMWQ0GiWGiXrXkyZDZZQYrODg4H79+uXm5kZGRuo6F2LI6KaeGLLr1697eXnV1dWdPXvW29tb1+kQw0SjUWLIqHc9aQJURomBo971RNvopp4YPupdT7SKRqPE8DX0rvfx8eF5XtfpEENDo1HSIjx69Kht27Y8z2dkZLz44ou6TocYFBqNkhbhwoULSqVSIpG0bt1a17kQQ0NllBg+xlhERASA8PBwBwcHXadDDA3d1BPDd/DgwYkTJ3bo0CErK4vjOF2nQwwNjUaJgaurq1OtCo2KiqIaSrSByigxcF988cXNmzednJxmz56t61yIYaKbemLQamtL+vdfZ23d18/Pd9o0XWdDDBOVUWLQtm+Hvz+6d8eNG9DCRqGEgMooMWRVVXB1xf37SErChAm6zoYYLHo2SgzXtm24fx9SKd56S9epEENGo1FioGQyODvj4UMcPoyxY3WdDTFkNBolBioxEQ8fwtubaijRNhqNkuYvLw+dOv1+oFQiIwN2drhwAS+8gIEDdZ0fMXA0GiXNX0RE/UFkJJRKzJqFmzexYwfkcqqhpAnQFBDS/JWV4fx5AKitxblzGDwYU6YAwOTJmDVLp5mRFoHKKGn+lEo8egQAPI+qKojF9Z/TRFHSJOimnjR/rVtj3DiMGwczMwwYgMOHUVCAlBR07KjrzEiLQK+YSPN37BhGjQKAlBSMHIm8PHzzDdq2xaRJMKKBAtE6KqOEEKIR+ruaEEI0QmWUEEI0QmWUEEI0QmWUEEI0QmWUEEI0QmWUEEI08v+WUloc33nRdgAAAg56VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wMy41AAB4nHu/b+09BiAQAGJGBghQgOIGRg6GBJA4IxuYZmKC0ewMGkCamZkNQrNwQGioPCMzmwNEHEabgs1jhAujaWNnyADTaPYwY9gP59PHXG5gkDAyMTAxA4UUWFg1mIA2MLMrsHNkMHFwMnBwJXBxK3DzaDBx8yrw8jHw8TPwCzDwCzIICjFwCWcwCYswCIsmiIopiIlrMIlJKEhIMkhKMUhJM0jJMMjIMojKZTDJyTPIsSc4gQKejV1OVJiLQ7yLERERDAr5Gkn2i389OADimGeetPeK2QZmBzxytnd82wtmq7vtsr8w1w3Mns1p66Bn7wRmS5tkOnxjubUfxF50tMBhfl8nWNxwjqp9ff1rsPibv5H7D9t8AbO/Bh/dLxLtD1bDWn5lvyujKlicS8nrwNlqMzC706fswLLbTGA1gvdqD5T+4bEHsX0erTtwg/m7HdhtB04cEK+9DxYXWL3pgN1vFwcQe8rmHwcu778GFj/jwXnwl60NWJzbpWJ/20d9sLjIrMf7n2z5CWYLhbHb8+c4gtmcDHPst01nBat/Ln/J7nhpGJjd80zXIfIbJ8QcwSgHbuN4MPvk7zkOaycmg9lnb25zmLtZCMzmFNzsYJA+B8z2ZXns0DhtHpgtobza3piHCexH4QpTh1Dlh/tAbDEAoOyQIIoSzB4AAALkelRYdE1PTCByZGtpdCAyMDI0LjAzLjUAAHicfVVbbttADPz3KfYCFvhc7n4mcVAERWygTXuH/vf+6FBJVg5AVLYIiRhzh6/xqeX14/L9z9+2LrmcTq3Rf75zzvZbiej02vKhPT5/e7m2p7eHx0/P0+3X9e1nU8kv5ecr9uHt9vrp4XZrtI2hJrOdYyMlnw4X7dfxU2lPjTcPddF29q3PEb0XQAWQtujOIe1sm+EXLAXQ2hURzV0Q56wbk+qogI6Isk3XYE4gqTNVR3cko1s3HeTtzFuQSlTAQEQFR6GuyVHm6J0K4NiT6dYn7xGHziGjAE4AzyiksysnctJgsgLJ1F4S4MZdPPMRnVNLKGdU3rrPgcqckwnqxRU0+5NF9IGcExosA08FVFEmQIcNE3CVjcy6V3Xi7BGgs/ceEwB3ijpoNgljgUAsmYsNsZppT2TfJNTQHHTJOlE1chxJ1DeklIOBxkcMLZHZJ0xvBE+khIpFhMwKOTPmQEwSzCfGylAnrQae3ls6+4yRLe8DqVcxceLeUbBjwxEYVQLlCin7PLmM7lnF8LwqoO7rJjwCRUSHRugogbYvsGHW4NZNnaaWJB2tFEwFStTxwO69V/2Rvi+HodEzI9oIm9VuSG6RYdAhCh1AZFIrAhbm1hxHG9LA0TJFuCxPbhH6PdFx6ANkKXyWGkOICLGaJuHJwR3NqYC811E1iEZ2E/ObClogBU0ULLibWyJNZ0jF8vl6+SKh76L6eLteDlHNjxzSiZemh0AybjtkkHH7IXaCux+SxniNQ7gSPA55wkubhwhx3vdSY+ngO0Hh3Sxu0A1Jw4seJ7/0LIacFNMskth23s3iyUk0zaLK8eFZbLGlvJtFGNtoyUfuty5fsVN322W756jnXlAQX5zRJ9nN4izJOT2LsyRnGFmcJTnDyOIsyRlGFmcZH57FWbLKMHo/jZZ8dHHWrDOMLs4YL0vPCpxTdD8z+f75J47n0z/BvoD9diYvYQAAAXJ6VFh0U01JTEVTIHJka2l0IDIwMjQuMDMuNQAAeJxtUTmOw0AM+8qWNjAejG4JRipXaTaPyDfy+KWcdqcaEBRJUde1PV7773Zdr/1N7+25v7cvdF3ba2/0P4yeP59NZjgvH4dOrnQfJ88yCRqHzCVGNE6ZrpJAaMYSznHSVDOO5tASAbKgYxTcQirG1CQL/MZh0ysjmpQpyjWOgDZcgbh6kbV2SqWM8wDLyOT2qwXfEx9Tcm4/liq5IbfKHEdLIE3TkMZSpLEgTgRsLDUVozyXai8IqNw9x5pmK26WTUV8BqTJeov55BDNgaXVb0pr50AtEXfQmBFUDIZHRCMJzoIgOlTqopCkvCLbzBNedUeHACnmylYrIQinW4AU1q/LY8owKK2Mb8FKS+Eutkr7TNF1M1a23kqmYpcCQTMUNoryLHoCgt2+YUJXYYSL+0DYqLDTUJwlIIGjlCI9Js0yOoRILOo+UVRVtW2KqTWkUsEy9s8fXxCCFHzsehkAAAAASUVORK5CYII=",
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "group = \"241\"\n",
+ "idx = 55\n",
+ "\n",
+ "cantidad = df[df[group]>0].shape[0]\n",
+ "ocurr = df[df[group]>0][group].iloc[idx]\n",
+ "smiles = df[df[group]>0].index[idx]\n",
+ "\n",
+ "print(cantidad)\n",
+ "print(ocurr)\n",
+ "print(smiles)\n",
+ "\n",
+ "mol = instantiate_mol_object(smiles, \"smiles\")\n",
+ "mol"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "np.int64(3)"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df[df[group]>1][group].iloc[0]"
+ ]
}
],
"metadata": {
diff --git a/tests/agani_s.ipynb b/tests/agani_s.ipynb
new file mode 100644
index 0000000..8381bee
--- /dev/null
+++ b/tests/agani_s.ipynb
@@ -0,0 +1,175 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from ugropy import abdulelah_gani_s, instantiate_mol_object\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "import numpy as np\n",
+ "\n",
+ "from rdkit.Chem import Draw\n",
+ "from rdkit import Chem"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"abdulelah_gani_frags/log_p.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")\n",
+ "\n",
+ "primary = np.linspace(221, 350, 130, dtype=int).astype(str)\n",
+ "\n",
+ "df = df[primary]\n",
+ "df.rename(columns=lambda col: int(col) if col.isdigit() else col, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "BrC1=CC2=C(S1)N3C(C)=NN=C3CN=C2c4ccccc4Cl\n",
+ "11\n",
+ "Subgrupos:\n",
+ "{'(N=C)cyc-CH3': 2}\n",
+ "Subgrupos por número:\n",
+ "{346: 2}\n",
+ "Gani: \n",
+ "{324: 1, 346: 1}\n"
+ ]
+ }
+ ],
+ "source": [
+ "for idx, smiles in enumerate(df.index):\n",
+ " solution = abdulelah_gani_s.get_groups(smiles, \"smiles\")\n",
+ " \n",
+ " # Filtrar las columnas de la fila que no sean iguales a 0\n",
+ " row = df.loc[smiles][df.loc[smiles] != 0]\n",
+ " \n",
+ " # Convertir la fila filtrada a diccionario\n",
+ " row_dict = row.to_dict()\n",
+ "\n",
+ " # Verificar si ninguna solución tiene subgroups_numbers igual a row_dict\n",
+ " if solution.subgroups_numbers != row_dict:\n",
+ " print(smiles)\n",
+ " print(idx)\n",
+ " print(\"Subgrupos:\")\n",
+ " print(str(solution.subgroups)) # Convertir a string y unir con saltos de línea\n",
+ " print(\"Subgrupos por número:\")\n",
+ " print(str(solution.subgroups_numbers)) # Convertir a string y unir con saltos de línea\n",
+ " print(\"Gani: \")\n",
+ " print(row_dict)\n",
+ " \n",
+ " wrong_smiles = smiles\n",
+ " break"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'(N=C)cyc-CH3_0': (6, 7, 8), '(N=C)cyc-CH3_1': (9, 7, 8)}"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mol = instantiate_mol_object(wrong_smiles, \"smiles\")\n",
+ "\n",
+ "abdulelah_gani_s.detect_fragments(mol)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3dd1hTZ/sH8PskYYQRCCJ7OAAVgqiIiCiOarVWcYGbtmoVV6e+Rduq2FqrbW21ropapTgqVhxYUaGKOFHwJ0IYyhJkSQIEYpCQ5Pn9cSi1vq0KJDmJ7/25vHqF5HDuO1e9vj5nPQ9FCAGEEELtxWK6AYQQ0m8Yowgh1CEYowgh1CEYowgh1CEYowgh1CEYowgh1CEYowj9KyVRqkBFv/668mtmm0E6i8L7RhH6b9lPsheVLKpsrgSAfib99rjusbtrV9+nngKK6daQzsHRKELPUhLl9KLp0/jT8rzycr1yx1mMY7ojpNM4TDeAkM653Xj7serxos6LAIACapbVLKY7QjoNR6MIPatEXtLdqDsev6OXhDH6alIooK7urx8fPwa5nLlu9A2fzZcoJUx3gfQGxuirKTUV+Hw4c6blx8WLIS6O0Yb0ig/X5/6T+0XyIqYbQfoBY/SV5eYGy5fDkydM96GHOnE6rbZfPezesI1VG7dVbwspDGkiTfRHpfJSZntDOghj9JXVrRuMHg3r1zPdB0BDQ8PMmTPHjRt3+vRpfbnB7kObD8+4nenE7mRAGayyX2VEGe102Tnk3pBuwm70XVAItcIYfZVFRkJ0NOTmMtYAIeTo0aMCgeDw4cNJSUnjx4/38PDYuHFjTU0NYz29NC9jr3et3w23Dvfh+gDAbKvZNhwbBVEcqj3EdGtIt2CMvsosLOCrr+DDD1t+LC3V6oWmixcvDhgwYOrUqSUlJXZ2diNGjHBxccnPz1+xYoWLi0t4ePjdu3e11406hFmFAUCMOIbpRpBuwRh9xc2aBU+eQHIyAMD06eDuDlFRoFRqtmhOTs7UqVNHjBiRlpbm6Oi4a9euhw8fnjlzpqioKDExcdy4cTKZLCoqysfHp3///r/88ktzc7NmG1KTNy3etOZY32m8c7dRz/4BQJpF0KultpZcvEiuXCGvv97yjlBIDAzIrl1EICAABIAIBOTECY1ULysrW7BgAZvNBgAzM7OIiIj6+vr/3iw3N/f999+3sLCg/xLa29tv3HigrEwjLanXkpIlkA7/efgfphtBOgRj9JUiEpF+/YiRETl5kty+/df7t26RykqiVJLYWOLm1hKm/v7kjz/UVloqlW7YsMHc3BwADAwMFixYUFlZ+fxfaWxsjI6O9vHxAYAhQ4RsNhk3jiQmEpVKbV2p3Q3pDUgHh7sOCpWC6V6QrsAYfXVUVZHevQkA8fAgpaV/++jXX8myZaS6mhBC5HKyaxext28J05Ejya1bHaorl8t37dplZ2dHDy3HjRt37969Nu3h0qVLYWFyA4O/Bss7dpCGhg51pTk9hT0hHc5KzjLdCNIVGKOviMrKlmP2nj3JM0fHSiXp3p0AEB6PrF3bEk9SKVm3jlhaEgBCUSQ0lOTl1bSj7qlTp9zd3ekA9ff3T0lJafdXqKggGzYQZ+eWMDU3JwsWkMzMlk8vXSKJiS2vVSqyZ0+763TUlxVfQjrMKprFWAdIx2CMvgpKSoi7OwEgnp6kvPwfNsjKIqGhLfHUqRPZsIE0NhJCiFhMIiIIl0tsbcvMzMxCQ0MLCwtfsuiNGzeGDBlCB6iHh0dsbKxKHUfjcjk5coQEBbV0S1HktdfIyZNk/nxiYkIKCgghRKkkxsYdL9VOxU3FrHQW9/+4dYo6xppAugRjVO8VF7cMNvv2bTls/zfXrpGhQ1viydmZ7NpFmpsJIaSsjKxadcDAwAAAjI2Nly1bVv3cHeXl5YWGhlIUBQDW1tYbNmxoampS63cihJDcXPL++8TMjACQ6dPJ/Plk8mTyxhuEMB2jhJBh94ZBOvws+pnJJpDOwBjVb4WFpGtXAkD69ydi8Uv9Snx8yylUevR66lQyPYosLi5+5iJ7Xd2zo63q6uqIiAhDQ0MAMDExiYiIkEgkav9ST6urI1u2kFu3yPz55NAhMnQoOXqU+RiNKY0ZdmjYlKVTmGwC6QyMUT2Wm0scHQkAGTyYtCnNVKqWS/Y+PskA4Ofnl/jnecesrKzQ0FD6UL1Tp04bNmxobGwkhDx+/HjDhg08Hg8AWCxWWFhY+T+ePtAYOkYzM0mXLkQiYThGJRKJiYkJRVFFRUVM9oF0A8aovsrObrnaPnRoOy9qy+Vk7944e3t7OjRHjhx58+ZN+qOUlJTAwED6fVdX17lz57ZuNn78eKFQqM5v8nLoGCWELFtGVqxgOEYJITNnzgSAr776iuE+kA7AGNVLt28Ta2sCQMaMITJZh3ZFDzP5fH5rmGZkZNAfJSYm9u3bFwAcHBzoQevFixc73nz7tMZoQwNxc2M+RhMSEuhrawz3gXQAxqhuUyjI1atkyxaydi1Zu5Zs307S09NuqaysCAB5882WC+4dV19f33rzPIvFCg0NLSgoIIQolcqAgAAAWLRokVouxLdba4wSQmJjmY9RpVLp6OgIAKmpqQy3gpiGz9TrsMePYccOSE6G2logBAiB6mpFQuLMcfU1NRASAsePg7GxekqZm5tHREQUFBREREQYGBgcPXr0ypUrAMBisRoaGgBg3rx59KV5pkRFwYwZLa89PeHGDQZ7AQBgsVjTp08HgJgYnKnkfx0usKyrCIGoKHj0qMeWxUGuD3YHxwPAxaIuG68O3jT2j515IzafduNoZkHCwsLCXbt2rV+/ns1mNzc3m5mZKRSKhoYGExMTjdR7OfX1kJcHfn5w4ACEhUFwMJw8yWA7AABZWVne3t5WVlbl5eVGRkYMd4OYg6NRXXXvHtTUgEoFAMnFXa6UuLR+4mVVsS0oliOu0lDlbt26bdy4kb7zKTc3Vy6Xd+/endkMffgQrK1h7FhQqeC114Ci4I8/oKmJwY4AAAQCgY+PT01NzdmzZxluBTEKY1RXZWa2Tg66/rU/Fv/+ZrPyqf9ZKpV2ZmPOysoCAG9vby3Ueg4nJ3BxAZEI0tLA3h5694bHj+HqVWabAgAICwsDPK7/n4cxqqvE4taXg5xL+9pVfH894K9PlUoQibTQhY7EKACMGQMAkJDw7GtmzZ49m8PhxMfHl5biGk3/uzBGddXfLx59+3rij6n+JRKLv97icrXQRWZmJgAIBAIt1Hq+N94AAKCPnukY1YUjaVtbW1dXV0tLSxcXFwcHh1GjRn3wwQe//PJLenr6E1xN8H+GZi5SoI5zd4eHD0GhoH+yMX38WdDlry4HdePXAgAYGUHXrlrogh6N6kKMDh8OxsZw8yZUV0NgIFhYQFYWlJSAi8uLf1dztmzZUlBQYGxsbGhoWFFRUVFRkZSURH9kaGjYq1cvLy8vb29vgUAgEAi6dOnCZK9IYzBGdVXfvnDp0tNvLOyfFn3Hh37dpGDvTup5ZS38+qsGW5BKpcXFxUZGRm5ubhos83JMTGDIEEhMhKQkmDEDRoyA48fh3DmYP5+xls6fP798+XKKovbu3UsvOSUUCtPT07Ozs4VCYW5ubkZGRkZGRuv25ubmHh4enp6eXl5enp6efn5+rZO0Ir2GNzzpsLw8OHbsbI7rUNdiroECAApr+QW1/KGuD5q5vK4/vF9dDRcuwPDhmqp/48aNgICAvn373r59W1M12uKHH+Djj+GttyA6GqKiIDwcJk+GY8eYaSY3NzcgIKCurm7NmjWRkZH/vYFcLr9//35rqmZnZxcWFj6zDZ/Pb01VX1/fPn36mJmZaaN7pFYYo7qtpASOHweZDBQKUKkO3vVedn70mqHJi/zS1j1asGqH/fDhcOGCporv3y//4YfG0aPLvvnGU1M12iInBzw9oXNnqKyE8nJwdgYeD0QiMDDQdic1NTX+/v75+flTpkw5evQo/WBCr169OnXq1JqJPj4+9FNhrUQiUWZmZlZWVlZWVmZmplAorK+vf3oDFovFZrOXLVv29ddfa/X7oI7BGNUHlZWQkQE3bvyW7RkaG+psUZ///o9PzKy7bFhYWwspKfDn7Mlq9uGHsGULbNwIn3yikf23Q9euUFwMt26p+vdneXvD/ftNSUmNgwdbarOH5ubm0aNHX7x4sV+/fikpKaampgAgEok6d+78zJb29vatqerl5eXl5WX89yuH5eXlrWNVoVB469YtuVwOAMeOHZs8ebLWvhHqKGafRUVtEBWlWhPpbVsFQHYHnyKRkWsWVgL8tQKo2o0YQQDImTOa2n87rF4d5+7u/eWXXxJC1q3ba2pqGhERoeUewsPDAcDe3r707ytelZWVJSYmbt68ecGCBYGBgf/9wAKHw+nWrdu4cePWrFkTGxublZWlVCqf3kNjYyO9wN/SpUu1+51Qh2CM6o+cHBIZeWjKbwCkG7+mefUXdRt/ohdTunJFIwVtbAgAKSnRyM7b58SJEwAQGBhICKGvifv4+GizgU2bNgEAl8t94Ywkzc3NOTk5sbGxq1atmjx5spubG4v17P2F5ubm/v7+77777ubNmysqKggh169fBwAHBweFAlce1RsYo/pDpSI7dypWr+1hLQIg+yceJ5GRn85/RE/1pHaPHhEAYmGhW8sdS6VSIyMjNpstFoubmprMzc0piirT1gr3Z8+eZbPZFEUdap1sqi2ampqysrJiY2PXrFkzbty4bt26PT3bS+afq/f17NkTAM6dO6fW3pEG4e33+oOiICiIzSKfDrkMAOtSghQq1sfu8ebm8PvvcOuWmqvdvQsA4O0NjM7r9CxTU9PAwEClUpmUlGRoaDh8+HBCyLlz57RQOicnZ9q0aUqlMjIyckbrZFNtYWho6OXlFRoaGhkZGR8fX1BQIBKJkpOTt23btnDhQg8PD3qzWbNmAT5gqlcwRvVKr15gYzPL+657p5r8GqtYoVcnWeniqSJ///KoqE3qLZWVBQCgA/fdP2vMmDEAQM8GQr9O0PxjoWKxODg4WCKRhISErFq1Sl27tbKyGjp06JIlS3bu3EmvcAUAYWFhLBYrLi6OnqIQ6T6MUb1CUTB4MJtFIgKvAMC6lCAVoT7usT8z033v3v88fad3x+lsjL7xxhsAkJCQQAihXycmJir+fNxLE5qbm0NCQvLz8/v16xcdHa3peVddXV2HDBkik8mOMXVPLGojjFF9IxBAp05v+WR05dflVFsfy+5lI3u8IDSUELJu3To11snMBADQgTlJniUQCFxcXCorKzMyMrp06dKzZ8+6urrU1FTNVVy6dGlycrKDg8PJkye1M2EgThylXzBG9Q1FwZAhBmxVROCVfvZZB+6eIoRECARcLvfYsWP0TCIdJRKRu5nZWSoA8PJSw/7UbvTo0fDnsbymj+u//fbbqKgoLpd74sQJJycnDVV5RmhoqImJycWLF4uLi7VTEXUExqge6t0brKzm9L1ZLRt8Ku/Aidxcu8eP54WEEEI6+vTLo0fw008QFVV/LDHA4YGvY2Wngz+C7k0B9/TpUfq4XkMTJyckJKxcuZKiqH379vn5+WmixD/i8XgTJkwghBw+fFhrRVG7sf/xcWCk0ygKDAzY9+8bsNkJ9+/nicXhvr69nZ13pKRkZmZOmzbN2tq6PbstK4N9+y7d5UtkBq7mNWG9M2Z6Z17MsXMrvwz29mBlpe6v0X6Ojo4FBQXTpk3r06ePk5PT999/X1paWl1dXVNTo1KprKys6Kn7Oyg7O3vs2LGNjY3r1q1btGhRx3fYJiYmJgcPHiwtLX3vvfe0XBq1FT4Mqp9UKti69YlI5Pbjj2X19fEzZ47z8FiYl7fr8OG33norOjq6zTtUKmHzZpBKR/7yVk61dfaS7RbGTfk1VhMOTxcu2QGGhvDRR2pbP0+thEKhr6+vubm56M95rDkcjouLy9NTfvTs2bOtwSoWi/39/QsKCkJDQ48cOaL95fyUSqWzs3NFRcXNmze1ORBG7YAH9fqJxYLAQGMOZ1lAAABEJicTQj51cTHkcA4ePHj/3r2X2smTJ1BSAunpkJQE+/fD48f02wOdHn5+YcTftlSp4M4dNX8FdWhqapo1a1ZTU5Ovr+/atWtDQ0Ppe9cLCwtPnz69cePGt99+WyAQ8Hg8Pz+/uXPnbtq0KTExsaKi4vm7bW5unjJlSkFBga+v7/79+xlZEpXNZtN3p+KFJt2H843qrb594cqVRX5+3167ll5efr6gYLSb22xvb6FIJI2JgRUrwNT0r42fPIFHj6C6GmprW/6IRNDc/PT+pHJDNqUCgA8Gps4/Nf5WmQOf++f87QoF5OfDwIHa+3YvJyIiIiMjw83N7ejRo63TKTU3N9+7d+/pKT9ycnLS0tLS0tJaf9HS0pKeK4QetPr4+Dw9scjixYsvXbqkzUvz/ygsLOz7778/dOjQd99913pXKdJBeFCvzy5cgMuXv7l6NSIxMcDZ+dq8eY3NzVwDA2CxwNgYvL2hrg5qaqCmBpTKp3+vScEua+AJH3XOru5cWMun/xTVWsZMPr7vTt+vRvwhbjRZfXH4oSnHJv06TbhkBwCAoyO8+y4zX/NfnD9/fsyYMRwO5/Lly/7+/s/Zsq6uLisrSygU3r17VygUZmZm1tTUPLONq6srPVP9gwcPfv31Vy6Xm5KS0r9/f01+gxfz8fG5e/fuiRMnJkyYwGwn6DlwNKrP8vMBYImf33fXrl0vLb1QVDSCXllEpQKZDFJTAUAql98Xi/NravJramoap6eWvX5fbFUp/Ye5gY05ironLWc/x7rf33O7377/6/PXx7p0iQkAqqur33nnHfpu2ednKABYWloOHjx48ODBre/U1tY+PVP9nTt3Hjx48ODBgzNnzlhaWrLZ7IMHDzKeoQAQFhb2n//8JyYmBmNUl2GM6i2JBKqrAcDU0PADf//PL1xYm5zcxdJS+OhRdnV1YW0t/aeotrb1cGOIq//lBy4AYMhWOvHqu/Fru/FrPTtXe9lUd+PXdrGsY1HkeG4veuPNY8722xVuayoFADA01Kkb8Qkh8+bNq6ioCAoKWrZsWTv2wOfznw5WpVJZWFiYmZmZkZGxadMmpVLZvXt3AEhPT9+5c+ekSZPefPNNdX6BlxYWFrZy5cr4+HixWNypUydGekAvhDGqt6qqgMOh17x7z99/45UrN8vKum/Z8sxWxhyOe6dOblZWblZWPraKtcOi3axqnHj1z141MTMDpTE0NbW+4WIhWT7oWkxGb2CzJaYOFjqwHFOr7du3x8fH8/n8mJgYtdzbxGaz3d3d3d3dJ0+eLBaLt2/fHhMT8+23316+fHnv3r0ikYipGLW1tR05cuTZs2ePHj26cOFCRnpAL4TnRvXW/ftw7BgdfFVSabctW1gU5WJh0cPamg5NdysrNysrJx7v2QvNxsbA57f86dwZbGzAygqMjEAuh19+gepqkMv/2tjQcK9wYETCsAsXqd69tfsF/0V2dnb//v0bGxuPHDkydepUte8/NTV14MCB9KzMYrGYfnKpvLy8nXfjdtihQ4dmzZo1aNCgq1evMtIAeiGMUb1VXw/bttFX299PSNiamjqxZ8/j06f/bZt/S8x/Qwjcvg2pqSAWAyFgaUn6+4V8NzDuOOXiAteugaOjhr/UizQ1Nfn7+2dkZMyfPz8qKkpDVXr16pWbm3v27NnRo0ePHz/+9OnTW7duXbp0qYbKPV9jY6O9vb1EIsnNze3RowcjPaDnw6eY9JaREeTkgFT6oK5uzsmThJDDISF2retKcjgQFASzZoGvL3h5QbduYGcH5ubAee5pHIoCBwfw84OhQ2HoUPD3p5ydJ0ygLl2CzEw4dw5mzmT4Hvxly5adOnXKzc0tLi7O6Dn/HnSMRCK5cOECIWTy5MkcDue3334Ti8XzGVrK2cDA4N69e3fu3LGyshquuWVgUQfg7ff6zMICAFZfvNikUIT5+Pi0LnrOZoOlJTx1YbojjI3h5Eno2ROEQpg06enTp9p27ty5rVu3GhgYHDx48JlFN9WrdcZPiUQSHBxsaWl58+bN3NxczVV8YT8AEB0djceOugljVG+JRHDvXq5IdCgz05DNXj10aMv7hobg5ARz54I6rr3QrKwgMRGcnCA5Gd55B1Qqde24DVrvcFq/fv2AAQM0WsvFxSUoKKixsTEuLs7Y2Dg0NBQYfZpo2LBhXbt2ffDgwZUrV5jqAT0HxqjeSkoClWpFUpJCpQrv378bnw8WFuDvDzNnwjvvAJer3mpOTnDmDFhYwK+/wsqV6t33ixFC5s6dW1lZOXTo0I8//lgLFZ+e8bN1MKj8+1MMmvbDDz/cuHEDACiKoieyWrJkiTYbQC9L24s/IbUoKyORkTfnz6cATA0NK5YvJ199RRoaNF32wgViaEgAyObNmi71N1u2bAEAPp9foq11SiUSiYmJCUVRRUVFKpWKvo30jz/+0E51Qgg9H4qpqWlVVRUhZN68eQDg4OCgtQbQy8PRqH5KSgKAFUlJBOCjgQPtzMxg4EAw+4dnk9Rr+HCIigKKgq+/Vp06dVHT5WhCoXDFihUAsGvXLmdnZ+0U5fF4EydOJIQcOnSIoqjZs2eDFo/rb9++PWfOHELI+vXrbWxssrOzY2NjAYBexR7pHKZzHLVdfj6JjDwXFgYAfC63JiKCbNhAGhu1Vv+776QuLsO5XO6VK1c0XevJkye9e/cGgPDwcE3XegY9o76HhwchJD8/nx4bNmh+yF9eXk7frDp37lxCiFgsdnNzA4AJEyYolUpNV0ftgDGqb1Qqsnu3as0afycnAPhm1CgSGUmuXtVyF/RNlFZWVjk5OVoo5ObmpoX8eoZSqXR0dASA1NRUQkhgYCAAHDhwQKNFZTIZfQEtKCioqalJLpfTNzn169dPKpVqtDRqN4xRfSMUksjI2NBQAHAwN3/82Wdk0yYil2u5C4VCMWnSJABwcnIqLS3VUJWEhASKooyMjG7fvq2hEs9HP7C/dOlSQshPP/0EAKNHj9ZcOZVKNX36dADo2rVrdXU1ISQ8PBwA6EeqNFcXdRDGqF5Rqci2bYrVq3t17gwAu8aPJ5GRJC2NkV5kMtmgQYMAwNvbu66uTu37r6qqsrOzA4DvvvtO7Tt/SfQSgVZWVk+ePKmrq+NyuSwWS3OJtmrVKgDg8XhZWVmEkE2bNgEAl8ulh8NIZ2GM6pXbt0lk5J7gYABw79RJvmoV+fFHwtz5MpFIRD+eOHz48KamJjXuWaVS0bOBjBo1itkTgvRVnePHjxNCQkJCAOCbb77RRKHY2FiKoths9u+//04IOXv2LJvNpijq8OHDmiiH1AifqdcfCgVs2/ZELO6xdWuJRHI4JGS6QABTpoBAwGBTRUVFAQEBVVVVM2fOPHDgwEuutyGXy6VSaV1dXUNDQ0NDg1QqbWhooH+USqVSqTQ1NTUlJcXa2jojI8PBwUHT3+I5Nm3atHz58smTJx87duzUqVMTJkzw8vLKyspSb5X09PSgoCCZTLZt27YlS5bk5OQEBARIJJK1a9euXr1avbWQ2mGM6o/r1+H8+e+vX1927lxvW9v/W7iQZWcH4eHAxEpBT0tPTx82bJhUKn377bfHjh1bV1dXX19Pp2FDQ0NtbW3r6/r6eolEIpVKm17ikVJra+vFixevXbtWC1/hOaqqqpycnFgsVnl5uYWFhZOTU1VV1e3bt/v27auuEuXl5QMGDCgrK5s3b96ePXvEYvHAgQPz8/NDQkLoIaq6CiENwflG9YRcDlevSuXyjVeuAMCGkSNZFAUjRzKeoQDg6+t75MiR4ODg8+fPv+SipBwOx9zc3NLS0tzc3MzMzMzMzMLCwsLCgn5tbm6empoaFxcnFAo13fwL2drajho1KiEhITY2dtGiRdOmTfvxxx9jYmLUFaONjY0TJ04sKysLCgrasWNHc3NzSEhIfn6+r69vdHQ0ZqhewNGonkhOhkuXIpOT1yYnD3ZxuTx3Lri4wJw5TLfVQi6X8/l8mUw2fvx4e3v71jTk8Xh0ONJxSeemubm58YvmiaqqqnJ2dqYo6uHDh08vNseIw4cPz5w5MyAg4Nq1a2lpaX5+fjY2Ng8fPjQwMOjgngkhM2bMOHLkSI8ePa5fv87n8xcsWLB7924HB4ebN286Mj4vIXo5OBrVbQ0NkJ4O+flQXi6SyX64fh0A1o0YAQAwYsQLfleLrl27JpPJevfuferUqedv2dzcLJVKy8vLW0+D1tfX19XVtR74BwYGTpgwwdbWdsyYMfHx8QcOHPjoo4+08y3+zcSJEy0sLK5fv56Xl9e/f3+BQJCVlZWYmDh27NgO7vnzzz8/cuQIn88/deoUn8//9ttvd+/ezeVyT5w4gRmqRzBGdVhGBvz+O6hU9LqeX6Wk1Dc1jfPwGNqlC3h4gKsr0/39JSkpCQBGjRpF/7hjx46MjIynLxlJJJL6+vqGhoYXnhVVKBT08m1z5syJj4/fu3cv4zHK5XKnTJny888/Hzx48Isvvpg3b96VK1c6PkaOjY39+uuvORzO0aNHPTw8EhISVq5cSVHUvn37/Pz81NI50g48qNdVeXlw7FjrUvJl9fXuW7c2KRTp4eF97OxgwQKwt2e2waf5+/vfvHkzISFhzJgxAPDmm2+eOXPmH7dks9k8Hs/S0rL1wN/CwoLH47Ue+AcGBtIrzSkUCmdn58rKytTUVE3PjPdCycnJw4cPd3V1LSoqUsv5yrS0NHouvh07dixatCg7O3vQoEESiWTdunWfffZZx/ePtAlHozpJpYJTp1ozFABWXbzY2Nw8q3fvPra2wGJBZaXuxGhdXV16erqhoeGQIUPod8LDwydMmNB6ycjMzIzP59MpyX3pGfw4HM6sWbM2bdq0b98+xmN06NChXbt2LSoqOnjwID1NSUeUl5dPnDixsbHxvffeW7RokVgsDg4OlkgkoaGhn376qVoaRtqEo1GdVFQER460TjR/Tyz22r6doqicJUu60+vFOzgAQ2ta/Le4uLgpU6YMHz78woUL6t1zXl5ez549LSwsysvLTUxM1Lvztho1alRSUhKbzQ4ODvb39xcIBAKBwLVdp1Zee7ZsyLcAAAp1SURBVO21CxcuvP766/Sd9qNGjbp06ZKvr29KSgrjXxO1A45GdVJ1NTw1Q3B6ebkhm/12nz4tGQoAYjEzjf2TxMREABg5cqTa99yjR4+BAwfeuHEjLi6u42PADvr666/T0tLq6uqOHz9+/Phx+k0ej+fu7u7p6enl5eXp6enn52fXupTLv9u1a9fy5cv379/P4XDmz59/6dIlBweHkydPYobqKRyN6qS0NDh3jl6DnlbR0MBhsTqbmrb8zOXCJ58w09t/cXd3z8/Pv3nzpiYujERFRYWHh2tiqNsOIpHo/PnzMplMKBRmZWVlZmZWVVU9s429vb1AIPD29qb/6+np+Zxw3LBhw8qVK7lcbkpKSv/+/TXcPtIUjFGdVFYGMTHPWz2uSxd4+20tNvSvHjx40KVLFz6fX11dzVbf6k+tpFKpvb3948eP79+/T09Br1Nqa2uFQmF2djb93zt37ohEome2sbe39/X1pYerXl5eAoGAXtP09OnTEydOVKlUv/7669SpU5loH6kHHtTrJAcHMDH51xg1MICAAO029K9ycy9bWZkMHz5CExkKAGZmZpMnT/7ll1+io6O/+OILTZToCD6fP3jw4MF/LsJKCCkuLs7KyhIKhXfv3hUKhbm5uRUVFadPnz59+jS9jaGhoaenZ0NDQ2FhISFk/fr1mKH6Dkejuqq8HPbvf/pifQsDA+jeHaZNY6Knf1BYOK2u7iSf/1PXru9oqMSlS5eGDRvm5ORUXFysobDWHIVCUVJSIhQK09PT6UFrbm6u6s+1Vd3c3O7fv89sh6jjMEZ1WGkpHDkCzc0glwMAcDhAUdC7N4wdCywdWURLlZFhp1BUCwQFRkbdNFSDEOLh4ZGfn996X6pek0qld+7c2bNnj5+f3zvvvGPaer4b6S2MUd2mVMK9e1BUBE1NYGMDvXpB68V6HSCTpefk9Dc07OLtXaTRQl999dXnn38+derUI0eOaLQQQu2AMYrar7JyQ1nZSmvrBa6uuzRaqKyszNXVlc1ml5WVWVtba7QWQm2lI8eGSC/V1ycBAI83StOFHB0dR48eLZfLDx06pOlaCLUVxihqJ5XqyePH1wBY5ubDtFBuzpw5ALBnzx4t1EKoTTBGUTtJpSkqVaOJST8ORxtH2cHBwZ07d87MzExPT9dCOYReHsYoaietHdHTDA0NZ82aBQD79u3TTkWEXhLGKGqnhoZEAODx1P8o/b+ZN28eABw4cKCxsVFrRRF6IYxR1B4KhUgmu8ticU1Ntfc8lUAg8PPzk0gkJ06c0FpRhF4IYxS1B4dj1atXWpcu+1isl50/tN2kUml+fj79mr7Q9PPPP2u6KEIvD+8bRW2jUkkfPoyQSlMoysjWdpmV1QyNlissLJw4cWJ9ff2tW7c6d+4skUhsbGzkcvmlS5eCgoI0Whqhl4SjUdQ2VVWbVCppr17pHh4XzMwGabRWUlLSgAEDMjMzTUxMGhoaAKCkpEShUAAAPoqOdAfGKGobubyUzbakKEM2m2doqMFl9aKiosaOHSsWi8eOHXv9+vVu3brFx8cPHjxYpVKZmppOmTJFc6URahOMUdQ2NjYf1tWdys0NEIl2E6J48S+0XVNT09y5c8PDwxUKRURERHx8PI/H27hxI310P23atJKSEktLS02URqgd8NwoajNClA0NFysqvuRye7m4/KTenZeXl0+ePDk1NdXMzGzfvn0hISFSqXTOnDm//fYbRVGrV69es2aNWtbmREhdcNpm1GYUxebxRnI4VoWF0xQKUWHhNEfHDaamalhB5Nq1ayEhIRUVFc7OzsePH/f19S0tLZ00aVJ6erq5uXlMTAy9hD1COgVHo6htRKI9bDbPwMCpunoHRRmw2ZaPHm2mKI6t7cf29pEduf9p9+7dS5culcvlQUFBR48etbGxSUlJCQ0NffTokYeHx4kTJ3r16qXGL4KQuuC5UdQ2XK63THZHLP7Z1NTPxWW7o+N6O7sIAFJZ+U12tqChoT0LzykUihUrVixYsEAuly9YsCApKcnGxiYqKmrkyJGPHj0aO3ZsamoqZijSWTgaRWogk9158GCeTHYbgLK2nu/k9C2bzXvJ3xWJRNOmTbtw4YKRkdHOnTvnzJnT1NS0ePHin3/+maKoTz75ZP369Sxdme0foX+AMYrUg5Dmqqrvy8vXENJkYGDv4rLD0nLiC38rIyNj4sSJxcXFDg4OcXFx/v7+rZeYTE1N9+/fHxISooXmEeoIjFGkTo2NwgcP5j1+nAoAfH6oi8t2Dqfzv2185MiRuXPnymSyQYMG/fbbb/b29tevX58yZcrTl5i02DtC7YTHSkiduFyvHj2uOjl9z2KZ1NYeDQub8Y+rJymVyhUrVsyYMUMmk82ePfuPP/6wt7c/cODAiBEjKioqgoKC0tLSMEOR3iAIacCTJwWHDs2j/44FBweXlZU9/enRo0cBwMDAYPv27YSQ5ubmiIgIemP6QhNDXSPUHhijSFNUKlV0dLSVlRUAWFhYbN68WalUtn76wQcfJCcnE0JEItGIESMAwMjIaO/evcz1i1A74blRpFmVlZVLliyJi4sDgCFDhuzZs8fDw6P104yMjEmTJhUVFbVeYmKuU4TaCc+NIs2ys7M7duxYbGysjY3N5cuX+/Tps3HjRqVSCQCxsbGBgYFFRUUBAQFpaWmYoUhP4WgUaUltbe2KFSuioqIAoG/fvgMGDIiKiiKEzJ49OyoqisvV+PTPCGkIxijSqjNnzixcuLC0tNTY2FipVH7//fdLly5luimEOgRjFGlbfX39gAED8vLy1qxZExkZyXQ7CHUUnhtF2sbj8QYNGgQALi4uTPeCkBpgjCIGmJubAwC9LghC+g5jFDGAjtH6+nqmG0FIDTBGEQNwNIpeJRijiAE8Hg8wRtGrAmMUMQAP6tGrBGMUMQBHo+hVgjGKGIDnRtGrBGMUMQAP6tGrBBdYRgzobGU5YfQwJwdbphtBSA0wRhEDOlma9zXIM1PUMt0IQmqAB/WIAYYmZgDQJMNzo+hVgDGKGGBgZEKx2M1NjSqlguleEOoojFHEAIqiDLmmACB/ImO6F4Q6CmMUMcOIjtFGKdONINRRGKOIGYZcPD2KXhEYo4gZRibmANCEo1Gk/zBGETP+PKh/zHQjCHUUxihiBh7Uo1cGxihiBh7Uo1cGLmmHmFFRkCmtfWTbxZNnbc90Lwh1CD4MihgQs2p6beUDjqGxbZder721wtIW17ZDegwP6hED6sUVs9ceWrQ1yb674PednzLdDkIdgjGKGEOx2J0c3eirTNGfhZ756bOtCwYpFc1M94VQ2+BBPWLGic0fsDmGVcU50z/fBwAPc9OGzVj2RviXFIX/tCM9gzGKmDF89ieWnZ2Ks26c3PLR4u3JAODiNQAzFOkj/FuLmGFh7ci379Jn5DSZRCyrr2G6HYTaD0ejiBn30y+YFXQuunvV2sndjG/DdDsItR/GKGJAv9dnNtRUyuprXDwHjJm/lqKogIkLASim+0KoPfD2e4QQ6hA8N4oQQh2CMYoQQh2CMYoQQh2CMYoQQh2CMYoQQh2CMYoQQh3y//shrlVAlUWhAAABt3pUWHRyZGtpdFBLTCByZGtpdCAyMDI0LjAzLjUAAHice79v7T0GIBAAYkYGCBADYkkgbmBUZlAAibMxaAApZhY2hgwQzYwkAKEFIDQTO4RmRsgngPQzw8SxyzNh6nOA0TAL8TEgagUhbuUGeoORKYOJiTmBmSWDiYU1gYVNgY09gZ2DgZ0zg4mTK4GLO4OJm4eBh5eBl0+DiY9fgV8gg0lAMEFQKINJSDhBWCSDSUSUgZUxgY9ZgZstQYQ/wYkFaDIrIxMzCysrOxs3Fyc7DzcbCzMfL5uAoJCwCL+4ECMi9BjEVEqF99cfnnAAxDnXoL6/foUjmM3zhcn+kQMrmD1pyuU9sg16+0FsP+7D+493O4HZaoe/7F8fwwtWs8ZM8YBM1047EHudfMOBJTskwez3fdMOPGydBlaf6zL1wLKp++1B7PWLUg9kLFFyALFNjggcONv9Gizu5ly0P/qRMlh8+737dq6z/4LFbc5H2J89zwlmF50QcbjDPQFsfsoVUwdWxwlg85tNuhy+qG4Es+/8WOFga563D8R+XDTDQaR2GlivUmCQA3PSdjDbwNXQgeGyKdguMQBL4WpwRrbCDgAAAi16VFh0TU9MIHJka2l0IDIwMjQuMDMuNQAAeJx9VFtuGzEM/PcpeIEIfImiPmM7KIoiNtC4vUOBfBW9P0ru1paCCN31Eitpdvga+gB5fT9/+/UHHhefDwcA/M+v9w4/BREPr5AvcHz58vUCp9vz8b5zuv643N6AGbjGN3F/xD7frq/3HYLjb3jCUpshGjxpqSxuClhwu8bHDKdEWlXBDk9SkGuPt89ICWRQElUJSi7ojF4XQN0pg8jZN3InU14gayIpg2vOiWy1d11xGrwlshMz1vQevvObz8gGlzyv5N3yXCxSXwE9nWtsR2VanFNFIl8A+w5sqF09o6AI02iBJEznWszUUICKdmt1VXaiREqp7hiAiFeJl9WkrUORsFFvNThdkXHJKXvhu5pIT05D68s4NbxjUWmsFpTdSXAJrFvTXYMna1jFhdsKaAHkIqxNMjYORsUVsG1AV0Lfi8nWfZlONkiLRCOjroEM370to8wO1fCJoaOsAAtHCVZqx42zWdRQYeuk9ZU4mAIoQWmMnI0MxraaipjH03sk1DIL/5cZrny/XM4fhnQf2+P1ch5jmzeP0QxhgIz503zGlOVdxyjFAmzMC8WyjaHQeHxIn+LpQ+CaW7OMNQ3RJFdNQzzJkjYjk/xoMzrJjNNQnfREm7FJOJqG2qQQTUM+KUHTUJ86rml47qymYZpaSGmYp15pVGasZY/mETBnSSOpRyycZU0im7s49yzX97/peD/8BatDHPVL7xMjAAABHXpUWHRTTUlMRVMgcmRraXQgMjAyNC4wMy41AAB4nCVQO25DMQy7SscEcAz9LeEhS71n6RGydurcw5dyPRgQQfGjvfn5ej23bHy3t7776f6+b8X8tW+fP/fn1vuL5eP39rC5yMrHgycvrxgXICLNNWiyE3MeKMJi8LSK5QB0eib5kOnG0hSZElwJThoJAJploVrNCYoaF03TJUenkrWBtCgWWLmmyhqXTBVbCkRAsQbSmOoElIDBZVM5ixvBUmHJQUbkAU9RgWmTVnSOU6vNFZzo2cDI1l1Q6wbQP3FJPGWtVonksC7B00XXUcZ1yvxgiCzwR2XKlWc53LRTasvUv6CvIIqB60Eko/s6s2v0KqWA33dztAkU1gBLxv33D7xAXEcM2UGcAAAAAElFTkSuQmCC",
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mol"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "column_sums = df.sum()\n",
+ "\n",
+ "# Identificar columnas cuya suma es igual a 0\n",
+ "columns_with_zero_sum = column_sums[column_sums == 0].index.tolist()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[225, 238, 256, 264, 276, 280, 308, 314, 342]"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "columns_with_zero_sum"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "ugropy",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/ugropy/__init__.py b/ugropy/__init__.py
index ee9588b..747a8d4 100644
--- a/ugropy/__init__.py
+++ b/ugropy/__init__.py
@@ -7,11 +7,11 @@
"""
from .core import instantiate_mol_object
-from .core.frag_classes.abdulelah_gani.abdulelah_gani_p import (
- AbdulelahGaniPrimaryModel,
+from .core.frag_classes.abdulelah_gani.abdulelah_gani_pst import (
+ AbdulelahGaniPSTModel,
)
-from .core.frag_classes.abdulelah_gani.abdulelah_gani_p_result import (
- AGaniPFragmentationResult,
+from .core.frag_classes.abdulelah_gani.abdulelah_gani_pst_result import (
+ AGaniPSTFragmentationResult,
)
from .core.frag_classes.base.fragmentation_model import (
FragmentationModel,
@@ -29,6 +29,7 @@
from .core.ilp_solvers.ilp_solver import ILPSolver
from .groups import Groups
from .models.abdulelah_gani_pmod import abdulelah_gani_p
+from .models.abdulelah_gani_smod import abdulelah_gani_s
from .models.jobackmod import joback
from .models.psrkmod import psrk
from .models.unifacmod import unifac
@@ -38,8 +39,8 @@
"constants",
"writers",
"instantiate_mol_object",
- "AbdulelahGaniPrimaryModel",
- "AGaniPFragmentationResult",
+ "AbdulelahGaniPSTModel",
+ "AGaniPSTFragmentationResult",
"FragmentationModel",
"FragmentationResult",
"GibbsModel",
@@ -48,6 +49,7 @@
"JobackFragmentationResult",
"Groups",
"abdulelah_gani_p",
+ "abdulelah_gani_s",
"joback",
"unifac",
"psrk",
diff --git a/ugropy/core/frag_classes/abdulelah_gani/__init__.py b/ugropy/core/frag_classes/abdulelah_gani/__init__.py
index 19da812..7e94fde 100644
--- a/ugropy/core/frag_classes/abdulelah_gani/__init__.py
+++ b/ugropy/core/frag_classes/abdulelah_gani/__init__.py
@@ -1,6 +1,6 @@
"""Abdulelah-Gani frag classes module."""
-from . import abdulelah_gani_p, abdulelah_gani_p_result
+from . import abdulelah_gani_pst, abdulelah_gani_pst_result
-__all__ = ["abdulelah_gani_p", "abdulelah_gani_p_result"]
+__all__ = ["abdulelah_gani_pst", "abdulelah_gani_pst_result"]
diff --git a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py
similarity index 94%
rename from ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py
rename to ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py
index f72b048..41a1c54 100644
--- a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py
+++ b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py
@@ -6,8 +6,8 @@
from rdkit import Chem
-from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_p_result import (
- AGaniPFragmentationResult,
+from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst_result import (
+ AGaniPSTFragmentationResult,
)
from ugropy.core.frag_classes.base.fragmentation_model import (
FragmentationModel,
@@ -16,7 +16,7 @@
from ugropy.core.ilp_solvers.ilp_solver import ILPSolver
-class AbdulelahGaniPrimaryModel(FragmentationModel):
+class AbdulelahGaniPSTModel(FragmentationModel):
"""Abdulelah-Gani model dedicated to properties estimation models.
Class to construct the primary structures detector for the Abdulelah-Gani
@@ -48,13 +48,17 @@ def __init__(
self,
subgroups: pd.DataFrame,
subgroups_info: pd.DataFrame,
+ allow_overlapping: bool = False,
+ allow_free_atoms: bool = False,
) -> None:
super().__init__(
subgroups=subgroups,
- allow_overlapping=False,
- fragmentation_result=AGaniPFragmentationResult,
+ allow_overlapping=allow_overlapping,
+ allow_free_atoms=allow_free_atoms,
+ fragmentation_result=AGaniPSTFragmentationResult,
)
+
self.subgroups_info = subgroups_info
def get_groups(
@@ -63,7 +67,7 @@ def get_groups(
identifier_type: str = "name",
solver: ILPSolver = DefaultSolver,
search_multiple_solutions: bool = False,
- ) -> Union[AGaniPFragmentationResult, List[AGaniPFragmentationResult]]:
+ ) -> Union[AGaniPSTFragmentationResult, List[AGaniPSTFragmentationResult]]:
"""Get the groups of a molecule.
Parameters
diff --git a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py
similarity index 96%
rename from ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py
rename to ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py
index 67a02b5..8b114c8 100644
--- a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py
+++ b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py
@@ -9,7 +9,7 @@
)
-class AGaniPFragmentationResult(FragmentationResult):
+class AGaniPSTFragmentationResult(FragmentationResult):
"""Abdulelah-Gani primary group contribution properties estimator.
Parameters
diff --git a/ugropy/core/frag_classes/base/fragmentation_model.py b/ugropy/core/frag_classes/base/fragmentation_model.py
index 213379f..af48b55 100644
--- a/ugropy/core/frag_classes/base/fragmentation_model.py
+++ b/ugropy/core/frag_classes/base/fragmentation_model.py
@@ -54,10 +54,12 @@ def __init__(
self,
subgroups: pd.DataFrame,
allow_overlapping: bool = False,
+ allow_free_atoms: bool = False,
fragmentation_result: FragmentationResult = FragmentationResult,
) -> None:
self.subgroups = subgroups
self.allow_overlapping = allow_overlapping
+ self.allow_free_atoms = allow_free_atoms
self.fragmentation_result = fragmentation_result
# Instantiate all de mol object from their SMARTS representation
@@ -122,7 +124,7 @@ def get_groups(
)
# If there is free atoms in the molecule can't fragment with the model
- if np.size(free_atoms) > 0:
+ if np.size(free_atoms) > 0 and not self.allow_free_atoms:
return self.set_fragmentation_result(
mol, [{}], search_multiple_solutions, **kwargs
)
diff --git a/ugropy/groupscsv/abdulelah_gani/secondary/info.csv b/ugropy/groupscsv/abdulelah_gani/secondary/info.csv
new file mode 100644
index 0000000..ae2da41
--- /dev/null
+++ b/ugropy/groupscsv/abdulelah_gani/secondary/info.csv
@@ -0,0 +1,131 @@
+group|group_number
+(CH3)2CH|221
+(CH3)3C|222
+CH(CH3)CH(CH3)|223
+CH(CH3)C(CH3)2|224
+C(CH3)2C(CH3)2|225
+CHn=CHm-CHp=CHk (k,m,n,p in 0..2)|226
+CH3-CHm=CHn (m,n in 0..2)|227
+CH2-CHm=CHn (m,n in 0..2)|228
+CHp-CHm=CHn (m,n in 0..2; p in 0..1)|229
+CHCHO or CCHO|230
+CH3COCH2|231
+CH3COCH or CH3COC|232
+CHCOOH or CCOOH|233
+CH3COOCH or CH3COOC|234
+CO-O-CO|235
+CHOH|236
+COH|237
+CH3COCHnOH (n in 0..2)|238
+NCCHOH or NCCOH|239
+OH-CHn-COO (n in 0..2)|240
+CHm(OH)CHn(OH) (m,n in 0..2)|241
+CHm(OH)CHn(NHp) (m,n,p in 0..2)|242
+CHm(NH2)CHn(NH2) (m,n in 0..2)|243
+CHm(NH)CHn(NH2) (m,n in 1..2)|244
+H2NCOCHnCHmCONH2 (m,n in 1..2)|245
+CHm(NHn)-COOH (m,n in 0..2)|246
+HOOC-CHn-COOH (n in 1..2)|247
+HOOC-CHn-CHm-COOH (n, m in 1..2)|248
+HO-CHn-COOH (n in 1..2)|249
+NH2-CHn-CHm-COOH (n, m in 1..2)|250
+CH3-O-CHn-COOH (n in 1..2)|251
+HS-CH-COOH|252
+HS-CHn-CHm-COOH (n, m in 1..2)|253
+NC-CHn-CHm-CN (n, m in 1..2)|254
+OH-CHn-CHm-CN (n, m in 1..2)|255
+HS-CHn-CHm-SH (n, m in 1..2)|256
+COO-CHn-CHm-OOC (n, m in 1..2)|257
+OOC-CHm-CHm-COO (n, m in 1..2)|258
+NC-CHn-COO (n in 1..2)|259
+COCHnCOO (n in 1..2)|260
+CHm-O-CHn=CHp (m,n,p in 0..3)|261
+CHm=CHn-F (m,n in 0..2)|262
+CHm=CHn-Br (m,n in 0..2)|263
+CHm=CHn-I (m,n in 0..2)|264
+CHm=CHn-Cl (m,n in 0..2)|265
+CHm=CHn-CN (m,n in 0..2)|266
+CHn=CHm-COO-CHp (m,n,p in 0..3)|267
+CHm=CHn-CHO (m,n in 0..2)|268
+CHm=CHn-COOH (m,n in 0..2)|269
+aC-CHn-X (n in 1..2) X: Halogen|270
+aC-CHn-NHm (n in 1..2; m in 0..2))|271
+aC-CHn-O- (n in 1..2)|272
+aC-CHn-OH (n in 1..2)|273
+aC-CHn-CN (n in 1..2)|274
+aC-CHn-CHO (n in 1..2)|275
+aC-CHn-SH (n in 1..2)|276
+aC-CHn-COOH (n in 1..2)|277
+aC-CHn-CO- (n in 1..2)|278
+aC-CHn-S- (n in 1..2)|279
+aC-CHn-OOC-H (n in 1..2)|280
+aC-CHm-NO2 (n in 1..2)|281
+aC-CHn-CONH2 (n in 1..2)|282
+aC-CHn-OOC (n in 1..2)|283
+aC-CHn-COO (n in 1..2)|284
+aC-SO2-OH|285
+aC-CH(CH3)2|286
+aC-C(CH3)3|287
+aC-CF3|288
+(CHn=C)(cyc)-CHO (n in 0..2)|289
+(CHn=C)cyc-COO-CHm (n,m in 0..3)|290
+(CHn=C)cyc-CO- (n in 0..2)|291
+(CHn=C)cyc-CH3 (n in 0..2)|292
+(CHn=C)cyc-CH2 (n in 0..2)|293
+(CHn=C)cyc-CN (n in 0..2)|294
+(CHn=C)cyc-Cl (n in 0..2)|295
+CHcyc-CH3|296
+CHcyc-CH2|297
+CHcyc-CH|298
+CHcyc-C|299
+CHcyc-CH=CHn (n in 1..2)|300
+CHcyc-C=CHn (n in 1..2)|301
+CHcyc-Cl|302
+CHcyc-F|303
+CHcyc-OH|304
+CHcyc-NH2|305
+CHcyc-NH-CHn (n in 0..3)|306
+CHcyc-N-CHn (n in 0..3)|307
+CHcyc-SH|308
+CHcyc-CN|309
+CHcyc-COOH|310
+CHcyc-CO|311
+CHcyc-NO2|312
+CHcyc-S-|313
+CHcyc-CHO|314
+CHcyc-O-|315
+CHcyc-OOCH|316
+CHcyc-COO|317
+CHcyc-OOC|318
+Ccyc-CH3|319
+Ccyc-CH2|320
+Ccyc-OH|321
+>Ncyc-CH3|322
+>Ncyc-CH2|323
+AROMRINGs1s2|324
+AROMRINGs1s3|325
+AROMRINGs1s4|326
+AROMRINGs1s2s3|327
+AROMRINGs1s2s4|328
+AROMRINGs1s3s5|329
+AROMRINGs1s2s3s4|330
+AROMRINGs1s2s3s5|331
+AROMRINGs1s2s4s5|332
+PYRIDINEs2|333
+PYRIDINEs3|334
+PYRIDINEs4|335
+PYRIDINEs2s3|336
+PYRIDINEs2s4|337
+PYRIDINEs2s5|338
+PYRIDINEs2s6|339
+PYRIDINEs3s4|340
+PYRIDINEs3s5|341
+PYRIDINEs2s3s6|342
+(CHn=CHm)cyc-COOH|343
+AROMRINGs1s2s3s4s5|344
+aC-NHCOCH2N|345
+(N=C)cyc-CH3|346
+aC-CONH(CH2)2N|347
+aC-SO2NHn (n>=0;n<3)|348
+aC-SO2NHn (n>=0;n<3)|349
+aC-SO2NHn (n>=0;n<3)|350
\ No newline at end of file
diff --git a/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv b/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv
index 96f4ade..9c10aaa 100644
--- a/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv
+++ b/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv
@@ -1,130 +1,131 @@
-(CH3)2CH
-(CH3)3C
-CH(CH3)CH(CH3)
-CH(CH3)C(CH3)2
-C(CH3)2C(CH3)2
-CHn=CHm-CHp=CHk (k,m,n,p in 0..2)
-CH3-CHm=CHn (m,n in 0..2)
-CH2-CHm=CHn (m,n in 0..2)
-CHp-CHm=CHn (m,n in 0..2; p in 0..1)
-CHCHO or CCHO
-CH3COCH2
-CH3COCH or CH3COC
-CHCOOH or CCOOH
-CH3COOCH or CH3COOC
-CO-O-CO
-CHOH
-COH
-CH3COCHnOH (n in 0..2)
-NCCHOH or NCCOH
-OH-CHn-COO (n in 0..2)
-CHm(OH)CHn(OH) (m,n in 0..2)
-CHm(OH)CHn(NHp) (m,n,p in 0..2)
-CHm(NH2)CHn(NH2) (m,n in 0..2)
-CHm(NH)CHn(NH2) (m,n in 1..2)
-H2NCOCHnCHmCONH2 (m,n in 1..2)
-CHm(NHn)-COOH (m,n in 0..2)
-HOOC-CHn-COOH (n in 1..2)
-HOOC-CHn-CHm-COOH (n, m in 1..2)
-HO-CHn-COOH (n in 1..2)
-NH2-CHn-CHm-COOH (n, m in 1..2)
-CH3-O-CHn-COOH (n in 1..2)
-HS-CH-COOH
-HS-CHn-CHm-COOH (n, m in 1..2)
-NC-CHn-CHm-CN (n, m in 1..2)
-OH-CHn-CHm-CN (n, m in 1..2)
-HS-CHn-CHm-SH (n, m in 1..2)
-COO-CHn-CHm-OOC (n, m in 1..2)
-OOC-CHm-CHm-COO (n, m in 1..2)
-NC-CHn-COO (n in 1..2)
-COCHnCOO (n in 1..2)
-CHm-O-CHn=CHp (m,n,p in 0..3)
-CHm=CHn-F (m,n in 0..2)
-CHm=CHn-Br (m,n in 0..2)
-CHm=CHn-I (m,n in 0..2)
-CHm=CHn-Cl (m,n in 0..2)
-CHm=CHn-CN (m,n in 0..2)
-CHn=CHm-COO-CHp (m,n,p in 0..3)
-CHm=CHn-CHO (m,n in 0..2)
-CHm=CHn-COOH (m,n in 0..2)
-aC-CHn-X (n in 1..2) X: Halogen
-aC-CHn-NHm (n in 1..2; m in 0..2))
-aC-CHn-O- (n in 1..2)
-aC-CHn-OH (n in 1..2)
-aC-CHn-CN (n in 1..2)
-aC-CHn-CHO (n in 1..2)
-aC-CHn-SH (n in 1..2)
-aC-CHn-COOH (n in 1..2)
-aC-CHn-CO- (n in 1..2)
-aC-CHn-S- (n in 1..2)
-aC-CHn-OOC-H (n in 1..2)
-aC-CHm-NO2 (n in 1..2)
-aC-CHn-CONH2 (n in 1..2)
-aC-CHn-OOC (n in 1..2)
-aC-CHn-COO (n in 1..2)
-aC-SO2-OH
-aC-CH(CH3)2
-aC-C(CH3)3
-aC-CF3
-(CHn=C)(cyc)-CHO (n in 0..2)
-(CHn=C)cyc-COO-CHm (n,m in 0..3)
-(CHn=C)cyc-CO- (n in 0..2)
-(CHn=C)cyc-CH3 (n in 0..2)
-(CHn=C)cyc-CH2 (n in 0..2)
-(CHn=C)cyc-CN (n in 0..2)
-(CHn=C)cyc-Cl (n in 0..2)
-CHcyc-CH3
-CHcyc-CH2
-CHcyc-CH
-CHcyc-C
-CHcyc-CH=CHn (n in 1..2)
-CHcyc-C=CHn (n in 1..2)
-CHcyc-Cl
-CHcyc-F
-CHcyc-OH
-CHcyc-NH2
-CHcyc-NH-CHn (n in 0..3)
-CHcyc-N-CHn (n in 0..3)
-CHcyc-SH
-CHcyc-CN
-CHcyc-COOH
-CHcyc-CO
-CHcyc-NO2
-CHcyc-S-
-CHcyc-CHO
-CHcyc-O-
-CHcyc-OOCH
-CHcyc-COO
-CHcyc-OOC
-Ccyc-CH3
-Ccyc-CH2
-Ccyc-OH
->Ncyc-CH3
->Ncyc-CH2
-AROMRINGs1s2
-AROMRINGs1s3
-AROMRINGs1s4
-AROMRINGs1s2s3
-AROMRINGs1s2s4
-AROMRINGs1s3s5
-AROMRINGs1s2s3s4
-AROMRINGs1s2s3s5
-AROMRINGs1s2s4s5
-PYRIDINEs2
-PYRIDINEs3
-PYRIDINEs4
-PYRIDINEs2s3
-PYRIDINEs2s4
-PYRIDINEs2s5
-PYRIDINEs2s6
-PYRIDINEs3s4
-PYRIDINEs3s5
-PYRIDINEs2s3s6
-(CHn=CHm)cyc-COOH
-AROMRINGs1s2s3s4s5
-aC-NHCOCH2N
-(N=C)cyc-CH3
-aC-CONH(CH2)2N
-aC-SO2NHn (n>=0;n<3)
-aC-SO2NHn (n>=0;n<3)
-aC-SO2NHn (n>=0;n<3)
\ No newline at end of file
+group|smarts
+(CH3)2CH|[CH;!R]([CH3])[CH3]
+?(CH3)3C|
+?CH(CH3)CH(CH3)|
+?CH(CH3)C(CH3)2|
+????C(CH3)2C(CH3)2|
+?CHn=CHm-CHp=CHk (k,m,n,p in 0..2)|
+?CH3-CHm=CHn (m,n in 0..2)|
+?CH2-CHm=CHn (m,n in 0..2)|
+?CHp-CHm=CHn (m,n in 0..2; p in 0..1)|
+?CHCHO or CCHO|
+?CH3COCH2|
+?CH3COCH or CH3COC|
+?CHCOOH or CCOOH|
+?CH3COOCH or CH3COOC|
+?CO-O-CO|
+?CHOH|
+?COH|
+????CH3COCHnOH (n in 0..2)|
+?NCCHOH or NCCOH|
+?OH-CHn-COO (n in 0..2)|
+?CHm(OH)CHn(OH) (m,n in 0..2)|
+?CHm(OH)CHn(NHp) (m,n,p in 0..2)|
+?CHm(NH2)CHn(NH2) (m,n in 0..2)|
+?CHm(NH)CHn(NH2) (m,n in 1..2)|
+?H2NCOCHnCHmCONH2 (m,n in 1..2)|
+?CHm(NHn)-COOH (m,n in 0..2)|
+?HOOC-CHn-COOH (n in 1..2)|
+?HOOC-CHn-CHm-COOH (n, m in 1..2)|
+?HO-CHn-COOH (n in 1..2)|
+?NH2-CHn-CHm-COOH (n, m in 1..2)|
+?CH3-O-CHn-COOH (n in 1..2)|
+?HS-CH-COOH|
+?HS-CHn-CHm-COOH (n, m in 1..2)|
+?NC-CHn-CHm-CN (n, m in 1..2)|
+?OH-CHn-CHm-CN (n, m in 1..2)|
+????HS-CHn-CHm-SH (n, m in 1..2)|
+?COO-CHn-CHm-OOC (n, m in 1..2)|
+?OOC-CHm-CHm-COO (n, m in 1..2)|
+?NC-CHn-COO (n in 1..2)|
+?COCHnCOO (n in 1..2)|
+?CHm-O-CHn=CHp (m,n,p in 0..3)|
+?CHm=CHn-F (m,n in 0..2)|
+CHm=CHn-Br (m,n in 0..2)|[CH0,CH1,CH2;!R]=[CH0,CH1,CH2;!R][Br]
+????CHm=CHn-I (m,n in 0..2)|
+?CHm=CHn-Cl (m,n in 0..2)|
+?CHm=CHn-CN (m,n in 0..2)|
+?CHn=CHm-COO-CHp (m,n,p in 0..3)|
+?CHm=CHn-CHO (m,n in 0..2)|
+?CHm=CHn-COOH (m,n in 0..2)|
+?aC-CHn-X (n in 1..2) X: Halogen|
+?aC-CHn-NHm (n in 1..2; m in 0..2))|
+?aC-CHn-O- (n in 1..2)|
+?aC-CHn-OH (n in 1..2)|
+?aC-CHn-CN (n in 1..2)|
+?aC-CHn-CHO (n in 1..2)|
+????aC-CHn-SH (n in 1..2)|
+?aC-CHn-COOH (n in 1..2)|
+?aC-CHn-CO- (n in 1..2)|
+?aC-CHn-S- (n in 1..2)|
+????aC-CHn-OOC-H (n in 1..2)|
+?aC-CHm-NO2 (n in 1..2)|
+?aC-CHn-CONH2 (n in 1..2)|
+?aC-CHn-OOC (n in 1..2)|
+?aC-CHn-COO (n in 1..2)|
+?aC-SO2-OH|
+?aC-CH(CH3)2|
+?aC-C(CH3)3|
+?aC-CF3|
+?(CHn=C)(cyc)-CHO (n in 0..2)|
+?(CHn=C)cyc-COO-CHm (n,m in 0..3)|
+?(CHn=C)cyc-CO- (n in 0..2)|
+?(CHn=C)cyc-CH3 (n in 0..2)|
+?(CHn=C)cyc-CH2 (n in 0..2)|
+?(CHn=C)cyc-CN (n in 0..2)|
+?(CHn=C)cyc-Cl (n in 0..2)|
+?CHcyc-CH3|
+?CHcyc-CH2|
+?CHcyc-CH|
+?CHcyc-C|
+CHcyc-CH=CHn (n in 1..2)|[CH;R][CH]=[CH1,CH2;!R]
+?CHcyc-C=CHn (n in 1..2)|
+?CHcyc-Cl|
+?CHcyc-F|
+?CHcyc-OH|
+?CHcyc-NH2|
+?CHcyc-NH-CHn (n in 0..3)|
+?CHcyc-N-CHn (n in 0..3)|
+????CHcyc-SH|
+?CHcyc-CN|
+?CHcyc-COOH|
+?CHcyc-CO|
+CHcyc-NO2|[CH;R][N+](=O)[O-]
+?CHcyc-S-|
+????CHcyc-CHO|
+?CHcyc-O-|
+?CHcyc-OOCH|
+?CHcyc-COO|
+?CHcyc-OOC|
+?Ccyc-CH3|
+?Ccyc-CH2|
+?Ccyc-OH|
+?>Ncyc-CH3|
+?>Ncyc-CH2|
+?AROMRINGs1s2|
+?AROMRINGs1s3|
+?AROMRINGs1s4|
+?AROMRINGs1s2s3|
+?AROMRINGs1s2s4|
+?AROMRINGs1s3s5|
+?AROMRINGs1s2s3s4|
+?AROMRINGs1s2s3s5|
+?AROMRINGs1s2s4s5|
+?PYRIDINEs2|
+?PYRIDINEs3|
+?PYRIDINEs4|
+?PYRIDINEs2s3|
+?PYRIDINEs2s4|
+?PYRIDINEs2s5|
+?PYRIDINEs2s6|
+?PYRIDINEs3s4|
+?PYRIDINEs3s5|
+????PYRIDINEs2s3s6|
+?(CHn=CHm)cyc-COOH|
+?AROMRINGs1s2s3s4s5|
+?aC-NHCOCH2N|
+(N=C)cyc-CH3|[#7;R]@[#6;R][CH3]
+?aC-CONH(CH2)2N|
+?aC-SO2NHn (n>=0;n<3)|
+?aC-SO2NHn (n>=0;n<3)|
+?aC-SO2NHn (n>=0;n<3)|
\ No newline at end of file
diff --git a/ugropy/models/abdulelah_gani_pmod.py b/ugropy/models/abdulelah_gani_pmod.py
index ce5b20a..9f74e32 100644
--- a/ugropy/models/abdulelah_gani_pmod.py
+++ b/ugropy/models/abdulelah_gani_pmod.py
@@ -23,8 +23,8 @@
"""
from ugropy.constants import _csvs
-from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_p import (
- AbdulelahGaniPrimaryModel,
+from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst import (
+ AbdulelahGaniPSTModel,
)
from ugropy.models.read_csv import _rd
@@ -37,4 +37,4 @@
_ag_sg = _rd(_ag / "primary.csv", "group")
_ag_info = _rd(_ag / "info.csv", "group")
-abdulelah_gani_p = AbdulelahGaniPrimaryModel(_ag_sg, _ag_info)
+abdulelah_gani_p = AbdulelahGaniPSTModel(_ag_sg, _ag_info, False, False)
diff --git a/ugropy/models/abdulelah_gani_smod.py b/ugropy/models/abdulelah_gani_smod.py
new file mode 100644
index 0000000..b5a2da8
--- /dev/null
+++ b/ugropy/models/abdulelah_gani_smod.py
@@ -0,0 +1,40 @@
+"""AbdulelahGani Secondary Structures FragmentationModel implementation.
+
+Import and use the AbdulelahGani Secondary Structures FragmentationModel with:
+
+.. code-block:: python
+
+ from ugropy import abdulelah_gani_p
+
+ # Get groups from molecule's name
+ tol = abdulelah_gani_p.get_groups("toluene")
+
+ print(tol.subgroups)
+
+ # Get groups from molecule's SMILES
+ eth = abdulelah_gani_p.get_groups("CCO", "smiles")
+
+ print(eth.subgroups)
+
+Attributes
+----------
+abdulelah_gani_p: AbdulelahGaniPrimaryModel
+ AbdulelahGaniPrimaryModel FragmentationModel :cite:p:`gani`
+"""
+
+from ugropy.constants import _csvs
+from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst import (
+ AbdulelahGaniPSTModel,
+)
+from ugropy.models.read_csv import _rd
+
+
+# =============================================================================
+# Abdulelah Gani Secondary Structures FragmentationModel
+# =============================================================================
+_ag = _csvs / "abdulelah_gani" / "secondary"
+
+_ag_sg = _rd(_ag / "secondary.csv", "group")
+_ag_info = _rd(_ag / "info.csv", "group")
+
+abdulelah_gani_s = AbdulelahGaniPSTModel(_ag_sg, _ag_info, True, True)