diff --git a/tests/agani.ipynb b/tests/agani.ipynb index ff81f31..43b736d 100644 --- a/tests/agani.ipynb +++ b/tests/agani.ipynb @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -247,6 +247,9 @@ } ], "source": [ + "from rdkit.Chem import Draw\n", + "from rdkit import Chem\n", + "\n", "smiles = \"C1=CC2=NOC=C2C=C1\"\n", "\n", "mol = instantiate_mol_object(smiles, \"smiles\")\n", @@ -286,428 +289,22 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
12345678910...211212213214215216217218219220
SMILES
C=CC(=O)NCCC1=NC=NC10100100000...0000000000
C=CC(=O)NCCCN(C)C1200100000...0000000000
C=CC(=O)NCCCN(CC)CC2300100000...0000000000
C=CC(=O)NCCCN1CCOCC10200100000...0000000000
C=CC(=O)NCCCN1CCSCC10200100000...0000000000
..................................................................
OCCNC(=O)CCN1C=CN=C1N(=O)=O0300000000...0000000000
OCCNC(=O)CCN1C=NC(=N1)N(=O)=O0300000000...0000000000
OCCNC(=O)CN1C=CN=C1N(=O)=O0200000000...0000000000
OCCNC(=O)CN1C=NC(=N1)N(=O)=O0200000000...0000000000
OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC(Cc2ccccc2)C(=O)N3CCC(OCOC)CC3)C(C)C3840000000...0000000000
\n", - "

198 rows × 220 columns

\n", - "
" - ], "text/plain": [ - " 1 2 3 4 5 \\\n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 1 0 0 1 \n", - "C=CC(=O)NCCCN(C)C 1 2 0 0 1 \n", - "C=CC(=O)NCCCN(CC)CC 2 3 0 0 1 \n", - "C=CC(=O)NCCCN1CCOCC1 0 2 0 0 1 \n", - "C=CC(=O)NCCCN1CCSCC1 0 2 0 0 1 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 3 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 3 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 2 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 2 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 3 8 4 0 0 \n", - "\n", - " 6 7 8 9 10 \\\n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(C)C 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(CC)CC 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCOCC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCSCC1 0 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 0 0 0 0 \n", - "\n", - " ... 211 212 213 214 \\\n", - "SMILES ... \n", - "C=CC(=O)NCCC1=NC=NC1 ... 0 0 0 0 \n", - "C=CC(=O)NCCCN(C)C ... 0 0 0 0 \n", - "C=CC(=O)NCCCN(CC)CC ... 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCOCC1 ... 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCSCC1 ... 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O ... 0 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O ... 0 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O ... 0 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O ... 0 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... ... 0 0 0 0 \n", - "\n", - " 215 216 217 218 219 \\\n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(C)C 0 0 0 0 0 \n", - "C=CC(=O)NCCCN(CC)CC 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCOCC1 0 0 0 0 0 \n", - "C=CC(=O)NCCCN1CCSCC1 0 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 0 0 0 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 0 0 0 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 0 0 0 0 \n", - "\n", - " 220 \n", - "SMILES \n", - "C=CC(=O)NCCC1=NC=NC1 0 \n", - "C=CC(=O)NCCCN(C)C 0 \n", - "C=CC(=O)NCCCN(CC)CC 0 \n", - "C=CC(=O)NCCCN1CCOCC1 0 \n", - "C=CC(=O)NCCCN1CCSCC1 0 \n", - "... ... \n", - "OCCNC(=O)CCN1C=CN=C1N(=O)=O 0 \n", - "OCCNC(=O)CCN1C=NC(=N1)N(=O)=O 0 \n", - "OCCNC(=O)CN1C=CN=C1N(=O)=O 0 \n", - "OCCNC(=O)CN1C=NC(=N1)N(=O)=O 0 \n", - "OCCNCCCNC(=O)C(CC(O)C(CC1CCCCC1)NC(=O)C(CCCC)NC... 0 \n", - "\n", - "[198 rows x 220 columns]" + "'BrC'" ] }, - "execution_count": 9, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[df[88]>0]" + "df[df[1]>0].index[0]" ] }, { @@ -719,7 +316,7 @@ "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3de1xUdd4H8M8Mw00EQRBQUC4aiiKJZN4tAVtN3doIt9vg6hq+trXRbHMsd5/p6ea4XRzLVinreUZNV+zZlMxU1Gx1xdQEFWVdQkBUFImbyJ35PX/8RjRmQGBmzjnDfN8vX72U32HOFxo+/M453/M7MsYYCCGEdJdc7AIIIcS+UYwSQohFKEYJIcQiFKOEEGIRilFCCLEIxSghhFiEYpQQQixCMUoIIRahGCWEEIsoxC7AsZWW4tAhFBSgogJ+fhg8GFOnwttb7LIIIV1AMSqSK1egVmPrVhgMv/i4szOefx5vvQUfH5EqI4R0jYzuqRfBhQuYNg3FxVAoMHs2Jk6EpyfKy3HgAA4cAGMYPhwZGRgwQOxCCSH3RjEquPp6jBmDnBwMGoRduzBy5C9GDxxAYiKqqhAXh4wMyOnkNSFSRz+lgvv8c+TkwNkZX3/dNkMBxMdj82YAOHgQu3YJXx0hpKsoRgW3YQMAJCYiOtr8BrNmYdw4APj0U+GqIoR0F8WosKqqcPo0APz61x1t9pvfAMCRI20vQBFCpIdiVFgXLhiT0fRw/m58tLISJSVCVEUIsQDFqLDKy41/6du3o818fY1/qaiwbT2EEItRjAqrtS9CJutos9YL9HRQT4jkUfu9sFrvUKqqQv/+7W7WOgmlJnzi2IqLceECAERHw9/fzAZHj6K2FpGRCAoSuLQ7aDYqrCFDjH/Jze1os/PnAcDDo6OoJcQBfPUVpk3DtGlQKs1vMG8epk3D7t3ClvVLFKPC6tfPmKQZGR1txkfHjoWCDhcIAYB9+7Btm9hFtINiVHD8t+qWLbh2zfwGOTnYuxcA5s4VripCJMzNDQBeeglVVWKXYg7FqOBeeAH+/qiqwtNPo6am7ej163j6aTQ3Y/hw/Pa3YtRHiOTExmLCBJSU4M9/FrsUcyhGBefnB70erq44dAhRUfjoI2Rno7AQJ09Cq8XIkcjJgbc3Nm+Gq6vYtRIiCTIZVq2CTIZ163DihNjVmKBTb2KYPh179mDBAuTnQ6VqOxoTg40bERUlRmWESNSkSXjqKWzdigUL8OOP0rpqIKVaHMrDD+P8eezcif37kZ+Pykr4+iIiAjNmYPp0WtiJEFOrViE9HWfO4OOPsXix2NXchWJUPC4uSEpCUpLYdRBiHwYOxIoVeO01/OUvePJJMRtF26BZjxhefBF/+hNqa8WugxAJKSnB0aPYsgVvv40FC7Bpk5ltXn4ZkZG4eRNLlgheX/toNio4gwHr16OlBStXth1qaUFYGPr1w8mT97hblBC7VVGBq1dRUoKLF+/8uXChbd9Kc7OZlnsXF6xfj4cfxpdfIiMD06YJVnVHKEYFV1GB5mb07Qtn57ZDZWUoLkZ9PWUo6QFqalBYiIKCO3/4P6urzW/frx9CQxEWZvzv6NHmN5syxXit6aWXkJ1tu/K7gGJUcDduAEC/fl0bIkSqGhoaioqKCgoKCgoKrlxJ/s9/evG45G9nU15exqBs/cP/2bt3Z/f4/vv45hucO4fPP7fWF2ERilHBUYwSu1VRUXHRRFFRUUtLC9/gvvsm5uUZ19J1cUFwMMLD7/zp3x8DBiAszNLDrf798cYbWLIEr78OJycLvyYroBgVHMUokbySkhI+uywsLGz9S3FxcVNTk+nGzs7OISEhoaGhYWFhI0e6+vkZ55g2XVdn0SLo9cjKsuEuOo9iVHAUo0RKsrKytm3b5uvre3di1tfXm24pk8mCgoLCbuO5GRoaGhwcrBC8G97JCampGDdOEkvyUowKjmKUSEZmZmZCQkKtSe+dj49PeHh4eHh4//79BwwYwP8+bNgwDw8PUeo0a8wYzJuHzz4Tuw6KURFQjBLJWLduXW1trUKhWLBgQWRkZOs0s3fnL/fY2H33ISkJw4aZH9VqUVMDgwHh4cKW9UsUo4KjGCWSkZWVBWD69Onr1q0TuxbzZszAjBntjvr54e9/F7CadtBdTIK7Z4yafVQCITZQU1MDIC4uTuxCuu/UKcybh1u3xKyBYlRwNBsl0mAwGMrKygAo23tAhz148UX87/9iwwYxa6AYFRzFKJGGvLy8mpqagQMH+vn5iV1L9y1fDgDvvYfGRtFqoBgVFmMoKwMA0zeuwYDycshkdx5ST4gt8ROjMTExYhdikVmzMHIkLl/Gli2i1UAxKqyqKjQ2wsvLzMr2/F57Hx9pLUhLeq7Tp08DGDVqlNiF3JvBgL/9DePHo66u7ZBMBrUaALRa0XpIKUaFRUf0RDL4bNQuYlQux+bNOHYM//M/ZkafegpDhuDCBezYIXhlAChGhUYxSiQjOzsbdhKjAF55BQBWrYLp/ahOTli6FADeeguMCV0YKEaFRjFKpKGkpOT69eve3t6hoaFi19Ipjz+OESNw6ZL5p9XPn4/+/ZGVhQMHBK+MYlRoFKNEGvhU9P7775fZyeK2MplxQvrOO2bOgbq6Gp/OZLoYugAoRoXVQVaWlrY7RIi12eNl+meeQUgIcnPx9ddmRv/wB3h74+BBZGYKXRjFqLBoNkqkgV+mv//++8UupAucnfHyywDwzjtmRr288MILAPDXvwpaFShGhdbB7Z4Uo0RA9jgbBfD88wgMxPHj+O47M6NLl8LDAzt3IidH0KooRoVFs1EiATU1Nfn5+S4uLpGRkWLX0jVubnjxRaCdc6C+vpg3D4wJPSGlGBUWxSiRgNOnTxsMhhEjRri4uIhdS5ctWgRvb2Rk4ORJM6PLlsHFBVu3orBQuJIoRoVFMUokwI4a7015eWHhQgDQas2MDhyIp59GczPee0+4kihGhdXeDfWM4eefzQ8RYm12dBuoWS+9BHd3fPUVzp83M7p8OeRyfPYZSkoEqodiVDg1NTU+jD0UHo5evdoMNVZXVw8b1jBihJl77QmxNruejQIICMDvfgeDwfyUc9gwPPYY6uvx0UcC1SNjotw85ZAKCgrCw8NDQ0MLCgraDOXl5UVERAwePPinn34SpTbiOJqbmz09PRsaGioqKvr06SN2Od1UUICICMhkyMtDSEjb0RMn8OCD8PJCURG8vW1eDM1GhXPjxg0A/cyd/exgiBDrys3Nra+vDw8Pt98MBRAWht/+Fk1NWL3azOiYMYiPR3U11q8XohiKUeHcM0b96fEhxPbsa0WSDqxYAbkcn35qvDrbxquvAsDq1WbW1rM6ilHh0GyUSEGPidHISMycidpa8+dA4+MxfjxKS82vrWddFKPCoRglUtBjYhTAihUA8OGHqKoyM8qXMnn3XTQ327YMm8Rok+mKgIRilEjDmTNn0FNidOxYPPwwqqrw2WdmMuexx/DrX5f06aP6+98327QM68doRkbG0KFD//nPf1r9le0dxSgRXVFRUVlZma+vb3BwsNi1WMeKFfWTJm3Q6YbV19e3GZLL8cQT+06f/uidd94x2PIBI9aP0TVr1hQUFMTFxb322muNIj6sT3ooRono+BH96NGjxS7EahIS3Orq1hcXX9y4caPp6DPPPBMSEpKbm7tr1y7b1WD9GN2xY4dWq5XL5StXroyNjeX/2wgoRokE9KQTo63UajWAVatWNZucBHV2dn755ZcBvP3227YrwPoxqlAo1Gr14cOHIyIicnJyxo8fv2rVqpaWFqvvyO5QjBLR9cgYTUxMjIyMvHjx4pdffmk6+vvf/97f3//48eOHDh2yUQG2ulI/duzY7OxslUrV0NCwfPnyKVOm5Ofn22hf9qKDrCwrKwPgRzfUExvrkTEql8v5lHPlypWmt2X26tVLpVLxUVtVwGxs7969QUFBALy8vFJTU229O8mqq6sD4Orqajp08+ZNAB4eHsJXRRxKRUWFTCZzd3dvamoSuxYra2xsHDRoEIBvvvnGdLSyspLfsnXixAlb7N3mfaOPPPJITk7Os88+W11dvXDhwpkzZ5YItu6KlNARPRFdVlYWYyw6OlqhUIhdi5U5OzsvWbIEwJtvvmk62qdPn4ULFwJYtWqVLfZuhRjNzc1duXJlB2c/vb29N2/enJaW1rdv3927d48aNWrnzp2W79e+UIwS0fXII/pWKSkpfn5+x44dO3LkiOno0qVL3d3d//GPf5w3u7ieZSyNUcbY3LlzX3vttcmTJ3e8OlFSUlJWVlZcXFxpaenjjz+enJzMD2YdBMUoEV3PjlEPD49FixahnXOgAQEBc+fONRgM77//vtV3bWmMymSylStXDhw4MDMzMzo6es2aNaz9lfcGDRq0f//+1NTUXr16bdq0KTo62nG69ClGieh6dowCePHFFz09PXfv3n3q1CnT0WXLlikUik2bNl26dMm6+7XCQX18fPzZs2dTUlLq6uqWLFkyY8aMK1eutLexTCZLSUk5efJkbGxsYWHh1KlTFy9e3NDQYHkZEldaWgqKUSKehoaG3NxcuVweFRUldi220rdv3+effx7Au+++azoaFhY2Z86cpqam1WYX17OAdS4x9enTJzU1dfv27b6+vnv37o2Kivriiy862D4yMvLYsWNardbJyenDDz984IEHenyXPs1GibjOnTvX1NQUERHRu3dvsWuxoVdeecXNzW379u15eXmmoytWrJDL5Z988skNs4vrdZc1r9Q/+eST586dmz17dmVl5XPPPTdnzpzy8vL2Nna0Ln2KUSIuO30wfVcFBgY+99xzLS0t75l7wMjw4cNnzpxZW1v7kVUfMGLlhqeAgID09HS9Xt+7d+/t27fHxMR89913HWzvOF36FKNEXPwxdvfff7/Yhdjc8uXLnZyc9Hr91atXTUdXrFgBYO3atVa8xG2TvtHk5OQzZ85Mnjz50qVL8fHxCxcurK2tbW9jd3f3NWvW7NmzJygo6OjRo6NHj/7kk09sUZW4KEaJuBxkNgpg8ODBiYmJDQ0NZs+Bjh079qGHHqqoqEhNTbXaLm3R0881NTVptVoXFxcAw4cPP3nyZMfbV1RUPPvss7yqRx999OrVq7arTXhDhgwBcOHCBcZYZmbm2rVrW78hYWFhAPLz80UtkPRkBoPBy8sLwLVr18SuRQjZ2dkymczDw6OsrMx0dM+ePQACAwPr6uqssjub3wx6+vTp6OhoAAqFQqPRNDc3d7w979IH4O/vv2PHDluXJxh+L9rOnTsjIyP5rwq5XP7xxx8zxvgp/+rqarFrJD0Wv94yYMAAsQsRzvTp0wG88cYbZkdjY2MBWOv2dJvHKGOsrq5OrVbL5XI+o+Yzsg4UFRXFxcXxrFEqlT0gXxobG2W38a/L9fbz6B9//HG0c689Idayfft2fpAndiHC+f777wH4+vrevHnTdDQtLQ1AeHi4VZYXECJGuf379w8cOBCAu7u7TqczGAwdbGwwGHiXPoDQ0NDvv/9esDqt7vDhw/wXIwAXF5eYmJj9+/czxtLS0nx8fPjHfX19xS6T9GT8usqKFSvELkRQEydOlMlkZg9qW1paIiIiAGzdutXyHQkXo4yxysrKlJQUHhy/+tWvLl++3PH258+f53NvuVyuUqnq6+uFqdMqDAZDenr6+PHj+dfr6empUqnafMlFRUWTJ09unXeb/bVJiOVmzpwJIC0tTexCBPXjjz/m5OS0N7phwwYAQ4cObWlpsXBHgsYox7v0cXvJko435tepnJ2dAURFRfElaiSupaUlLS1txIgRPB/79eun0WjKy8vNbnz3vDssLMyu591EsgYMGAAgLy9P7EIkpKGhwc3NDcDatWstfCkRYpQxdu3atdmzZ/OUSUpK+vnnnzve/tixY3wG7ubmptVq73mdSiz19fV6vf6+++7jX1pISIhOp7t169Y9P/HcuXP88ThOTk5qtbqhoUGAaomD4Dcie3p6Wj7t6km2bt3Kr1Xs2bPHwpcSJ0Y53qUPYNCgQQcPHux449raWpVKxb/sCRMm/PTTT8IU2UnV1dU6nY7/zgcwZMiQ1NTUxsbGzr9CU1OTRqNxcnICMHLkyOzsbNtVSxzK3r17AUyaNEnsQiRk48aNfNHV+Pj4jq/TdIaYMcoYu3jx4qRJk3B7yZJ7TtwkuJZ+aWmpRqNpvVgUExOj1+u7PV/OzMzkk1k+76bpA7EcX6t40aJFYhciFR999BGfkKnVaqu8oMgxyuy5S7+wsFClUvHTmgAmTpyYnp5u+cvePe+eOHGi1ObdosjLy4uOjvby8oqIiCgqKhK7HDvz9NNPA9iwYYPYhUiCVqvl87bVq1db6zXFj1HOvrr0c3JylEolv/All8tnzZqVmZlp3V3s2bOHnyKQzrxbFNnZ2U899RQ/18E5OTnxZW1JJw0bNgzAPScoPZ7BYFi6dCl/C33++edWfGWpxCizky79U6dOKZVK/lPt7OysVCrPnTtno32VlpY+8cQT/AtMTEy8ceOGjXYkTYcPH541axaflbu4uMyZM2fVqlWenp6dPwVEGGO3bt1ycnJSKBTWuvHRTjU3Ny9YsIC/l7788kvrvriEYpSTbJc+/6luvQcpJSVFmKPL1i59f3//nTt3CrBH0R0+fDghIYF/qz08PFQqVXFxMR+qq6tLSUnp/CkgkpmZCSA6Orq9DWpra4WsRxQNDQ1z5swB0KtXL8uvy5uSXIwyiXXpt7S0pKenjx079u4ueoFPyBYVFU2dOrXHd+nzb/WYMWP4V+rl5aVWq80uLXHmzJnWU0BqtbpLHRGOZt26dQCSk5PNjmZlZfn6+mq12h48V62vr+e3XPfp0+fIkSO22IUUY5QTvUu/sbFRr9e3riTi7++v0WgqKiosf+Vu4MvQ8m7hIUOGHDtmqzMJomhoaNDr9UOHDuXf6oCAAI1GU1lZ2cGndPUUkMPiDxb+4IMPzI4uW7aMf8/DwsK2bNlieeuP1Ny8eTM+Ph5A3759f/jhBxvtRboxyky69Nu7EajV0aNH+Xp0bm5u77//frf3W1NTo9Pp+LkFfrpAp9NJ4diHd+n7+Q0PCGhRq1kPaNK/efOmTqcLDg7m3+rw8HCdTtf5mVGXTgE5Jn4g1UFfdkZGRutazrGxsQcOHBCyPJsqLy8fN24cgP79+589e9Z2O5J0jLLbZz+70aX/xz/+sRu7q6qq0ul0gYGB/F0VFRWl1+utsgaMtTQ0NKxcednJiQFs9GjW/h3DUnfjxg2NRsPbLfjJu+7123b1FJBDaW5u5lcOOr5RkN++HBoayr+NCQkJPeDuj5KSEn7mJywszNZdg1KPUa6rXfr79u3r6mXc69evazQab29v/k4aPXp0WlqaZGc3mZnsvvsYwNzcmFbL7KtJv6SkRK1We3h48G8177e18FvdpVNAjmPfvn0AAgMDO7PxrVu3tFotXxhXLpcnJSUVFBTYuEBbKSws5LexREZGCvCb1T5ilHW9S7/zLl68qFKp3N3d7/6pttaL286tW0ylYjIZA9jEicwumvTz8/NVKhU/w8tnPf/617+s9eJdXaihZ8vLy0tJSXF2duYnPTo/wSwrK1Or1Xw93F69eqnV6o5PUktQbm4uP00UGxsrTJug3cQo19Uu/Y6dOXNGqVTyW2t5F/3x48etVaow9uxhAwYwgHl5MSk36f/449nExER+RUihUDz77LNnzpyxxY7uXqihJ53m67xjx4499thj/Fvt7Ow8YcIE/vgQJyen+fPnd3JqVlhYqFQqedMuv5RvL8tUnjp1ij/WbMqUKVVVVcLs1M5ilFnpEm2b1m6lUpmbm2v1UoVRWsqeeIIBDGCJiUxqTfqHD7NZs9iDD+5q/Vbb+qr6xYsX+Squjtal36a1WalU8pXxuj3BPH78+EMPPcRfMCQkRK/XS/Y0F/fDDz/ws+2PPvqokNeE7S9GuW5for37rdamtduupaUxHx8GsIAAJoUmfYOB7djBxo0z5nufPobXX3+3pKREmL3b7hSQBHWytbnbE8yMjIyRI0fyFx8zZsyhQ4ds8EVYwd69e/kJ9zlz5gjcSmyvMcq6eIm2863d9quoiE2daowtpZKJ1aTf0sLS09no0cZK/PyYRsPu1atmE9Y9BSRB3Wht7t4Es6WlRa/X9+/fv/Wkto3OyXRbejqLjf2Xh4eHUqkUvrXGjmOUu/sS7RdffGG6QTdau+2XwcBSU1mvXgxgYWFM4KX06+uZXm9sIQBYSAjT6Zi4h9Q9tUu/vr4+NTV10KBB/F3d1dbm7k0wb968qdFo+KlnZ2eXZcsqhDq6uAe9nikUDGD//d/nRDntYPcxyn55iTYhIaGwsJB//MaNG5a0dtuvc+eMk0EnJyZMl351NdPpjBe7ADZ4MNPpmHSuSfSkLn3e2tw6Mex2a3O3J5g3btxQqVSTJq0FmIcHU6uZUBdyzPvb35hczgBmpbVDu6MnxCi73aXPz4woFIpXX311ypQprU8z5q3dkuqit7WmJqbRMN6lP3Iks10zdWkp02iMp2UBNmoU0+uZBI+ee0CXvi1am2tqarRaLb+Ur1AoUlJSOrleRG6u4bHHjP/TAwPZ+vVMlB8vrZYBTCZj770nwt5b9ZAY5Q4ePMiPOFqFhoZ+++23YtclGpt26RcWMpXKeAKB965Kv93WTrv0CwoKbNrazCeYvPPPw8NDrVZ3slUoM5NNnmx8AwwdygR+8Oh//ZfxkOvTTwXdr6keFaOMsbq6uri4OB8fn4CAgPXr14tdjvja69KfMYP5+LC+fdmJE2Y+6+xZ5uPDfHzMH6/l5bGUFObsbJwIzJrFrL1otQ3ZV5e+aWuz7dbX+Pe//52UlMSP4fz8/HQ6XScP4NLT2ZAhxjAdN44dPmyjAu8wGJhKxQDm7My2bbP57u6pp8UoMevrr1lgIG88Ylu3MsbYpEnG931srJlj8Oxs42ibS3GnTjGl0niuwNmZKZXMZotW21aXHqcoiiNHjojS2pyZmclvvAYwdOjQTj7avrGRpaaygADj22bWLGa7Zzk3N7Pf/Y4BzNWVffWVrfbSJRSjjqK1S58fAbXGKMBMH8lhGqO8i55/0NWVKZU2/DkRRlcXahCMaWvzpUuXhCzAYDBs27Zt8ODBvIaHHnr4+PFOXaa8eZNptax3b+Nv2ZQUdu2alWurrze+jXv3ZhkZVn7xbqMYdSx79xr/wmN0+nQGME9P1uaKy90x+u9/swceMP7T25utWMFKS4Uv3CYk1aUvtdbmxsbG1NTUgICA8eN1XZpgXr7MUlKMhyy9ezO12motzDU1bNo0BjAfH2mdR6IYdVA8RnU6FhfHAPbkk78YvTtGa2qYry/z92caDRNp0WrbEr1Ln7c28wfPSa21ubKy8s03q9zdGcBcXJhK1dm7jc+fZ0lJxndRUBBLTbW0f6Oigk2YYGwMOH3aopeyOopRB9Uaozk5xtblr7++M9rmoP74cdaz223F6tJvs2p1WFiYNFubuz3B3L//zv1skZHdv5R/7RobNcp4T8d//tPNF7EdilEH1RqjjLEXXjC+QWtqjKPtXWLq2YTs0i8rK9NoNLz7yl5am7s3wWxpYZs3s5AQ4ycmJHT5rHpREYuIYAAbNoxJcwEMilEHdXeM/vwz8/NjAHv1VeOoY8YoE6RL/9q1axqNhq+ODCutWi2k7k0wGxpYairr14+5u3ctCi9eZOHhDGAxMdI9KU8x6qDujlHG2IYNDGAKhfGsk8PGKGejLn3TVatt9KBKWzMYWFqaMd0AFh/PfvyxU5/488+sS483zskx3mH84INMyg2+FKMOqk2MGgzG8/dTpzLm8DHKuv44xY5lZ2crlUonJ6fWLvoTZm97sCutE0x+F0ZSEsvPt+brnzjBfH2N70mJP1OcYtRBtYlRxtiZM8ZrTf/3fxSjjHX9cYpmmS4Q3mNWmeLKy5lazdzcjJfyU1Ksc+h96BDz9GQAmz3bDi5vUow6KNMYZYwtXswAFh7OfviBYtSoe136BoMhPT19woQJfD7bu3dvlUplj+uhdNKlS3cu5fv4MK3WTPYVFrK0NJaW1u4lpgMHWFoay81lFRWsTx8GsLlzpbjMjSmKUQdlNkarq42nopKTKUbv6FKXPu+ij42N5QHq5+en0Wgkfue+teTksJkzje+cgQPbXsrftMk4FBXFzC5Oz5+VoNUyxtjOnWzJEmYvF94oRh2U2RhljG3ZwgDjAo4Uo3e7Z5c+76KPiIjgARoYGKjVaqVzj6lgMjKMPZ4AGzGC7dpl/HhrjALsr38184l3x6h9oRh1UO3FKGPG+5ooRk3d3aUfFRW1b98+/vGrV6+uXr06KCiIB+jgwYN1Op29PErTFlpa2Oefs+Bg47toxgxWX2+MUQ8PJpMxDw92e3X1O+w3RuUg5JfWrYOrq9hFSJKbm5tWq923b19wcHBOTs4jjzwye/bsyZMnBwUFvfTSS1euXBk1apRer79w4cLixYtdHfibKJdj3jzk50Ong7c33N3vvKNCQpCYiFu38Ic/iFqiVVGMOihPT/j44HYL4y9EROCVV+DjAx8f3H6AALkjPj7+6NGjoaGhAHbt2sV7P4ODgzMyMrKyspKTk3ljE3FxweLFyMvDmjW/+PjKlXBxwbffYscOkSqzNopRB7V7N8rLsXCh+dE330R5OcrL4eUlbFl2YuDAgQUFBUuWLAkNDQ0KCvrss8+Ki4sTEhLErkuK/Pxwe80AoyFDsGQJAKhUqKkRpSgroxh1UFu2YNMmXLpkftRgwKZN2LQJ164JW5ZdWb16dUFBweXLl+fPny92LXbmz39G//4oLoZGI3Yp1kAx6qDmz0dyMk6cMD/a2IjkZCQn49w5YcsijsHTE+++CwAffoisLLGrsRjFKCFEBM88g6lT0dyMF14AY2JXYxmKUUKICGQyrF8PV1ccO4aNG8WuxjIUo4QQcUREGK81qdWorha7GgtQjBJCRPOXv2DQIFy/bjxVaqcoRgkhovHwgE4HAB98gOvXxa6muxRiF0DEdOkSzp418/HGRsFLIY7qN7/BrFnYtQsFBWKX0l0Uow5t6VKxKyAE0Omwfz/q68Wuo6j0vh0AAADdSURBVLsoRh3ayJHw9zfzcYMB330neDXEUQ0ejOXL8frrYtfRXRSjDk2jQWKimY/X18PdXfBqSI82YgTUavO/tgGo1WhqQnMzxo0TtixroBglhAghJgYxMe2OurnhrbcErMaq6Eo9IYRYhGKUEEIsQjFKCCEWoRglhBCLUIwSQohF6Eq9g1q6FE1NuP0Uy7YUCvzpTwAQEiJkUYTYJRmz96X+CCFEVHRQTwghFqEYJYQQi1CMEkKIRShGCSHEIhSjhBBiEYpRQgixCMUoIYRYhGKUEEIsQjFKCCEW+X8Aznzf1SB3pgAAAQx6VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wMy41AAB4nHu/b+09BiAQAGJGBgjggeIGRjaGDCDNzMQEZTADRTRADBYOCM3EDlUBlEgAMkAqYTREITuEZoabgBCAKORmYNRgYmRSYGLWYGJiUWBhZWBlY2BjZ2Dn0GDi4FTg5NJg4uJm4GZncAK5j5Wdg5OLW3wWknMZeDZUbzwQeaHBDsTZxNJ1YJGO1H4QO9ne9sBfD3lbEDsx2fSAR+gRexD7zKI7++0XBIDVGFnv2Wfyx3AfiB0zK9b+XWczWHy6AZ9DyNOjYHGNtCQHl1W9YPFYxSkOR5fN3wti51rVO0yeuRhs5g1GBYeNBt1gthgAKrs4m5ZxkiwAAAFielRYdE1PTCByZGtpdCAyMDI0LjAzLjUAAHicfZNdbsMgDMffcwpfoMgfgPFj21TTNDWRtm532Hvvr9mZOlIJzYkRkB9/sE0mCHuf377v8Gc8TxMA/vOaGXwJIk5XiA6cLi+vC5xvx9Nj5rx+LrcPII4X43lmj7f1+pghOMOhpJKrZANMXKiYr0i4WV/KAeYkJKgCB0wVmXMekBIkJyvVbPsuzW0AZlgDbJLJCCgVkVZkABZY4EBJWZRa7N0oWy4DssbemKRqFYseqTg8INVJF6qlioY4stasA7A5yImZlOqm7WtotLf5KcWD0NxKKBJWLTYAyUsEOVU2VA5JVLUykiTaNK2p5cgQayMeRUO8nbIgEomD2DTzKJWXZX6q/++NOK3L3G8Eh/e6x4T04voAci8h+bD0QpF77dUgd+0pJ/fWE8vu1tNH4fsscTREu2zQ1vAu7Bh2zQhvH0yMH7+G96cf0yKlSN/jHp0AAAC3elRYdFNNSUxFUyByZGtpdCAyMDI0LjAzLjUAAHicJY47DoNADESvkhKkxfL/I0S1PbkQh483WK7ejMczrzm367vfc0667tlLn2c7DExdbCCwkdU4DwUhwRgHgiOzajOGMq9qF0r2/FGKUg0CE0lrQhAsweswSUsXQxAPl1qQQpqPs2U3l/5AgBzePgZmek+lRWoknRtay0TYGeNUcK63GEaUaS5XZZR2C44kXkmGSNQAM5TH/vwATM0y0DPnCj0AAAAASUVORK5CYII=", "text/plain": [ - "" + "" ] }, "execution_count": 10, @@ -734,6 +331,77 @@ "\n", "mol" ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"abdulelah_gani_frags/log_p.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "61\n", + "2\n", + "OCCN(C(=O)C)c1c(I)c(C(=O)NCC(O)CO)c(I)c(C(=O)NCC(O)CO)c1I\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAeCklEQVR4nO3deVhUZfsH8O8AAmdYB8GN3AAxUQdRSHEp99dSK0rFNbdQMBGFVEI2F7BXXzHSXHBJC9PCt3zDMpOf+5qSCmIqiIBhyKJsM8MynOf3x3CRWRnOnGGG4f5c/XGucc59bopun3PO89yPiDEGQggh6jLSdQKEENK8URklhBCNUBklhBCNUBklhBCNUBklhBCNUBklhBCNUBklRF8xhoMHEReH27cB4OjR+s8bDoh+oDJKiF5SKhETA1NTzJiBVavw4AH276//o4YDoh9MdJ0AIS1IRkbGvn37OI6TyWRlZWUKhUIul7ubmq7IyYFCgdJSyOVQKFBWBj8/yOUIDweAKVNw6hRyc7FmDQBUVOj2pyBPoTJKSBO5du2ap6cnz/NPLR3kXV1x587T366qAs+DMYhEKC+HpSU6dkRICAAEBDRVyqRRqIwS0kS2bNlSV1fXpk0bPz8/Kysra2trjuMsLCwcJRKYmUEshq0tOA4cB4kEAL79FsuWwdMTycnYswdffw2O0/UPQf6CiNbUE9IEsrKy3NzceJ7PyMjo3r17Y097+BC//QY3N5ia4sEDdOgA4PcDoh9oNEpIU4iOjq6trX333Xefo4YCaNsWbdvWHzeUTqqheoZGo4RoXUZGhlQqNTExuXXrVteuXXWdDhEYTXgiROsiIyN5np8/fz7VUINEo1FCtCs1NdXLy8vc3DwzM9PR0VHX6RDh0WiUEO2KiIhgjAUGBlINNVQ0GiVEi86dOzd48GBLS8vs7GwHBwddp0O0gkajhGhRREQEgJCQEKqhBoxGo4Roy7Fjx0aPHi2RSLKzs21tbXWdDtEWGo0Soi2RkZEAQkNDqYYaNhqNEqIV33777RtvvNGuXbu7d++KxWJdp0O0iEajhAiPMRYVFQUgLCyMaqjBo9EoMQi1tWjVSnvheZ4/cOBAmzZtKioq5HL5k23uSktLFQqFQqF4/Pix6pOysrKysrKKigo7O7ucnBwzMzPtJUb0Aa2pJ81cTQ0WLkSHDsjNxdKlcHPTxkVGjRp1/Pjx5zrF0tKytrZWLpdTGTV4NBolzdz+/TA3h48PyssRHIydOwW/wtGjR8eMGdOqVatBgwbZ2tpyHGdlZWVlZcVxnKWlpbW1tVgsFovFqj/iOE4ikZibm0+bNu3kyZNz587dqYWUiF6h0SgBZDJYWABATQ1MTf9woP8KC+HtDQDW1qiq0sYVVE85Y2Nj33///caftX37dnd39927d0+ePHnkyJHaSIzoCXrF1LI9fIiZMxEfjxkz8OAB5s2r/7zhQP8NHIjvvgOAixfxXD3oGufQoUOXLl1q3779ggULnutEV1fXFStWMMbmzZsnk8kET4zoDyqjeubAgT8c3LuHqCisXo3CQq1cLiEB77+PsDCsWIFt2wCgpAQlJaip0crltMHLC1IpwsJw9iyWLsVnn2H6dKFi8zwfHR0NYMWKFWq8cF++fHnv3r3v3bsXo9pDiRgqRvTKzJm/H9TWsrffZhUVrLCQ+fpq5XJBQaywkDHGSktZQACbMYNt2MA2bGDe3lq5nFbdusV27WISCQPYvn2ChNy3bx+Azp07V1VVqRfh4sWL/u7uiu7dWWqqICkRPUSjUT1TXo6wMISFITsbeXno0QOWlnBwAMehulr4yw0ZgkOHAOCbb/DyyzAyQnAwgoPh6ir8tbTq3j306YNFi7B8OQAEBaGoSMOQSiU+/9xZKh0SGRmp9tv2/v37bx061Pz2bcyZg9paDVMi+onKqJ6xtkZsLGJj4eQEW1sUF9d/XlWllXc+gwbBzKx+F19fXzS8CWl2r0S6doWPD2Qy/PgjRo5EcTGWLtUw5Gef4Ycf+ldVnZ45c7ZGgVT/Na9fR3y8hikR/URlVI/Z2cHREStXIjQUQ4dCJBI4/uHDcHJCfj7WrMGsWRCJfn+qKNzjxabz8cdwcMDx4xgzBhYW2LsXP/6odrCamvo94VeuhLGxZv/mxWJ88gkAREXh7l2NQhH9pOunCuSPZLKnD8rKWGUlu32bTZ/++4ea43nm4cEAFh8vWEyd27OHAax1axYezoC6Xr1klZXqRdq0iQGsVy9WVydQbtOmMYANG8Z4XqCIRF/QaFTPNLwObjiwtoaFBWbORGIioqIEu1BSEq5ehaMj/PwEi6lzM2di9GiUlCA7+9eJEwc+ehQRGalGGIUCH34IAGvWwEio/0U2boS9PR48wMOHAkUk+oLKaDOxdStMTLBxI1JTBYhWV4foaACIigLHCRBQfyQk1HXunJCb+/XgwT8XFcXHx1+5cuV5Y3zyCfLz4eWF118XLjEHB/z4I1JTceQIoqJw9SqA+vd7Tx6Q5kjXw2HSaMHBDGBSKaup0TTUp58ygLm4CBBK/8THxQHo3LlzUFAQAKlUWtO4H5PnWUUFY4z9+isLCGBHjtR/Xl0tXHLh4ez4cVZVxWbOZLm57J136j9vmOhGmiEajTYfq1fD2Rlpadi4UZMwtbW1uV9/DQBRUVrtiqQr7y1a5OnpmZubq1QqnZ2d09LSNmzY0JgTq6rQtSsKCiCXo1Mn7N9f/7mQS7qyszFsGMzMMGkSzpzBvXv189vKy4W7BmlqtKa++RCLsWUL/vWv2oSE/Lfe6uLiol6YXbt2BSQnR44du3LKFGET1BPGxsa7du3y9PTcunVrfHz8okWLoqOjX3755Xbt2pWXlysUCplMplD0LCtrL5fj8WMoFFAoUFqKiRMxejRWrcKSJfWhVHNPhZywyxjq6mBsjJIS2Nmha1fExgLArFnCXYM0NSqjzcro0enLl4/avLnn/PkpKSmi558CVVVVFRMTA8B9zhwYG2shxedz9781Dn1NrLsKfFcklUpDQkI+/PDDmJgYjuPkcvmgQYOe/MLLL6edPt3+qbO6dYODA1xckJJS/0liIgDk5AiX2ezZCApCnz44dQq7d+Orr4QLTXSGGuU1MyUlJW5uboWFhbt27ZozZ87znh4XFxcSEtKvX7/Lly+rUYUFd3NHdfshJpIXhS/oCoVCKpVmZWWJRCJTU1MrKytra2tLS0tVm7uePdcWF3taWMDGBhwHsRi2tujTB/v3Y8MGvPYahg3DL79g714AmDULe/YIl1lFBYqK0KULjIzw6BHs7AD8fkCaIRqNNjOtW7eOi4ubPn16cHDwmDFjOnTo0PhzKysr//3vfwNYs2aNPtRQreI4bvjw4VlZWR4eHleuXGnMz1tbi5wcGBsjNhZFRXjhhfrPR40SNDMrK1hZ1R83lE6qoc0ZjUabpddffz05OXnSpElffvnlM77WsKeFTCZTKBTbtm379NNPBw0adPbs2SZL9dlu7qiW9DAqSeN7LRC4RXxZWZmzs3NJScmxY8eo3SfRKhqNNkubN28+ceLEV199lZ2d3atXr7/bGuips0QiEcdxqmlAesJMYnRuSZW8kLfsZNRlnJDTBjZu3FhSUjJkyBCqoUTbaDTaXH344Yfr1q37c618krm5uWpPC9XmFr/99lt+fv64ceOSk5ObLM9/dGdfzcUwhbm96M3jVqY2wjxqKCkpcXJyKi8vP3Xq1MsvvyxITEL+DpXR5ooxtmfPnuvXr0ul0r/bGuipB4IFBQVubm6PHz8+cOCAr6+vrjJ/GsOPU2UF55UuvqYD1wmzpGrZsmXr169/7bXXvlM1xidEm6iMtiwJCQnz58+3t7f/5Zdf7O3tdZ1OvfJsPnlMRV0NRiVatB+s6YOmgoICZ2dnhUJx8eLFl156SZAMCXkGWsXUsvj5+Y0YMaK4uHjZsmW6zuV31k5GvQPNwXAhVKGUa/r3ekxMjFwuf+utt6iGkqZBo9EWJzMz093dvaqq6ujRo6MEnsujPl6J78ZVlt+ts1l4a1xQf7Xj5OXlubq61tbWXr16VSqVCpghIX+HRqMtTrdu3cLDw5me7VhpZAKvD412mE73eX/wVVX3I7WsWrWqurp66tSpVENJk6Ey2hItW7bMw8MjJydn1apVus7ld+36iD1GuyiVytmzZ9eqtW1RZmbm3r17TUxMwlXbohDSJKiMtkQmJibbt283NjaOi4tLFaSBqUBiYmKcnJyuX78er9a2RVFRUUqlcs6cOd21sGE9IX+Hno22XMHBwRs3bnR3d798+XIrvemY98MPP7z66qtisTgtLc3Z2bnxJ2ZkZEil0latWmVmZnbs2FF7GRLyFCqjLZdcLu/du3d2dva6deuWaryPpoCmT5++b9++YcOG/d///d+TU18rKioUCkVlZeVfLtlKTEy8ceNGUFDQRx99pMPkSQtEZbRFUw39OI5LS0tzaXQD04QEDB0KV1d89hm8vFBRgZdeQkYGFAp4egqQVVFRUY8ePUpKSpydnRljlZWVCoWioqLi2WeJxWIjI6Pvv/9+yJAhAiRBSKNRGW3pZsyYkZiYOHTo0OPHjz+jDVJlJRQKVFSAsfpGwzt3ws8Pvr4oLMS0aThyBOXlEGpt1IgRI65du/bo0aMnP2xoc/fkki2O4ywsLGxsbI4fP56amjpgwIBz584ZCbYRHSH/jMpoS1dcXNyjR4/i4uLevXt7eHiobpBVd8p2ds43bnwll/9hhwtvb0ilGDUKCgVOnoSvLzZvRs+eyM6Gj48wZTQzM9PNzU0kEh08eLBXr16qta3W1tbPOKWysvKHH35YvHhxfn7+li1bAgICBMiDkEbSxQZQRL+EhYX95e9G9+59AKb6Ryxm9vbMyYm9/TabP58xxmbNYr6+7McfWWIiY4x9/z07cECYfFTr/efNmzd16tRz58794/fLy8u7dOliamoaFxcHwNra+v79+8KkQkgjUKM8gpiYmL59+969e7dNmzaqG2SO48RisZ1dG3NzcBxsbf/wfX9/AFixApMnC59Menp6UlKSubl5p06dwsPD09PTr1+//uymy1ZWVq+++urWrVsPHDjw5ptvHjp0yN/f//Dhw8InR8hf0nUdJ83Y1ats2DCWm8sYYzzPeF6AmK+//jqAwMBAR0dHAIcOHWrMWWVlZS+88AKANWvW2NraAvjqq68EyIaQRqAn8UR9MTE4cQILFwKASATN9yW5cuVKcnKyhYWFRCLJz8/v16+fqqr+I2tr623btgFYu3ZtSEgIgEWLFj27GSshQqEyStT38ceQSJCcjKQkYQJ+8MEHjLEFCxaoamJsbGzj94waO3bshAkTZDLZ+fPnhw8fXlBQoFeTYYkBozJK1Ne+PdauBYBFi6D5yO/s2bMpKSk2NjbGxsaFhYWDBw8ePXr0c0XYtGmTRCI5cuTI2LFjzc3Nd+/endKwVzIh2qPrpwqkeeN5Nnw4A9jcuZqGeuWVVwCEh4dLJBIAJ0+eVCPIjh07ANjb24eGhgLo1q2bXC7XNDNCnolGo0QjIhG2bIG5OY4eVZ4+fU3tOEeOHDl16pS9vb1MJnv8+PGYMWNUVfV5zZ07d+TIkcXFxQ8ePOjTp09mZqZedbEihknXdZwYgk2bbllYtHFxcVFv6MfzvKpT/cqVKy0tLQFcvHhR7WTu3LnDcRyAjz/+2NjY2MTEJDU19bki/Hau9sHZWrUTIC0NjUaJAPz9nV1dHbOystQb+n3zzTc//fRT+/btHz58WFlZ6ePj07+/+g3wu3XrFhkZCSAuLm7+/Pmq1nlKpfLJ75SWlj548KA4t6Ikva7wJ+WD08rc72rvJtXc/rzmxtZqRRGvrKTVfaSxaDEoEcbly5e9vb1FItGlS5f69u3b+BN5nvfw8EhLS4uNjV29enV1dfXPP//s7u6uSTJKpdLLy+vatWuhoaH79u27f/9+p06dTExMGlpDqb62441Lplf/ojNp/zWcuI2o47/0pXkg0XO0iokIw8vLa+HChfHx8XPmzHl2A9Py8nK5XN7Q5u7w4cNpaWmdO3fOyspSKBRTp07VsIYCMDEx2blz586dO0NDQzMzM8vLy/Py8p78gmqllkhSZdfL2MQcxuYiUxuRibnImIOplcjYTMPrk5aFRqNEMKmpqS+99BLP805OTt7e3gqFory8vKHNXUO30D+faGNjExgY2K9fv+jo6KSkpG7dugmV0oMHD1xcXKqqqg4cOODp6dnQEerZZz28pARD2wE0yCCNQmWUCGbixIkHDx7kOE6hUDzja0+1uSsqKsrKyurZs+fPP/9samoqbEr+/v7bt2+fPHny/v37hY1MSAMqo0QY6enpffr0MTU1nTx58p49ezp27LhhwwaJRCIWizmOs7GxaegW+tSJ1dXVHh4ev/zyy6pVqyIiIgRMKScnp3v37nV1denp6T169BAwMiF/oNuJAsRgjB07FkBQUFC7du0AJCcnN/7ckydPikQiMzOzjIwMAVN65513AMyePVvAmIT8GU14IgL46aefvv/+e0tLS0tLy4KCAi8vL1VVbaRXXnll7ty51dXVc+fO5XlekJTu3LnzxRdftGrVijZbJtpGZZQIIHLFCsbYggULtm7dCmDt2rWNbymi8p///MfR0fHixYvbt28XJKXw8HClUunn5+fk5CRIQEL+DpVRorHjx7/Nyto0YgSAR48eDRkyZMSIEc8bw8bGRrWjZ2ho6K+//qphRunp6f/973/Nzc0/+OADDUMR8o+ojBKNhYeb5uQsHDAg6tixmP7916qaPj2/CRMm+Pj4lJeX+6va62vggw8+4Hl+4cKFql7OhGgVvaknmjl8GOPHw8EBU6ciPh6vvYbvvlM72G+//ebm5lZaWpqUlDRhwgT1gly+fLl///4WFhaqbVHUToaQRqLRKNEAY4iMBIDAQOzYAZEI0dGaxGvfvn1sbCyAwMBAtXvXh4aGMsaWLFmilRpaXIyaGuHDkuaMRqNEA0lJmDQJHTpg/Hhs344JEzTvg8/z/NChQ8+cOfPuu++qmocWFRVVVFSoVo6q9n+Wy+UKhaKsrEwmkzWslfL09FyyZMnp06dfeeUVW1vb7OxsVd9SwTAGf390746sLLz6KsaPFzI4adZ0O9+KNGNKJevRgwFs7VpmZsaMjNj164IEvn37trm5uUgkSklJYYypeuj9o3HjxjHGhgwZAiA2NlaQTP7g/Hn20UeMMcbzbNIk4eOTZotWDRN1GRkhOhp79+L2bVRXY8YMSKWCBHZ1dQ0LC4uMjAwICLh+/XqnTp2Ki4sbtn22tbXlOI7jONVg09jYmOM4xpiNjc3q1avPnDnj4OCwULXNnrBKS2FvDwAiEUzofxzyO/ptIOq6dAlyOTZvxrlzOH8eUVECxl6+fHlSUlJ6evqgQYO6d+/eu3dvmUxWVlZWUFCQnZ1dWloql8urqqr+fKKdnZ2Pj8+fl5wKwNMTwcHw9cWdO2jdWvj4pNmiZ6NELYmJePgQ48djzRpER6NrVwG2V/6jHTt2LF26tKys7BnfeXJkynHc48ePMzIyOnbsePPmTVUXfWGoxtrvvQcrK3z9NVq3hr8/OE6w+KSZozJK1DJtGhITIRLhwgWkpkILN9HDhw8/ceLEuHHjpkyZoupp0tDmruEG/6lT6urqBgwYcOXKlaCgINVkfmFs3ozAQEilmDQJI0dCg878xCBRGSVqmTEDn34KExOcPInbtzF/vrDhU1JSRo0aJZFIsrOzbW1tG39iWlqap6dnXV3dmTNnBg4cKEAqCgW6dUN+PjZuRHAwOA6//gph5wCQZo7mjRK1TJuGsDCcOIHt2/HWW4KHV3XMW758+XPVUABSqTQkJITneX9//xpBJnhu2oT8fHh64tgxMIb33qMaSp5Co1GirpwcZGWhXz/By8q33377xhtvODg4ZGdnq/GIU6FQuLu7Z2Vlxcf/LzBQs9mdlZVwckJRET76CIsXw9ISd++CVkaRP6LRKFFXly4YOVLwGsoYi4qKAhAREaHeayKO4xISPvXwSFm6dPytW5pls2EDiooweDD+9z8ACAmhGkr+jMoo0S+3fijuJHLv2LHjvHnz1A4ydOggD4/h1dWYMwdq9y8tKSn59OxZpaMj3n4bJ07A1hZBQWqnRAwYlVGiR1gd7qw3f7skPjnuJzMzjfbn3LABjo64cAEJCWpGWL9+/ZyUlIl9+vzv6lW+bVssX05PRclfojJK9Ej21zVld3mrLka932ynYSgbG2zcCADLl0ON/qUFBQWbNm0SiUSDhw1787PPpGIxHxioYUrEUFEZJfqCVyLt42oA7kvMjYRYXjdxIt58E+XlCAh47nNjY2PlcrmPj09iYiKAeYsXG/3TtsykxaIySvRF5v6aijze1tW46+uthIq5ZQu6dsX48XiuCSl5eXkJCQlGRkbe3t7Xrl1zdHT08/MTKiVieKiMEr1QV430T6oB9AkxEwn3W9m+PTIzcfo0zp8HgNDQv7l6Xd3jx4/v379/586d1NTUgICA6urq4cOH7969G0BUVBRHSz/J36N5o0QvXFlddXNndevexmOTLSHw6nz4+aG6Grt2ISAAbdrg8mU8fgy5HAoFysogk6FXr1E//5zyl+e6uLjcvHmzVSvBBsjE8FCHJ6IXqkvZwPWcZScjwWsoAGNjzJuH+HgAuHEDKX8qmKamVhKJxMLCguM4a2trCwuLvLy8nJwcAMHBwVRDybNRGSX6QQSXSabaCz94MJKSUFqK6GgsWgRbW3AcOA4SCcRimJl9/edTRo8efezYsXPnzgWo8YqKtCT0bJQYuMpKvPgiAISHw8kJffti5Eh4eqJnTzg5QSLB381PTUhIsLCw2Ldv3+HDh5syYdLsUBklBm7CBBw6hLt34eCAdeue48QuXbqolqUuXLiwsrJSW/mR5o9eMRG9UJHHW3US/i/1s2cxZAhsbJCdDTu75z5dqVQOGDAgNTV1yZIlcXFxgqdHDAONRole0EYNBRARAQAhIerUUAAmJia7du1q1apVfHz8hQsXhM2NGAwqo8RgHT2KkyfRurVGHUXc3d2XLFnC8/z8+fNra2uFy44YDiqjxGCpNtkLDYW1tUZxoqOjXVxc0tPT169fL0hixMDQs1FimI4cyX377c62tsjKwp82bXpuJ0+eHD58uKmp6dWrV3v06CFEgsRw0GiUGCCe55cvH29t3Tc29p7mNRTA0KFDZ82aVV1d7e/vTyMP8hQqo8QAffnll+np6ebmj6ZM6SBUzI0bN3bo0OH06dM7duwQKiYxDFRGiaGpq6tbtWoVgMjISA17Pz/JxsZGNedp6dKl+fn5QoUlBoDKKDE0e/fuvXXrVrdu3d555x1hI/v6+o4fP768vDwkJETYyKRZozX1xKDU1NSsWbMGwMqVK01MhP/1/uSTT5RKpWp1EyEq9KaeGJSEhISlS5eamZkVFBQYGdHNFmkK9HtGDIqtrW1lZWVFRUVeXp6ucyEtBZVRYlAmTZo0ceLEqqoqf39/XedCWgq6qSeGpri42M3Nraio6PPPP58+fbqu0yGGj0ajxNDY29uvW7cOwOLFiwsLC3WdDjF8VEaJAZo1a9aoUaNKSkpoZhJpAnRTTwxTTk5Or169ZDJZcnLyuHHjdJ0OMWQ0GiWGiXrXkyZDZZQYrODg4H79+uXm5kZGRuo6F2LI6KaeGLLr1697eXnV1dWdPXvW29tb1+kQw0SjUWLIqHc9aQJURomBo971RNvopp4YPupdT7SKRqPE8DX0rvfx8eF5XtfpEENDo1HSIjx69Kht27Y8z2dkZLz44ou6TocYFBqNkhbhwoULSqVSIpG0bt1a17kQQ0NllBg+xlhERASA8PBwBwcHXadDDA3d1BPDd/DgwYkTJ3bo0CErK4vjOF2nQwwNjUaJgaurq1OtCo2KiqIaSrSByigxcF988cXNmzednJxmz56t61yIYaKbemLQamtL+vdfZ23d18/Pd9o0XWdDDBOVUWLQtm+Hvz+6d8eNG9DCRqGEgMooMWRVVXB1xf37SErChAm6zoYYLHo2SgzXtm24fx9SKd56S9epEENGo1FioGQyODvj4UMcPoyxY3WdDTFkNBolBioxEQ8fwtubaijRNhqNkuYvLw+dOv1+oFQiIwN2drhwAS+8gIEDdZ0fMXA0GiXNX0RE/UFkJJRKzJqFmzexYwfkcqqhpAnQFBDS/JWV4fx5AKitxblzGDwYU6YAwOTJmDVLp5mRFoHKKGn+lEo8egQAPI+qKojF9Z/TRFHSJOimnjR/rVtj3DiMGwczMwwYgMOHUVCAlBR07KjrzEiLQK+YSPN37BhGjQKAlBSMHIm8PHzzDdq2xaRJMKKBAtE6KqOEEKIR+ruaEEI0QmWUEEI0QmWUEEI0QmWUEEI0QmWUEEI0QmWUEEI08v+WUloc33nRdgAAAg56VFh0cmRraXRQS0wgcmRraXQgMjAyNC4wMy41AAB4nHu/b+09BiAQAGJGBghQgOIGRg6GBJA4IxuYZmKC0ewMGkCamZkNQrNwQGioPCMzmwNEHEabgs1jhAujaWNnyADTaPYwY9gP59PHXG5gkDAyMTAxA4UUWFg1mIA2MLMrsHNkMHFwMnBwJXBxK3DzaDBx8yrw8jHw8TPwCzDwCzIICjFwCWcwCYswCIsmiIopiIlrMIlJKEhIMkhKMUhJM0jJMMjIMojKZTDJyTPIsSc4gQKejV1OVJiLQ7yLERERDAr5Gkn2i389OADimGeetPeK2QZmBzxytnd82wtmq7vtsr8w1w3Mns1p66Bn7wRmS5tkOnxjubUfxF50tMBhfl8nWNxwjqp9ff1rsPibv5H7D9t8AbO/Bh/dLxLtD1bDWn5lvyujKlicS8nrwNlqMzC706fswLLbTGA1gvdqD5T+4bEHsX0erTtwg/m7HdhtB04cEK+9DxYXWL3pgN1vFwcQe8rmHwcu778GFj/jwXnwl60NWJzbpWJ/20d9sLjIrMf7n2z5CWYLhbHb8+c4gtmcDHPst01nBat/Ln/J7nhpGJjd80zXIfIbJ8QcwSgHbuN4MPvk7zkOaycmg9lnb25zmLtZCMzmFNzsYJA+B8z2ZXns0DhtHpgtobza3piHCexH4QpTh1Dlh/tAbDEAoOyQIIoSzB4AAALkelRYdE1PTCByZGtpdCAyMDI0LjAzLjUAAHicfVVbbttADPz3KfYCFvhc7n4mcVAERWygTXuH/vf+6FBJVg5AVLYIiRhzh6/xqeX14/L9z9+2LrmcTq3Rf75zzvZbiej02vKhPT5/e7m2p7eHx0/P0+3X9e1nU8kv5ecr9uHt9vrp4XZrtI2hJrOdYyMlnw4X7dfxU2lPjTcPddF29q3PEb0XQAWQtujOIe1sm+EXLAXQ2hURzV0Q56wbk+qogI6Isk3XYE4gqTNVR3cko1s3HeTtzFuQSlTAQEQFR6GuyVHm6J0K4NiT6dYn7xGHziGjAE4AzyiksysnctJgsgLJ1F4S4MZdPPMRnVNLKGdU3rrPgcqckwnqxRU0+5NF9IGcExosA08FVFEmQIcNE3CVjcy6V3Xi7BGgs/ceEwB3ijpoNgljgUAsmYsNsZppT2TfJNTQHHTJOlE1chxJ1DeklIOBxkcMLZHZJ0xvBE+khIpFhMwKOTPmQEwSzCfGylAnrQae3ls6+4yRLe8DqVcxceLeUbBjwxEYVQLlCin7PLmM7lnF8LwqoO7rJjwCRUSHRugogbYvsGHW4NZNnaaWJB2tFEwFStTxwO69V/2Rvi+HodEzI9oIm9VuSG6RYdAhCh1AZFIrAhbm1hxHG9LA0TJFuCxPbhH6PdFx6ANkKXyWGkOICLGaJuHJwR3NqYC811E1iEZ2E/ObClogBU0ULLibWyJNZ0jF8vl6+SKh76L6eLteDlHNjxzSiZemh0AybjtkkHH7IXaCux+SxniNQ7gSPA55wkubhwhx3vdSY+ngO0Hh3Sxu0A1Jw4seJ7/0LIacFNMskth23s3iyUk0zaLK8eFZbLGlvJtFGNtoyUfuty5fsVN322W756jnXlAQX5zRJ9nN4izJOT2LsyRnGFmcJTnDyOIsyRlGFmcZH57FWbLKMHo/jZZ8dHHWrDOMLs4YL0vPCpxTdD8z+f75J47n0z/BvoD9diYvYQAAAXJ6VFh0U01JTEVTIHJka2l0IDIwMjQuMDMuNQAAeJxtUTmOw0AM+8qWNjAejG4JRipXaTaPyDfy+KWcdqcaEBRJUde1PV7773Zdr/1N7+25v7cvdF3ba2/0P4yeP59NZjgvH4dOrnQfJ88yCRqHzCVGNE6ZrpJAaMYSznHSVDOO5tASAbKgYxTcQirG1CQL/MZh0ysjmpQpyjWOgDZcgbh6kbV2SqWM8wDLyOT2qwXfEx9Tcm4/liq5IbfKHEdLIE3TkMZSpLEgTgRsLDUVozyXai8IqNw9x5pmK26WTUV8BqTJeov55BDNgaXVb0pr50AtEXfQmBFUDIZHRCMJzoIgOlTqopCkvCLbzBNedUeHACnmylYrIQinW4AU1q/LY8owKK2Mb8FKS+Eutkr7TNF1M1a23kqmYpcCQTMUNoryLHoCgt2+YUJXYYSL+0DYqLDTUJwlIIGjlCI9Js0yOoRILOo+UVRVtW2KqTWkUsEy9s8fXxCCFHzsehkAAAAASUVORK5CYII=", + "text/plain": [ + "" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group = \"241\"\n", + "idx = 55\n", + "\n", + "cantidad = df[df[group]>0].shape[0]\n", + "ocurr = df[df[group]>0][group].iloc[idx]\n", + "smiles = df[df[group]>0].index[idx]\n", + "\n", + "print(cantidad)\n", + "print(ocurr)\n", + "print(smiles)\n", + "\n", + "mol = instantiate_mol_object(smiles, \"smiles\")\n", + "mol" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "np.int64(3)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df[group]>1][group].iloc[0]" + ] } ], "metadata": { diff --git a/tests/agani_s.ipynb b/tests/agani_s.ipynb new file mode 100644 index 0000000..8381bee --- /dev/null +++ b/tests/agani_s.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from ugropy import abdulelah_gani_s, instantiate_mol_object\n", + "\n", + "import pandas as pd\n", + "\n", + "import numpy as np\n", + "\n", + "from rdkit.Chem import Draw\n", + "from rdkit import Chem" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"abdulelah_gani_frags/log_p.csv\", index_col=\"SMILES\", sep=\"|\", comment=\"?\")\n", + "\n", + "primary = np.linspace(221, 350, 130, dtype=int).astype(str)\n", + "\n", + "df = df[primary]\n", + "df.rename(columns=lambda col: int(col) if col.isdigit() else col, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BrC1=CC2=C(S1)N3C(C)=NN=C3CN=C2c4ccccc4Cl\n", + "11\n", + "Subgrupos:\n", + "{'(N=C)cyc-CH3': 2}\n", + "Subgrupos por número:\n", + "{346: 2}\n", + "Gani: \n", + "{324: 1, 346: 1}\n" + ] + } + ], + "source": [ + "for idx, smiles in enumerate(df.index):\n", + " solution = abdulelah_gani_s.get_groups(smiles, \"smiles\")\n", + " \n", + " # Filtrar las columnas de la fila que no sean iguales a 0\n", + " row = df.loc[smiles][df.loc[smiles] != 0]\n", + " \n", + " # Convertir la fila filtrada a diccionario\n", + " row_dict = row.to_dict()\n", + "\n", + " # Verificar si ninguna solución tiene subgroups_numbers igual a row_dict\n", + " if solution.subgroups_numbers != row_dict:\n", + " print(smiles)\n", + " print(idx)\n", + " print(\"Subgrupos:\")\n", + " print(str(solution.subgroups)) # Convertir a string y unir con saltos de línea\n", + " print(\"Subgrupos por número:\")\n", + " print(str(solution.subgroups_numbers)) # Convertir a string y unir con saltos de línea\n", + " print(\"Gani: \")\n", + " print(row_dict)\n", + " \n", + " wrong_smiles = smiles\n", + " break" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'(N=C)cyc-CH3_0': (6, 7, 8), '(N=C)cyc-CH3_1': (9, 7, 8)}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol = instantiate_mol_object(wrong_smiles, \"smiles\")\n", + "\n", + "abdulelah_gani_s.detect_fragments(mol)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcIAAACWCAIAAADCEh9HAAAABmJLR0QA/wD/AP+gvaeTAAAgAElEQVR4nO3dd1hTZ/sH8PskYYQRCCJ7OAAVgqiIiCiOarVWcYGbtmoVV6e+Rduq2FqrbW21ropapTgqVhxYUaGKOFHwJ0IYyhJkSQIEYpCQ5Pn9cSi1vq0KJDmJ7/25vHqF5HDuO1e9vj5nPQ9FCAGEEELtxWK6AYQQ0m8Yowgh1CEYowgh1CEYowgh1CEYowgh1CEYowgh1CEYowj9KyVRqkBFv/668mtmm0E6i8L7RhH6b9lPsheVLKpsrgSAfib99rjusbtrV9+nngKK6daQzsHRKELPUhLl9KLp0/jT8rzycr1yx1mMY7ojpNM4TDeAkM653Xj7serxos6LAIACapbVLKY7QjoNR6MIPatEXtLdqDsev6OXhDH6alIooK7urx8fPwa5nLlu9A2fzZcoJUx3gfQGxuirKTUV+Hw4c6blx8WLIS6O0Yb0ig/X5/6T+0XyIqYbQfoBY/SV5eYGy5fDkydM96GHOnE6rbZfPezesI1VG7dVbwspDGkiTfRHpfJSZntDOghj9JXVrRuMHg3r1zPdB0BDQ8PMmTPHjRt3+vRpfbnB7kObD8+4nenE7mRAGayyX2VEGe102Tnk3pBuwm70XVAItcIYfZVFRkJ0NOTmMtYAIeTo0aMCgeDw4cNJSUnjx4/38PDYuHFjTU0NYz29NC9jr3et3w23Dvfh+gDAbKvZNhwbBVEcqj3EdGtIt2CMvsosLOCrr+DDD1t+LC3V6oWmixcvDhgwYOrUqSUlJXZ2diNGjHBxccnPz1+xYoWLi0t4ePjdu3e11406hFmFAUCMOIbpRpBuwRh9xc2aBU+eQHIyAMD06eDuDlFRoFRqtmhOTs7UqVNHjBiRlpbm6Oi4a9euhw8fnjlzpqioKDExcdy4cTKZLCoqysfHp3///r/88ktzc7NmG1KTNy3etOZY32m8c7dRz/4BQJpF0KultpZcvEiuXCGvv97yjlBIDAzIrl1EICAABIAIBOTECY1ULysrW7BgAZvNBgAzM7OIiIj6+vr/3iw3N/f999+3sLCg/xLa29tv3HigrEwjLanXkpIlkA7/efgfphtBOgRj9JUiEpF+/YiRETl5kty+/df7t26RykqiVJLYWOLm1hKm/v7kjz/UVloqlW7YsMHc3BwADAwMFixYUFlZ+fxfaWxsjI6O9vHxAYAhQ4RsNhk3jiQmEpVKbV2p3Q3pDUgHh7sOCpWC6V6QrsAYfXVUVZHevQkA8fAgpaV/++jXX8myZaS6mhBC5HKyaxext28J05Ejya1bHaorl8t37dplZ2dHDy3HjRt37969Nu3h0qVLYWFyA4O/Bss7dpCGhg51pTk9hT0hHc5KzjLdCNIVGKOviMrKlmP2nj3JM0fHSiXp3p0AEB6PrF3bEk9SKVm3jlhaEgBCUSQ0lOTl1bSj7qlTp9zd3ekA9ff3T0lJafdXqKggGzYQZ+eWMDU3JwsWkMzMlk8vXSKJiS2vVSqyZ0+763TUlxVfQjrMKprFWAdIx2CMvgpKSoi7OwEgnp6kvPwfNsjKIqGhLfHUqRPZsIE0NhJCiFhMIiIIl0tsbcvMzMxCQ0MLCwtfsuiNGzeGDBlCB6iHh0dsbKxKHUfjcjk5coQEBbV0S1HktdfIyZNk/nxiYkIKCgghRKkkxsYdL9VOxU3FrHQW9/+4dYo6xppAugRjVO8VF7cMNvv2bTls/zfXrpGhQ1viydmZ7NpFmpsJIaSsjKxadcDAwAAAjI2Nly1bVv3cHeXl5YWGhlIUBQDW1tYbNmxoampS63cihJDcXPL++8TMjACQ6dPJ/Plk8mTyxhuEMB2jhJBh94ZBOvws+pnJJpDOwBjVb4WFpGtXAkD69ydi8Uv9Snx8yylUevR66lQyPYosLi5+5iJ7Xd2zo63q6uqIiAhDQ0MAMDExiYiIkEgkav9ST6urI1u2kFu3yPz55NAhMnQoOXqU+RiNKY0ZdmjYlKVTmGwC6QyMUT2Wm0scHQkAGTyYtCnNVKqWS/Y+PskA4Ofnl/jnecesrKzQ0FD6UL1Tp04bNmxobGwkhDx+/HjDhg08Hg8AWCxWWFhY+T+ePtAYOkYzM0mXLkQiYThGJRKJiYkJRVFFRUVM9oF0A8aovsrObrnaPnRoOy9qy+Vk7944e3t7OjRHjhx58+ZN+qOUlJTAwED6fVdX17lz57ZuNn78eKFQqM5v8nLoGCWELFtGVqxgOEYJITNnzgSAr776iuE+kA7AGNVLt28Ta2sCQMaMITJZh3ZFDzP5fH5rmGZkZNAfJSYm9u3bFwAcHBzoQevFixc73nz7tMZoQwNxc2M+RhMSEuhrawz3gXQAxqhuUyjI1atkyxaydi1Zu5Zs307S09NuqaysCAB5882WC+4dV19f33rzPIvFCg0NLSgoIIQolcqAgAAAWLRokVouxLdba4wSQmJjmY9RpVLp6OgIAKmpqQy3gpiGz9TrsMePYccOSE6G2logBAiB6mpFQuLMcfU1NRASAsePg7GxekqZm5tHREQUFBREREQYGBgcPXr0ypUrAMBisRoaGgBg3rx59KV5pkRFwYwZLa89PeHGDQZ7AQBgsVjTp08HgJgYnKnkfx0usKyrCIGoKHj0qMeWxUGuD3YHxwPAxaIuG68O3jT2j515IzafduNoZkHCwsLCXbt2rV+/ns1mNzc3m5mZKRSKhoYGExMTjdR7OfX1kJcHfn5w4ACEhUFwMJw8yWA7AABZWVne3t5WVlbl5eVGRkYMd4OYg6NRXXXvHtTUgEoFAMnFXa6UuLR+4mVVsS0oliOu0lDlbt26bdy4kb7zKTc3Vy6Xd+/endkMffgQrK1h7FhQqeC114Ci4I8/oKmJwY4AAAQCgY+PT01NzdmzZxluBTEKY1RXZWa2Tg66/rU/Fv/+ZrPyqf9ZKpV2ZmPOysoCAG9vby3Ueg4nJ3BxAZEI0tLA3h5694bHj+HqVWabAgAICwsDPK7/n4cxqqvE4taXg5xL+9pVfH894K9PlUoQibTQhY7EKACMGQMAkJDw7GtmzZ49m8PhxMfHl5biGk3/uzBGddXfLx59+3rij6n+JRKLv97icrXQRWZmJgAIBAIt1Hq+N94AAKCPnukY1YUjaVtbW1dXV0tLSxcXFwcHh1GjRn3wwQe//PJLenr6E1xN8H+GZi5SoI5zd4eHD0GhoH+yMX38WdDlry4HdePXAgAYGUHXrlrogh6N6kKMDh8OxsZw8yZUV0NgIFhYQFYWlJSAi8uLf1dztmzZUlBQYGxsbGhoWFFRUVFRkZSURH9kaGjYq1cvLy8vb29vgUAgEAi6dOnCZK9IYzBGdVXfvnDp0tNvLOyfFn3Hh37dpGDvTup5ZS38+qsGW5BKpcXFxUZGRm5ubhos83JMTGDIEEhMhKQkmDEDRoyA48fh3DmYP5+xls6fP798+XKKovbu3UsvOSUUCtPT07Ozs4VCYW5ubkZGRkZGRuv25ubmHh4enp6eXl5enp6efn5+rZO0Ir2GNzzpsLw8OHbsbI7rUNdiroECAApr+QW1/KGuD5q5vK4/vF9dDRcuwPDhmqp/48aNgICAvn373r59W1M12uKHH+Djj+GttyA6GqKiIDwcJk+GY8eYaSY3NzcgIKCurm7NmjWRkZH/vYFcLr9//35rqmZnZxcWFj6zDZ/Pb01VX1/fPn36mJmZaaN7pFYYo7qtpASOHweZDBQKUKkO3vVedn70mqHJi/zS1j1asGqH/fDhcOGCporv3y//4YfG0aPLvvnGU1M12iInBzw9oXNnqKyE8nJwdgYeD0QiMDDQdic1NTX+/v75+flTpkw5evQo/WBCr169OnXq1JqJPj4+9FNhrUQiUWZmZlZWVlZWVmZmplAorK+vf3oDFovFZrOXLVv29ddfa/X7oI7BGNUHlZWQkQE3bvyW7RkaG+psUZ///o9PzKy7bFhYWwspKfDn7Mlq9uGHsGULbNwIn3yikf23Q9euUFwMt26p+vdneXvD/ftNSUmNgwdbarOH5ubm0aNHX7x4sV+/fikpKaampgAgEok6d+78zJb29vatqerl5eXl5WX89yuH5eXlrWNVoVB469YtuVwOAMeOHZs8ebLWvhHqKGafRUVtEBWlWhPpbVsFQHYHnyKRkWsWVgL8tQKo2o0YQQDImTOa2n87rF4d5+7u/eWXXxJC1q3ba2pqGhERoeUewsPDAcDe3r707ytelZWVJSYmbt68ecGCBYGBgf/9wAKHw+nWrdu4cePWrFkTGxublZWlVCqf3kNjYyO9wN/SpUu1+51Qh2CM6o+cHBIZeWjKbwCkG7+mefUXdRt/ohdTunJFIwVtbAgAKSnRyM7b58SJEwAQGBhICKGvifv4+GizgU2bNgEAl8t94Ywkzc3NOTk5sbGxq1atmjx5spubG4v17P2F5ubm/v7+77777ubNmysqKggh169fBwAHBweFAlce1RsYo/pDpSI7dypWr+1hLQIg+yceJ5GRn85/RE/1pHaPHhEAYmGhW8sdS6VSIyMjNpstFoubmprMzc0piirT1gr3Z8+eZbPZFEUdap1sqi2ampqysrJiY2PXrFkzbty4bt26PT3bS+afq/f17NkTAM6dO6fW3pEG4e33+oOiICiIzSKfDrkMAOtSghQq1sfu8ebm8PvvcOuWmqvdvQsA4O0NjM7r9CxTU9PAwEClUpmUlGRoaDh8+HBCyLlz57RQOicnZ9q0aUqlMjIyckbrZFNtYWho6OXlFRoaGhkZGR8fX1BQIBKJkpOTt23btnDhQg8PD3qzWbNmAT5gqlcwRvVKr15gYzPL+657p5r8GqtYoVcnWeniqSJ///KoqE3qLZWVBQCgA/fdP2vMmDEAQM8GQr9O0PxjoWKxODg4WCKRhISErFq1Sl27tbKyGjp06JIlS3bu3EmvcAUAYWFhLBYrLi6OnqIQ6T6MUb1CUTB4MJtFIgKvAMC6lCAVoT7usT8z033v3v88fad3x+lsjL7xxhsAkJCQQAihXycmJir+fNxLE5qbm0NCQvLz8/v16xcdHa3peVddXV2HDBkik8mOMXVPLGojjFF9IxBAp05v+WR05dflVFsfy+5lI3u8IDSUELJu3To11snMBADQgTlJniUQCFxcXCorKzMyMrp06dKzZ8+6urrU1FTNVVy6dGlycrKDg8PJkye1M2EgThylXzBG9Q1FwZAhBmxVROCVfvZZB+6eIoRECARcLvfYsWP0TCIdJRKRu5nZWSoA8PJSw/7UbvTo0fDnsbymj+u//fbbqKgoLpd74sQJJycnDVV5RmhoqImJycWLF4uLi7VTEXUExqge6t0brKzm9L1ZLRt8Ku/Aidxcu8eP54WEEEI6+vTLo0fw008QFVV/LDHA4YGvY2Wngz+C7k0B9/TpUfq4XkMTJyckJKxcuZKiqH379vn5+WmixD/i8XgTJkwghBw+fFhrRVG7sf/xcWCk0ygKDAzY9+8bsNkJ9+/nicXhvr69nZ13pKRkZmZOmzbN2tq6PbstK4N9+y7d5UtkBq7mNWG9M2Z6Z17MsXMrvwz29mBlpe6v0X6Ojo4FBQXTpk3r06ePk5PT999/X1paWl1dXVNTo1KprKys6Kn7Oyg7O3vs2LGNjY3r1q1btGhRx3fYJiYmJgcPHiwtLX3vvfe0XBq1FT4Mqp9UKti69YlI5Pbjj2X19fEzZ47z8FiYl7fr8OG33norOjq6zTtUKmHzZpBKR/7yVk61dfaS7RbGTfk1VhMOTxcu2QGGhvDRR2pbP0+thEKhr6+vubm56M95rDkcjouLy9NTfvTs2bOtwSoWi/39/QsKCkJDQ48cOaL95fyUSqWzs3NFRcXNmze1ORBG7YAH9fqJxYLAQGMOZ1lAAABEJicTQj51cTHkcA4ePHj/3r2X2smTJ1BSAunpkJQE+/fD48f02wOdHn5+YcTftlSp4M4dNX8FdWhqapo1a1ZTU5Ovr+/atWtDQ0Ppe9cLCwtPnz69cePGt99+WyAQ8Hg8Pz+/uXPnbtq0KTExsaKi4vm7bW5unjJlSkFBga+v7/79+xlZEpXNZtN3p+KFJt2H843qrb594cqVRX5+3167ll5efr6gYLSb22xvb6FIJI2JgRUrwNT0r42fPIFHj6C6GmprW/6IRNDc/PT+pHJDNqUCgA8Gps4/Nf5WmQOf++f87QoF5OfDwIHa+3YvJyIiIiMjw83N7ejRo63TKTU3N9+7d+/pKT9ycnLS0tLS0tJaf9HS0pKeK4QetPr4+Dw9scjixYsvXbqkzUvz/ygsLOz7778/dOjQd99913pXKdJBeFCvzy5cgMuXv7l6NSIxMcDZ+dq8eY3NzVwDA2CxwNgYvL2hrg5qaqCmBpTKp3+vScEua+AJH3XOru5cWMun/xTVWsZMPr7vTt+vRvwhbjRZfXH4oSnHJv06TbhkBwCAoyO8+y4zX/NfnD9/fsyYMRwO5/Lly/7+/s/Zsq6uLisrSygU3r17VygUZmZm1tTUPLONq6srPVP9gwcPfv31Vy6Xm5KS0r9/f01+gxfz8fG5e/fuiRMnJkyYwGwn6DlwNKrP8vMBYImf33fXrl0vLb1QVDSCXllEpQKZDFJTAUAql98Xi/NravJramoap6eWvX5fbFUp/Ye5gY05ironLWc/x7rf33O7377/6/PXx7p0iQkAqqur33nnHfpu2ednKABYWloOHjx48ODBre/U1tY+PVP9nTt3Hjx48ODBgzNnzlhaWrLZ7IMHDzKeoQAQFhb2n//8JyYmBmNUl2GM6i2JBKqrAcDU0PADf//PL1xYm5zcxdJS+OhRdnV1YW0t/aeotrb1cGOIq//lBy4AYMhWOvHqu/Fru/FrPTtXe9lUd+PXdrGsY1HkeG4veuPNY8722xVuayoFADA01Kkb8Qkh8+bNq6ioCAoKWrZsWTv2wOfznw5WpVJZWFiYmZmZkZGxadMmpVLZvXt3AEhPT9+5c+ekSZPefPNNdX6BlxYWFrZy5cr4+HixWNypUydGekAvhDGqt6qqgMOh17x7z99/45UrN8vKum/Z8sxWxhyOe6dOblZWblZWPraKtcOi3axqnHj1z141MTMDpTE0NbW+4WIhWT7oWkxGb2CzJaYOFjqwHFOr7du3x8fH8/n8mJgYtdzbxGaz3d3d3d3dJ0+eLBaLt2/fHhMT8+23316+fHnv3r0ikYipGLW1tR05cuTZs2ePHj26cOFCRnpAL4TnRvXW/ftw7BgdfFVSabctW1gU5WJh0cPamg5NdysrNysrJx7v2QvNxsbA57f86dwZbGzAygqMjEAuh19+gepqkMv/2tjQcK9wYETCsAsXqd69tfsF/0V2dnb//v0bGxuPHDkydepUte8/NTV14MCB9KzMYrGYfnKpvLy8nXfjdtihQ4dmzZo1aNCgq1evMtIAeiGMUb1VXw/bttFX299PSNiamjqxZ8/j06f/bZt/S8x/Qwjcvg2pqSAWAyFgaUn6+4V8NzDuOOXiAteugaOjhr/UizQ1Nfn7+2dkZMyfPz8qKkpDVXr16pWbm3v27NnRo0ePHz/+9OnTW7duXbp0qYbKPV9jY6O9vb1EIsnNze3RowcjPaDnw6eY9JaREeTkgFT6oK5uzsmThJDDISF2retKcjgQFASzZoGvL3h5QbduYGcH5ubAee5pHIoCBwfw84OhQ2HoUPD3p5ydJ0ygLl2CzEw4dw5mzmT4Hvxly5adOnXKzc0tLi7O6Dn/HnSMRCK5cOECIWTy5MkcDue3334Ti8XzGVrK2cDA4N69e3fu3LGyshquuWVgUQfg7ff6zMICAFZfvNikUIT5+Pi0LnrOZoOlJTx1YbojjI3h5Eno2ROEQpg06enTp9p27ty5rVu3GhgYHDx48JlFN9WrdcZPiUQSHBxsaWl58+bN3NxczVV8YT8AEB0djceOugljVG+JRHDvXq5IdCgz05DNXj10aMv7hobg5ARz54I6rr3QrKwgMRGcnCA5Gd55B1Qqde24DVrvcFq/fv2AAQM0WsvFxSUoKKixsTEuLs7Y2Dg0NBQYfZpo2LBhXbt2ffDgwZUrV5jqAT0HxqjeSkoClWpFUpJCpQrv378bnw8WFuDvDzNnwjvvAJer3mpOTnDmDFhYwK+/wsqV6t33ixFC5s6dW1lZOXTo0I8//lgLFZ+e8bN1MKj8+1MMmvbDDz/cuHEDACiKoieyWrJkiTYbQC9L24s/IbUoKyORkTfnz6cATA0NK5YvJ199RRoaNF32wgViaEgAyObNmi71N1u2bAEAPp9foq11SiUSiYmJCUVRRUVFKpWKvo30jz/+0E51Qgg9H4qpqWlVVRUhZN68eQDg4OCgtQbQy8PRqH5KSgKAFUlJBOCjgQPtzMxg4EAw+4dnk9Rr+HCIigKKgq+/Vp06dVHT5WhCoXDFihUAsGvXLmdnZ+0U5fF4EydOJIQcOnSIoqjZs2eDFo/rb9++PWfOHELI+vXrbWxssrOzY2NjAYBexR7pHKZzHLVdfj6JjDwXFgYAfC63JiKCbNhAGhu1Vv+776QuLsO5XO6VK1c0XevJkye9e/cGgPDwcE3XegY9o76HhwchJD8/nx4bNmh+yF9eXk7frDp37lxCiFgsdnNzA4AJEyYolUpNV0ftgDGqb1Qqsnu3as0afycnAPhm1CgSGUmuXtVyF/RNlFZWVjk5OVoo5ObmpoX8eoZSqXR0dASA1NRUQkhgYCAAHDhwQKNFZTIZfQEtKCioqalJLpfTNzn169dPKpVqtDRqN4xRfSMUksjI2NBQAHAwN3/82Wdk0yYil2u5C4VCMWnSJABwcnIqLS3VUJWEhASKooyMjG7fvq2hEs9HP7C/dOlSQshPP/0EAKNHj9ZcOZVKNX36dADo2rVrdXU1ISQ8PBwA6EeqNFcXdRDGqF5Rqci2bYrVq3t17gwAu8aPJ5GRJC2NkV5kMtmgQYMAwNvbu66uTu37r6qqsrOzA4DvvvtO7Tt/SfQSgVZWVk+ePKmrq+NyuSwWS3OJtmrVKgDg8XhZWVmEkE2bNgEAl8ulh8NIZ2GM6pXbt0lk5J7gYABw79RJvmoV+fFHwtz5MpFIRD+eOHz48KamJjXuWaVS0bOBjBo1itkTgvRVnePHjxNCQkJCAOCbb77RRKHY2FiKoths9u+//04IOXv2LJvNpijq8OHDmiiH1AifqdcfCgVs2/ZELO6xdWuJRHI4JGS6QABTpoBAwGBTRUVFAQEBVVVVM2fOPHDgwEuutyGXy6VSaV1dXUNDQ0NDg1QqbWhooH+USqVSqTQ1NTUlJcXa2jojI8PBwUHT3+I5Nm3atHz58smTJx87duzUqVMTJkzw8vLKyspSb5X09PSgoCCZTLZt27YlS5bk5OQEBARIJJK1a9euXr1avbWQ2mGM6o/r1+H8+e+vX1927lxvW9v/W7iQZWcH4eHAxEpBT0tPTx82bJhUKn377bfHjh1bV1dXX19Pp2FDQ0NtbW3r6/r6eolEIpVKm17ikVJra+vFixevXbtWC1/hOaqqqpycnFgsVnl5uYWFhZOTU1VV1e3bt/v27auuEuXl5QMGDCgrK5s3b96ePXvEYvHAgQPz8/NDQkLoIaq6CiENwflG9YRcDlevSuXyjVeuAMCGkSNZFAUjRzKeoQDg6+t75MiR4ODg8+fPv+SipBwOx9zc3NLS0tzc3MzMzMzMzMLCwsLCgn5tbm6empoaFxcnFAo13fwL2drajho1KiEhITY2dtGiRdOmTfvxxx9jYmLUFaONjY0TJ04sKysLCgrasWNHc3NzSEhIfn6+r69vdHQ0ZqhewNGonkhOhkuXIpOT1yYnD3ZxuTx3Lri4wJw5TLfVQi6X8/l8mUw2fvx4e3v71jTk8Xh0ONJxSeemubm58YvmiaqqqnJ2dqYo6uHDh08vNseIw4cPz5w5MyAg4Nq1a2lpaX5+fjY2Ng8fPjQwMOjgngkhM2bMOHLkSI8ePa5fv87n8xcsWLB7924HB4ebN286Mj4vIXo5OBrVbQ0NkJ4O+flQXi6SyX64fh0A1o0YAQAwYsQLfleLrl27JpPJevfuferUqedv2dzcLJVKy8vLW0+D1tfX19XVtR74BwYGTpgwwdbWdsyYMfHx8QcOHPjoo4+08y3+zcSJEy0sLK5fv56Xl9e/f3+BQJCVlZWYmDh27NgO7vnzzz8/cuQIn88/deoUn8//9ttvd+/ezeVyT5w4gRmqRzBGdVhGBvz+O6hU9LqeX6Wk1Dc1jfPwGNqlC3h4gKsr0/39JSkpCQBGjRpF/7hjx46MjIynLxlJJJL6+vqGhoYXnhVVKBT08m1z5syJj4/fu3cv4zHK5XKnTJny888/Hzx48Isvvpg3b96VK1c6PkaOjY39+uuvORzO0aNHPTw8EhISVq5cSVHUvn37/Pz81NI50g48qNdVeXlw7FjrUvJl9fXuW7c2KRTp4eF97OxgwQKwt2e2waf5+/vfvHkzISFhzJgxAPDmm2+eOXPmH7dks9k8Hs/S0rL1wN/CwoLH47Ue+AcGBtIrzSkUCmdn58rKytTUVE3PjPdCycnJw4cPd3V1LSoqUsv5yrS0NHouvh07dixatCg7O3vQoEESiWTdunWfffZZx/ePtAlHozpJpYJTp1ozFABWXbzY2Nw8q3fvPra2wGJBZaXuxGhdXV16erqhoeGQIUPod8LDwydMmNB6ycjMzIzP59MpyX3pGfw4HM6sWbM2bdq0b98+xmN06NChXbt2LSoqOnjwID1NSUeUl5dPnDixsbHxvffeW7RokVgsDg4OlkgkoaGhn376qVoaRtqEo1GdVFQER460TjR/Tyz22r6doqicJUu60+vFOzgAQ2ta/Le4uLgpU6YMHz78woUL6t1zXl5ez549LSwsysvLTUxM1Lvztho1alRSUhKbzQ4ODvb39xcIBAKBwLVdp1Zee7ZsyLcAAAp1SURBVO21CxcuvP766/Sd9qNGjbp06ZKvr29KSgrjXxO1A45GdVJ1NTw1Q3B6ebkhm/12nz4tGQoAYjEzjf2TxMREABg5cqTa99yjR4+BAwfeuHEjLi6u42PADvr666/T0tLq6uqOHz9+/Phx+k0ej+fu7u7p6enl5eXp6enn52fXupTLv9u1a9fy5cv379/P4XDmz59/6dIlBweHkydPYobqKRyN6qS0NDh3jl6DnlbR0MBhsTqbmrb8zOXCJ58w09t/cXd3z8/Pv3nzpiYujERFRYWHh2tiqNsOIpHo/PnzMplMKBRmZWVlZmZWVVU9s429vb1AIPD29qb/6+np+Zxw3LBhw8qVK7lcbkpKSv/+/TXcPtIUjFGdVFYGMTHPWz2uSxd4+20tNvSvHjx40KVLFz6fX11dzVbf6k+tpFKpvb3948eP79+/T09Br1Nqa2uFQmF2djb93zt37ohEome2sbe39/X1pYerXl5eAoGAXtP09OnTEydOVKlUv/7669SpU5loH6kHHtTrJAcHMDH51xg1MICAAO029K9ycy9bWZkMHz5CExkKAGZmZpMnT/7ll1+io6O/+OILTZToCD6fP3jw4MF/LsJKCCkuLs7KyhIKhXfv3hUKhbm5uRUVFadPnz59+jS9jaGhoaenZ0NDQ2FhISFk/fr1mKH6Dkejuqq8HPbvf/pifQsDA+jeHaZNY6Knf1BYOK2u7iSf/1PXru9oqMSlS5eGDRvm5ORUXFysobDWHIVCUVJSIhQK09PT6UFrbm6u6s+1Vd3c3O7fv89sh6jjMEZ1WGkpHDkCzc0glwMAcDhAUdC7N4wdCywdWURLlZFhp1BUCwQFRkbdNFSDEOLh4ZGfn996X6pek0qld+7c2bNnj5+f3zvvvGPaer4b6S2MUd2mVMK9e1BUBE1NYGMDvXpB68V6HSCTpefk9Dc07OLtXaTRQl999dXnn38+derUI0eOaLQQQu2AMYrar7JyQ1nZSmvrBa6uuzRaqKyszNXVlc1ml5WVWVtba7QWQm2lI8eGSC/V1ycBAI83StOFHB0dR48eLZfLDx06pOlaCLUVxihqJ5XqyePH1wBY5ubDtFBuzpw5ALBnzx4t1EKoTTBGUTtJpSkqVaOJST8ORxtH2cHBwZ07d87MzExPT9dCOYReHsYoaietHdHTDA0NZ82aBQD79u3TTkWEXhLGKGqnhoZEAODx1P8o/b+ZN28eABw4cKCxsVFrRRF6IYxR1B4KhUgmu8ticU1Ntfc8lUAg8PPzk0gkJ06c0FpRhF4IYxS1B4dj1atXWpcu+1isl50/tN2kUml+fj79mr7Q9PPPP2u6KEIvD+8bRW2jUkkfPoyQSlMoysjWdpmV1QyNlissLJw4cWJ9ff2tW7c6d+4skUhsbGzkcvmlS5eCgoI0Whqhl4SjUdQ2VVWbVCppr17pHh4XzMwGabRWUlLSgAEDMjMzTUxMGhoaAKCkpEShUAAAPoqOdAfGKGobubyUzbakKEM2m2doqMFl9aKiosaOHSsWi8eOHXv9+vVu3brFx8cPHjxYpVKZmppOmTJFc6URahOMUdQ2NjYf1tWdys0NEIl2E6J48S+0XVNT09y5c8PDwxUKRURERHx8PI/H27hxI310P23atJKSEktLS02URqgd8NwoajNClA0NFysqvuRye7m4/KTenZeXl0+ePDk1NdXMzGzfvn0hISFSqXTOnDm//fYbRVGrV69es2aNWtbmREhdcNpm1GYUxebxRnI4VoWF0xQKUWHhNEfHDaamalhB5Nq1ayEhIRUVFc7OzsePH/f19S0tLZ00aVJ6erq5uXlMTAy9hD1COgVHo6htRKI9bDbPwMCpunoHRRmw2ZaPHm2mKI6t7cf29pEduf9p9+7dS5culcvlQUFBR48etbGxSUlJCQ0NffTokYeHx4kTJ3r16qXGL4KQuuC5UdQ2XK63THZHLP7Z1NTPxWW7o+N6O7sIAFJZ+U12tqChoT0LzykUihUrVixYsEAuly9YsCApKcnGxiYqKmrkyJGPHj0aO3ZsamoqZijSWTgaRWogk9158GCeTHYbgLK2nu/k9C2bzXvJ3xWJRNOmTbtw4YKRkdHOnTvnzJnT1NS0ePHin3/+maKoTz75ZP369Sxdme0foX+AMYrUg5Dmqqrvy8vXENJkYGDv4rLD0nLiC38rIyNj4sSJxcXFDg4OcXFx/v7+rZeYTE1N9+/fHxISooXmEeoIjFGkTo2NwgcP5j1+nAoAfH6oi8t2Dqfzv2185MiRuXPnymSyQYMG/fbbb/b29tevX58yZcrTl5i02DtC7YTHSkiduFyvHj2uOjl9z2KZ1NYeDQub8Y+rJymVyhUrVsyYMUMmk82ePfuPP/6wt7c/cODAiBEjKioqgoKC0tLSMEOR3iAIacCTJwWHDs2j/44FBweXlZU9/enRo0cBwMDAYPv27YSQ5ubmiIgIemP6QhNDXSPUHhijSFNUKlV0dLSVlRUAWFhYbN68WalUtn76wQcfJCcnE0JEItGIESMAwMjIaO/evcz1i1A74blRpFmVlZVLliyJi4sDgCFDhuzZs8fDw6P104yMjEmTJhUVFbVeYmKuU4TaCc+NIs2ys7M7duxYbGysjY3N5cuX+/Tps3HjRqVSCQCxsbGBgYFFRUUBAQFpaWmYoUhP4WgUaUltbe2KFSuioqIAoG/fvgMGDIiKiiKEzJ49OyoqisvV+PTPCGkIxijSqjNnzixcuLC0tNTY2FipVH7//fdLly5luimEOgRjFGlbfX39gAED8vLy1qxZExkZyXQ7CHUUnhtF2sbj8QYNGgQALi4uTPeCkBpgjCIGmJubAwC9LghC+g5jFDGAjtH6+nqmG0FIDTBGEQNwNIpeJRijiAE8Hg8wRtGrAmMUMQAP6tGrBGMUMQBHo+hVgjGKGIDnRtGrBGMUMQAP6tGrBBdYRgzobGU5YfQwJwdbphtBSA0wRhEDOlma9zXIM1PUMt0IQmqAB/WIAYYmZgDQJMNzo+hVgDGKGGBgZEKx2M1NjSqlguleEOoojFHEAIqiDLmmACB/ImO6F4Q6CmMUMcOIjtFGKdONINRRGKOIGYZcPD2KXhEYo4gZRibmANCEo1Gk/zBGETP+PKh/zHQjCHUUxihiBh7Uo1cGxihiBh7Uo1cGLmmHmFFRkCmtfWTbxZNnbc90Lwh1CD4MihgQs2p6beUDjqGxbZder721wtIW17ZDegwP6hED6sUVs9ceWrQ1yb674PednzLdDkIdgjGKGEOx2J0c3eirTNGfhZ756bOtCwYpFc1M94VQ2+BBPWLGic0fsDmGVcU50z/fBwAPc9OGzVj2RviXFIX/tCM9gzGKmDF89ieWnZ2Ks26c3PLR4u3JAODiNQAzFOkj/FuLmGFh7ci379Jn5DSZRCyrr2G6HYTaD0ejiBn30y+YFXQuunvV2sndjG/DdDsItR/GKGJAv9dnNtRUyuprXDwHjJm/lqKogIkLASim+0KoPfD2e4QQ6hA8N4oQQh2CMYoQQh2CMYoQQh2CMYoQQh2CMYoQQh2CMYoQQh3y//shrlVAlUWhAAABt3pUWHRyZGtpdFBLTCByZGtpdCAyMDI0LjAzLjUAAHice79v7T0GIBAAYkYGCBADYkkgbmBUZlAAibMxaAApZhY2hgwQzYwkAKEFIDQTO4RmRsgngPQzw8SxyzNh6nOA0TAL8TEgagUhbuUGeoORKYOJiTmBmSWDiYU1gYVNgY09gZ2DgZ0zg4mTK4GLO4OJm4eBh5eBl0+DiY9fgV8gg0lAMEFQKINJSDhBWCSDSUSUgZUxgY9ZgZstQYQ/wYkFaDIrIxMzCysrOxs3Fyc7DzcbCzMfL5uAoJCwCL+4ECMi9BjEVEqF99cfnnAAxDnXoL6/foUjmM3zhcn+kQMrmD1pyuU9sg16+0FsP+7D+493O4HZaoe/7F8fwwtWs8ZM8YBM1047EHudfMOBJTskwez3fdMOPGydBlaf6zL1wLKp++1B7PWLUg9kLFFyALFNjggcONv9Gizu5ly0P/qRMlh8+737dq6z/4LFbc5H2J89zwlmF50QcbjDPQFsfsoVUwdWxwlg85tNuhy+qG4Es+/8WOFga563D8R+XDTDQaR2GlivUmCQA3PSdjDbwNXQgeGyKdguMQBL4WpwRrbCDgAAAi16VFh0TU9MIHJka2l0IDIwMjQuMDMuNQAAeJx9VFtuGzEM/PcpeIEIfImiPmM7KIoiNtC4vUOBfBW9P0ru1paCCN31Eitpdvga+gB5fT9/+/UHHhefDwcA/M+v9w4/BREPr5AvcHz58vUCp9vz8b5zuv643N6AGbjGN3F/xD7frq/3HYLjb3jCUpshGjxpqSxuClhwu8bHDKdEWlXBDk9SkGuPt89ICWRQElUJSi7ojF4XQN0pg8jZN3InU14gayIpg2vOiWy1d11xGrwlshMz1vQevvObz8gGlzyv5N3yXCxSXwE9nWtsR2VanFNFIl8A+w5sqF09o6AI02iBJEznWszUUICKdmt1VXaiREqp7hiAiFeJl9WkrUORsFFvNThdkXHJKXvhu5pIT05D68s4NbxjUWmsFpTdSXAJrFvTXYMna1jFhdsKaAHkIqxNMjYORsUVsG1AV0Lfi8nWfZlONkiLRCOjroEM370to8wO1fCJoaOsAAtHCVZqx42zWdRQYeuk9ZU4mAIoQWmMnI0MxraaipjH03sk1DIL/5cZrny/XM4fhnQf2+P1ch5jmzeP0QxhgIz503zGlOVdxyjFAmzMC8WyjaHQeHxIn+LpQ+CaW7OMNQ3RJFdNQzzJkjYjk/xoMzrJjNNQnfREm7FJOJqG2qQQTUM+KUHTUJ86rml47qymYZpaSGmYp15pVGasZY/mETBnSSOpRyycZU0im7s49yzX97/peD/8BatDHPVL7xMjAAABHXpUWHRTTUlMRVMgcmRraXQgMjAyNC4wMy41AAB4nCVQO25DMQy7SscEcAz9LeEhS71n6RGydurcw5dyPRgQQfGjvfn5ej23bHy3t7776f6+b8X8tW+fP/fn1vuL5eP39rC5yMrHgycvrxgXICLNNWiyE3MeKMJi8LSK5QB0eib5kOnG0hSZElwJThoJAJploVrNCYoaF03TJUenkrWBtCgWWLmmyhqXTBVbCkRAsQbSmOoElIDBZVM5ixvBUmHJQUbkAU9RgWmTVnSOU6vNFZzo2cDI1l1Q6wbQP3FJPGWtVonksC7B00XXUcZ1yvxgiCzwR2XKlWc53LRTasvUv6CvIIqB60Eko/s6s2v0KqWA33dztAkU1gBLxv33D7xAXEcM2UGcAAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mol" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "column_sums = df.sum()\n", + "\n", + "# Identificar columnas cuya suma es igual a 0\n", + "columns_with_zero_sum = column_sums[column_sums == 0].index.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[225, 238, 256, 264, 276, 280, 308, 314, 342]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns_with_zero_sum" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ugropy", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ugropy/__init__.py b/ugropy/__init__.py index ee9588b..747a8d4 100644 --- a/ugropy/__init__.py +++ b/ugropy/__init__.py @@ -7,11 +7,11 @@ """ from .core import instantiate_mol_object -from .core.frag_classes.abdulelah_gani.abdulelah_gani_p import ( - AbdulelahGaniPrimaryModel, +from .core.frag_classes.abdulelah_gani.abdulelah_gani_pst import ( + AbdulelahGaniPSTModel, ) -from .core.frag_classes.abdulelah_gani.abdulelah_gani_p_result import ( - AGaniPFragmentationResult, +from .core.frag_classes.abdulelah_gani.abdulelah_gani_pst_result import ( + AGaniPSTFragmentationResult, ) from .core.frag_classes.base.fragmentation_model import ( FragmentationModel, @@ -29,6 +29,7 @@ from .core.ilp_solvers.ilp_solver import ILPSolver from .groups import Groups from .models.abdulelah_gani_pmod import abdulelah_gani_p +from .models.abdulelah_gani_smod import abdulelah_gani_s from .models.jobackmod import joback from .models.psrkmod import psrk from .models.unifacmod import unifac @@ -38,8 +39,8 @@ "constants", "writers", "instantiate_mol_object", - "AbdulelahGaniPrimaryModel", - "AGaniPFragmentationResult", + "AbdulelahGaniPSTModel", + "AGaniPSTFragmentationResult", "FragmentationModel", "FragmentationResult", "GibbsModel", @@ -48,6 +49,7 @@ "JobackFragmentationResult", "Groups", "abdulelah_gani_p", + "abdulelah_gani_s", "joback", "unifac", "psrk", diff --git a/ugropy/core/frag_classes/abdulelah_gani/__init__.py b/ugropy/core/frag_classes/abdulelah_gani/__init__.py index 19da812..7e94fde 100644 --- a/ugropy/core/frag_classes/abdulelah_gani/__init__.py +++ b/ugropy/core/frag_classes/abdulelah_gani/__init__.py @@ -1,6 +1,6 @@ """Abdulelah-Gani frag classes module.""" -from . import abdulelah_gani_p, abdulelah_gani_p_result +from . import abdulelah_gani_pst, abdulelah_gani_pst_result -__all__ = ["abdulelah_gani_p", "abdulelah_gani_p_result"] +__all__ = ["abdulelah_gani_pst", "abdulelah_gani_pst_result"] diff --git a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py similarity index 94% rename from ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py rename to ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py index f72b048..41a1c54 100644 --- a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p.py +++ b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst.py @@ -6,8 +6,8 @@ from rdkit import Chem -from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_p_result import ( - AGaniPFragmentationResult, +from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst_result import ( + AGaniPSTFragmentationResult, ) from ugropy.core.frag_classes.base.fragmentation_model import ( FragmentationModel, @@ -16,7 +16,7 @@ from ugropy.core.ilp_solvers.ilp_solver import ILPSolver -class AbdulelahGaniPrimaryModel(FragmentationModel): +class AbdulelahGaniPSTModel(FragmentationModel): """Abdulelah-Gani model dedicated to properties estimation models. Class to construct the primary structures detector for the Abdulelah-Gani @@ -48,13 +48,17 @@ def __init__( self, subgroups: pd.DataFrame, subgroups_info: pd.DataFrame, + allow_overlapping: bool = False, + allow_free_atoms: bool = False, ) -> None: super().__init__( subgroups=subgroups, - allow_overlapping=False, - fragmentation_result=AGaniPFragmentationResult, + allow_overlapping=allow_overlapping, + allow_free_atoms=allow_free_atoms, + fragmentation_result=AGaniPSTFragmentationResult, ) + self.subgroups_info = subgroups_info def get_groups( @@ -63,7 +67,7 @@ def get_groups( identifier_type: str = "name", solver: ILPSolver = DefaultSolver, search_multiple_solutions: bool = False, - ) -> Union[AGaniPFragmentationResult, List[AGaniPFragmentationResult]]: + ) -> Union[AGaniPSTFragmentationResult, List[AGaniPSTFragmentationResult]]: """Get the groups of a molecule. Parameters diff --git a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py similarity index 96% rename from ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py rename to ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py index 67a02b5..8b114c8 100644 --- a/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_p_result.py +++ b/ugropy/core/frag_classes/abdulelah_gani/abdulelah_gani_pst_result.py @@ -9,7 +9,7 @@ ) -class AGaniPFragmentationResult(FragmentationResult): +class AGaniPSTFragmentationResult(FragmentationResult): """Abdulelah-Gani primary group contribution properties estimator. Parameters diff --git a/ugropy/core/frag_classes/base/fragmentation_model.py b/ugropy/core/frag_classes/base/fragmentation_model.py index 213379f..af48b55 100644 --- a/ugropy/core/frag_classes/base/fragmentation_model.py +++ b/ugropy/core/frag_classes/base/fragmentation_model.py @@ -54,10 +54,12 @@ def __init__( self, subgroups: pd.DataFrame, allow_overlapping: bool = False, + allow_free_atoms: bool = False, fragmentation_result: FragmentationResult = FragmentationResult, ) -> None: self.subgroups = subgroups self.allow_overlapping = allow_overlapping + self.allow_free_atoms = allow_free_atoms self.fragmentation_result = fragmentation_result # Instantiate all de mol object from their SMARTS representation @@ -122,7 +124,7 @@ def get_groups( ) # If there is free atoms in the molecule can't fragment with the model - if np.size(free_atoms) > 0: + if np.size(free_atoms) > 0 and not self.allow_free_atoms: return self.set_fragmentation_result( mol, [{}], search_multiple_solutions, **kwargs ) diff --git a/ugropy/groupscsv/abdulelah_gani/secondary/info.csv b/ugropy/groupscsv/abdulelah_gani/secondary/info.csv new file mode 100644 index 0000000..ae2da41 --- /dev/null +++ b/ugropy/groupscsv/abdulelah_gani/secondary/info.csv @@ -0,0 +1,131 @@ +group|group_number +(CH3)2CH|221 +(CH3)3C|222 +CH(CH3)CH(CH3)|223 +CH(CH3)C(CH3)2|224 +C(CH3)2C(CH3)2|225 +CHn=CHm-CHp=CHk (k,m,n,p in 0..2)|226 +CH3-CHm=CHn (m,n in 0..2)|227 +CH2-CHm=CHn (m,n in 0..2)|228 +CHp-CHm=CHn (m,n in 0..2; p in 0..1)|229 +CHCHO or CCHO|230 +CH3COCH2|231 +CH3COCH or CH3COC|232 +CHCOOH or CCOOH|233 +CH3COOCH or CH3COOC|234 +CO-O-CO|235 +CHOH|236 +COH|237 +CH3COCHnOH (n in 0..2)|238 +NCCHOH or NCCOH|239 +OH-CHn-COO (n in 0..2)|240 +CHm(OH)CHn(OH) (m,n in 0..2)|241 +CHm(OH)CHn(NHp) (m,n,p in 0..2)|242 +CHm(NH2)CHn(NH2) (m,n in 0..2)|243 +CHm(NH)CHn(NH2) (m,n in 1..2)|244 +H2NCOCHnCHmCONH2 (m,n in 1..2)|245 +CHm(NHn)-COOH (m,n in 0..2)|246 +HOOC-CHn-COOH (n in 1..2)|247 +HOOC-CHn-CHm-COOH (n, m in 1..2)|248 +HO-CHn-COOH (n in 1..2)|249 +NH2-CHn-CHm-COOH (n, m in 1..2)|250 +CH3-O-CHn-COOH (n in 1..2)|251 +HS-CH-COOH|252 +HS-CHn-CHm-COOH (n, m in 1..2)|253 +NC-CHn-CHm-CN (n, m in 1..2)|254 +OH-CHn-CHm-CN (n, m in 1..2)|255 +HS-CHn-CHm-SH (n, m in 1..2)|256 +COO-CHn-CHm-OOC (n, m in 1..2)|257 +OOC-CHm-CHm-COO (n, m in 1..2)|258 +NC-CHn-COO (n in 1..2)|259 +COCHnCOO (n in 1..2)|260 +CHm-O-CHn=CHp (m,n,p in 0..3)|261 +CHm=CHn-F (m,n in 0..2)|262 +CHm=CHn-Br (m,n in 0..2)|263 +CHm=CHn-I (m,n in 0..2)|264 +CHm=CHn-Cl (m,n in 0..2)|265 +CHm=CHn-CN (m,n in 0..2)|266 +CHn=CHm-COO-CHp (m,n,p in 0..3)|267 +CHm=CHn-CHO (m,n in 0..2)|268 +CHm=CHn-COOH (m,n in 0..2)|269 +aC-CHn-X (n in 1..2) X: Halogen|270 +aC-CHn-NHm (n in 1..2; m in 0..2))|271 +aC-CHn-O- (n in 1..2)|272 +aC-CHn-OH (n in 1..2)|273 +aC-CHn-CN (n in 1..2)|274 +aC-CHn-CHO (n in 1..2)|275 +aC-CHn-SH (n in 1..2)|276 +aC-CHn-COOH (n in 1..2)|277 +aC-CHn-CO- (n in 1..2)|278 +aC-CHn-S- (n in 1..2)|279 +aC-CHn-OOC-H (n in 1..2)|280 +aC-CHm-NO2 (n in 1..2)|281 +aC-CHn-CONH2 (n in 1..2)|282 +aC-CHn-OOC (n in 1..2)|283 +aC-CHn-COO (n in 1..2)|284 +aC-SO2-OH|285 +aC-CH(CH3)2|286 +aC-C(CH3)3|287 +aC-CF3|288 +(CHn=C)(cyc)-CHO (n in 0..2)|289 +(CHn=C)cyc-COO-CHm (n,m in 0..3)|290 +(CHn=C)cyc-CO- (n in 0..2)|291 +(CHn=C)cyc-CH3 (n in 0..2)|292 +(CHn=C)cyc-CH2 (n in 0..2)|293 +(CHn=C)cyc-CN (n in 0..2)|294 +(CHn=C)cyc-Cl (n in 0..2)|295 +CHcyc-CH3|296 +CHcyc-CH2|297 +CHcyc-CH|298 +CHcyc-C|299 +CHcyc-CH=CHn (n in 1..2)|300 +CHcyc-C=CHn (n in 1..2)|301 +CHcyc-Cl|302 +CHcyc-F|303 +CHcyc-OH|304 +CHcyc-NH2|305 +CHcyc-NH-CHn (n in 0..3)|306 +CHcyc-N-CHn (n in 0..3)|307 +CHcyc-SH|308 +CHcyc-CN|309 +CHcyc-COOH|310 +CHcyc-CO|311 +CHcyc-NO2|312 +CHcyc-S-|313 +CHcyc-CHO|314 +CHcyc-O-|315 +CHcyc-OOCH|316 +CHcyc-COO|317 +CHcyc-OOC|318 +Ccyc-CH3|319 +Ccyc-CH2|320 +Ccyc-OH|321 +>Ncyc-CH3|322 +>Ncyc-CH2|323 +AROMRINGs1s2|324 +AROMRINGs1s3|325 +AROMRINGs1s4|326 +AROMRINGs1s2s3|327 +AROMRINGs1s2s4|328 +AROMRINGs1s3s5|329 +AROMRINGs1s2s3s4|330 +AROMRINGs1s2s3s5|331 +AROMRINGs1s2s4s5|332 +PYRIDINEs2|333 +PYRIDINEs3|334 +PYRIDINEs4|335 +PYRIDINEs2s3|336 +PYRIDINEs2s4|337 +PYRIDINEs2s5|338 +PYRIDINEs2s6|339 +PYRIDINEs3s4|340 +PYRIDINEs3s5|341 +PYRIDINEs2s3s6|342 +(CHn=CHm)cyc-COOH|343 +AROMRINGs1s2s3s4s5|344 +aC-NHCOCH2N|345 +(N=C)cyc-CH3|346 +aC-CONH(CH2)2N|347 +aC-SO2NHn (n>=0;n<3)|348 +aC-SO2NHn (n>=0;n<3)|349 +aC-SO2NHn (n>=0;n<3)|350 \ No newline at end of file diff --git a/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv b/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv index 96f4ade..9c10aaa 100644 --- a/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv +++ b/ugropy/groupscsv/abdulelah_gani/secondary/secondary.csv @@ -1,130 +1,131 @@ -(CH3)2CH -(CH3)3C -CH(CH3)CH(CH3) -CH(CH3)C(CH3)2 -C(CH3)2C(CH3)2 -CHn=CHm-CHp=CHk (k,m,n,p in 0..2) -CH3-CHm=CHn (m,n in 0..2) -CH2-CHm=CHn (m,n in 0..2) -CHp-CHm=CHn (m,n in 0..2; p in 0..1) -CHCHO or CCHO -CH3COCH2 -CH3COCH or CH3COC -CHCOOH or CCOOH -CH3COOCH or CH3COOC -CO-O-CO -CHOH -COH -CH3COCHnOH (n in 0..2) -NCCHOH or NCCOH -OH-CHn-COO (n in 0..2) -CHm(OH)CHn(OH) (m,n in 0..2) -CHm(OH)CHn(NHp) (m,n,p in 0..2) -CHm(NH2)CHn(NH2) (m,n in 0..2) -CHm(NH)CHn(NH2) (m,n in 1..2) -H2NCOCHnCHmCONH2 (m,n in 1..2) -CHm(NHn)-COOH (m,n in 0..2) -HOOC-CHn-COOH (n in 1..2) -HOOC-CHn-CHm-COOH (n, m in 1..2) -HO-CHn-COOH (n in 1..2) -NH2-CHn-CHm-COOH (n, m in 1..2) -CH3-O-CHn-COOH (n in 1..2) -HS-CH-COOH -HS-CHn-CHm-COOH (n, m in 1..2) -NC-CHn-CHm-CN (n, m in 1..2) -OH-CHn-CHm-CN (n, m in 1..2) -HS-CHn-CHm-SH (n, m in 1..2) -COO-CHn-CHm-OOC (n, m in 1..2) -OOC-CHm-CHm-COO (n, m in 1..2) -NC-CHn-COO (n in 1..2) -COCHnCOO (n in 1..2) -CHm-O-CHn=CHp (m,n,p in 0..3) -CHm=CHn-F (m,n in 0..2) -CHm=CHn-Br (m,n in 0..2) -CHm=CHn-I (m,n in 0..2) -CHm=CHn-Cl (m,n in 0..2) -CHm=CHn-CN (m,n in 0..2) -CHn=CHm-COO-CHp (m,n,p in 0..3) -CHm=CHn-CHO (m,n in 0..2) -CHm=CHn-COOH (m,n in 0..2) -aC-CHn-X (n in 1..2) X: Halogen -aC-CHn-NHm (n in 1..2; m in 0..2)) -aC-CHn-O- (n in 1..2) -aC-CHn-OH (n in 1..2) -aC-CHn-CN (n in 1..2) -aC-CHn-CHO (n in 1..2) -aC-CHn-SH (n in 1..2) -aC-CHn-COOH (n in 1..2) -aC-CHn-CO- (n in 1..2) -aC-CHn-S- (n in 1..2) -aC-CHn-OOC-H (n in 1..2) -aC-CHm-NO2 (n in 1..2) -aC-CHn-CONH2 (n in 1..2) -aC-CHn-OOC (n in 1..2) -aC-CHn-COO (n in 1..2) -aC-SO2-OH -aC-CH(CH3)2 -aC-C(CH3)3 -aC-CF3 -(CHn=C)(cyc)-CHO (n in 0..2) -(CHn=C)cyc-COO-CHm (n,m in 0..3) -(CHn=C)cyc-CO- (n in 0..2) -(CHn=C)cyc-CH3 (n in 0..2) -(CHn=C)cyc-CH2 (n in 0..2) -(CHn=C)cyc-CN (n in 0..2) -(CHn=C)cyc-Cl (n in 0..2) -CHcyc-CH3 -CHcyc-CH2 -CHcyc-CH -CHcyc-C -CHcyc-CH=CHn (n in 1..2) -CHcyc-C=CHn (n in 1..2) -CHcyc-Cl -CHcyc-F -CHcyc-OH -CHcyc-NH2 -CHcyc-NH-CHn (n in 0..3) -CHcyc-N-CHn (n in 0..3) -CHcyc-SH -CHcyc-CN -CHcyc-COOH -CHcyc-CO -CHcyc-NO2 -CHcyc-S- -CHcyc-CHO -CHcyc-O- -CHcyc-OOCH -CHcyc-COO -CHcyc-OOC -Ccyc-CH3 -Ccyc-CH2 -Ccyc-OH ->Ncyc-CH3 ->Ncyc-CH2 -AROMRINGs1s2 -AROMRINGs1s3 -AROMRINGs1s4 -AROMRINGs1s2s3 -AROMRINGs1s2s4 -AROMRINGs1s3s5 -AROMRINGs1s2s3s4 -AROMRINGs1s2s3s5 -AROMRINGs1s2s4s5 -PYRIDINEs2 -PYRIDINEs3 -PYRIDINEs4 -PYRIDINEs2s3 -PYRIDINEs2s4 -PYRIDINEs2s5 -PYRIDINEs2s6 -PYRIDINEs3s4 -PYRIDINEs3s5 -PYRIDINEs2s3s6 -(CHn=CHm)cyc-COOH -AROMRINGs1s2s3s4s5 -aC-NHCOCH2N -(N=C)cyc-CH3 -aC-CONH(CH2)2N -aC-SO2NHn (n>=0;n<3) -aC-SO2NHn (n>=0;n<3) -aC-SO2NHn (n>=0;n<3) \ No newline at end of file +group|smarts +(CH3)2CH|[CH;!R]([CH3])[CH3] +?(CH3)3C| +?CH(CH3)CH(CH3)| +?CH(CH3)C(CH3)2| +????C(CH3)2C(CH3)2| +?CHn=CHm-CHp=CHk (k,m,n,p in 0..2)| +?CH3-CHm=CHn (m,n in 0..2)| +?CH2-CHm=CHn (m,n in 0..2)| +?CHp-CHm=CHn (m,n in 0..2; p in 0..1)| +?CHCHO or CCHO| +?CH3COCH2| +?CH3COCH or CH3COC| +?CHCOOH or CCOOH| +?CH3COOCH or CH3COOC| +?CO-O-CO| +?CHOH| +?COH| +????CH3COCHnOH (n in 0..2)| +?NCCHOH or NCCOH| +?OH-CHn-COO (n in 0..2)| +?CHm(OH)CHn(OH) (m,n in 0..2)| +?CHm(OH)CHn(NHp) (m,n,p in 0..2)| +?CHm(NH2)CHn(NH2) (m,n in 0..2)| +?CHm(NH)CHn(NH2) (m,n in 1..2)| +?H2NCOCHnCHmCONH2 (m,n in 1..2)| +?CHm(NHn)-COOH (m,n in 0..2)| +?HOOC-CHn-COOH (n in 1..2)| +?HOOC-CHn-CHm-COOH (n, m in 1..2)| +?HO-CHn-COOH (n in 1..2)| +?NH2-CHn-CHm-COOH (n, m in 1..2)| +?CH3-O-CHn-COOH (n in 1..2)| +?HS-CH-COOH| +?HS-CHn-CHm-COOH (n, m in 1..2)| +?NC-CHn-CHm-CN (n, m in 1..2)| +?OH-CHn-CHm-CN (n, m in 1..2)| +????HS-CHn-CHm-SH (n, m in 1..2)| +?COO-CHn-CHm-OOC (n, m in 1..2)| +?OOC-CHm-CHm-COO (n, m in 1..2)| +?NC-CHn-COO (n in 1..2)| +?COCHnCOO (n in 1..2)| +?CHm-O-CHn=CHp (m,n,p in 0..3)| +?CHm=CHn-F (m,n in 0..2)| +CHm=CHn-Br (m,n in 0..2)|[CH0,CH1,CH2;!R]=[CH0,CH1,CH2;!R][Br] +????CHm=CHn-I (m,n in 0..2)| +?CHm=CHn-Cl (m,n in 0..2)| +?CHm=CHn-CN (m,n in 0..2)| +?CHn=CHm-COO-CHp (m,n,p in 0..3)| +?CHm=CHn-CHO (m,n in 0..2)| +?CHm=CHn-COOH (m,n in 0..2)| +?aC-CHn-X (n in 1..2) X: Halogen| +?aC-CHn-NHm (n in 1..2; m in 0..2))| +?aC-CHn-O- (n in 1..2)| +?aC-CHn-OH (n in 1..2)| +?aC-CHn-CN (n in 1..2)| +?aC-CHn-CHO (n in 1..2)| +????aC-CHn-SH (n in 1..2)| +?aC-CHn-COOH (n in 1..2)| +?aC-CHn-CO- (n in 1..2)| +?aC-CHn-S- (n in 1..2)| +????aC-CHn-OOC-H (n in 1..2)| +?aC-CHm-NO2 (n in 1..2)| +?aC-CHn-CONH2 (n in 1..2)| +?aC-CHn-OOC (n in 1..2)| +?aC-CHn-COO (n in 1..2)| +?aC-SO2-OH| +?aC-CH(CH3)2| +?aC-C(CH3)3| +?aC-CF3| +?(CHn=C)(cyc)-CHO (n in 0..2)| +?(CHn=C)cyc-COO-CHm (n,m in 0..3)| +?(CHn=C)cyc-CO- (n in 0..2)| +?(CHn=C)cyc-CH3 (n in 0..2)| +?(CHn=C)cyc-CH2 (n in 0..2)| +?(CHn=C)cyc-CN (n in 0..2)| +?(CHn=C)cyc-Cl (n in 0..2)| +?CHcyc-CH3| +?CHcyc-CH2| +?CHcyc-CH| +?CHcyc-C| +CHcyc-CH=CHn (n in 1..2)|[CH;R][CH]=[CH1,CH2;!R] +?CHcyc-C=CHn (n in 1..2)| +?CHcyc-Cl| +?CHcyc-F| +?CHcyc-OH| +?CHcyc-NH2| +?CHcyc-NH-CHn (n in 0..3)| +?CHcyc-N-CHn (n in 0..3)| +????CHcyc-SH| +?CHcyc-CN| +?CHcyc-COOH| +?CHcyc-CO| +CHcyc-NO2|[CH;R][N+](=O)[O-] +?CHcyc-S-| +????CHcyc-CHO| +?CHcyc-O-| +?CHcyc-OOCH| +?CHcyc-COO| +?CHcyc-OOC| +?Ccyc-CH3| +?Ccyc-CH2| +?Ccyc-OH| +?>Ncyc-CH3| +?>Ncyc-CH2| +?AROMRINGs1s2| +?AROMRINGs1s3| +?AROMRINGs1s4| +?AROMRINGs1s2s3| +?AROMRINGs1s2s4| +?AROMRINGs1s3s5| +?AROMRINGs1s2s3s4| +?AROMRINGs1s2s3s5| +?AROMRINGs1s2s4s5| +?PYRIDINEs2| +?PYRIDINEs3| +?PYRIDINEs4| +?PYRIDINEs2s3| +?PYRIDINEs2s4| +?PYRIDINEs2s5| +?PYRIDINEs2s6| +?PYRIDINEs3s4| +?PYRIDINEs3s5| +????PYRIDINEs2s3s6| +?(CHn=CHm)cyc-COOH| +?AROMRINGs1s2s3s4s5| +?aC-NHCOCH2N| +(N=C)cyc-CH3|[#7;R]@[#6;R][CH3] +?aC-CONH(CH2)2N| +?aC-SO2NHn (n>=0;n<3)| +?aC-SO2NHn (n>=0;n<3)| +?aC-SO2NHn (n>=0;n<3)| \ No newline at end of file diff --git a/ugropy/models/abdulelah_gani_pmod.py b/ugropy/models/abdulelah_gani_pmod.py index ce5b20a..9f74e32 100644 --- a/ugropy/models/abdulelah_gani_pmod.py +++ b/ugropy/models/abdulelah_gani_pmod.py @@ -23,8 +23,8 @@ """ from ugropy.constants import _csvs -from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_p import ( - AbdulelahGaniPrimaryModel, +from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst import ( + AbdulelahGaniPSTModel, ) from ugropy.models.read_csv import _rd @@ -37,4 +37,4 @@ _ag_sg = _rd(_ag / "primary.csv", "group") _ag_info = _rd(_ag / "info.csv", "group") -abdulelah_gani_p = AbdulelahGaniPrimaryModel(_ag_sg, _ag_info) +abdulelah_gani_p = AbdulelahGaniPSTModel(_ag_sg, _ag_info, False, False) diff --git a/ugropy/models/abdulelah_gani_smod.py b/ugropy/models/abdulelah_gani_smod.py new file mode 100644 index 0000000..b5a2da8 --- /dev/null +++ b/ugropy/models/abdulelah_gani_smod.py @@ -0,0 +1,40 @@ +"""AbdulelahGani Secondary Structures FragmentationModel implementation. + +Import and use the AbdulelahGani Secondary Structures FragmentationModel with: + +.. code-block:: python + + from ugropy import abdulelah_gani_p + + # Get groups from molecule's name + tol = abdulelah_gani_p.get_groups("toluene") + + print(tol.subgroups) + + # Get groups from molecule's SMILES + eth = abdulelah_gani_p.get_groups("CCO", "smiles") + + print(eth.subgroups) + +Attributes +---------- +abdulelah_gani_p: AbdulelahGaniPrimaryModel + AbdulelahGaniPrimaryModel FragmentationModel :cite:p:`gani` +""" + +from ugropy.constants import _csvs +from ugropy.core.frag_classes.abdulelah_gani.abdulelah_gani_pst import ( + AbdulelahGaniPSTModel, +) +from ugropy.models.read_csv import _rd + + +# ============================================================================= +# Abdulelah Gani Secondary Structures FragmentationModel +# ============================================================================= +_ag = _csvs / "abdulelah_gani" / "secondary" + +_ag_sg = _rd(_ag / "secondary.csv", "group") +_ag_info = _rd(_ag / "info.csv", "group") + +abdulelah_gani_s = AbdulelahGaniPSTModel(_ag_sg, _ag_info, True, True)