more dormund groups

ipqa-research · Nov 2, 2023 · 8fbd233 · 8fbd233
1 parent 5bd9af6
commit 8fbd233
Show file tree

Hide file tree

Showing 7 changed files with 260 additions and 130 deletions.
diff --git a/coso.ipynb b/coso.ipynb
diff --git a/tests/dortmund_new_groups/test_14_oh(s).py b/tests/dortmund_new_groups/test_14_oh(s).py
@@ -0,0 +1,20 @@
+import pytest
+
+import ugropy as ug
+
+
+# =============================================================================
+# 14 - OH (P), OH (S), OH (T)
+# =============================================================================
+# Dortmund
+trials_dortmund = [
+
+]
+
+
+@pytest.mark.OH
+@pytest.mark.DORTMUND
+@pytest.mark.parametrize("identifier, result, identifier_type", trials_dortmund)
+def test_unifac_ch2(identifier, result, identifier_type):
+    groups = ug.Groups(identifier, identifier_type)
+
diff --git a/tools/build_dortmund_matrix.ipynb b/tools/build_dortmund_matrix.ipynb
@@ -0,0 +1,80 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open(\"../ugropy/groupscsv/dortmund/dortmund_subgroups.csv\", mode='r') as f:\n",
+    "    df = pd.read_csv(f, sep='|', index_col=\"group\", comment=\"?\")\n",
+    "\n",
+    "# indexes of the groups matrix\n",
+    "index = df.index.to_numpy()\n",
+    "\n",
+    "# filles with zeros at start\n",
+    "matrix = np.zeros((len(index), len(index)), dtype=int)\n",
+    "\n",
+    "# build the matrix\n",
+    "dfm = pd.DataFrame(matrix, index=index, columns=index).rename_axis(\"group\")\n",
+    "\n",
+    "# fill the matrix\n",
+    "for group in df.index:\n",
+    "    string_contribution = df.loc[group].contribute\n",
+    "    \n",
+    "    try:\n",
+    "        contribution = json.loads(string_contribution) # transform into dict\n",
+    "    except json.JSONDecodeError:\n",
+    "        print(group)\n",
+    "        print(string_contribution)\n",
+    "        break\n",
+    "    except TypeError:\n",
+    "        print(group)\n",
+    "        print(string_contribution)\n",
+    "        break\n",
+    "\n",
+    "    for k in contribution.keys():\n",
+    "        dfm.loc[group][k] = contribution[k] # fill contribution\n",
+    "\n",
+    "# save the matrix\n",
+    "dfm.to_csv(\"../ugropy/groupscsv/dortmund/dortmund_matrix.csv\", sep=\"|\")\n",
+    "    "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ugropy",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/ugropy/constants.py b/ugropy/constants.py
@@ -78,8 +78,8 @@
         f, sep="|", index_col="group", comment="?"
     )
 
-# with open(f"{here}/groupscsv/dortmund/dortmund_matrix.csv", mode="r") as f:
-#     dortmund_matrix = pd.read_csv(f, sep="|", index_col="group", comment="?")
+with open(f"{here}/groupscsv/dortmund/dortmund_matrix.csv", mode="r") as f:
+    dortmund_matrix = pd.read_csv(f, sep="|", index_col="group", comment="?")
 
 # with open(f"{here}/groupscsv/dortmund/ch2_hideouts.csv", mode="r") as f:
 #     dortmund_ch2_hideouts = pd.read_csv(

diff --git a/ugropy/groups.py b/ugropy/groups.py
@@ -4,6 +4,8 @@
 from rdkit import Chem
 
 from .constants import (
+    dortmund_matrix,
+    dortmund_subgroups,
     problematic_structures,
     psrk_ch2_hideouts,
     psrk_ch_hideouts,
@@ -35,6 +37,9 @@ class Groups:
     psrk : bool, optional
         If True the algorithm will try to get the PSRK groups. If False this
         will be skiped, by default "True".
+    dortmund : bool, optional
+        If True the algorithm will try to get the Dortmund groups. If False 
+        this will be skiped, by default "True".
 
     Attributes
     ----------
@@ -55,6 +60,7 @@ def __init__(
         identifier_type: str = "name",
         unifac: bool = True,
         psrk: bool = True,
+        dortmund: bool = True,
     ) -> None:
         self.identifier = identifier.lower()
         self.identifier_type = identifier_type.lower()
@@ -69,6 +75,9 @@ def __init__(
             self.smiles = pcp_object.canonical_smiles
             self.chem_object = Chem.MolFromSmiles(self.smiles)
 
+        # =====================================================================
+        # UNIFAC groups
+        # =====================================================================
         if unifac:
             self.unifac_groups = get_groups(
                 self.chem_object,
@@ -81,6 +90,9 @@ def __init__(
         else:
             self.unifac_groups = {}
 
+        # =====================================================================
+        # PSRK groups
+        # =====================================================================
         if psrk:
             self.psrk_groups = get_groups(
                 self.chem_object,
@@ -92,3 +104,18 @@ def __init__(
             )
         else:
             self.psrk_groups = {}
+
+        # =====================================================================
+        # Dortmund groups
+        # =====================================================================
+        if dortmund:
+            self.dortmund_groups = get_groups(
+                self.chem_object,
+                dortmund_subgroups,
+                dortmund_matrix,
+                psrk_ch2_hideouts,
+                psrk_ch_hideouts,
+                problematic_structures,
+            )
+        else:
+            self.dortmund_groups = {}