Skip to content

Commit

Permalink
more dormund groups
Browse files Browse the repository at this point in the history
  • Loading branch information
SalvadorBrandolin committed Nov 2, 2023
1 parent 5bd9af6 commit 8fbd233
Show file tree
Hide file tree
Showing 7 changed files with 260 additions and 130 deletions.
88 changes: 0 additions & 88 deletions coso.ipynb

This file was deleted.

20 changes: 20 additions & 0 deletions tests/dortmund_new_groups/test_14_oh(s).py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest

import ugropy as ug


# =============================================================================
# 14 - OH (P), OH (S), OH (T)
# =============================================================================
# Dortmund
trials_dortmund = [

]


@pytest.mark.OH
@pytest.mark.DORTMUND
@pytest.mark.parametrize("identifier, result, identifier_type", trials_dortmund)
def test_unifac_ch2(identifier, result, identifier_type):
groups = ug.Groups(identifier, identifier_type)

80 changes: 80 additions & 0 deletions tools/build_dortmund_matrix.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"import pandas as pd\n",
"\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"with open(\"../ugropy/groupscsv/dortmund/dortmund_subgroups.csv\", mode='r') as f:\n",
" df = pd.read_csv(f, sep='|', index_col=\"group\", comment=\"?\")\n",
"\n",
"# indexes of the groups matrix\n",
"index = df.index.to_numpy()\n",
"\n",
"# filles with zeros at start\n",
"matrix = np.zeros((len(index), len(index)), dtype=int)\n",
"\n",
"# build the matrix\n",
"dfm = pd.DataFrame(matrix, index=index, columns=index).rename_axis(\"group\")\n",
"\n",
"# fill the matrix\n",
"for group in df.index:\n",
" string_contribution = df.loc[group].contribute\n",
" \n",
" try:\n",
" contribution = json.loads(string_contribution) # transform into dict\n",
" except json.JSONDecodeError:\n",
" print(group)\n",
" print(string_contribution)\n",
" break\n",
" except TypeError:\n",
" print(group)\n",
" print(string_contribution)\n",
" break\n",
"\n",
" for k in contribution.keys():\n",
" dfm.loc[group][k] = contribution[k] # fill contribution\n",
"\n",
"# save the matrix\n",
"dfm.to_csv(\"../ugropy/groupscsv/dortmund/dortmund_matrix.csv\", sep=\"|\")\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ugropy",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
4 changes: 2 additions & 2 deletions ugropy/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@
f, sep="|", index_col="group", comment="?"
)

# with open(f"{here}/groupscsv/dortmund/dortmund_matrix.csv", mode="r") as f:
# dortmund_matrix = pd.read_csv(f, sep="|", index_col="group", comment="?")
with open(f"{here}/groupscsv/dortmund/dortmund_matrix.csv", mode="r") as f:
dortmund_matrix = pd.read_csv(f, sep="|", index_col="group", comment="?")

# with open(f"{here}/groupscsv/dortmund/ch2_hideouts.csv", mode="r") as f:
# dortmund_ch2_hideouts = pd.read_csv(
Expand Down
27 changes: 27 additions & 0 deletions ugropy/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from rdkit import Chem

from .constants import (
dortmund_matrix,
dortmund_subgroups,
problematic_structures,
psrk_ch2_hideouts,
psrk_ch_hideouts,
Expand Down Expand Up @@ -35,6 +37,9 @@ class Groups:
psrk : bool, optional
If True the algorithm will try to get the PSRK groups. If False this
will be skiped, by default "True".
dortmund : bool, optional
If True the algorithm will try to get the Dortmund groups. If False
this will be skiped, by default "True".
Attributes
----------
Expand All @@ -55,6 +60,7 @@ def __init__(
identifier_type: str = "name",
unifac: bool = True,
psrk: bool = True,
dortmund: bool = True,
) -> None:
self.identifier = identifier.lower()
self.identifier_type = identifier_type.lower()
Expand All @@ -69,6 +75,9 @@ def __init__(
self.smiles = pcp_object.canonical_smiles
self.chem_object = Chem.MolFromSmiles(self.smiles)

# =====================================================================
# UNIFAC groups
# =====================================================================
if unifac:
self.unifac_groups = get_groups(
self.chem_object,
Expand All @@ -81,6 +90,9 @@ def __init__(
else:
self.unifac_groups = {}

# =====================================================================
# PSRK groups
# =====================================================================
if psrk:
self.psrk_groups = get_groups(
self.chem_object,
Expand All @@ -92,3 +104,18 @@ def __init__(
)
else:
self.psrk_groups = {}

# =====================================================================
# Dortmund groups
# =====================================================================
if dortmund:
self.dortmund_groups = get_groups(
self.chem_object,
dortmund_subgroups,
dortmund_matrix,
psrk_ch2_hideouts,
psrk_ch_hideouts,
problematic_structures,
)
else:
self.dortmund_groups = {}
Loading

0 comments on commit 8fbd233

Please sign in to comment.