Skip to content

Commit

Permalink
more joback test (aldehydes, esters, ketones), first problematic stru…
Browse files Browse the repository at this point in the history
…ctures for Joback detected and added to list
  • Loading branch information
SalvadorBrandolin committed Nov 7, 2023
1 parent 7dd4054 commit 61e5d35
Show file tree
Hide file tree
Showing 11 changed files with 458 additions and 12 deletions.
81 changes: 79 additions & 2 deletions tests/joback/test_alcohols.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,79 @@
# Gastrodigenin
# ("C1=CC(=CC=C1CO)O", {"ring=CH-": 4, "ACOH": 1, "ACCH2": 1, "OH": 1}, "smiles"),
import pytest

import ugropy as ug


# =============================================================================
# -OH (alcohol), -OH (phenol)
# =============================================================================
# Joback
trials = [
# 1,2-Cyclohexanediol, 4-tert-butyl-1-phenyl-, stereoisomer
(
"CC(C)(C)C1CCC(C(C1)O)(C2=CC=CC=C2)O",
{
"-CH3": 3,
">C<": 1,
"ring-CH2-": 3,
"ring>CH-": 2,
"ring>C<": 1,
"-OH (alcohol)": 2,
"ring=C<": 1,
"ring=CH-": 5,
},
"smiles",
),
# (2S,3S)-2-Methyl-1,3-hexanediol
(
"CCCC(C(C)CO)O",
{"-CH3": 2, "-CH2-": 3, ">CH-": 2, "-OH (alcohol)": 2},
"smiles",
),
# 2-propanol
("CC(C)O", {"-CH3": 2, ">CH-": 1, "-OH (alcohol)": 1}, "smiles"),
("OC(O)=O", {"-COOH (acid)": 1, "-OH (alcohol)": 1}, "smiles"),
# Phenanthrene-3,4-diol
(
"C1=CC=C2C(=C1)C=CC3=C2C(=C(C=C3)O)O",
{"ring=CH-": 8, "ring=C<": 6, "-OH (phenol)": 2},
"smiles",
),
# 3-(tert-butyl)benzene-1,2-diol
(
"CC(C)(C)C1=C(C(=CC=C1)O)O",
{"ring=CH-": 3, "ring=C<": 3, "-OH (phenol)": 2, "-CH3": 3, ">C<": 1},
"smiles",
),
# [1,1'-Biphenyl]-2,3',4-triol
(
"C1=CC(=CC(=C1)O)C2=C(C=C(C=C2)O)O",
{"ring=CH-": 7, "ring=C<": 5, "-OH (phenol)": 3},
"smiles",
),
# phenol
(
"C1=CC=C(C=C1)O",
{"ring=CH-": 5, "ring=C<": 1, "-OH (phenol)": 1},
"smiles",
),
# methanol
("CO", {"-CH3": 1, "-OH (alcohol)": 1}, "smiles"),
# Gastrodigenin
(
"C1=CC(=CC=C1CO)O",
{
"ring=CH-": 4,
"ring=C<": 2,
"-OH (phenol)": 1,
"-CH2-": 1,
"-OH (alcohol)": 1,
},
"smiles",
),
]


@pytest.mark.Joback
@pytest.mark.parametrize("identifier, result, identifier_type", trials)
def test_joback_alcohols(identifier, result, identifier_type):
assert ug.get_joback_groups(identifier, identifier_type) == result
72 changes: 72 additions & 0 deletions tests/joback/test_aldehydes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import pytest

import ugropy as ug


# =============================================================================
# O=CH- (aldehyde)
# =============================================================================
# Joback
trials = [
("C(=O)C=O", {"O=CH- (aldehyde)": 2}, "smiles"),
# salicylaldehyde
(
"C1=CC=C(C(=C1)C=O)O",
{
"ring=CH-": 4,
"-OH (phenol)": 1,
"ring=C<": 2,
"O=CH- (aldehyde)": 1,
},
"smiles",
),
# 2-Methyl-3-butenal
(
"CC(C=C)C=O",
{"-CH3": 1, ">CH-": 1, "CH2=CH-": 1, "O=CH- (aldehyde)": 1},
"smiles",
),
# Cinnamaldehyde
(
"C1=CC=C(C=C1)C=CC=O",
{"ring=CH-": 5, "ring=C<": 1, "-CH=CH-": 1, "O=CH- (aldehyde)": 1},
"smiles",
),
# benzaldehyde
(
"C1=CC=C(C=C1)C=O",
{"ring=CH-": 5, "ring=C<": 1, "O=CH- (aldehyde)": 1},
"smiles",
),
# cyclohexanecarbaldehyde
(
"C1CCC(CC1)C=O",
{
"ring-CH2-": 5,
"ring>CH-": 1,
"O=CH- (aldehyde)": 1,
},
"smiles",
),
# pentanal
("CCCCC=O", {"-CH3": 1, "-CH2-": 3, "O=CH- (aldehyde)": 1}, "smiles"),
# 3-methylbutanal
(
"CC(C)CC=O",
{"-CH3": 2, "-CH2-": 1, ">CH-": 1, "O=CH- (aldehyde)": 1},
"smiles",
),
# acetaldehyde
("CC=O", {"-CH3": 1, "O=CH- (aldehyde)": 1}, "smiles"),
(
r"CCCCCC\C(C=O)=C/C1=CC=CC=C1",
{},
"smiles",
),
]


@pytest.mark.Joback
@pytest.mark.parametrize("identifier, result, identifier_type", trials)
def test_joback_aldehydes(identifier, result, identifier_type):
assert ug.get_joback_groups(identifier, identifier_type) == result
76 changes: 76 additions & 0 deletions tests/joback/test_esters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pytest

import ugropy as ug


# =============================================================================
# -COO- (ester)
# =============================================================================
# Joback
trials = [
# Ascorbic acid
(
"OCC(O)C1OC(=O)C(O)=C1O",
{
"-COO- (ester)": 1,
"ring=C<": 2,
"-OH (alcohol)": 4,
">CH-": 1,
"ring>CH-": 1,
"-CH2-": 1,
},
"smiles",
),
# Procaine
(
"CCN(CC)CCOC(=O)C1=CC=C(N)C=C1",
{
"-NH2": 1,
"ring=CH-": 4,
"ring=C<": 2,
"-COO- (ester)": 1,
"-CH2-": 4,
"-CH3": 2,
">N- (non-ring)": 1,
},
"smiles",
),
# Cocaine
(
"COC(=O)C1C2CCC(CC1OC(=O)C1=CC=CC=C1)N2C",
{},
"smiles",
),
# Methyl acrylate
("COC(=O)C=C", {"-CH3": 1, "CH2=CH-": 1, "-COO- (ester)": 1}, "smiles"),
# Aspirin
(
"CC(=O)OC1=CC=CC=C1C(=O)O",
{
"-CH3": 1,
"-COO- (ester)": 1,
"ring=C<": 2,
"ring=CH-": 4,
"-COOH (acid)": 1,
},
"smiles",
),
# Tert-butyl acetate
("CC(=O)OC(C)(C)C", {"-COO- (ester)": 1, "-CH3": 4, ">C<": 1}, "smiles"),
# triacetin
(
"CC(=O)OCC(COC(=O)C)OC(=O)C",
{"-CH3": 3, "-COO- (ester)": 3, "-CH2-": 2, ">CH-": 1},
"smiles",
),
# butyl propanoate
("CCCCOC(=O)CC", {"-CH3": 2, "-CH2-": 4, "-COO- (ester)": 1}, "smiles"),
# butyl acetate
("CCCCOC(=O)C", {"-CH3": 2, "-CH2-": 3, "-COO- (ester)": 1}, "smiles"),
]


@pytest.mark.Joback
@pytest.mark.parametrize("identifier, result, identifier_type", trials)
def test_joback_esters(identifier, result, identifier_type):
assert ug.get_joback_groups(identifier, identifier_type) == result
128 changes: 128 additions & 0 deletions tests/joback/test_ethers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import pytest

import ugropy as ug


# =============================================================================
# -O- (non-ring), -O- (ring)
# =============================================================================
# Joback
trials = [
# # 4-flavanol
# (
# "OC1CC(OC2=CC=CC=C12)C1=CC=CC=C1",
# {"ACH": 9, "AC": 2, "ACCH": 1, "CHO": 1, "CH2": 1, "OH": 1},
# "smiles",
# ),
# (
# "O[C@@H]1CO[C@H](O)[C@@H](O)[C@@H]1O",
# {"CH2O": 1, "CH": 4, "OH": 4},
# "smiles",
# ),
# ("C1COCCOCCOCCOC1", {"CH2O": 4, "CH2": 5}, "smiles"),
# ("C1COCCO1", {"CH2O": 2, "CH2": 2}, "smiles"),
# ("CCOCOCC", {"CH3": 2, "CH2O": 2, "CH2": 1}, "smiles"),
# ("C1COCO1", {"CH2O": 2, "CH2": 1}, "smiles"),
# ("C1COCCOCCOCCOCCOCCO1", {"CH2O": 6, "CH2": 6}, "smiles"),
# # tetrahydrofuran
# ("C1CCOC1", {"THF": 1}, "smiles"),
# # diisopropyl ether
# ("CC(C)OC(C)C", {"CH3": 4, "CH": 1, "CHO": 1}, "smiles"),
# # diethyl ether
# ("CCOCC", {"CH3": 2, "CH2": 1, "CH2O": 1}, "smiles"),
# # dimethyl ether
# ("COC", {"CH3": 1, "CH3O": 1}, "smiles"),
# # 2H-Pyran, 2-(cyclohexyloxy)tetrahydro-
# (
# "C1CCC(CC1)OC2CCCCO2",
# {"CH2": 8, "CH": 1, "CH2O": 1, "CHO": 1},
# "smiles",
# ),
# # Problematic ones
# (
# "COC(=O)OC1=CC=CC=C1",
# {"CH3O": 1, "COO": 1, "AC": 1, "ACH": 5},
# "smiles",
# ),
# (
# "CCOC(=O)OC1=CC=CC=C1",
# {"CH3": 1, "CH2O": 1, "COO": 1, "AC": 1, "ACH": 5},
# "smiles",
# ),
# (
# "CC(C)OC(=O)OC1=CC=CC=C1",
# {"CH3": 2, "CHO": 1, "COO": 1, "AC": 1, "ACH": 5},
# "smiles",
# ),
# ("CC(C)(C)OC(=O)OC1=CC=CC=C1", {}, "smiles"),
# # Benzyl 2-hydroxyethyl carbonate
# (
# "C1=CC=C(C=C1)COC(=O)OCCO",
# {"ACCH2": 1, "ACH": 5, "COO": 1, "C2H5O2": 1},
# "smiles",
# ),
# # tert-Butyl ethyl carbonate
# ("CCOC(=O)OC(C)(C)C", {"CH3": 4, "C": 1, "COO": 1, "CH2O": 1}, "smiles"),
# # Ethyl phenyl carbonate
# (
# "CCOC(=O)OC1=CC=CC=C1",
# {"CH3": 1, "AC": 1, "ACH": 5, "COO": 1, "CH2O": 1},
# "smiles",
# ),
# # Carbonic acid, ethyl 2,3,6-trimethylcyclohexyl ester
# (
# "CCOC(=O)OC1C(CCC(C1C)C)C",
# {"CH3": 4, "CH2": 2, "CH": 4, "COO": 1, "CH2O": 1},
# "smiles",
# ),
# # Diethyl carbonate
# ("CCOC(=O)OCC", {"CH3": 2, "CH2": 1, "COO": 1, "CH2O": 1}, "smiles"),
# # Methyl phenyl carbonate
# (
# "COC(=O)OC1=CC=CC=C1",
# {"AC": 1, "ACH": 5, "COO": 1, "CH3O": 1},
# "smiles",
# ),
# # tert-Butyl methyl carbonate
# ("CC(C)(C)OC(=O)OC", {"CH3": 3, "C": 1, "COO": 1, "CH3O": 1}, "smiles"),
# # Methyl isopropyl carbonate
# ("CC(C)OC(=O)OC", {"CH3": 2, "CH": 1, "COO": 1, "CH3O": 1}, "smiles"),
# # Ethyl methyl carbonate
# ("CCOC(=O)OC", {"CH3": 1, "CH2": 1, "COO": 1, "CH3O": 1}, "smiles"),
# # Dimethyl carbonate
# ("COC(=O)OC", {"CH3": 1, "COO": 1, "CH3O": 1}, "smiles"),
# # I hate ether group
# ("COCOC(C)OCOC", {"CH3O": 2, "CH2O": 2, "CH": 1, "CH3": 1}, "smiles"),
# (
# "CC(C)OCOC(C)OCOC(C)C",
# {"CH3": 5, "CH": 1, "HCO": 2, "CH2O": 2},
# "smiles",
# ),
# (
# "CC(C)OCOCC(OCOC(C)C)OCOC(C)C",
# {"CH3": 6, "CH": 2, "CH2O": 4, "HCO": 2},
# "smiles",
# ),
# (
# "CC(C)OCOC(OCOC(C)C)OCOC(C)C",
# {"CH3": 6, "HCO": 3, "CH2O": 3, "CH": 1},
# "smiles",
# ),
# ("CC(C)OCOC(C)C", {"CH3": 4, "HCO": 1, "CH2O": 1, "CH": 1}, "smiles"),
# ("CCOCOCC", {"CH3": 2, "CH2O": 2, "CH2": 1}, "smiles"),
# ("COCOC", {"CH3O": 2, "CH2": 1}, "smiles"),
# # Problematics with acids
# ("COC(O)=O", {"COOH": 1, "CH3O": 1}, "smiles"),
# ("CCOC(O)=O", {"COOH": 1, "CH2O": 1, "CH3": 1}, "smiles"),
# ("CC(C)OC(O)=O", {"COOH": 1, "CHO": 1, "CH3": 2}, "smiles"),
# ("CC(C)(C)OC(O)=O", {"OH": 1, "COO": 1, "C": 1, "CH3": 3}, "smiles"),
# ("OC(=O)OC1=CC=CC=C1", {"OH": 1, "COO": 1, "AC": 1, "ACH": 5}, "smiles"),
# # Impossibles
# ("C1COCON1", {}, "smiles"),
]


@pytest.mark.Joback
@pytest.mark.parametrize("identifier, result, identifier_type", trials)
def test_joback_ethers(identifier, result, identifier_type):
assert ug.get_joback_groups(identifier, identifier_type) == result
Loading

0 comments on commit 61e5d35

Please sign in to comment.