Skip to content

Commit

Permalink
add rdDetermineBonds inferer
Browse files Browse the repository at this point in the history
  • Loading branch information
cbouy committed Dec 16, 2023
1 parent 77e5b35 commit 0009427
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 33 deletions.
2 changes: 1 addition & 1 deletion .github/actions/setup-deps/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ inputs:
pytng:
default: 'pytng>=0.2.3'
rdkit:
default: 'rdkit>=2020.03.1'
default: 'rdkit>=2022.09.1'
scikit-learn:
default: 'scikit-learn'
seaborn:
Expand Down
2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ jobs:
parmed
pytng>=0.2.3
tidynamics>=1.0.0
rdkit>=2020.03.1
rdkit>=2022.09.1
displayName: 'Install additional dependencies for 64-bit tests'
condition: and(succeeded(), eq(variables['PYTHON_ARCH'], 'x64'))
- script: >-
Expand Down
20 changes: 20 additions & 0 deletions package/MDAnalysis/converters/RDKitInferring.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@
RDBONDORDER.update({str(key): value for key, value in RDBONDORDER.items()})
PERIODIC_TABLE = Chem.GetPeriodicTable()

with suppress(ImportError):
from rdkit.Chem.rdDetermineBonds import (
DetermineBondOrders, # available since 2022.09.1
)


def reorder_atoms(
mol: "Chem.Mol", field: str = "_MDAnalysis_index"
Expand Down Expand Up @@ -674,3 +679,18 @@ def assign_from_template_with_adjusted_hydrogens(
sanitize_mol(new)
# reorder atoms as input atomgroup (through _MDAnalysis_index)
return reorder_atoms(new, field=index_field)


@dataclass(frozen=True)
class RDKitInferer:
"""Uses RDKit's :func:`~rdkit.Chem.rdDetermineBonds.DetermineBondOrders`
to infer bond orders and formal charges. This is the same algorithm used
by the :ref:`xyz2mol <https://github.com/jensengroup/xyz2mol>` package.
"""

charge: int = 0

def __call__(self, mol: "Chem.Mol") -> "Chem.Mol":
new = Chem.Mol(mol)
DetermineBondOrders(new, charge=self.charge)
return new
2 changes: 1 addition & 1 deletion package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ extra_formats = [
"pyedr>=0.7.0",
"pytng>=0.2.3",
"gsd>3.0.0",
"rdkit>=2020.03.1",
"rdkit>=2021.09.1",
]
analysis = [
"biopython>=1.80",
Expand Down
2 changes: 1 addition & 1 deletion package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ def long_description(readme):
'chemfiles>=0.10', # multiple formats supported by chemfiles
'pyedr>=0.7.0', # EDR files for the EDR AuxReader
'gsd>3.0.0', # GSD
'rdkit>=2020.03.1', # RDKit converter
'rdkit>=2021.09.1', # RDKit converter
'parmed', # ParmEd converter
],
'analysis': [
Expand Down
115 changes: 86 additions & 29 deletions testsuite/MDAnalysisTests/converters/test_rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
)
from MDAnalysis.converters.RDKitInferring import (
MDAnalysisInferer,
RDKitInferer,
TemplateInferer,
reorder_atoms,
sanitize_mol,
Expand Down Expand Up @@ -499,8 +500,7 @@ def test_reorder_atoms_key_error(self):
reorder_atoms(mol)


@requires_rdkit
class TestRDKitMDAnalysisInferer:
class BaseInferer:
def add_Hs_remove_bo_and_charges(self, mol):
"""Add hydrogens and remove bond orders and charges from a molecule"""
mH = Chem.AddHs(mol)
Expand All @@ -514,25 +514,6 @@ def add_Hs_remove_bo_and_charges(self, mol):
mH.UpdatePropertyCache(strict=False)
return mH

def enumerate_reordered_mol(self, mol):
"""Enumerates all possible starting atoms for a given molecule"""
# go through each possible starting atom
for root_atom in mol.GetAtoms():
smi = Chem.MolToSmiles(mol, rootedAtAtom=root_atom.GetIdx())
reordered_mol = Chem.MolFromSmiles(smi, sanitize=False)
for atom in reordered_mol.GetAtoms():
atom.SetNoImplicit(True)
reordered_mol.UpdatePropertyCache(strict=False)
yield reordered_mol

def assign_bond_orders_and_charges(self, mol):
"""Returns a sanitized molecule with infered bond orders and charges"""
inferer = MDAnalysisInferer()
inferer._infer_bo_and_charges(mol)
mol = inferer._standardize_patterns(mol)
Chem.SanitizeMol(mol)
return mol

def assert_isomorphic_resonance_structure(self, mol, ref):
"""Checks if 2 molecules are isomorphic using their resonance
structures
Expand All @@ -546,6 +527,24 @@ def assert_isomorphic_resonance_structure(self, mol, ref):
isomorphic
), f"{Chem.MolToSmiles(ref)} != {Chem.MolToSmiles(mol)}"


@requires_rdkit
class TestRDKitMDAnalysisInferer(BaseInferer):
def enumerate_reordered_mol(self, mol):
"""Enumerates all possible starting atoms for a given molecule"""
# go through each possible starting atom
for root_atom in mol.GetAtoms():
smi = Chem.MolToSmiles(mol, rootedAtAtom=root_atom.GetIdx())
reordered_mol = Chem.MolFromSmiles(smi, sanitize=False)
for atom in reordered_mol.GetAtoms():
atom.SetNoImplicit(True)
reordered_mol.UpdatePropertyCache(strict=False)
yield reordered_mol

@pytest.fixture(scope="class")
def inferer(self):
return MDAnalysisInferer()

@pytest.mark.parametrize(
"smi, out",
[
Expand Down Expand Up @@ -624,10 +623,10 @@ def test_infer_charges(self, smi, atom_idx, charge):
("[H]-[C]-[C]-[C]-[C]-[H]", "C#CC#C"),
],
)
def test_standardize_patterns(self, smi, out):
def test_standardize_patterns(self, inferer, smi, out):
mol = Chem.MolFromSmiles(smi, sanitize=False)
mol.UpdatePropertyCache(strict=False)
mol = self.assign_bond_orders_and_charges(mol)
mol = inferer(mol)
mol = Chem.RemoveHs(mol)
molref = Chem.MolFromSmiles(out)
assert is_isomorphic(mol, molref), "{} != {}".format(
Expand Down Expand Up @@ -671,7 +670,7 @@ def test_ignore_prop(self):
"O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]",
],
)
def test_transfer_properties(self, smi):
def test_transfer_properties(self, inferer, smi):
mol = Chem.MolFromSmiles(smi)
mol = self.add_Hs_remove_bo_and_charges(mol)
old = {}
Expand All @@ -680,7 +679,7 @@ def test_transfer_properties(self, smi):
atom.SetIntProp("_MDAnalysis_index", ix)
atom.SetProp("dummy", f"foo_{ix}")
old[ix] = {"_MDAnalysis_index": ix, "dummy": f"foo_{ix}"}
newmol = self.assign_bond_orders_and_charges(mol)
newmol = inferer(mol)
new = {}
for a in newmol.GetAtoms():
ix = a.GetIntProp("_MDAnalysis_index")
Expand Down Expand Up @@ -766,13 +765,13 @@ def test_transfer_properties(self, smi):
"N#Cc1c[nH]c(C(=O)Nc2ccc(-c3cccc[n+]3[O-])cc2C2=CCCCC2)n1 CHEMBL1172116",
],
)
def test_order_independant(self, smi):
def test_order_independant(self, inferer, smi):
# generate mol with hydrogens but without bond orders
ref = Chem.MolFromSmiles(smi)
stripped_mol = self.add_Hs_remove_bo_and_charges(ref)
# enumerate mols with different starting atoms
for m in self.enumerate_reordered_mol(stripped_mol):
m = self.assign_bond_orders_and_charges(m)
m = inferer(m)
m = Chem.RemoveHs(m)
self.assert_isomorphic_resonance_structure(m, ref)

Expand Down Expand Up @@ -830,10 +829,10 @@ def test_deprecation_warning_max_iter(self):
"[Na+].[Cl-]",
],
)
def test_ions(self, smi):
def test_ions(self, inferer, smi):
ref = Chem.MolFromSmiles(smi)
stripped_mol = self.add_Hs_remove_bo_and_charges(ref)
mol = self.assign_bond_orders_and_charges(stripped_mol)
mol = inferer(stripped_mol)
assert is_isomorphic(mol, ref)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -956,3 +955,61 @@ def test_adjust_hydrogens_bigger_mol(self):
mol = ag.convert_to.rdkit(inferer=inferer)
xyz = mol.GetConformer().GetPositions()
assert_array_equal(xyz, ag.positions)


@requires_rdkit
class TestRDKitInferer(BaseInferer):
@pytest.mark.parametrize(
"smi",
[
"c1ccc(cc1)-c1ccccc1-c1ccccc1",
"c1cc[nH]c1",
"c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1",
"c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21",
"c1csc(c1)-c1ccoc1-c1cc[nH]c1",
"C1=C2C(=NC=N1)N=CN2",
"c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1",
"C=CC=CC=CC=CC=CC=C",
"NCCCCC([NH3+])C(=O)[O-]",
"CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C",
"C#CC=C",
# amino acids
"C[C@H](N)C(=O)O", # A
"NCC(=O)O", # G
"CC[C@H](C)[C@H](N)C(=O)O", # I
"CC(C)C[C@H](N)C(=O)O", # L
"O=C(O)[C@@H]1CCCN1", # P
"CC(C)[C@H](N)C(=O)O", # V
"N[C@@H](Cc1ccccc1)C(=O)O", # F
"N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O", # W
"N[C@@H](Cc1ccc(O)cc1)C(=O)O", # Y
"N[C@@H](CC(=O)O)C(=O)O", # D
"N[C@@H](CCC(=O)O)C(=O)O", # E
"N=C(N)NCCC[C@H](N)C(=O)O", # R
"N[C@@H](Cc1c[nH]cn1)C(=O)O", # H
"NCCCC[C@H](N)C(=O)O", # K
"N[C@@H](CO)C(=O)O", # S
"C[C@@H](O)[C@H](N)C(=O)O", # T
"N[C@@H](CS)C(=O)O", # C
"CSCC[C@H](N)C(=O)O", # M
"NC(=O)C[C@H](N)C(=O)O", # N
"NC(=O)CC[C@H](N)C(=O)O", # Q
# nucleic acids
"Nc1ncnc2c1ncn2[C@H]1C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O1", # A
"Cc1cn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]c1=O", # T
"O=c1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # U
"Nc1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)n1", # C
"Nc1nc2c(ncn2[C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # G
],
)
def test_inferring(self, smi):
# generate mol with hydrogens but without bond orders
ref = Chem.MolFromSmiles(smi)
charge = sum(atom.GetFormalCharge() for atom in ref.GetAtoms())
inferer = RDKitInferer(charge=charge)
mol = Chem.AddHs(ref)
AllChem.EmbedMolecule(mol, randomSeed=0xAC1D1C)
stripped_mol = self.add_Hs_remove_bo_and_charges(mol)
new = inferer(stripped_mol)
new = Chem.RemoveHs(new)
self.assert_isomorphic_resonance_structure(new, ref)

0 comments on commit 0009427

Please sign in to comment.