diff --git a/.github/actions/setup-deps/action.yaml b/.github/actions/setup-deps/action.yaml index 1fab1db8823..1a5377fabc3 100644 --- a/.github/actions/setup-deps/action.yaml +++ b/.github/actions/setup-deps/action.yaml @@ -73,7 +73,7 @@ inputs: pytng: default: 'pytng>=0.2.3' rdkit: - default: 'rdkit>=2020.03.1' + default: 'rdkit>=2022.09.1' scikit-learn: default: 'scikit-learn' seaborn: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 61d58be7e95..82fd7083a56 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -113,7 +113,7 @@ jobs: parmed pytng>=0.2.3 tidynamics>=1.0.0 - rdkit>=2020.03.1 + rdkit>=2022.09.1 displayName: 'Install additional dependencies for 64-bit tests' condition: and(succeeded(), eq(variables['PYTHON_ARCH'], 'x64')) - script: >- diff --git a/package/MDAnalysis/converters/RDKitInferring.py b/package/MDAnalysis/converters/RDKitInferring.py index 008a98383dc..81ea6b2dcf1 100644 --- a/package/MDAnalysis/converters/RDKitInferring.py +++ b/package/MDAnalysis/converters/RDKitInferring.py @@ -64,6 +64,11 @@ RDBONDORDER.update({str(key): value for key, value in RDBONDORDER.items()}) PERIODIC_TABLE = Chem.GetPeriodicTable() +with suppress(ImportError): + from rdkit.Chem.rdDetermineBonds import ( + DetermineBondOrders, # available since 2022.09.1 + ) + def reorder_atoms( mol: "Chem.Mol", field: str = "_MDAnalysis_index" @@ -674,3 +679,18 @@ def assign_from_template_with_adjusted_hydrogens( sanitize_mol(new) # reorder atoms as input atomgroup (through _MDAnalysis_index) return reorder_atoms(new, field=index_field) + + +@dataclass(frozen=True) +class RDKitInferer: + """Uses RDKit's :func:`~rdkit.Chem.rdDetermineBonds.DetermineBondOrders` + to infer bond orders and formal charges. This is the same algorithm used + by the :ref:`xyz2mol ` package. + """ + + charge: int = 0 + + def __call__(self, mol: "Chem.Mol") -> "Chem.Mol": + new = Chem.Mol(mol) + DetermineBondOrders(new, charge=self.charge) + return new diff --git a/package/pyproject.toml b/package/pyproject.toml index 34d00fa43d5..f418b80c838 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -81,7 +81,7 @@ extra_formats = [ "pyedr>=0.7.0", "pytng>=0.2.3", "gsd>3.0.0", - "rdkit>=2020.03.1", + "rdkit>=2021.09.1", ] analysis = [ "biopython>=1.80", diff --git a/package/setup.py b/package/setup.py index 20833734c99..908dd2d84d9 100755 --- a/package/setup.py +++ b/package/setup.py @@ -654,7 +654,7 @@ def long_description(readme): 'chemfiles>=0.10', # multiple formats supported by chemfiles 'pyedr>=0.7.0', # EDR files for the EDR AuxReader 'gsd>3.0.0', # GSD - 'rdkit>=2020.03.1', # RDKit converter + 'rdkit>=2021.09.1', # RDKit converter 'parmed', # ParmEd converter ], 'analysis': [ diff --git a/testsuite/MDAnalysisTests/converters/test_rdkit.py b/testsuite/MDAnalysisTests/converters/test_rdkit.py index 7e49ccf2fad..ad1e2682a51 100644 --- a/testsuite/MDAnalysisTests/converters/test_rdkit.py +++ b/testsuite/MDAnalysisTests/converters/test_rdkit.py @@ -50,6 +50,7 @@ ) from MDAnalysis.converters.RDKitInferring import ( MDAnalysisInferer, + RDKitInferer, TemplateInferer, reorder_atoms, sanitize_mol, @@ -499,8 +500,7 @@ def test_reorder_atoms_key_error(self): reorder_atoms(mol) -@requires_rdkit -class TestRDKitMDAnalysisInferer: +class BaseInferer: def add_Hs_remove_bo_and_charges(self, mol): """Add hydrogens and remove bond orders and charges from a molecule""" mH = Chem.AddHs(mol) @@ -514,25 +514,6 @@ def add_Hs_remove_bo_and_charges(self, mol): mH.UpdatePropertyCache(strict=False) return mH - def enumerate_reordered_mol(self, mol): - """Enumerates all possible starting atoms for a given molecule""" - # go through each possible starting atom - for root_atom in mol.GetAtoms(): - smi = Chem.MolToSmiles(mol, rootedAtAtom=root_atom.GetIdx()) - reordered_mol = Chem.MolFromSmiles(smi, sanitize=False) - for atom in reordered_mol.GetAtoms(): - atom.SetNoImplicit(True) - reordered_mol.UpdatePropertyCache(strict=False) - yield reordered_mol - - def assign_bond_orders_and_charges(self, mol): - """Returns a sanitized molecule with infered bond orders and charges""" - inferer = MDAnalysisInferer() - inferer._infer_bo_and_charges(mol) - mol = inferer._standardize_patterns(mol) - Chem.SanitizeMol(mol) - return mol - def assert_isomorphic_resonance_structure(self, mol, ref): """Checks if 2 molecules are isomorphic using their resonance structures @@ -546,6 +527,24 @@ def assert_isomorphic_resonance_structure(self, mol, ref): isomorphic ), f"{Chem.MolToSmiles(ref)} != {Chem.MolToSmiles(mol)}" + +@requires_rdkit +class TestRDKitMDAnalysisInferer(BaseInferer): + def enumerate_reordered_mol(self, mol): + """Enumerates all possible starting atoms for a given molecule""" + # go through each possible starting atom + for root_atom in mol.GetAtoms(): + smi = Chem.MolToSmiles(mol, rootedAtAtom=root_atom.GetIdx()) + reordered_mol = Chem.MolFromSmiles(smi, sanitize=False) + for atom in reordered_mol.GetAtoms(): + atom.SetNoImplicit(True) + reordered_mol.UpdatePropertyCache(strict=False) + yield reordered_mol + + @pytest.fixture(scope="class") + def inferer(self): + return MDAnalysisInferer() + @pytest.mark.parametrize( "smi, out", [ @@ -624,10 +623,10 @@ def test_infer_charges(self, smi, atom_idx, charge): ("[H]-[C]-[C]-[C]-[C]-[H]", "C#CC#C"), ], ) - def test_standardize_patterns(self, smi, out): + def test_standardize_patterns(self, inferer, smi, out): mol = Chem.MolFromSmiles(smi, sanitize=False) mol.UpdatePropertyCache(strict=False) - mol = self.assign_bond_orders_and_charges(mol) + mol = inferer(mol) mol = Chem.RemoveHs(mol) molref = Chem.MolFromSmiles(out) assert is_isomorphic(mol, molref), "{} != {}".format( @@ -671,7 +670,7 @@ def test_ignore_prop(self): "O=C([C@H](CC1=C[NH1+]=CN1)[NH3+])[O-]", ], ) - def test_transfer_properties(self, smi): + def test_transfer_properties(self, inferer, smi): mol = Chem.MolFromSmiles(smi) mol = self.add_Hs_remove_bo_and_charges(mol) old = {} @@ -680,7 +679,7 @@ def test_transfer_properties(self, smi): atom.SetIntProp("_MDAnalysis_index", ix) atom.SetProp("dummy", f"foo_{ix}") old[ix] = {"_MDAnalysis_index": ix, "dummy": f"foo_{ix}"} - newmol = self.assign_bond_orders_and_charges(mol) + newmol = inferer(mol) new = {} for a in newmol.GetAtoms(): ix = a.GetIntProp("_MDAnalysis_index") @@ -766,13 +765,13 @@ def test_transfer_properties(self, smi): "N#Cc1c[nH]c(C(=O)Nc2ccc(-c3cccc[n+]3[O-])cc2C2=CCCCC2)n1 CHEMBL1172116", ], ) - def test_order_independant(self, smi): + def test_order_independant(self, inferer, smi): # generate mol with hydrogens but without bond orders ref = Chem.MolFromSmiles(smi) stripped_mol = self.add_Hs_remove_bo_and_charges(ref) # enumerate mols with different starting atoms for m in self.enumerate_reordered_mol(stripped_mol): - m = self.assign_bond_orders_and_charges(m) + m = inferer(m) m = Chem.RemoveHs(m) self.assert_isomorphic_resonance_structure(m, ref) @@ -830,10 +829,10 @@ def test_deprecation_warning_max_iter(self): "[Na+].[Cl-]", ], ) - def test_ions(self, smi): + def test_ions(self, inferer, smi): ref = Chem.MolFromSmiles(smi) stripped_mol = self.add_Hs_remove_bo_and_charges(ref) - mol = self.assign_bond_orders_and_charges(stripped_mol) + mol = inferer(stripped_mol) assert is_isomorphic(mol, ref) @pytest.mark.parametrize( @@ -956,3 +955,61 @@ def test_adjust_hydrogens_bigger_mol(self): mol = ag.convert_to.rdkit(inferer=inferer) xyz = mol.GetConformer().GetPositions() assert_array_equal(xyz, ag.positions) + + +@requires_rdkit +class TestRDKitInferer(BaseInferer): + @pytest.mark.parametrize( + "smi", + [ + "c1ccc(cc1)-c1ccccc1-c1ccccc1", + "c1cc[nH]c1", + "c1ccc(cc1)-c1ccc(-c2ccccc2)c(-c2ccccc2)c1-c1ccccc1", + "c1ccc2c(c1)c1ccccc1c1ccccc1c1ccccc1c1ccccc21", + "c1csc(c1)-c1ccoc1-c1cc[nH]c1", + "C1=C2C(=NC=N1)N=CN2", + "c1c[nH]c(c1)-c1ccc(s1)-c1ccoc1-c1c[nH]cc1-c1ccccc1", + "C=CC=CC=CC=CC=CC=C", + "NCCCCC([NH3+])C(=O)[O-]", + "CC(C=CC1=C(C)CCCC1(C)C)=CC=CC(C)=CC=[NH+]C", + "C#CC=C", + # amino acids + "C[C@H](N)C(=O)O", # A + "NCC(=O)O", # G + "CC[C@H](C)[C@H](N)C(=O)O", # I + "CC(C)C[C@H](N)C(=O)O", # L + "O=C(O)[C@@H]1CCCN1", # P + "CC(C)[C@H](N)C(=O)O", # V + "N[C@@H](Cc1ccccc1)C(=O)O", # F + "N[C@@H](Cc1c[nH]c2ccccc12)C(=O)O", # W + "N[C@@H](Cc1ccc(O)cc1)C(=O)O", # Y + "N[C@@H](CC(=O)O)C(=O)O", # D + "N[C@@H](CCC(=O)O)C(=O)O", # E + "N=C(N)NCCC[C@H](N)C(=O)O", # R + "N[C@@H](Cc1c[nH]cn1)C(=O)O", # H + "NCCCC[C@H](N)C(=O)O", # K + "N[C@@H](CO)C(=O)O", # S + "C[C@@H](O)[C@H](N)C(=O)O", # T + "N[C@@H](CS)C(=O)O", # C + "CSCC[C@H](N)C(=O)O", # M + "NC(=O)C[C@H](N)C(=O)O", # N + "NC(=O)CC[C@H](N)C(=O)O", # Q + # nucleic acids + "Nc1ncnc2c1ncn2[C@H]1C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O1", # A + "Cc1cn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]c1=O", # T + "O=c1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # U + "Nc1ccn([C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)n1", # C + "Nc1nc2c(ncn2[C@H]2C[C@H](OP(=O)(O)O)[C@@H](COP(=O)(O)O)O2)c(=O)[nH]1", # G + ], + ) + def test_inferring(self, smi): + # generate mol with hydrogens but without bond orders + ref = Chem.MolFromSmiles(smi) + charge = sum(atom.GetFormalCharge() for atom in ref.GetAtoms()) + inferer = RDKitInferer(charge=charge) + mol = Chem.AddHs(ref) + AllChem.EmbedMolecule(mol, randomSeed=0xAC1D1C) + stripped_mol = self.add_Hs_remove_bo_and_charges(mol) + new = inferer(stripped_mol) + new = Chem.RemoveHs(new) + self.assert_isomorphic_resonance_structure(new, ref)