Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve basic support for elements (single-atom residues, metal, noble gas) that are unsupported by ad4 #259

Draft
wants to merge 12 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 29 additions & 39 deletions meeko/chemtempgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,23 @@
import atexit
import os

from meeko.utils.rdkitutils import covalent_radius
from meeko.utils.autodock4_atom_types_elements import autodock4_atom_types_elements
from meeko.utils.utils import is_metal
autodock4_elements = {v for k,v in autodock4_atom_types_elements.items()}

from rdkit import Chem
from rdkit.Chem import rdmolops
periodic_table = Chem.GetPeriodicTable()

from rdkit import RDLogger
import sys, logging

logger = logging.getLogger(__name__)


list_of_AD_elements_as_AtomicNum = list(covalent_radius.keys())
metal_AtomicNums = {12, 20, 25, 26, 30} # Mg: 12, Ca: 20, Mn: 25, Fe: 26, Zn: 30
list_of_AD_elements_as_AtomicNum = list(periodic_table.GetAtomicNumber(element) for element in autodock4_elements)
metal_AtomicNums = {atomic_num for atomic_num in list_of_AD_elements_as_AtomicNum if is_metal(atomic_num)}

# Utility Functions
def mol_contains_unexpected_element(mol: Chem.Mol, allowed_elements: list[str] = list_of_AD_elements_as_AtomicNum) -> bool:
"""Check if mol contains unexpected elements"""
for atom in mol.GetAtoms():
if atom.GetAtomicNum() not in allowed_elements:
return True
return False


def get_atom_idx_by_names(mol: Chem.Mol, wanted_names: set[str] = set()) -> set[int]:

if not wanted_names:
Expand Down Expand Up @@ -380,11 +374,6 @@ def from_cif(cls, source_cif: str, resname: str):
rdkit_atom.SetFormalCharge(int(target_charge)) # this needs to be int for rdkit
rwmol.AddAtom(rdkit_atom)

# Check if rwmol contains unexpected elements
if mol_contains_unexpected_element(rwmol):
logger.warning(f"Molecule contains unexpected elements -> template for {resname} will be None. ")
return None

# Map atom_id (atom names) with rdkit idx
name_to_idx_mapping = {atom.GetProp('atom_id'): idx for (idx, atom) in enumerate(rwmol.GetAtoms())}

Expand All @@ -395,28 +384,29 @@ def from_cif(cls, source_cif: str, resname: str):
'value_order', # bond order
]
bond_table = block.find(bond_category, bond_attributes)
bond_cols = {attr: bond_table.find_column(f"{bond_category}{attr}") for attr in bond_attributes}

# Connect atoms by bonds
bond_type_mapping = {
'SING': Chem.BondType.SINGLE,
'DOUB': Chem.BondType.DOUBLE,
'TRIP': Chem.BondType.TRIPLE,
'AROM': Chem.BondType.AROMATIC
}
bond_types = bond_cols['value_order']

for bond_i, bond_type in enumerate(bond_types):
rwmol.AddBond(name_to_idx_mapping[bond_cols['atom_id_1'][bond_i].strip('"')],
name_to_idx_mapping[bond_cols['atom_id_2'][bond_i].strip('"')],
bond_type_mapping.get(bond_type, Chem.BondType.UNSPECIFIED))

# Try recharging mol (for metals)
try:
rwmol = recharge(rwmol)
except Exception as e:
logger.error(f"Failed to recharge rdkitmol. Error: {e} -> template for {resname} will be None. ")
return None
if bond_table:
bond_cols = {attr: bond_table.find_column(f"{bond_category}{attr}") for attr in bond_attributes}

# Connect atoms by bonds
bond_type_mapping = {
'SING': Chem.BondType.SINGLE,
'DOUB': Chem.BondType.DOUBLE,
'TRIP': Chem.BondType.TRIPLE,
'AROM': Chem.BondType.AROMATIC
}
bond_types = bond_cols['value_order']

for bond_i, bond_type in enumerate(bond_types):
rwmol.AddBond(name_to_idx_mapping[bond_cols['atom_id_1'][bond_i].strip('"')],
name_to_idx_mapping[bond_cols['atom_id_2'][bond_i].strip('"')],
bond_type_mapping.get(bond_type, Chem.BondType.UNSPECIFIED))

# Try recharging mol (for metals)
try:
rwmol = recharge(rwmol)
except Exception as e:
logger.error(f"Failed to recharge rdkitmol. Error: {e} -> template for {resname} will be None. ")
return None

# Finish eidting mol
try:
Expand Down
9 changes: 8 additions & 1 deletion meeko/cli/mk_prepare_receptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,10 @@ def main():
except PolymerCreationError as e:
print(e)
sys.exit(1)
else:
print(f"Unsupported input file format '{ext}'. Supported formats: {SUPPORTED_PRODY_FORMATS.keys()}.")
sys.exit(1)

else:
with open(args.read_pdb) as f:
pdb_string = f.read()
Expand Down Expand Up @@ -628,7 +632,10 @@ def main():
written_files_log["description"].append("parameterized receptor")

if args.write_pdb is not None:
fn = args.write_pdb[0]
if args.write_pdb:
fn = args.write_pdb[0]
else:
fn = str(outpath) + ".pdb"
with open(fn, "w") as f:
f.write(polymer.to_pdb())
written_files_log["filename"].append(fn)
Expand Down
5 changes: 2 additions & 3 deletions meeko/export_flexres.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
from rdkit.Chem import rdDetermineBonds
from rdkit.Geometry import Point3D
from .utils.utils import parse_begin_res
from .utils.utils import mini_periodic_table
from .rdkit_mol_create import RDKitMolCreate
from .polymer import Polymer

periodic_table = Chem.GetPeriodicTable()

mini_periodic_table = {v: k for k, v in mini_periodic_table.items()}

def sidechain_to_mol(pdbqt_atoms):
positions = []
Expand All @@ -21,7 +20,7 @@ def sidechain_to_mol(pdbqt_atoms):
element = row["name"][1]
elif len(row["name"]) > 0:
element = row["name"][0]
atomic_nr = mini_periodic_table[element]
atomic_nr = periodic_table.GetAtomicNumber(element)
atom = Chem.Atom(atomic_nr)
mol.AddAtom(atom)
x, y, z = row["xyz"]
Expand Down
16 changes: 14 additions & 2 deletions meeko/molecule_pdbqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,9 +319,21 @@ def _identify_bonds(atom_idx, positions, atom_types):

for atom_i, position, atom_type in zip(atom_idx, positions, atom_types):
distances, indices = KDTree.query(position, k=k)
if atom_type not in autodock4_atom_types_elements:
continue
if autodock4_atom_types_elements[atom_type] not in covalent_radius:
continue
r_cov = covalent_radius[autodock4_atom_types_elements[atom_type]]

optimal_distances = [bond_allowance_factor * (r_cov + covalent_radius[autodock4_atom_types_elements[atom_types[i]]]) for i in indices[1:]]

nei_cov_list = []
for i in indices[1:]:
nei_atom_type = atom_types[i]
nei_cov = 0.
if nei_atom_type in autodock4_atom_types_elements:
if autodock4_atom_types_elements[nei_atom_type] in covalent_radius:
nei_cov = covalent_radius[autodock4_atom_types_elements[nei_atom_type]]
nei_cov_list.append(nei_cov)
optimal_distances = [bond_allowance_factor * (r_cov + nei_cov) for nei_cov in nei_cov_list]
bonds[atom_i] = atom_idx[indices[1:][np.where(distances[1:] < optimal_distances)]].tolist()

return bonds
Expand Down
4 changes: 2 additions & 2 deletions meeko/molsetup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@
else:
_has_misctools = True


from .utils import rdkitutils
periodic_table = Chem.GetPeriodicTable()

# region DEFAULT VALUES
DEFAULT_PDBINFO = None
Expand Down Expand Up @@ -1254,7 +1254,7 @@ def write_coord_string(self) -> str:
if self.atoms[index].is_dummy:
continue
if not self.atoms[index].is_pseudo_atom:
element = utils.mini_periodic_table[self.atoms[index].atomic_num]
element = periodic_table.GetElementSymbol(self.atoms[index].atomic_num)
x, y, z = self.atoms[index].coord
output_string += "%3s %12.6f %12.6f %12.6f\n" % (element, x, y, z)
return output_string
Expand Down
7 changes: 3 additions & 4 deletions meeko/openff_xml_parser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from rdkit import Chem
import math
import pathlib
import xml.etree.ElementTree as ET

from rdkit import Chem

from .utils.utils import mini_periodic_table
periodic_table = Chem.GetPeriodicTable()


def load_openff():
Expand Down Expand Up @@ -222,7 +221,7 @@ def assign_atypes(vdw_list, use_openff_id=True, force_uppercase=True):
atom = mol.GetAtomWithIdx(
v["IDX"][0]
) # consider only the first if multiple IDX
element = mini_periodic_table[atom.GetAtomicNum()]
element = periodic_table.GetElementSymbol(atom.GetAtomicNum())
atomic_numbers.append(atom.GetAtomicNum())
used_numbers.setdefault(element, set())
off_id = v["id"]
Expand Down
50 changes: 35 additions & 15 deletions meeko/polymer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
from .molsetup import RDKitMoleculeSetup
from .molsetup import MoleculeSetupEncoder
from .utils.jsonutils import rdkit_mol_from_json
from .utils.rdkitutils import mini_periodic_table
from .utils.rdkitutils import react_and_map
from .utils.rdkitutils import AtomField
from .utils.rdkitutils import build_one_rdkit_mol_per_altloc
from .utils.rdkitutils import _aux_altloc_mol_build
from .utils.rdkitutils import covalent_radius
from .utils.covalent_radius_table import covalent_radius
from .utils.autodock4_atom_types_elements import autodock4_atom_types_elements
from .utils.utils import is_metal, is_noble
autodock4_elements = {v for k,v in autodock4_atom_types_elements.items()}
from .utils.pdbutils import PDBAtomInfo
from .chemtempgen import export_chem_templates_to_json
from .chemtempgen import build_noncovalent_CC
Expand Down Expand Up @@ -93,7 +95,6 @@ def prody_to_rdkit(*args):
],
}


def find_graph_paths(graph, start_node, end_nodes, current_path=(), paths_found=()):
"""
Recursively finds all paths between start and end nodes.
Expand Down Expand Up @@ -135,9 +136,7 @@ def find_inter_mols_bonds(mols_dict):
"""

allowance = 1.2 # vina uses 1.1 but covalent radii are shorter here
max_possible_covalent_radius = (
2 * allowance * max([r for k, r in covalent_radius.items()])
)
max_possible_covalent_radius = (2 * allowance * covalent_radius["I"])
cubes_min = []
cubes_max = []
for key, (mol, _) in mols_dict.items():
Expand All @@ -148,6 +147,16 @@ def find_inter_mols_bonds(mols_dict):
pairs_to_consider = []
keys = list(mols_dict)
for i in range(len(mols_dict)):
atomic_nums = [atom.GetAtomicNum() for atom in mols_dict[keys[i]][0].GetAtoms()]
elements = [periodic_table.GetElementSymbol(atomic_num) for atomic_num in atomic_nums]
unsupported_element_idx = [i for i, element in enumerate(elements) if element not in autodock4_elements]
unbound_element_idx = [i for i, atomic_num in enumerate(atomic_nums) if is_metal(atomic_num) or is_noble(atomic_num)]
if unsupported_element_idx:
logger.warning(f"Input residue {keys[i]}:{mols_dict[keys[i]][1]} atoms #{unsupported_element_idx} do not have an AutoDock4 atom type. ")
if unbound_element_idx:
logger.warning(f"Input residue {keys[i]}:{mols_dict[keys[i]][1]} atoms #{unbound_element_idx} is metal or noble gas. " + eol +
"No intermol bond perception will be made involving a metal or noble gas. ")
continue
for j in range(i + 1, len(mols_dict)):
do_consider = True
for d in range(3):
Expand All @@ -164,18 +173,18 @@ def find_inter_mols_bonds(mols_dict):
p1 = mols_dict[keys[i]][0].GetConformer().GetPositions()
p2 = mols_dict[keys[j]][0].GetConformer().GetPositions()
for a1 in mols_dict[keys[i]][0].GetAtoms():
if is_metal(a1.GetAtomicNum()) or is_noble(a1.GetAtomicNum()):
continue
for a2 in mols_dict[keys[j]][0].GetAtoms():
if is_metal(a2.GetAtomicNum()) or is_noble(a2.GetAtomicNum()):
continue

vec = p1[a1.GetIdx()] - p2[a2.GetIdx()]
distsqr = np.dot(vec, vec)

# check if atom has implemented covalent radius
for atom in [a1, a2]:
if atom.GetAtomicNum() not in covalent_radius:
raise RuntimeError(f"Element {periodic_table.GetElementSymbol(atom.GetAtomicNum())} doesn't have an implemented covalent radius, which was required for the perception of intermolecular bonds. ")

cov_dist = (
covalent_radius[a1.GetAtomicNum()]
+ covalent_radius[a2.GetAtomicNum()]
covalent_radius[periodic_table.GetElementSymbol(a1.GetAtomicNum())]
+ covalent_radius[periodic_table.GetElementSymbol(a2.GetAtomicNum())]
)
if distsqr < (allowance * cov_dist) ** 2:
key = (keys[i], keys[j])
Expand Down Expand Up @@ -842,6 +851,17 @@ def __init__(

bonded_unknown_res = {res_id: all_unknown_res[res_id] for res_id in all_unknown_res
if any(res_id in respair for respair in bonds)}
single_atom_res= set()
for res_id in bonded_unknown_res:
raw_input_mol = raw_input_mols[res_id][0]
atoms_in_mol = [atom for atom in raw_input_mol.GetAtoms()]
if len(atoms_in_mol)==1:
atom = atoms_in_mol[0]
atomic_num = atom.GetAtomicNum()
if is_metal(atomic_num) or is_noble(atomic_num):
single_atom_res.add(res_id)
for res_id in single_atom_res:
bonded_unknown_res.pop(res_id)

unbound_unknown_res = all_unknown_res.copy()
for key in bonded_unknown_res:
Expand Down Expand Up @@ -1784,7 +1804,7 @@ def to_pdb(self, new_positions: Optional[dict]=None):

pdbout = ""
atom_count = 0
pdb_line = "{:6s}{:5d} {:^4s} {:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f} {:2s} "
pdb_line = "{:6s}{:5d} {:^4s} {:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f} {:2s} "
pdb_line += eol
for res_id in self.get_valid_monomers():
rdkit_mol = self.monomers[res_id].rdkit_mol
Expand All @@ -1809,7 +1829,7 @@ def to_pdb(self, new_positions: Optional[dict]=None):
props = atom.GetPropsAsDict()
atom_name = self.monomers[res_id].atom_names[i]
x, y, z = positions[i]
element = mini_periodic_table[atom.GetAtomicNum()]
element = periodic_table.GetElementSymbol(atom.GetAtomicNum())
pdbout += pdb_line.format(
"ATOM",
atom_count,
Expand Down
16 changes: 14 additions & 2 deletions meeko/receptor_pdbqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,21 @@ def _identify_bonds(atom_idx, positions, atom_types):

for atom_i, position, atom_type in zip(atom_idx, positions, atom_types):
distances, indices = KDTree.query(position, k=k)
if atom_type not in autodock4_atom_types_elements:
continue
if autodock4_atom_types_elements[atom_type] not in covalent_radius:
continue
r_cov = covalent_radius[autodock4_atom_types_elements[atom_type]]

optimal_distances = [bond_allowance_factor * (r_cov + covalent_radius[autodock4_atom_types_elements[atom_types[i]]]) for i in indices[1:]]

nei_cov_list = []
for i in indices[1:]:
nei_atom_type = atom_types[i]
nei_cov = 0.
if nei_atom_type in autodock4_atom_types_elements:
if autodock4_atom_types_elements[nei_atom_type] in covalent_radius:
nei_cov = covalent_radius[autodock4_atom_types_elements[nei_atom_type]]
nei_cov_list.append(nei_cov)
optimal_distances = [bond_allowance_factor * (r_cov + nei_cov) for nei_cov in nei_cov_list]
bonds[atom_i] = atom_idx[indices[1:][np.where(distances[1:] < optimal_distances)]].tolist()

return bonds
Expand Down
29 changes: 2 additions & 27 deletions meeko/utils/rdkitutils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from rdkit import Chem
from rdkit.Chem import rdChemReactions
from .utils import mini_periodic_table
from .pdbutils import PDBAtomInfo
from rdkit.Geometry import Point3D
from rdkit.Chem import rdDetermineBonds
from .pdbutils import PDBAtomInfo

periodic_table = Chem.GetPeriodicTable()

Expand All @@ -30,7 +29,7 @@ def getPdbInfoNoNull(atom):
if atomic_number == 0:
name = "%-2s" % "*"
else:
name = "%-2s" % mini_periodic_table[atomic_number]
name = "%-2s" % periodic_table.GetElementSymbol(atomic_number)
chain = " "
resNum = 1
icode = ""
Expand Down Expand Up @@ -249,27 +248,3 @@ def react_and_map(reactants: tuple[Chem.Mol], rxn: rdChemReactions.ChemicalReact
outcomes.append((product, index_map))

return outcomes


covalent_radius = { # from wikipedia
1: 0.31,
5: 0.84,
6: 0.76,
7: 0.71,
8: 0.66,
9: 0.57,
12: 0.00, # hack to avoid bonds with metals
14: 1.11,
15: 1.07,
16: 1.05,
17: 1.02,
# 19: 2.03,
20: 0.00,
# 24: 1.39,
25: 0.00, # hack to avoid bonds with metals
26: 0.00,
30: 0.00, # hack to avoid bonds with metals
# 34: 1.20,
35: 1.20,
53: 1.39,
}
Loading