Skip to content

Commit

Permalink
improved clustenm fit incl replace filtered
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesmkrieger committed Oct 26, 2023
1 parent c13f73c commit b5b4b1b
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 36 deletions.
16 changes: 8 additions & 8 deletions prody/apps/prody_apps/prody_clustenm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
('write_params', 'whether to write parameters', False),
('fitmap', 'map to fit by filtering conformations like MDeNMD-EMFit', None),
('fit_resolution', 'resolution for blurring structures for fitting cc', 5),
('map_cutoff', 'min_cutoff for passing map for fitting', 0)]:
('map_cutoff', 'min_cutoff for passing map for fitting', 0),
('replace_filtered', 'whether to keep sampling again to replace filtered conformers', False)]:

DEFAULTS[key] = val
HELPTEXT[key] = txt
Expand Down Expand Up @@ -143,12 +144,6 @@ def prody_clustenm(pdb, **kwargs):

nproc = kwargs.get('nproc')
if nproc:
try:
from threadpoolctl import threadpool_limits
except ImportError:
raise ImportError('Please install threadpoolctl to control threads')

with threadpool_limits(limits=nproc, user_api="blas"):
ens = prody.ClustENM(pdb.getTitle())
ens.setAtoms(select)
ens.run(n_gens=ngens, n_modes=nmodes,
Expand All @@ -161,7 +156,8 @@ def prody_clustenm(pdb, **kwargs):
outlier=outlier, mzscore=mzscore,
sparse=sparse, kdtree=kdtree, turbo=turbo,
parallel=parallel, fitmap=fitmap,
fit_resolution=fit_resolution, **kwargs)
fit_resolution=fit_resolution,
n_proc=nproc, **kwargs)
else:
ens = prody.ClustENM(pdb.getTitle())
ens.setAtoms(select)
Expand Down Expand Up @@ -352,6 +348,10 @@ def addCommand(commands):
group.add_argument('-u', '--map_cutoff', dest='map_cutoff', type=float,
default=DEFAULTS['map_cutoff'], metavar='FLOAT',
help=HELPTEXT['map_cutoff'] + ' (default: %(default)s)')

group.add_argument('-O', '--replace_filtered', dest='replace_filtered',
action='store_true',
default=DEFAULTS['replace_filtered'], help=HELPTEXT['replace_filtered'])

subparser.add_argument('pdb', help='PDB identifier or filename')

Expand Down
160 changes: 132 additions & 28 deletions prody/dynamics/clustenm.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,69 @@ def _targeted_sim(self, coords0, coords1, tmdk=15., d_steps=100, n_max_steps=100
LOGGER.warning('OpenMM exception: ' + be.__str__() + ' so the corresponding conformer will be discarded!')

return np.nan, np.full_like(coords0, np.nan)

def _excited_sim(self, coords0, coords1, tmdk=15., d_steps=100, n_max_steps=10000, ddtol=1e-3, n_conv=5):

try:
from openmm.unit import nanometer, kelvin, angstrom
except ImportError:
raise ImportError('Please install PDBFixer and OpenMM 7.6 in order to use ClustENM.')

coords1_ca = coords1[self._idx_cg, :]
pos1 = coords1 * angstrom
pos1_ca = pos1[self._idx_cg, :]

n_atoms = coords0.shape[0]
atom_indices = np.arange(n_atoms)
for i, atm_idx in enumerate(atom_indices):
vector = pos1[i, :].value_in_unit(nanometer) - pos2[i, :].value_in_unit(nanometer)

# automatic conversion into nanometer will be carried out.
simulation.context.setPositions(coords0 * angstrom)

dist = dist0 = calcRMSD(coords0, coords1)
m_conv = 0
n_steps = 0
try:
simulation.minimizeEnergy(tolerance=self._tolerance*kilojoule_per_mole,
maxIterations=self._maxIterations)

# update parameters
while n_steps < n_max_steps:
simulation.context.setParameter('tmdk', tmdk)
force.updateParametersInContext(simulation.context)

simulation.step(d_steps)
n_steps += d_steps

# evaluate distance to destination
pos = simulation.context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(angstrom)
d = calcRMSD(pos, coords1)
dd = np.abs(dist - d)

if dd < ddtol:
m_conv += 1

if m_conv >= n_conv:
break

dist = d

LOGGER.debug('RMSD: %4.2f -> %4.2f' % (dist0, dist))

simulation.context.setParameter('tmdk', 0.0)
simulation.minimizeEnergy(tolerance=self._tolerance*kilojoule_per_mole,
maxIterations=self._maxIterations)

pos = simulation.context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(angstrom)
pot = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilojoule_per_mole)

return pot, pos

except BaseException as be:
LOGGER.warning('OpenMM exception: ' + be.__str__() + ' so the corresponding conformer will be discarded!')

return np.nan, np.full_like(coords0, np.nan)

def _checkANM(self, anm):

Expand Down Expand Up @@ -485,7 +548,8 @@ def _buildANM(self, cg):

anm = ANM()
anm.buildHessian(cg, cutoff=self._cutoff, gamma=self._gamma,
sparse=self._sparse, kdtree=self._kdtree)
sparse=self._sparse, kdtree=self._kdtree,
nproc=self._nproc)

return anm

Expand Down Expand Up @@ -515,19 +579,56 @@ def _sample(self, conf):
tmp.setCoords(conf)
cg = tmp[self._idx_cg]

n_confs = self._n_confs

anm_cg = self._buildANM(cg)

if not self._checkANM(anm_cg):
return None

anm_cg.calcModes(self._n_modes, turbo=self._turbo)
anm_cg.calcModes(self._n_modes, turbo=self._turbo,
nproc=self._nproc)

anm_ex = self._extendModel(anm_cg, cg, tmp)
ens_ex = sampleModes(anm_ex, atoms=tmp,
n_confs=self._n_confs,
rmsd=self._rmsd[self._cycle])
coordsets = ens_ex.getCoordsets()

if self._fitmap is not None:
LOGGER.info('Filtering for fitting in generation %d ...' % self._cycle)

kept_coordsets = []
if self._fitmap is not None:
kept_coordsets.extend(self._filter(coordsets))
n_confs = n_confs - len(kept_coordsets)

if len(kept_coordsets) == 0:
while len(kept_coordsets) == 0:
anm_ex = self._extendModel(anm_cg, cg, tmp)
ens_ex = sampleModes(anm_ex, atoms=tmp,
n_confs=n_confs,
rmsd=self._rmsd[self._cycle])
coordsets = ens_ex.getCoordsets()

if self._fitmap is not None:
kept_coordsets.extend(self._filter(coordsets))
n_confs = n_confs - len(kept_coordsets)

if self._replace_filtered:
while n_confs > 0:
anm_ex = self._extendModel(anm_cg, cg, tmp)
ens_ex = sampleModes(anm_ex, atoms=tmp,
n_confs=n_confs,
rmsd=self._rmsd[self._cycle])
coordsets = ens_ex.getCoordsets()

if self._fitmap is not None:
kept_coordsets.extend(self._filter(coordsets))
n_confs = n_confs - len(kept_coordsets)

coordsets = np.array(kept_coordsets)

if self._targeted:
if self._parallel:
with Pool(cpu_count()) as p:
Expand All @@ -543,6 +644,9 @@ def _sample(self, conf):

LOGGER.debug('%d/%d sets of coordinates were moved to the target' % (len(poses), len(coordsets)))

elif self._excited:
LOGGER.warn('excited simulations are not implemented yet')

return coordsets

def _rmsds(self, coords):
Expand Down Expand Up @@ -610,28 +714,18 @@ def _filter(self, *args):

ag = self._atoms.copy()
confs = args[0]
wei = args[1]
ccList = np.zeros(len(args[1]))
ccList = np.zeros(len(args[0]))
for i in range(len(confs)-1,-1,-1):
ag.setCoords(confs[i])
sim_map = self._blurrer(ag.toTEMPyStructure(), self._fit_resolution, self._fitmap)
cc = self._scorer.CCC(sim_map, self._fitmap)
ccList[i] = cc
if cc - self._cc_prev < 0:
confs = np.delete(confs, i, 0)
wei = np.delete(wei, i, 0)

if len(wei) == 0:
confs = args[0]
wei = args[1]
LOGGER.info('There were no closer conformers in generation %d so filtering was not performed' % self._cycle)
self._cc.extend(ccList)

if ccList.max() > self._cc_prev:
self._cc_prev = ccList.max()

self._cc.append(ccList.max())

return confs, wei
return confs

def _generate(self, confs, **kwargs):

Expand All @@ -652,20 +746,23 @@ def _generate(self, confs, **kwargs):

confs_cg = confs_ex[:, self._idx_cg]

LOGGER.info('Clustering in generation %d ...' % self._cycle)
label_cg = self._hc(confs_cg)
centers, wei = self._centers(confs_cg, label_cg)
LOGGER.report('Centroids were generated in %.2fs.',
label='_clustenm_gen')

confs_centers = confs_ex[centers]
LOGGER.report('Conformers were sampled in %.2fs.',
label='_clustenm_gen')

if self._fitmap is not None:
LOGGER.info('Filtering for fitting in generation %d ...' % self._cycle)
confs_centers, wei = self._filter(confs_centers, wei)
LOGGER.report('Filtered centroids were generated in %.2fs.',
label='_clustenm_gen')
LOGGER.info('Best CC is %f from %d conformers' % (self._cc_prev, len(wei)))
self._cc_prev = max(self._cc)
LOGGER.info('Best CC is %f from %d conformers' % (self._cc_prev, len(confs_cg)))

if len(confs_cg) > 1:
LOGGER.info('Clustering in generation %d ...' % self._cycle)
label_cg = self._hc(confs_cg)
centers, wei = self._centers(confs_cg, label_cg)
LOGGER.report('Centroids were generated in %.2fs.',
label='_clustenm_gen')
else:
centers, wei = confs_cg, [len(confs_cg)]

confs_centers = confs_ex[centers]

return confs_centers, wei

Expand Down Expand Up @@ -1022,6 +1119,10 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0,
:arg fit_resolution: Resolution for comparing to cryo-EM map for fitting
Default 5 Angstroms
:type fit_resolution: float
:arg replace_filtered: If it is True (default is False), conformer sampling and filtering
will be repeated until the desired number of conformers have been kept.
:type replace_filtered: bool
'''

if self._isBuilt():
Expand All @@ -1034,6 +1135,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0,
self._sparse = kwargs.get('sparse', False)
self._kdtree = kwargs.get('kdtree', False)
self._turbo = kwargs.get('turbo', False)
self._nproc = kwargs.pop('nproc', None)
if kwargs.get('zeros', False):
LOGGER.warn('ClustENM cannot use zero modes so ignoring this kwarg')

Expand All @@ -1043,6 +1145,7 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0,
self._platform = kwargs.pop('platform', None)
self._parallel = kwargs.pop('parallel', False)
self._targeted = kwargs.pop('targeted', False)
self._excited = kwargs.pop('excited', False)
self._tmdk = kwargs.pop('tmdk', 15.)

self._fitmap = kwargs.pop('fitmap', None)
Expand All @@ -1056,7 +1159,8 @@ def run(self, cutoff=15., n_modes=3, gamma=1., n_confs=50, rmsd=1.0,

if self._fitmap is not None:
self._fitmap = self._fitmap.toTEMPyMap()
self._fit_resolution = kwargs.get('fit_resolution', 5)
self._fit_resolution = kwargs.pop('fit_resolution', 5)
self._replace_filtered = kwargs.pop('replace_filtered', False)

if maxclust is None and threshold is None and n_gens > 0:
raise ValueError('Either maxclust or threshold should be set!')
Expand Down

0 comments on commit b5b4b1b

Please sign in to comment.