Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parallel insty #1945

Merged
merged 16 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
273 changes: 185 additions & 88 deletions prody/proteins/interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from prody.trajectory import TrajBase, Trajectory, Frame
from prody.ensemble import Ensemble

import multiprocessing
import multiprocessing as mp
from .fixer import *
from .compare import *
from prody.measure import calcTransformation, calcDistance, calcRMSD, superpose
Expand All @@ -47,7 +47,7 @@
'calcSASA', 'calcVolume','compareInteractions', 'showInteractionsGraph',
'showInteractionsHist', 'calcLigandInteractions', 'listLigandInteractions',
'showProteinInteractions_VMD', 'showLigandInteraction_VMD',
'calcHydrogenBondsTrajectory', 'calcHydrophobicOverlapingAreas',
'calcHydrophobicOverlapingAreas',
'Interactions', 'InteractionsTrajectory', 'LigandInteractionsTrajectory',
'calcSminaBindingAffinity', 'calcSminaPerAtomInteractions', 'calcSminaTermValues',
'showSminaTermValues', 'showPairEnergy', 'checkNonstandardResidues',
Expand Down Expand Up @@ -1248,7 +1248,7 @@ def calcPiStacking(atoms, **kwargs):
str(sele2.getResnames()[0])+str(sele2.getResnums()[0]), '_'.join(map(str,sele2.getIndices())), str(sele2.getChids()[0])]])

# create a process pool that uses all cpus
with multiprocessing.Pool() as pool:
with mp.Pool() as pool:
# call the function for each item in parallel with multiple arguments
for result in pool.starmap(calcPiStacking_once, items):
if result is not None:
Expand Down Expand Up @@ -1700,7 +1700,12 @@ def calcMetalInteractions(atoms, distA=3.0, extraIons=['FE'], excluded_ions=['SO

def calcInteractionsMultipleFrames(atoms, interaction_type, trajectory, **kwargs):
"""Compute selected type interactions for DCD trajectory or multi-model PDB
using default parameters or those from kwargs."""
using default parameters or those from kwargs.

:arg max_proc: maximum number of processes to use
default is half of the number of CPUs
:type max_proc: int
"""

try:
coords = getCoords(atoms)
Expand All @@ -1714,6 +1719,7 @@ def calcInteractionsMultipleFrames(atoms, interaction_type, trajectory, **kwargs
interactions_all = []
start_frame = kwargs.pop('start_frame', 0)
stop_frame = kwargs.pop('stop_frame', -1)
max_proc = kwargs.pop('max_proc', mp.cpu_count()//2)

interactions_dic = {
"HBs": calcHydrogenBonds,
Expand All @@ -1739,22 +1745,81 @@ def calcInteractionsMultipleFrames(atoms, interaction_type, trajectory, **kwargs
traj = trajectory[start_frame:stop_frame+1]

atoms_copy = atoms.copy()
for j0, frame0 in enumerate(traj, start=start_frame):
def analyseFrame(j0, start_frame, frame0, interactions_all):
LOGGER.info('Frame: {0}'.format(j0))
atoms_copy.setCoords(frame0.getCoords())
protein = atoms_copy.select('protein')
interactions = interactions_dic[interaction_type](protein, **kwargs)
interactions_all.append(interactions)

if max_proc == 1:
interactions_all = []
for j0, frame0 in enumerate(traj, start=start_frame):
interactions_all.append([])
analyseFrame(j0, start_frame, frame0, interactions_all)
else:
with mp.Manager() as manager:
interactions_all = manager.list()
for j0, frame0 in enumerate(traj, start=start_frame):
interactions_all.append([])

j0 = start_frame
while j0 < traj.numConfs()+start_frame:

processes = []
for _ in range(max_proc):
frame0 = traj[j0-start_frame]

p = mp.Process(target=analyseFrame, args=(j0, start_frame,
frame0,
interactions_all))
p.start()
processes.append(p)

j0 += 1
if j0 >= traj.numConfs()+start_frame:
break

for p in processes:
p.join()

interactions_all = interactions_all[:]

trajectory._nfi = nfi

else:
if atoms.numCoordsets() > 1:
for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])):
def analyseFrame(i, interactions_all):
LOGGER.info('Model: {0}'.format(i+start_frame))
atoms.setACSIndex(i+start_frame)
protein = atoms.select('protein')
interactions = interactions_dic[interaction_type](protein, **kwargs)
interactions_all.append(interactions)

if max_proc == 1:
interactions_all = []
for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])):
analyseFrame(i, interactions_all)
else:
with mp.Manager() as manager:
interactions_all = manager.list()

i = start_frame
while i < len(atoms.getCoordsets()[start_frame:stop_frame]):
processes = []
for _ in range(max_proc):
p = mp.Process(target=analyseFrame, args=(i, interactions_all))
p.start()
processes.append(p)

i += 1
if i >= len(atoms.getCoordsets()[start_frame:stop_frame]):
break

for p in processes:
p.join()

interactions_all = interactions_all[:]
else:
LOGGER.info('Include trajectory or use multi-model PDB file.')

Expand Down Expand Up @@ -5041,6 +5106,10 @@ def calcProteinInteractionsTrajectory(self, atoms, trajectory=None, filename=Non
:arg stop_frame: index of last frame to read
:type stop_frame: int

:arg max_proc: maximum number of processes to use
default is half of the number of CPUs
:type max_proc: int

Selection:
If we want to select interactions for the particular residue or group of residues:
selection='chain A and resid 1 to 50'
Expand All @@ -5056,7 +5125,7 @@ def calcProteinInteractionsTrajectory(self, atoms, trajectory=None, filename=Non
except TypeError:
raise TypeError('coords must be an object '
'with `getCoords` method')

HBs_all = []
SBs_all = []
RIB_all = []
Expand All @@ -5073,105 +5142,133 @@ def calcProteinInteractionsTrajectory(self, atoms, trajectory=None, filename=Non
HPh_nb = []
DiBs_nb = []

interactions_traj = [HBs_all, SBs_all, RIB_all, PiStack_all, PiCat_all, HPh_all, DiBs_all]
interactions_nb_traj = [HBs_nb, SBs_nb, RIB_nb, PiStack_nb, PiCat_nb, HPh_nb, DiBs_nb]

start_frame = kwargs.pop('start_frame', 0)
stop_frame = kwargs.pop('stop_frame', -1)
max_proc = kwargs.pop('max_proc', mp.cpu_count()//2)

if trajectory is not None:
if isinstance(trajectory, Atomic):
trajectory = Ensemble(trajectory)

nfi = trajectory._nfi
trajectory.reset()
numFrames = trajectory._n_csets

if stop_frame == -1:
traj = trajectory[start_frame:]
if trajectory is None:
if atoms.numCoordsets() > 1:
trajectory = atoms
else:
traj = trajectory[start_frame:stop_frame+1]
LOGGER.info('Include trajectory or use multi-model PDB file.')
return interactions_nb_traj

atoms_copy = atoms.copy()
protein = atoms_copy.protein
if isinstance(trajectory, Atomic):
trajectory = Ensemble(trajectory)

#nfi = trajectory._nfi
#trajectory.reset()
#numFrames = trajectory._n_csets

for j0, frame0 in enumerate(traj, start=start_frame):
LOGGER.info('Frame: {0}'.format(j0))
atoms_copy.setCoords(frame0.getCoords())

hydrogen_bonds = calcHydrogenBonds(protein, **kwargs)
salt_bridges = calcSaltBridges(protein, **kwargs)
RepulsiveIonicBonding = calcRepulsiveIonicBonding(protein, **kwargs)
Pi_stacking = calcPiStacking(protein, **kwargs)
Pi_cation = calcPiCation(protein, **kwargs)
hydrophobic = calcHydrophobic(protein, **kwargs)
Disulfide_Bonds = calcDisulfideBonds(protein, **kwargs)

HBs_all.append(hydrogen_bonds)
SBs_all.append(salt_bridges)
RIB_all.append(RepulsiveIonicBonding)
PiStack_all.append(Pi_stacking)
PiCat_all.append(Pi_cation)
HPh_all.append(hydrophobic)
DiBs_all.append(Disulfide_Bonds)

HBs_nb.append(len(hydrogen_bonds))
SBs_nb.append(len(salt_bridges))
RIB_nb.append(len(RepulsiveIonicBonding))
PiStack_nb.append(len(Pi_stacking))
PiCat_nb.append(len(Pi_cation))
HPh_nb.append(len(hydrophobic))
DiBs_nb.append(len(Disulfide_Bonds))

if stop_frame == -1:
traj = trajectory[start_frame:]
else:
if atoms.numCoordsets() > 1:
for i in range(len(atoms.getCoordsets()[start_frame:stop_frame])):
LOGGER.info('Model: {0}'.format(i+start_frame))
atoms.setACSIndex(i+start_frame)
protein = atoms.select('protein')

hydrogen_bonds = calcHydrogenBonds(atoms.protein, **kwargs)
salt_bridges = calcSaltBridges(atoms.protein, **kwargs)
RepulsiveIonicBonding = calcRepulsiveIonicBonding(atoms.protein, **kwargs)
Pi_stacking = calcPiStacking(atoms.protein, **kwargs)
Pi_cation = calcPiCation(atoms.protein, **kwargs)
hydrophobic = calcHydrophobic(atoms.protein, **kwargs)
Disulfide_Bonds = calcDisulfideBonds(atoms.protein, **kwargs)

HBs_all.append(hydrogen_bonds)
SBs_all.append(salt_bridges)
RIB_all.append(RepulsiveIonicBonding)
PiStack_all.append(Pi_stacking)
PiCat_all.append(Pi_cation)
HPh_all.append(hydrophobic)
DiBs_all.append(Disulfide_Bonds)
traj = trajectory[start_frame:stop_frame+1]

atoms_copy = atoms.copy()
protein = atoms_copy.protein

def analyseFrame(j0, frame0, interactions_all, interactions_nb, j0_list):
LOGGER.info('Frame: {0}'.format(j0))
atoms_copy.setCoords(frame0.getCoords())

HBs_nb.append(len(hydrogen_bonds))
SBs_nb.append(len(salt_bridges))
RIB_nb.append(len(RepulsiveIonicBonding))
PiStack_nb.append(len(Pi_stacking))
PiCat_nb.append(len(Pi_cation))
HPh_nb.append(len(hydrophobic))
DiBs_nb.append(len(Disulfide_Bonds))
else:
LOGGER.info('Include trajectory or use multi-model PDB file.')
hydrogen_bonds = calcHydrogenBonds(protein, **kwargs)
salt_bridges = calcSaltBridges(protein, **kwargs)
RepulsiveIonicBonding = calcRepulsiveIonicBonding(protein, **kwargs)
Pi_stacking = calcPiStacking(protein, **kwargs)
Pi_cation = calcPiCation(protein, **kwargs)
hydrophobic = calcHydrophobic(protein, **kwargs)
Disulfide_Bonds = calcDisulfideBonds(protein, **kwargs)

interactions_all[0].append(hydrogen_bonds)
interactions_all[1].append(salt_bridges)
interactions_all[2].append(RepulsiveIonicBonding)
interactions_all[3].append(Pi_stacking)
interactions_all[4].append(Pi_cation)
interactions_all[5].append(hydrophobic)
interactions_all[6].append(Disulfide_Bonds)

interactions_nb[0].append(len(hydrogen_bonds))
interactions_nb[1].append(len(salt_bridges))
interactions_nb[2].append(len(RepulsiveIonicBonding))
interactions_nb[3].append(len(Pi_stacking))
interactions_nb[4].append(len(Pi_cation))
interactions_nb[5].append(len(hydrophobic))
interactions_nb[6].append(len(Disulfide_Bonds))

j0_list.append(j0)

if max_proc == 1:
interactions_all = []
interactions_all.extend(interactions_traj)
interactions_nb = []
interactions_nb.extend(interactions_nb_traj)
j0_list = []
for j0, frame0 in enumerate(traj, start=start_frame):
analyseFrame(j0, frame0, interactions_all, interactions_nb,
j0_list)
else:
with mp.Manager() as manager:
interactions_all = manager.list()
interactions_all.extend([manager.list() for _ in interactions_traj])

interactions_nb = manager.list()
interactions_nb.extend([manager.list() for _ in interactions_nb_traj])

j0 = start_frame
j0_list = manager.list()
while j0 < traj.numConfs()+start_frame:

processes = []
for _ in range(max_proc):
frame0 = traj[j0-start_frame]

p = mp.Process(target=analyseFrame, args=(j0, frame0,
interactions_all,
interactions_nb,
j0_list))
p.start()
processes.append(p)

j0 += 1
if j0 >= traj.numConfs()+start_frame:
break

for p in processes:
p.join()

interactions_all = [entry[:] for entry in interactions_all]
interactions_nb = [entry[:] for entry in interactions_nb]
j0_list = [entry for entry in j0_list]

ids = np.argsort(j0_list)
interactions_all = [list(np.array(interactions_type, dtype=object)[ids])
for interactions_type in interactions_all]
interactions_nb = [list(np.array(interactions_type, dtype=object)[ids])
for interactions_type in interactions_nb]

self._atoms = atoms
self._traj = trajectory
self._interactions_traj = [HBs_all, SBs_all, RIB_all, PiStack_all, PiCat_all, HPh_all, DiBs_all]
self._interactions_nb_traj = [HBs_nb, SBs_nb, RIB_nb, PiStack_nb, PiCat_nb, HPh_nb, DiBs_nb]
self._hbs_traj = HBs_all
self._sbs_traj = SBs_all
self._rib_traj = RIB_all
self._piStack_traj = PiStack_all
self._piCat_traj = PiCat_all
self._hps_traj = HPh_all
self._dibs_traj = DiBs_all
self._interactions_traj = interactions_all
self._interactions_nb_traj = interactions_nb
self._hbs_traj = interactions_all[0]
self._sbs_traj = interactions_all[1]
self._rib_traj = interactions_all[2]
self._piStack_traj = interactions_all[3]
self._piCat_traj = interactions_all[4]
self._hps_traj = interactions_all[5]
self._dibs_traj = interactions_all[6]

if filename is not None:
import pickle
with open(str(filename)+'.pkl', 'wb') as f:
pickle.dump(self._interactions_traj, f)
LOGGER.info('File with interactions saved.')

return HBs_nb, SBs_nb, RIB_nb, PiStack_nb, PiCat_nb, HPh_nb, DiBs_nb
return interactions_nb


def getInteractions(self, **kwargs):
Expand Down
6 changes: 4 additions & 2 deletions prody/tests/proteins/test_insty.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def testAllInteractionsCalc(self):

if prody.PY3K:
self.INTERACTIONS_ALL = InteractionsTrajectory()
self.data_all = self.INTERACTIONS_ALL.calcProteinInteractionsTrajectory(self.ATOMS)
self.data_all = np.array(self.INTERACTIONS_ALL.calcProteinInteractionsTrajectory(self.ATOMS,
stop_frame=13))

try:
assert_equal(self.data_all, self.ALL_INTERACTIONS2,
Expand All @@ -52,7 +53,8 @@ def testAllInteractionsSave(self):
"""Test for saving and loading all types of interactions."""
if prody.PY3K:
self.INTERACTIONS_ALL = InteractionsTrajectory()
self.data_all = self.INTERACTIONS_ALL.calcProteinInteractionsTrajectory(self.ATOMS)
self.data_all = np.array(self.INTERACTIONS_ALL.calcProteinInteractionsTrajectory(self.ATOMS,
stop_frame=13))

np.save('test_2k39_all.npy', np.array(self.data_all, dtype=object), allow_pickle=True)

Expand Down
Loading