-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhbond_ana.py
executable file
·168 lines (144 loc) · 6.62 KB
/
hbond_ana.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#!/usr/bin/env python
import numpy as np
import sys
import MDAnalysis as mda
import MDAnalysis.analysis.hbonds
from hbond_candi import *
from utils import filter_timeseries, read_resfile
import time
import json
########## Modify these configurations if necessary ########
DIS_CUTOFF = 2.4 # distance cutoff between H and A
ANGLE_CUTOFF = 130.0, # angle cutoff of D--H--A
PRO_SEGID = 'PROA' # segid for protein residues
MEMB_SEGID = 'MEMB' # segid for memb lipids
##### NOTE for Lipid Types #####
# the lists of donors/acceptors of all lipids will be merged
# and used as candidate donors/acceptors to run the analysis
############################################################
def _set_lipid_atoms(lipids, htype='donor'):
'''
Merge the donors/acceptors of all the given lipids
'''
atoms = []
if htype == 'donor':
for l in lipids:
if l in ATOMS_DICT:
atoms += ATOMS_DICT[l]['donor'].split()
elif htype == 'acceptor':
for l in lipids:
if l in ATOMS_DICT:
atoms += ATOMS_DICT[l]['acceptor'].split()
return atoms
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Error: please submit a configuration file. Usage:\n ./hbond_ana.py CONFIG_FILE")
sys.exit(0)
config_file = sys.argv[1]
with open(config_file) as f:
Parameters = json.load(f)
# Set up the parameters
u = mda.Universe(Parameters['PSF_FILE'] , Parameters['DCD_FILE'])
res_info = read_resfile(Parameters['RESIDUE_FILE'])
lifetime = Parameters.get('lifetime')
details_file = Parameters.get('output_file_prefix').strip()
fstart, fstop, fstep = Parameters['start'], Parameters['stop'], Parameters['step']
LIPIDS = Parameters['hbond_lipids']
rids = [r[0] for r in res_info]
t0 = time.perf_counter()
h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(
u,
selection1 = 'segid {} and resid {}'.format(PRO_SEGID, ' '.join(rids) ) ,
selection2 = 'segid {} and resname {}'.format(MEMB_SEGID, ' '.join(LIPIDS) ) ,
selection1_type='both', # consider sel1 as both donor and acceptor
update_selection1=False, # candidates no need to change in each frame
update_selection2=False,
distance = DIS_CUTOFF,
angle = ANGLE_CUTOFF,
filter_first = False, # a coarse distance fileter; not necessary
### further selections for donors/acceptors performed on sel1/sel2
donors= _set_lipid_atoms(LIPIDS, 'donor'),
acceptors= _set_lipid_atoms(LIPIDS, 'acceptor'),
)
t1 = time.perf_counter() # HydrogenBondAnalysis setting-up time
h.run(fstart, fstop, fstep) # run the analysis
t2 = time.perf_counter()
hbonds = h.count_by_type() # shorter list including each type of hbond
recs = h.timesteps_by_type() # longer list plus timestep information
# print(hbonds)
# print(recs)
### NOTE: type of recs
# dtype = [
# ('donor_index', int),
# ('acceptor_index', int), ('donor_resnm', 'U4'), ('donor_resid', int),
# ('donor_heavy_atom', 'U4'), ('donor_atom', 'U4'),('acceptor_resnm', 'U4'),
# ('acceptor_resid', int), ('acceptor_atom', 'U4'), ('time', float)]
### Post-processing for
# 1. perfrom lifetime cutoff selection
# 2. customize the output format
U = len(u.trajectory[fstart:fstop:fstep])
frame_flags = np.zeros((len(hbonds), U ))
for i, hbond in enumerate(hbonds):
for r in recs:
if (r['donor_index'], r['acceptor_index']) == \
(hbond['donor_index'], hbond['acceptor_index']):
# convert timestep to frame index
frame_flags[i][h.timesteps.index(r['time'])] = 1
# perform lifetime cutoff
t3 = time.perf_counter()
if lifetime is not None:
filter_timeseries(frame_flags.transpose() , lifetime)
t4 = time.perf_counter()
print("\nTotal {} frames".format(U))
print("Time usage: \n - initialization(including atom seletion):{:.2f} \
\n - finding out candidate hbonds: {:.2f}s \
\n - lifetime cutoff: {:.2f}s".format(t1-t0, t2-t1, t4-t3))
# store frame indices where hbond occurs for calculating occupancy
new_hbonds = []
for i, hbond in enumerate(hbonds):
frames = frame_flags[i].nonzero()[0]
new_hbonds.append(list(hbond)[:9] + [frames])
### Output the results
for rid, rname in res_info :
head_idx, gly_idx, pho_idx = [], [], []
for hbond in new_hbonds:
if hbond[2] == rname and hbond[3] == int(rid): # amino acid is donor
lipid_name, lipid_atom = hbond[6], hbond[8]
elif hbond[6] == rname and hbond[7] == int(rid): # amino acid is acceptor
lipid_name, lipid_atom = hbond[2], hbond[4]
else:
continue
if lipid_atom in ATOMS_DICT[lipid_name]['pho'].split():
pho_idx += list(hbond[-1])
elif lipid_atom in ATOMS_DICT[lipid_name]['gly'].split():
gly_idx += list(hbond[-1])
elif lipid_atom in ATOMS_DICT[lipid_name]['head'].split():
head_idx += list(hbond[-1])
else:
print("WARNING: found contact from parts other than PHO, GLY and HEAD")
print(str(hbond))
gly_occ = len(set(gly_idx)) / U * 100
head_occ = len(set(head_idx)) / U * 100
pho_occ = len(set(pho_idx)) / U * 100
print("{}-{} Hbond occupancy: PHOSPHATE {:.2f}% GLYCEROL {:.2f}% HEADGROUP {:.2f}% "\
.format(rname, rid, pho_occ, gly_occ, head_occ))
# output details to file
if details_file != "":
with open(details_file+'_hbonds.txt', mode='w') as f:
f.write(','.join([
'RES-ID',
'donor_index', 'acceptor_index', 'donor_resnm',
'donor_resid', 'donor_heavy_atom', 'donor_atom',
'acceptor_resnm','acceptor_resid', 'acceptor_atom',
'frame_index'
])
+ '\n')
for rid, rname in res_info:
for hbond in new_hbonds:
if hbond[2] == rname and hbond[3] == int(rid) or \
hbond[6] == rname and hbond[7] == int(rid):
for f_number in hbond[-1]:
strs = [str(field) for field in hbond[:9]]
strs.insert(0, rname+'-'+rid)
strs.append(str(f_number))
f.write(' '.join(strs) + '\n')