-
Notifications
You must be signed in to change notification settings - Fork 5
/
fit_torsional.py
executable file
·480 lines (407 loc) · 17.7 KB
/
fit_torsional.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
#!/usr/bin/env python3
"""
Receives a Gaussian's ".log" of a scan generated by plot_eff_tors, the generated .dfr,
.txt and atoms defining the dihedral to fit the classical curve to the one from the .log.
Author: Henrique Musseli Cezar
Date: MAY/2019
"""
import argparse
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
try:
import pybel
import openbabel
except:
from openbabel import pybel
from openbabel import openbabel
from numpy import cos
from math import ceil
from scipy import optimize
from scipy.interpolate import CubicSpline
from plot_en_angle_gaussian_scan import parse_en_log_gaussian
from plot_eff_tors import *
from distutils.spawn import find_executable
def species_coord_to_openbabel(species, coord):
mol = openbabel.OBMol()
# add atoms to mol
for i, atomSp in species.items():
a = mol.NewAtom()
a.SetAtomicNum(int(atomSp))
a.SetVector(*coord[i])
# perceive bond information
mol.ConnectTheDots()
mol.PerceiveBondOrders()
return mol
def equal_parameters(dihedrals, mol, tolerance, usevalence):
infoAtom = []
dihAngles = []
for dih in dihedrals:
acoords = [[mol.GetAtomById(x-1).GetX(),mol.GetAtomById(x-1).GetY(),mol.GetAtomById(x-1).GetZ()] for x in dih]
dihAngles.append(get_phi(*acoords))
a1 = mol.GetAtomById(dih[0]-1)
a4 = mol.GetAtomById(dih[3]-1)
if usevalence:
infoAtom.append([a1.GetAtomicNum(), a1.GetValence(), a1.GetHyb(), a4.GetAtomicNum(), a4.GetValence(), a4.GetHyb()])
else:
infoAtom.append([a1.GetAtomicNum(), a1.GetHyb(), a4.GetAtomicNum(), a4.GetHyb()])
equals = []
for i, pair1 in enumerate(infoAtom):
# compare to all others
for j, pair2 in enumerate(infoAtom):
if j <= i: continue
pair3 = pair2[int(len(pair2)/2):]+pair2[:int(len(pair2)/2)]
if (pair1 == pair2) or (pair1 == pair3):
equals.append([i,j])
# compare angles to check if dihedrals are really equal
clean_pairs = []
for pair in equals:
ang1 = round(dihAngles[pair[0]],4)
ang2 = round(dihAngles[pair[1]],4)
if abs(ang1-ang2) <= tolerance:
clean_pairs.append(pair)
# join the pairs if needed
diff_lists = []
for pair in clean_pairs:
fnd = False
for i, val in enumerate(diff_lists):
if (pair[0] in val) or (pair[1] in val):
diff_lists[i] += pair
fnd = True
if not fnd:
diff_lists.append(pair)
return [sorted(list(set(x))) for x in diff_lists]
def torsen_opls(phi, V1, V2, V3, f1, f2, f3):
return 0.5 * (V1*(1.+cos(phi+f1)) + V2*(1.-cos(2.*phi+f2)) + V3*(1.+cos(3.*phi+f3)))
def torsen_amber(phi, V1, V2, V3, f1, f2, f3):
return 0.5 * (V1*(1.+cos(phi-f1)) + V2*(1.+cos(2.*phi-f2)) + V3*(1.+cos(3.*phi-f3)))
# thanks to https://stackoverflow.com/a/34226673
def fit_func(phi, *args):
# first half are vs
vs = args[:int(len(args)/2)]
# second half are fs
fs = args[int(len(args)/2):]
nfunc = int(len(args)/6)
sumf = 0.
for i in range(nfunc):
sumf += torsen_opls(phi,*vs[3*i:3*(i+1)],*fs[3*i:3*(i+1)])
return sumf
def fit_func_equals(phi, *args):
# first half are vs
vs = args[:int((len(args)-1)/2)]
# second half are fs
fs = args[int((len(args)-1)/2):len(args)-1]
# identification of equal parameters
equal = args[-1]
nfunc = int(len(vs)/3)
sumf = 0.
for i in range(nfunc):
calc = False
for lst in equal:
if i in lst:
sumf += torsen_opls(phi,*vs[3*lst[0]:3*(lst[0]+1)],*fs[3*i:3*(i+1)])
calc = True
break
if not calc:
sumf += torsen_opls(phi,*vs[3*i:3*(i+1)],*fs[3*i:3*(i+1)])
return sumf
def shift_angle_rad(tetha):
if tetha < 0.0:
return tetha
elif tetha > np.pi:
return tetha-(2.*np.pi)
else:
return tetha
def write_dfr(dfrfile, dihedrals, params, amber):
with open(dfrfile, "r") as f:
line = f.readline()
while "$dihedral" not in line:
print(line,end='')
line = f.readline()
print(line,end='')
line = f.readline()
dnum = 1
pdied = 0
while "$end dihedral" not in line:
if dnum in dihedrals:
# write the fitted parameters
if amber:
print("%d %d %d %d\t\tAMBER\t%.3f\t%.3f\t%.3f\t0.0\t0.0\t0.0" % (*dihedrals[dnum][:4], *params[3*pdied:3*(pdied+1)]))
else:
print("%d %d %d %d\t\tOPLS\t%.3f\t%.3f\t%.3f\t0.0\t0.0\t0.0" % (*dihedrals[dnum][:4], *params[3*pdied:3*(pdied+1)]))
pdied += 1
else:
print(line,end='')
dnum += 1
line = f.readline()
print(line,end='')
for line in f:
print(line,end='')
def find_nearest_idx(array, value):
array = np.asarray(array)
closest = (np.abs(array - value)).argmin()
# check if neighbor is smaller
if (closest != 0 and closest != len(array)-1):
if array[closest] > array[closest-1]:
idx = closest-1
elif array[closest] > array[closest+1]:
idx = closest+1
else:
idx = closest
else:
idx = closest
return idx
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Receives a Gaussians ".log" of a scan generated by plot_eff_tors, the generated .dfr, .txt and atoms defining the dihedral to fit the classical curve to the one from the .log.')
parser.add_argument("logfile", help="Gaussian's .log file")
parser.add_argument("dfrfile", help=".dfr containing current parameters")
parser.add_argument("txtfile", help=".txt containing the geometry and nonbonded parameters")
parser.add_argument("a1", type=int, help="first atom defining the reference dihedral")
parser.add_argument("a2", type=int, help="second atom defining the reference dihedral")
parser.add_argument("a3", type=int, help="third atom defining the reference dihedral")
parser.add_argument("a4", type=int, help="fourth atom defining the reference dihedral")
parser.add_argument("--amber", help="use AMBER rule to 1-4 interactions and torsional energy", action="store_true")
parser.add_argument("--plot-nben", help="plot the nonbonded energy in the final plot", action="store_true")
parser.add_argument("--plot-initial-guess", help="plot the curve using the parameters from the given dfr", action="store_true")
parser.add_argument("--no-force-min", help="disable the bigger weight given to the minimum points by default", action="store_true")
parser.add_argument("--force-surroundings", help="force the minimum and its surroundings points to be satisfied", action="store_true")
parser.add_argument("--force-similar-params", nargs="+", action="store", type=str, help="receives lists of indexes (based on the .dfr) of dihedrals that should have the same set of parameters. Example: 1,2,3 4,5,6 (spaces indicate a different set of similar parameters)'")
parser.add_argument("--force-different-params", help="force the parameters for each dihedral to be different, i.e., does not use any chemical information when fitting", action="store_true")
parser.add_argument("--fit-to-spline", help="use the cubic spline curve for the fit instead of just the few points", action="store_true")
parser.add_argument("--use-valence", help="also use valence of the atoms when finding similar dihedrals", action="store_true")
parser.add_argument("--weight-minimums", type=float, help="the weight that the minimums should have over the other points (default = 10)", default=10)
parser.add_argument("--tolerance-dihedral", type=float, help="tolarance value for which dihedral angles are considered to be equal (default = 0.1 radians)", default=0.1)
parser.add_argument("--bound-values", type=float, help="upper and lower bound [-val,+val] for the fitted parameters (default = 5)", default=5.)
parser.add_argument("--cut-energy", type=float, help="the percentage of highest energies that should not be considered during the fit (default = 0.3)", default=0.3)
parser.add_argument("--cut-from-total", help="instead of cutting the high torsional energies from fit, cut the high total energies", action="store_true")
args = parser.parse_args()
if args.force_surroundings and args.no_force_min:
print("Warning: If you are using --no-force-min the --force-surroundings is ignored.")
basename = os.path.splitext(os.path.basename(args.logfile))[0]
# parse data from the log file
died, enqm = parse_en_log_gaussian(args.logfile)
died = [shift_angle_rad(x*np.pi/180.) for x in died]
# parse dfr to get the dihedrals involved in the rotation
dihedralsDict, connInfo, fragInfo, fconnInfo = parse_dfr(args.dfrfile, args.a2, args.a3)
# parse txt to get geometry
_, natoms, atomSp, atomsCoord, _ = parse_txt(args.txtfile)
mol = species_coord_to_openbabel(atomSp, atomsCoord)
# get dihedrals which should have the same parameters
if not args.force_similar_params and not args.force_different_params:
equals = equal_parameters([dihedralsDict[x][:4] for x in dihedralsDict], mol, args.tolerance_dihedral, args.use_valence)
elif args.force_similar_params:
equals = [[int(y)-1 for y in x.split(",")] for x in args.force_similar_params]
else:
equals = False
# get reference angle
acoords = [atomsCoord[x] for x in [args.a1, args.a2, args.a3, args.a4]]
ref_ang = get_phi(*acoords)
# get initial dihedral angles for each torsional involved in the rotation
dihAngles = []
for tors in dihedralsDict:
acoords = [atomsCoord[x] for x in dihedralsDict[tors][:4]]
dihAngles.append(get_phi(*acoords)-ref_ang)
# get the classical curve with current parameters
diedClass, _, nben, _ = get_potential_curve(args.txtfile, args.dfrfile, args.a1, args.a2, args.a3, args.a4, died, "", False, args.amber, False, False)
# convert the angles and sort
diedClass = [shift_angle_rad(x) for x in diedClass]
diedClass, nben = (list(t) for t in zip(*sorted(zip(diedClass, nben))))
# points for which the spline are calculated
xc = np.arange(-np.pi,np.pi, 0.02)
diedClass_fit, den_fit, nben_fit, _ = get_potential_curve(args.txtfile, args.dfrfile, args.a1, args.a2, args.a3, args.a4, xc, "", False, args.amber, False, False)
# convert the angles and sort
convDied_fit = [shift_angle_rad(x) for x in diedClass_fit]
diedClass_fit, den_fit = (list(t) for t in zip(*sorted(zip(convDied_fit, den_fit))))
diedClass_fit, nben_fit = (list(t) for t in zip(*sorted(zip(convDied_fit, nben_fit))))
# prepare the data
v0s = []
f0s = []
for i, dih in enumerate(dihedralsDict):
v0s += dihedralsDict[dih][4:7]
f0s += [dihAngles[i], 2.*dihAngles[i], 3.*dihAngles[i]]
# set the bounds
lbound = len(v0s)*[-args.bound_values]
ubound = len(v0s)*[args.bound_values]
# shift the energies to the same reference
min_mq = min(enqm)
enqm = [x-min_mq for x in enqm]
min_class = nben[np.argmin(enqm)] # set as zero the same angle used before for QM
nben = [x-min_class for x in nben]
min_class_fit = den_fit[find_nearest_idx(xc,died[np.argmin(enqm)])]
den_fit = [x-min_class_fit for x in den_fit]
den_fit = np.asarray(den_fit)
min_class_fit = nben_fit[find_nearest_idx(xc,died[np.argmin(enqm)])]
nben_fit = [x-min_class_fit for x in nben_fit]
nben_fit = np.asarray(nben_fit)
died = np.asarray(died)
enqm = np.asarray(enqm)
nben = np.asarray(nben)
# subtract the nonbonded energy to get the "QM torsional"
enfit = enqm - nben
died_spline = died.copy()
en_spline = enqm.copy()
died_enfit_spline = died.copy()
enfit_spline = enfit.copy()
f = CubicSpline(died_spline, en_spline)
ffit = CubicSpline(died_enfit_spline, enfit_spline)
cr_pts = f.derivative().roots()
deriv2 = f.derivative(2)
cr_pts = np.delete(cr_pts, np.where(deriv2(cr_pts) < 0.))
# order and remove the points relative to the transition states
npremove = ceil(args.cut_energy*len(died))
if args.cut_from_total:
lowenremove = -np.sort(-enqm)[npremove-1]
filtbar = np.where(enqm >= lowenremove)
filtxc = np.where(f(xc) >= lowenremove)
else:
lowenremove = -np.sort(-enfit)[npremove-1]
filtbar = np.where(enfit >= lowenremove)
filtxc = np.where(ffit(xc) >= lowenremove)
olddied = died.copy()
oldenfit = enfit.copy()
died = np.delete(died, filtbar)
enfit = np.delete(enfit, filtbar)
xcfit = np.delete(xc, filtxc)
if not args.fit_to_spline:
# give greater weight to minimums (smaller sigma is a grater weight)
weights = np.ones(len(died))
if (not args.no_force_min) and args.force_surroundings:
idx_min = []
for val in cr_pts:
idx_min.append(find_nearest_idx(died,val))
idx_fit = idx_min.copy()
for idx in idx_min:
if idx == 0:
idx_fit.append(1)
elif idx == len(died)-1:
idx_fit.append(len(died)-2)
else:
idx_fit.append(idx-1)
idx_fit.append(idx+1)
weights[idx_fit] = 1./args.weight_minimums
elif not args.no_force_min:
idx_min = []
for val in cr_pts:
idx_min.append(find_nearest_idx(died,val))
weights[idx_min] = 1./args.weight_minimums
if equals:
try:
popt, pcov = optimize.curve_fit(lambda x, *vs: fit_func_equals(x, *vs, *f0s, equals), died, enfit, p0=v0s, bounds=(lbound,ubound), sigma=weights)
except Exception as e:
print("Problem while fitting the curve (%s)" % (str(e)))
print("If the problem is 'x0 is infeasible' use --bound-values to set a higher value")
sys.exit(0)
else:
try:
popt, pcov = optimize.curve_fit(lambda x, *vs: fit_func(x, *vs, *f0s), died, enfit, p0=v0s, bounds=(lbound,ubound), sigma=weights)
except Exception as e:
print("Problem while fitting the curve (%s)" % (str(e)))
print("If the problem is 'x0 is infeasible' use --bound-values to set a higher value")
sys.exit(0)
else:
# give greater weight to minimums (smaller sigma is a grater weight)
weights = np.ones(len(xcfit))
if (not args.no_force_min) and args.force_surroundings:
idx_min = []
idx_min_died = []
for val in cr_pts:
idx_min_died.append(find_nearest_idx(died,val))
for val in cr_pts:
idx_min.append(find_nearest_idx(xcfit,val))
idx_fit = idx_min.copy()
for i, idx in enumerate(idx_min):
if idx == 0:
idx_fit.append(find_nearest_idx(xcfit,died[idx_min_died[i]+1]))
elif idx == (len(xcfit)-1):
idx_fit.append(find_nearest_idx(xcfit,died[idx_min_died[i]-1]))
else:
idx_fit.append(find_nearest_idx(xcfit,died[idx_min_died[i]-1]))
idx_fit.append(find_nearest_idx(xcfit,died[idx_min_died[i]+1]))
weights[idx_fit] = 1./args.weight_minimums
elif not args.no_force_min:
idx_min = []
for val in cr_pts:
idx_min.append(find_nearest_idx(xcfit,val))
weights[idx_min] = 1./args.weight_minimums
if equals:
try:
popt, pcov = optimize.curve_fit(lambda x, *vs: fit_func_equals(x, *vs, *f0s, equals), xcfit, ffit(xcfit), p0=v0s, bounds=(lbound,ubound), sigma=weights)
except Exception as e:
print("Problem while fitting the curve (%s)" % (str(e)))
print("If the problem is 'x0 is infeasible' use --bound-values to set a higher value")
sys.exit(0)
else:
try:
popt, pcov = optimize.curve_fit(lambda x, *vs: fit_func(x, *vs, *f0s), xcfit, ffit(xcfit), p0=v0s, bounds=(lbound,ubound), sigma=weights)
except Exception as e:
print("Problem while fitting the curve (%s)" % (str(e)))
print("If the problem is 'x0 is infeasible' use --bound-values to set a higher value")
sys.exit(0)
if equals:
new_popt = []
for tors in range(int(len(popt)/3)):
fnd = False
for lst in equals:
if tors in lst:
new_popt += [round(x,3) for x in popt[lst[0]*3:3*(lst[0]+1)]]
fnd = True
if not fnd:
new_popt += [round(x,3) for x in popt[tors*3:3*(tors+1)]]
popt = np.asarray(new_popt)
else:
popt = np.asarray([round(x,3) for x in popt])
# plot the curves to compare
fcurv = []
for val in xc:
fcurv.append(fit_func(val,*popt,*f0s))
fcurv = np.asarray(fcurv)
# write the adjusted dfr
write_dfr(args.dfrfile, dihedralsDict, popt, args.amber)
# functions to plot
if args.fit_to_spline:
strgt = ffit(xcfit)
mins = f(cr_pts)
fminfit = ffit(cr_pts)
# convert the angles to degrees
died = [x*180./np.pi for x in died]
olddied = [x*180./np.pi for x in olddied]
xc = [x*180./np.pi for x in xc]
xcfit = [x*180./np.pi for x in xcfit]
cr_pts = [x*180./np.pi for x in cr_pts]
# plotting options
if find_executable('latex') and find_executable('dvipng'):
mpl.rcParams.update({'font.size':14, 'text.usetex':True, 'font.family':'serif', 'ytick.major.pad':4})
else:
mpl.rcParams.update({'font.size':14, 'font.family':'serif', 'ytick.major.pad':4})
# plot the torsional energies
if args.fit_to_spline:
plt.plot(xcfit, strgt, label="Gaussian torsional")
else:
plt.plot(died, enfit, label="Gaussian torsional")
plt.plot(xc, fcurv, label='Fit')
if args.plot_initial_guess:
plt.plot(xc, den_fit, label="Initial guess")
plt.plot(cr_pts, fminfit, 'o', color='tab:green', label="Target points")
plt.xlim([-180,180])
plt.xticks([-180,-120,-60,0,60,120,180])
plt.xlabel(r"$\phi$ ($^\circ$)")
plt.ylabel(r"$U_{torsional}$ (kcal/mol)")
plt.legend()
plt.savefig("fit_torsional_%s.pdf" % (basename), bbox_inches='tight')
plt.gcf().clear()
# plot the total energies
if args.plot_nben:
plt.plot(xc, nben_fit, label='Classical nonbonded energy')
plt.plot(olddied, oldenfit+nben, label='Gaussian total energy')
# plt.plot(xc, f(xc), label='Gaussian total energy spline')
plt.plot(xc, fcurv+nben_fit, label='Classical total energy')
if args.plot_initial_guess:
plt.plot(xc, den_fit+nben_fit, label="Initial guess")
plt.plot(cr_pts, mins, 'o', color='tab:green', label="Target minimums")
plt.xlim([-180,180])
plt.xticks([-180,-120,-60,0,60,120,180])
plt.xlabel(r"$\phi$ ($^\circ$)")
plt.ylabel(r"$U$ (kcal/mol)")
plt.legend()
plt.savefig("fit_total_en_%s.pdf" % (basename), bbox_inches='tight')