1
2
3
4
5
6
7
8 """ Torsion Fingerprints (Deviation) (TFD)
9 According to a paper from Schulz-Gasch et al., JCIM, 52, 1499-1512 (2012).
10
11 """
12 from rdkit import rdBase
13 from rdkit import RDConfig
14 from rdkit import Geometry
15 from rdkit import Chem
16 from rdkit.Chem import rdMolDescriptors
17 import math, os
18
20 """ Helper function to check if all atoms in the list are the same
21
22 Arguments:
23 - inv: atom invariants (used to define equivalence of atoms)
24 - atoms: list of atoms to check
25
26 Return: boolean
27 """
28 match = True
29 for i in range(len(atoms)-1):
30 for j in range(i+1, len(atoms)):
31 if (inv[atoms[i].GetIdx()] != inv[atoms[j].GetIdx()]):
32 match = False
33 return match
34 return match
35
37 """ Helper function to check if all atoms in the list are NOT the same
38
39 Arguments:
40 - inv: atom invariants (used to define equivalence of atoms)
41 - atoms: list of atoms to check
42
43 Return: boolean
44 """
45 match = True
46 for i in range(len(atoms)-1):
47 for j in range(i+1, len(atoms)):
48 if (inv[atoms[i].GetIdx()] == inv[atoms[j].GetIdx()]):
49 match = False
50 return match
51 return match
52
54 """ Helper function to check if two atoms in the list are the same,
55 and one not
56 Note: Works only for three atoms
57
58 Arguments:
59 - inv: atom invariants (used to define equivalence of atoms)
60 - atoms: list of atoms to check
61
62 Return: atom that is different
63 """
64 if len(atoms) != 3:
65 raise ValueError("Number of atoms must be three")
66 a1 = atoms[0].GetIdx()
67 a2 = atoms[1].GetIdx()
68 a3 = atoms[2].GetIdx()
69 if (inv[a1] == inv[a2] and inv[a1] != inv[a3] and inv[a2] != inv[a3]):
70 return atoms[2]
71 elif (inv[a1] != inv[a2] and inv[a1] == inv[a3] and inv[a2] != inv[a3]):
72 return atoms[1]
73 elif (inv[a1] != inv[a2] and inv[a1] != inv[a3] and inv[a2] == inv[a3]):
74 return atoms[0]
75 return None
76
78 """ Helper function to calculate the atom invariants for each atom
79 with a given radius
80
81 Arguments:
82 - mol: the molecule of interest
83 - radius: the radius for the Morgan fingerprint
84
85 Return: list of atom invariants
86 """
87 inv = []
88 for i in range(mol.GetNumAtoms()):
89 info = {}
90 fp = rdMolDescriptors.GetMorganFingerprint(mol, radius, fromAtoms=[i], bitInfo=info)
91 for k in info.keys():
92 if info[k][0][1] == radius:
93 inv.append(k)
94 return inv
95
97 """ Helper function to calculate the number of heavy atom neighbors.
98
99 Arguments:
100 - atom1: the atom of interest
101 - aid2: atom index that should be excluded from neighbors (default: none)
102
103 Return: a list of heavy atom neighbors of the given atom
104 """
105 if aid2 < 0:
106 return [n for n in atom1.GetNeighbors() if n.GetSymbol()!='H']
107 else:
108 return [n for n in atom1.GetNeighbors() if (n.GetSymbol()!='H' and n.GetIdx()!=aid2)]
109
111 """ Helper function to calculate the index of the reference atom for
112 a given atom
113
114 Arguments:
115 - neighbors: list of the neighbors of the atom
116 - inv: atom invariants
117
118 Return: list of atom indices as reference for torsion
119 """
120 if len(neighbors) == 1:
121 return [neighbors[0]]
122 elif _doMatch(inv, neighbors):
123 return neighbors
124 elif _doNotMatch(inv, neighbors):
125
126 return [neighbors[0]]
127 at = _doMatchExcept1(inv, neighbors)
128 if at is None:
129 raise ValueError("Atom neighbors are either all the same or all different")
130 return [at]
131
133 """ Calculate a list of torsions for a given molecule. For each torsion
134 the four atom indices are determined and stored in a set.
135
136 Arguments:
137 - mol: the molecule of interest
138 - maxDev: maximal deviation used for normalization
139 'equal': all torsions are normalized using 180.0 (default)
140 'spec': each torsion is normalized using its specific
141 maximal deviation as given in the paper
142 - symmRadius: radius used for calculating the atom invariants
143 (default: 2)
144
145 Return: two lists of torsions: non-ring and ring torsions
146 """
147 if maxDev not in ['equal', 'spec']:
148 raise ValueError("maxDev must be either equal or spec")
149
150 bonds = []
151 for b in mol.GetBonds():
152 if b.IsInRing(): continue
153 a1 = b.GetBeginAtomIdx()
154 a2 = b.GetEndAtomIdx()
155 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a2)
156 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom(), a1)
157 if nb1 and nb2:
158 bonds.append((b, a1, a2, nb1, nb2))
159
160 if symmRadius > 0:
161 inv = _getAtomInvariantsWithRadius(mol, symmRadius)
162 else:
163 inv = rdMolDescriptors.GetConnectivityInvariants(mol)
164
165 tors_list = []
166 for b, a1, a2, nb1, nb2 in bonds:
167 d1 = _getIndexforTorsion(nb1, inv)
168 d2 = _getIndexforTorsion(nb2, inv)
169 if len(d1) == 1 and len(d2) == 1:
170 tors_list.append(([(d1[0].GetIdx(), a1, a2, d2[0].GetIdx())], 180.0))
171 elif len(d1) == 1:
172 if len(nb2) == 2:
173 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 90.0))
174 else:
175 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 60.0))
176 elif len(d2) == 1:
177 if len(nb1) == 2:
178 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 90.0))
179 else:
180 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 60.0))
181 else:
182 tmp = []
183 for n1 in d1:
184 for n2 in d2:
185 tmp.append((n1.GetIdx(), a1, a2, n2.GetIdx()))
186 if len(nb1) == 2 and len(nb2) == 2:
187 tors_list.append((tmp, 90.0))
188 elif len(nb1) == 3 and len(nb2) == 3:
189 tors_list.append((tmp, 60.0))
190 else:
191 tors_list.append((tmp, 30.0))
192
193 if maxDev == 'equal':
194 tors_list = [(t,180.0) for t,d in tors_list]
195
196 rings = Chem.GetSymmSSSR(mol)
197 tors_list_rings = []
198 for r in rings:
199
200 tmp = []
201 num = len(r)
202 maxdev = 180.0 * math.exp(-0.025*(num-14)*(num-14))
203 for i in range(len(r)):
204 tmp.append((r[i], r[(i+1)%num], r[(i+2)%num], r[(i+3)%num]))
205 tors_list_rings.append((tmp,maxdev))
206 return tors_list, tors_list_rings
207
209 """ Helper function to retrieve the coordinates of the four atoms
210 in a torsion
211
212 Arguments:
213 - atoms: list with the four atoms
214 - conf: conformation of the molecule
215
216 Return: Point3D objects of the four atoms
217 """
218 if len(atoms) != 4:
219 raise ValueError("List must contain exactly four atoms")
220 p1 = conf.GetAtomPosition(atoms[0])
221 p2 = conf.GetAtomPosition(atoms[1])
222 p3 = conf.GetAtomPosition(atoms[2])
223 p4 = conf.GetAtomPosition(atoms[3])
224 return p1, p2, p3, p4
225
227 """ Calculate the torsion angles for a list of non-ring and
228 a list of ring torsions.
229
230 Arguments:
231 - mol: the molecule of interest
232 - tors_list: list of non-ring torsions
233 - tors_list_rings: list of ring torsions
234 - confId: index of the conformation (default: first conformer)
235
236 Return: list of torsion angles
237 """
238 torsions = []
239 conf = mol.GetConformer(confId)
240 for t,maxdev in tors_list:
241 if len(t) == 1:
242 t = t[0]
243 p1, p2, p3, p4 = _getTorsionAtomPositions(t, conf)
244 tors = (Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0
245 if tors < 0: tors += 360.0
246 else:
247
248 tors = 360.0
249 for t2 in t:
250 p1, p2, p3, p4 = _getTorsionAtomPositions(t2, conf)
251 tmp = (Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0
252 if tmp < 0: tmp += 360.0
253 if tmp < tors: tors = tmp
254 torsions.append((tors, maxdev))
255
256 for t,maxdev in tors_list_rings:
257 num = len(t)
258
259 tors = 0
260 for t2 in t:
261 p1, p2, p3, p4 = _getTorsionAtomPositions(t2, conf)
262 tmp = abs((Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0)
263 tors += tmp
264 tors /= num
265 torsions.append((tors, maxdev))
266 return torsions
267
269 """ Helper function to identify the atoms of the most central bond.
270
271 Arguments:
272 - mol: the molecule of interest
273 - distmat: distance matrix of the molecule
274
275 Return: atom indices of the two most central atoms (in order)
276 """
277 from numpy import std
278
279 stds = []
280 for i in range(mol.GetNumAtoms()):
281
282 if len(_getHeavyAtomNeighbors(mol.GetAtomWithIdx(i))) < 2: continue
283 tmp = [d for d in distmat[i]]
284 tmp.pop(i)
285 stds.append((std(tmp), i))
286 stds.sort()
287 aid1 = stds[0][1]
288
289 i = 1
290 while 1:
291 if mol.GetBondBetweenAtoms(aid1, stds[i][1]) is None:
292 i += 1
293 else:
294 aid2 = stds[i][1]
295 break
296 return aid1, aid2
297
299 """ Helper function to calculate the beta for torsion weights
300 according to the formula in the paper.
301 w(dmax/2) = 0.1
302
303 Arguments:
304 - mol: the molecule of interest
305 - distmat: distance matrix of the molecule
306 - aid1: atom index of the most central atom
307
308 Return: value of beta (float)
309 """
310
311 bonds = []
312 for b in mol.GetBonds():
313 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom())
314 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom())
315 if len(nb2) > 1 and len(nb2) > 1:
316 bonds.append(b)
317
318 dmax = 0
319 for b in bonds:
320 bid1 = b.GetBeginAtom().GetIdx()
321 bid2 = b.GetEndAtom().GetIdx()
322 d = max([distmat[aid1][bid1], distmat[aid1][bid2]])
323 if (d > dmax): dmax = d
324 dmax2 = dmax/2.0
325 beta = -math.log(0.1)/(dmax2*dmax2)
326 return beta
327
329 """ Calculate the weights for the torsions in a molecule.
330 By default, the highest weight is given to the bond
331 connecting the two most central atoms.
332 If desired, two alternate atoms can be specified (must
333 be connected by a bond).
334
335 Arguments:
336 - mol: the molecule of interest
337 - aid1: index of the first atom (default: most central)
338 - aid2: index of the second atom (default: second most cenral)
339
340 Return: list of torsion weights (both non-ring and ring)
341 """
342
343 distmat = Chem.GetDistanceMatrix(mol)
344 if aid1 < 0 and aid2 < 0:
345 aid1, aid2 = _findCentralBond(mol, distmat)
346 else:
347 b = mol.GetBondBetweenAtoms(aid1, aid2)
348 if b is None:
349 raise ValueError("Specified atoms must be connected by a bond.")
350
351 beta = _calculateBeta(mol, distmat, aid1)
352
353 bonds = []
354 for b in mol.GetBonds():
355 if b.IsInRing(): continue
356 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom())
357 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom())
358 if len(nb1) > 1 and len(nb2) > 1:
359 bonds.append(b)
360
361 weights = []
362 for b in bonds:
363 bid1 = b.GetBeginAtom().GetIdx()
364 bid2 = b.GetEndAtom().GetIdx()
365 if ((bid1, bid2) == (aid1, aid2)
366 or (bid2, bid1) == (aid1, aid2)):
367 d = 0
368 else:
369
370 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1], distmat[aid2][bid2])+1
371 w = math.exp(-beta*(d*d))
372 weights.append(w)
373
374
375 rings = mol.GetRingInfo()
376 for r in rings.BondRings():
377
378 tmp = []
379 num = len(r)
380 for bidx in r:
381 b = mol.GetBondWithIdx(bidx)
382 bid1 = b.GetBeginAtomIdx()
383 bid2 = b.GetEndAtomIdx()
384
385 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1], distmat[aid2][bid2])+1
386 tmp.append(d)
387
388
389
390
391 w = sum(tmp)/float(num)
392 w = math.exp(-beta*(w*w))
393 weights.append(w*(num/2.0))
394 return weights
395
397 """ Calculate the torsion deviation fingerprint (TFD) given two lists of
398 torsion angles.
399
400 Arguments;
401 - torsions1: torsion angles of conformation 1
402 - torsions2: torsion angles of conformation 2
403 - weights: list of torsion weights (default: None)
404
405 Return: TFD value (float)
406 """
407 if len(torsions1) != len(torsions2):
408 raise ValueError("List of torsions angles must have the same size.")
409
410 deviations = []
411 for t1, t2 in zip(torsions1, torsions2):
412 diff = abs(t1[0]-t2[0])
413 if (360.0-diff) < diff:
414 diff = 360.0 - diff
415 deviations.append(diff/t1[1])
416
417 if weights is not None:
418 if len(weights) != len(torsions1):
419 raise ValueError("List of torsions angles and weights must have the same size.")
420 deviations = [d*w for d,w in zip(deviations, weights)]
421 sum_weights = sum(weights)
422 else:
423 sum_weights = len(deviations)
424 tfd = sum(deviations)
425 if sum_weights != 0:
426 tfd /= sum_weights
427 return tfd
428
429
462
463 -def GetTFDBetweenMolecules(mol1, mol2, confIds1=-1, confIds2=-1, useWeights=True, maxDev='equal', symmRadius=2):
464 """ Wrapper to calculate the TFD between two list of conformers
465 of two molecules.
466 Important: The two molecules must be instances of the same molecule
467
468 Arguments:
469 - mol1: first instance of the molecule of interest
470 - mol2: second instance the molecule of interest
471 - confIds1: list of conformer indices from mol1 (default: first conformer)
472 - confIds2: list of conformer indices from mol2 (default: first conformer)
473 - useWeights: flag for using torsion weights in the TFD calculation
474 - maxDev: maximal deviation used for normalization
475 'equal': all torsions are normalized using 180.0 (default)
476 'spec': each torsion is normalized using its specific
477 maximal deviation as given in the paper
478 - symmRadius: radius used for calculating the atom invariants
479 (default: 2)
480
481 Return: list of TFD values
482 """
483 if (Chem.MolToSmiles(mol1) != Chem.MolToSmiles(mol2)):
484 raise ValueError("The two molecules must be instances of the same molecule!")
485 tl, tlr = CalculateTorsionLists(mol1, maxDev=maxDev, symmRadius=symmRadius)
486
487 if confIds1 < 0:
488 torsions1 = [CalculateTorsionAngles(mol1, tl, tlr)]
489 else:
490 torsions1 = [CalculateTorsionAngles(mol1, tl, tlr, confId=cid) for cid in confIds1]
491
492 if confIds2 < 0:
493 torsions2 = [CalculateTorsionAngles(mol2, tl, tlr)]
494 else:
495 torsions2 = [CalculateTorsionAngles(mol2, tl, tlr, confId=cid) for cid in confIds2]
496 tfd = []
497 if useWeights:
498 weights = CalculateTorsionWeights(mol1)
499 for t1 in torsions1:
500 for t2 in torsions2:
501 tfd.append(CalculateTFD(t1, t2, weights=weights))
502 else:
503 for t1 in torsions1:
504 for t2 in torsions2:
505 tfd.append(CalculateTFD(t1, t2))
506 return tfd
507
508 -def GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2):
509 """ Wrapper to calculate the matrix of TFD values for the
510 conformers of a molecule.
511
512 Arguments:
513 - mol: the molecule of interest
514 - useWeights: flag for using torsion weights in the TFD calculation
515 - maxDev: maximal deviation used for normalization
516 'equal': all torsions are normalized using 180.0 (default)
517 'spec': each torsion is normalized using its specific
518 maximal deviation as given in the paper
519 - symmRadius: radius used for calculating the atom invariants
520 (default: 2)
521
522 Return: matrix of TFD values
523 Note that the returned matrix is symmetrical, i.e. it is the
524 lower half of the matrix, e.g. for 5 conformers:
525 matrix = [ a,
526 b, c,
527 d, e, f,
528 g, h, i, j]
529 """
530 tl, tlr = CalculateTorsionLists(mol, maxDev=maxDev, symmRadius=symmRadius)
531 numconf = mol.GetNumConformers()
532 torsions = [CalculateTorsionAngles(mol, tl, tlr, confId=conf.GetId()) for conf in mol.GetConformers()]
533 tfdmat = []
534 if useWeights:
535 weights = CalculateTorsionWeights(mol)
536 for i in range(0, numconf):
537 for j in range(0, i):
538 tfdmat.append(CalculateTFD(torsions[i], torsions[j], weights=weights))
539 else:
540 for i in range(0, numconf):
541 for j in range(0, i):
542 tfdmat.append(CalculateTFD(torsions[i], torsions[j]))
543 return tfdmat
544