Package rdkit :: Package Chem :: Module TorsionFingerprints
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.TorsionFingerprints

  1  #  Copyright (C) 2014 Sereina Riniker 
  2  # 
  3  #  This file is part of the RDKit. 
  4  #  The contents are covered by the terms of the BSD license 
  5  #  which is included in the file license.txt, found at the root 
  6  #  of the RDKit source tree. 
  7  # 
  8  """ Torsion Fingerprints (Deviation) (TFD) 
  9      According to a paper from Schulz-Gasch et al., JCIM, 52, 1499-1512 (2012). 
 10   
 11  """ 
 12  from rdkit import rdBase 
 13  from rdkit import RDConfig 
 14  from rdkit import Geometry 
 15  from rdkit import Chem 
 16  from rdkit.Chem import rdMolDescriptors 
 17  import math, os 
 18   
19 -def _doMatch(inv, atoms):
20 """ Helper function to check if all atoms in the list are the same 21 22 Arguments: 23 - inv: atom invariants (used to define equivalence of atoms) 24 - atoms: list of atoms to check 25 26 Return: boolean 27 """ 28 match = True 29 for i in range(len(atoms)-1): 30 for j in range(i+1, len(atoms)): 31 if (inv[atoms[i].GetIdx()] != inv[atoms[j].GetIdx()]): 32 match = False 33 return match 34 return match
35
36 -def _doNotMatch(inv, atoms):
37 """ Helper function to check if all atoms in the list are NOT the same 38 39 Arguments: 40 - inv: atom invariants (used to define equivalence of atoms) 41 - atoms: list of atoms to check 42 43 Return: boolean 44 """ 45 match = True 46 for i in range(len(atoms)-1): 47 for j in range(i+1, len(atoms)): 48 if (inv[atoms[i].GetIdx()] == inv[atoms[j].GetIdx()]): 49 match = False 50 return match 51 return match
52
53 -def _doMatchExcept1(inv, atoms):
54 """ Helper function to check if two atoms in the list are the same, 55 and one not 56 Note: Works only for three atoms 57 58 Arguments: 59 - inv: atom invariants (used to define equivalence of atoms) 60 - atoms: list of atoms to check 61 62 Return: atom that is different 63 """ 64 if len(atoms) != 3: 65 raise ValueError("Number of atoms must be three") 66 a1 = atoms[0].GetIdx() 67 a2 = atoms[1].GetIdx() 68 a3 = atoms[2].GetIdx() 69 if (inv[a1] == inv[a2] and inv[a1] != inv[a3] and inv[a2] != inv[a3]): 70 return atoms[2] 71 elif (inv[a1] != inv[a2] and inv[a1] == inv[a3] and inv[a2] != inv[a3]): 72 return atoms[1] 73 elif (inv[a1] != inv[a2] and inv[a1] != inv[a3] and inv[a2] == inv[a3]): 74 return atoms[0] 75 return None
76
77 -def _getAtomInvariantsWithRadius(mol, radius):
78 """ Helper function to calculate the atom invariants for each atom 79 with a given radius 80 81 Arguments: 82 - mol: the molecule of interest 83 - radius: the radius for the Morgan fingerprint 84 85 Return: list of atom invariants 86 """ 87 inv = [] 88 for i in range(mol.GetNumAtoms()): 89 info = {} 90 fp = rdMolDescriptors.GetMorganFingerprint(mol, radius, fromAtoms=[i], bitInfo=info) 91 for k in info.keys(): 92 if info[k][0][1] == radius: 93 inv.append(k) 94 return inv
95
96 -def _getHeavyAtomNeighbors(atom1, aid2=-1):
97 """ Helper function to calculate the number of heavy atom neighbors. 98 99 Arguments: 100 - atom1: the atom of interest 101 - aid2: atom index that should be excluded from neighbors (default: none) 102 103 Return: a list of heavy atom neighbors of the given atom 104 """ 105 if aid2 < 0: 106 return [n for n in atom1.GetNeighbors() if n.GetSymbol()!='H'] 107 else: 108 return [n for n in atom1.GetNeighbors() if (n.GetSymbol()!='H' and n.GetIdx()!=aid2)]
109
110 -def _getIndexforTorsion(neighbors, inv):
111 """ Helper function to calculate the index of the reference atom for 112 a given atom 113 114 Arguments: 115 - neighbors: list of the neighbors of the atom 116 - inv: atom invariants 117 118 Return: list of atom indices as reference for torsion 119 """ 120 if len(neighbors) == 1: # atom has only one neighbor 121 return [neighbors[0]] 122 elif _doMatch(inv, neighbors): # atom has all symmetric neighbors 123 return neighbors 124 elif _doNotMatch(inv, neighbors): # atom has all different neighbors 125 # simply use the first neighbor 126 return [neighbors[0]] 127 at = _doMatchExcept1(inv, neighbors) # two neighbors the same, one different 128 if at is None: 129 raise ValueError("Atom neighbors are either all the same or all different") 130 return [at]
131
132 -def CalculateTorsionLists(mol, maxDev='equal', symmRadius=2):
133 """ Calculate a list of torsions for a given molecule. For each torsion 134 the four atom indices are determined and stored in a set. 135 136 Arguments: 137 - mol: the molecule of interest 138 - maxDev: maximal deviation used for normalization 139 'equal': all torsions are normalized using 180.0 (default) 140 'spec': each torsion is normalized using its specific 141 maximal deviation as given in the paper 142 - symmRadius: radius used for calculating the atom invariants 143 (default: 2) 144 145 Return: two lists of torsions: non-ring and ring torsions 146 """ 147 if maxDev not in ['equal', 'spec']: 148 raise ValueError("maxDev must be either equal or spec") 149 # get non-terminal, non-cyclic bonds 150 bonds = [] 151 for b in mol.GetBonds(): 152 if b.IsInRing(): continue 153 a1 = b.GetBeginAtomIdx() 154 a2 = b.GetEndAtomIdx() 155 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom(), a2) 156 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom(), a1) 157 if nb1 and nb2: # no terminal bonds 158 bonds.append((b, a1, a2, nb1, nb2)) 159 # get atom invariants 160 if symmRadius > 0: 161 inv = _getAtomInvariantsWithRadius(mol, symmRadius) 162 else: 163 inv = rdMolDescriptors.GetConnectivityInvariants(mol) 164 # get the torsions 165 tors_list = [] # to store the atom indices of the torsions 166 for b, a1, a2, nb1, nb2 in bonds: 167 d1 = _getIndexforTorsion(nb1, inv) 168 d2 = _getIndexforTorsion(nb2, inv) 169 if len(d1) == 1 and len(d2) == 1: # case 1, 2, 4, 5, 7, 10, 16, 12, 17, 19 170 tors_list.append(([(d1[0].GetIdx(), a1, a2, d2[0].GetIdx())], 180.0)) 171 elif len(d1) == 1: # case 3, 6, 8, 13, 20 172 if len(nb2) == 2: # two neighbors 173 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 90.0)) 174 else: # three neighbors 175 tors_list.append(([(d1[0].GetIdx(), a1, a2, nb.GetIdx()) for nb in d2], 60.0)) 176 elif len(d2) == 1: # case 3, 6, 8, 13, 20 177 if len(nb1) == 2: 178 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 90.0)) 179 else: # three neighbors 180 tors_list.append(([(nb.GetIdx(), a1, a2, d2[0].GetIdx()) for nb in d1], 60.0)) 181 else: # both symmetric 182 tmp = [] 183 for n1 in d1: 184 for n2 in d2: 185 tmp.append((n1.GetIdx(), a1, a2, n2.GetIdx())) 186 if len(nb1) == 2 and len(nb2) == 2: # case 9 187 tors_list.append((tmp, 90.0)) 188 elif len(nb1) == 3 and len(nb2) == 3: # case 21 189 tors_list.append((tmp, 60.0)) 190 else: # case 15 191 tors_list.append((tmp, 30.0)) 192 # maximal possible deviation for non-cyclic bonds 193 if maxDev == 'equal': 194 tors_list = [(t,180.0) for t,d in tors_list] 195 # rings 196 rings = Chem.GetSymmSSSR(mol) 197 tors_list_rings = [] 198 for r in rings: 199 # get the torsions 200 tmp = [] 201 num = len(r) 202 maxdev = 180.0 * math.exp(-0.025*(num-14)*(num-14)) 203 for i in range(len(r)): 204 tmp.append((r[i], r[(i+1)%num], r[(i+2)%num], r[(i+3)%num])) 205 tors_list_rings.append((tmp,maxdev)) 206 return tors_list, tors_list_rings
207
208 -def _getTorsionAtomPositions(atoms, conf):
209 """ Helper function to retrieve the coordinates of the four atoms 210 in a torsion 211 212 Arguments: 213 - atoms: list with the four atoms 214 - conf: conformation of the molecule 215 216 Return: Point3D objects of the four atoms 217 """ 218 if len(atoms) != 4: 219 raise ValueError("List must contain exactly four atoms") 220 p1 = conf.GetAtomPosition(atoms[0]) 221 p2 = conf.GetAtomPosition(atoms[1]) 222 p3 = conf.GetAtomPosition(atoms[2]) 223 p4 = conf.GetAtomPosition(atoms[3]) 224 return p1, p2, p3, p4
225
226 -def CalculateTorsionAngles(mol, tors_list, tors_list_rings, confId=-1):
227 """ Calculate the torsion angles for a list of non-ring and 228 a list of ring torsions. 229 230 Arguments: 231 - mol: the molecule of interest 232 - tors_list: list of non-ring torsions 233 - tors_list_rings: list of ring torsions 234 - confId: index of the conformation (default: first conformer) 235 236 Return: list of torsion angles 237 """ 238 torsions = [] 239 conf = mol.GetConformer(confId) 240 for t,maxdev in tors_list: 241 if len(t) == 1: 242 t = t[0] 243 p1, p2, p3, p4 = _getTorsionAtomPositions(t, conf) 244 tors = (Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0 245 if tors < 0: tors += 360.0 # angle between 0 and 360 246 else: 247 # loop over torsions and take minimum 248 tors = 360.0 249 for t2 in t: 250 p1, p2, p3, p4 = _getTorsionAtomPositions(t2, conf) 251 tmp = (Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0 252 if tmp < 0: tmp += 360.0 # angle between 0 and 360 253 if tmp < tors: tors = tmp 254 torsions.append((tors, maxdev)) 255 # rings 256 for t,maxdev in tors_list_rings: 257 num = len(t) 258 # loop over torsions and sum them up 259 tors = 0 260 for t2 in t: 261 p1, p2, p3, p4 = _getTorsionAtomPositions(t2, conf) 262 tmp = abs((Geometry.ComputeSignedDihedralAngle(p1, p2, p3, p4)/math.pi)*180.0) 263 tors += tmp 264 tors /= num 265 torsions.append((tors, maxdev)) 266 return torsions
267
268 -def _findCentralBond(mol, distmat):
269 """ Helper function to identify the atoms of the most central bond. 270 271 Arguments: 272 - mol: the molecule of interest 273 - distmat: distance matrix of the molecule 274 275 Return: atom indices of the two most central atoms (in order) 276 """ 277 from numpy import std 278 # get the most central atom = atom with the least STD of shortest distances 279 stds = [] 280 for i in range(mol.GetNumAtoms()): 281 # only consider non-terminal atoms 282 if len(_getHeavyAtomNeighbors(mol.GetAtomWithIdx(i))) < 2: continue 283 tmp = [d for d in distmat[i]] 284 tmp.pop(i) 285 stds.append((std(tmp), i)) 286 stds.sort() 287 aid1 = stds[0][1] 288 # find the second most central bond that is bonded to aid1 289 i = 1 290 while 1: 291 if mol.GetBondBetweenAtoms(aid1, stds[i][1]) is None: 292 i += 1 293 else: 294 aid2 = stds[i][1] 295 break 296 return aid1, aid2 # most central atom comes first
297
298 -def _calculateBeta(mol, distmat, aid1):
299 """ Helper function to calculate the beta for torsion weights 300 according to the formula in the paper. 301 w(dmax/2) = 0.1 302 303 Arguments: 304 - mol: the molecule of interest 305 - distmat: distance matrix of the molecule 306 - aid1: atom index of the most central atom 307 308 Return: value of beta (float) 309 """ 310 # get all non-terminal bonds 311 bonds = [] 312 for b in mol.GetBonds(): 313 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom()) 314 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom()) 315 if len(nb2) > 1 and len(nb2) > 1: 316 bonds.append(b) 317 # get shortest distance 318 dmax = 0 319 for b in bonds: 320 bid1 = b.GetBeginAtom().GetIdx() 321 bid2 = b.GetEndAtom().GetIdx() 322 d = max([distmat[aid1][bid1], distmat[aid1][bid2]]) 323 if (d > dmax): dmax = d 324 dmax2 = dmax/2.0 325 beta = -math.log(0.1)/(dmax2*dmax2) 326 return beta
327
328 -def CalculateTorsionWeights(mol, aid1=-1, aid2=-1):
329 """ Calculate the weights for the torsions in a molecule. 330 By default, the highest weight is given to the bond 331 connecting the two most central atoms. 332 If desired, two alternate atoms can be specified (must 333 be connected by a bond). 334 335 Arguments: 336 - mol: the molecule of interest 337 - aid1: index of the first atom (default: most central) 338 - aid2: index of the second atom (default: second most cenral) 339 340 Return: list of torsion weights (both non-ring and ring) 341 """ 342 # get distance matrix 343 distmat = Chem.GetDistanceMatrix(mol) 344 if aid1 < 0 and aid2 < 0: 345 aid1, aid2 = _findCentralBond(mol, distmat) 346 else: 347 b = mol.GetBondBetweenAtoms(aid1, aid2) 348 if b is None: 349 raise ValueError("Specified atoms must be connected by a bond.") 350 # calculate beta according to the formula in the paper 351 beta = _calculateBeta(mol, distmat, aid1) 352 # get non-terminal, non-cyclic bonds 353 bonds = [] 354 for b in mol.GetBonds(): 355 if b.IsInRing(): continue 356 nb1 = _getHeavyAtomNeighbors(b.GetBeginAtom()) 357 nb2 = _getHeavyAtomNeighbors(b.GetEndAtom()) 358 if len(nb1) > 1 and len(nb2) > 1: 359 bonds.append(b) 360 # get shortest paths and calculate weights 361 weights = [] 362 for b in bonds: 363 bid1 = b.GetBeginAtom().GetIdx() 364 bid2 = b.GetEndAtom().GetIdx() 365 if ((bid1, bid2) == (aid1, aid2) 366 or (bid2, bid1) == (aid1, aid2)): # if it's the most central bond itself 367 d = 0 368 else: 369 # get shortest distance between the 4 atoms and add 1 to get bond distance 370 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1], distmat[aid2][bid2])+1 371 w = math.exp(-beta*(d*d)) 372 weights.append(w) 373 374 ## RINGS 375 rings = mol.GetRingInfo() 376 for r in rings.BondRings(): 377 # get shortest distances 378 tmp = [] 379 num = len(r) 380 for bidx in r: 381 b = mol.GetBondWithIdx(bidx) 382 bid1 = b.GetBeginAtomIdx() 383 bid2 = b.GetEndAtomIdx() 384 # get shortest distance between the 4 atoms and add 1 to get bond distance 385 d = min(distmat[aid1][bid1], distmat[aid1][bid2], distmat[aid2][bid1], distmat[aid2][bid2])+1 386 tmp.append(d) 387 # calculate weights and append to list 388 # Note: the description in the paper is not very clear, the following 389 # formula was found to give the same weights as shown in Fig. 1 390 # For a ring of size N: w = N/2 * exp(-beta*(sum(w of each bond in ring)/N)^2) 391 w = sum(tmp)/float(num) 392 w = math.exp(-beta*(w*w)) 393 weights.append(w*(num/2.0)) 394 return weights
395
396 -def CalculateTFD(torsions1, torsions2, weights=None):
397 """ Calculate the torsion deviation fingerprint (TFD) given two lists of 398 torsion angles. 399 400 Arguments; 401 - torsions1: torsion angles of conformation 1 402 - torsions2: torsion angles of conformation 2 403 - weights: list of torsion weights (default: None) 404 405 Return: TFD value (float) 406 """ 407 if len(torsions1) != len(torsions2): 408 raise ValueError("List of torsions angles must have the same size.") 409 # calculate deviations and normalize (divide by max. possible deviation) 410 deviations = [] 411 for t1, t2 in zip(torsions1, torsions2): 412 diff = abs(t1[0]-t2[0]) 413 if (360.0-diff) < diff: # we do not care about direction 414 diff = 360.0 - diff 415 deviations.append(diff/t1[1]) 416 # do we use weights? 417 if weights is not None: 418 if len(weights) != len(torsions1): 419 raise ValueError("List of torsions angles and weights must have the same size.") 420 deviations = [d*w for d,w in zip(deviations, weights)] 421 sum_weights = sum(weights) 422 else: 423 sum_weights = len(deviations) 424 tfd = sum(deviations) 425 if sum_weights != 0: # avoid division by zero 426 tfd /= sum_weights 427 return tfd
428 429 # some wrapper functions
430 -def GetTFDBetweenConformers(mol, confIds1, confIds2, useWeights=True, maxDev='equal', symmRadius=2):
431 """ Wrapper to calculate the TFD between two list of conformers 432 of a molecule 433 434 Arguments: 435 - mol: the molecule of interest 436 - confIds1: first list of conformer indices 437 - confIds2: second list of conformer indices 438 - useWeights: flag for using torsion weights in the TFD calculation 439 - maxDev: maximal deviation used for normalization 440 'equal': all torsions are normalized using 180.0 (default) 441 'spec': each torsion is normalized using its specific 442 maximal deviation as given in the paper 443 - symmRadius: radius used for calculating the atom invariants 444 (default: 2) 445 446 Return: list of TFD values 447 """ 448 tl, tlr = CalculateTorsionLists(mol, maxDev=maxDev, symmRadius=symmRadius) 449 torsions1 = [CalculateTorsionAngles(mol, tl, tlr, confId=cid) for cid in confIds1] 450 torsions2 = [CalculateTorsionAngles(mol, tl, tlr, confId=cid) for cid in confIds2] 451 tfd = [] 452 if useWeights: 453 weights = CalculateTorsionWeights(mol) 454 for t1 in torsions1: 455 for t2 in torsions2: 456 tfd.append(CalculateTFD(t1, t2, weights=weights)) 457 else: 458 for t1 in torsions1: 459 for t2 in torsions2: 460 tfd.append(CalculateTFD(t1, t2)) 461 return tfd
462
463 -def GetTFDBetweenMolecules(mol1, mol2, confIds1=-1, confIds2=-1, useWeights=True, maxDev='equal', symmRadius=2):
464 """ Wrapper to calculate the TFD between two list of conformers 465 of two molecules. 466 Important: The two molecules must be instances of the same molecule 467 468 Arguments: 469 - mol1: first instance of the molecule of interest 470 - mol2: second instance the molecule of interest 471 - confIds1: list of conformer indices from mol1 (default: first conformer) 472 - confIds2: list of conformer indices from mol2 (default: first conformer) 473 - useWeights: flag for using torsion weights in the TFD calculation 474 - maxDev: maximal deviation used for normalization 475 'equal': all torsions are normalized using 180.0 (default) 476 'spec': each torsion is normalized using its specific 477 maximal deviation as given in the paper 478 - symmRadius: radius used for calculating the atom invariants 479 (default: 2) 480 481 Return: list of TFD values 482 """ 483 if (Chem.MolToSmiles(mol1) != Chem.MolToSmiles(mol2)): 484 raise ValueError("The two molecules must be instances of the same molecule!") 485 tl, tlr = CalculateTorsionLists(mol1, maxDev=maxDev, symmRadius=symmRadius) 486 # first molecule 487 if confIds1 < 0: 488 torsions1 = [CalculateTorsionAngles(mol1, tl, tlr)] 489 else: 490 torsions1 = [CalculateTorsionAngles(mol1, tl, tlr, confId=cid) for cid in confIds1] 491 # second molecule 492 if confIds2 < 0: 493 torsions2 = [CalculateTorsionAngles(mol2, tl, tlr)] 494 else: 495 torsions2 = [CalculateTorsionAngles(mol2, tl, tlr, confId=cid) for cid in confIds2] 496 tfd = [] 497 if useWeights: 498 weights = CalculateTorsionWeights(mol1) 499 for t1 in torsions1: 500 for t2 in torsions2: 501 tfd.append(CalculateTFD(t1, t2, weights=weights)) 502 else: 503 for t1 in torsions1: 504 for t2 in torsions2: 505 tfd.append(CalculateTFD(t1, t2)) 506 return tfd
507
508 -def GetTFDMatrix(mol, useWeights=True, maxDev='equal', symmRadius=2):
509 """ Wrapper to calculate the matrix of TFD values for the 510 conformers of a molecule. 511 512 Arguments: 513 - mol: the molecule of interest 514 - useWeights: flag for using torsion weights in the TFD calculation 515 - maxDev: maximal deviation used for normalization 516 'equal': all torsions are normalized using 180.0 (default) 517 'spec': each torsion is normalized using its specific 518 maximal deviation as given in the paper 519 - symmRadius: radius used for calculating the atom invariants 520 (default: 2) 521 522 Return: matrix of TFD values 523 Note that the returned matrix is symmetrical, i.e. it is the 524 lower half of the matrix, e.g. for 5 conformers: 525 matrix = [ a, 526 b, c, 527 d, e, f, 528 g, h, i, j] 529 """ 530 tl, tlr = CalculateTorsionLists(mol, maxDev=maxDev, symmRadius=symmRadius) 531 numconf = mol.GetNumConformers() 532 torsions = [CalculateTorsionAngles(mol, tl, tlr, confId=conf.GetId()) for conf in mol.GetConformers()] 533 tfdmat = [] 534 if useWeights: 535 weights = CalculateTorsionWeights(mol) 536 for i in range(0, numconf): 537 for j in range(0, i): 538 tfdmat.append(CalculateTFD(torsions[i], torsions[j], weights=weights)) 539 else: 540 for i in range(0, numconf): 541 for j in range(0, i): 542 tfdmat.append(CalculateTFD(torsions[i], torsions[j])) 543 return tfdmat
544