1
2
3
4
5
6
7
8
9
10
11 """ Calculation of topological/topochemical descriptors.
12
13
14
15 """
16 from __future__ import print_function
17 from rdkit import Chem
18 from rdkit.Chem import Graphs
19 from rdkit.Chem import rdchem
20 from rdkit.Chem import rdMolDescriptors
21
22 from rdkit.Chem import pyPeriodicTable as PeriodicTable
23 import numpy
24 import math
25 from rdkit.ML.InfoTheory import entropy
26
27 periodicTable = rdchem.GetPeriodicTable()
28
29 _log2val = math.log(2)
32
34 """ *Internal Use Only*
35
36 this is just a row sum of the matrix... simple, neh?
37
38 """
39 if not onlyOnes:
40 res = sum(mat)
41 else:
42 res = sum(numpy.equal(mat,1))
43 return res
44
46 """ *Internal Use Only*
47
48 """
49 res = mol.GetNumBonds()
50 return res
51
53 """ *Internal Use Only*
54
55 """
56 res = {}
57 for v in arr:
58 res[v] = res.get(v,0)+1
59 return res
60
61
63 """ calculate the Hall-Kier alpha value for a molecule
64
65 From equations (58) of Rev. Comp. Chem. vol 2, 367-422, (1991)
66
67 """
68 alphaSum = 0.0
69 rC = PeriodicTable.nameTable['C'][5]
70 for atom in m.GetAtoms():
71 atNum=atom.GetAtomicNum()
72 if not atNum: continue
73 symb = atom.GetSymbol()
74 alphaV = PeriodicTable.hallKierAlphas.get(symb,None)
75 if alphaV is not None:
76 hyb = atom.GetHybridization()-2
77 if(hyb<len(alphaV)):
78 alpha = alphaV[hyb]
79 if alpha is None:
80 alpha = alphaV[-1]
81 else:
82 alpha = alphaV[-1]
83 else:
84 rA = PeriodicTable.nameTable[symb][5]
85 alpha = rA/rC - 1
86 print(atom.GetIdx(),atom.GetSymbol(),alpha)
87 alphaSum += alpha
88 return alphaSum
89
90
91 -def Ipc(mol, avg = 0, dMat = None, forceDMat = 0):
92 """This returns the information content of the coefficients of the characteristic
93 polynomial of the adjacency matrix of a hydrogen-suppressed graph of a molecule.
94
95 'avg = 1' returns the information content divided by the total population.
96
97 From D. Bonchev & N. Trinajstic, J. Chem. Phys. vol 67, 4517-4533 (1977)
98
99 """
100 if forceDMat or dMat is None:
101 if forceDMat:
102 dMat = Chem.GetDistanceMatrix(mol,0)
103 mol._adjMat = dMat
104 else:
105 try:
106 dMat = mol._adjMat
107 except AttributeError:
108 dMat = Chem.GetDistanceMatrix(mol,0)
109 mol._adjMat = dMat
110
111 adjMat = numpy.equal(dMat,1)
112 cPoly = abs(Graphs.CharacteristicPolynomial(mol, adjMat))
113 if avg:
114 return entropy.InfoEntropy(cPoly)
115 else:
116 return sum(cPoly)*entropy.InfoEntropy(cPoly)
117 Ipc.version="1.0.0"
118
119
120
122 """ Hall-Kier Kappa1 value
123
124 From equations (58) and (59) of Rev. Comp. Chem. vol 2, 367-422, (1991)
125
126 """
127 P1 = mol.GetNumBonds(1)
128 A = mol.GetNumHeavyAtoms()
129 alpha = HallKierAlpha(mol)
130 denom = P1 + alpha
131 if denom:
132 kappa = (A + alpha)*(A + alpha - 1)**2 / denom**2
133 else:
134 kappa = 0.0
135 return kappa
136
137
138
140 """ Hall-Kier Kappa2 value
141
142 From equations (58) and (60) of Rev. Comp. Chem. vol 2, 367-422, (1991)
143
144 """
145 P2 = len(Chem.FindAllPathsOfLengthN(mol,2))
146 A = mol.GetNumHeavyAtoms()
147 alpha = HallKierAlpha(mol)
148 denom = (P2 + alpha)**2
149 if denom:
150 kappa = (A + alpha - 1)*(A + alpha - 2)**2 / denom
151 else:
152 kappa = 0
153 return kappa
154
155
157 """ Hall-Kier Kappa3 value
158
159 From equations (58), (61) and (62) of Rev. Comp. Chem. vol 2, 367-422, (1991)
160
161 """
162 P3 = len(Chem.FindAllPathsOfLengthN(mol,3))
163 A = mol.GetNumHeavyAtoms()
164 alpha = HallKierAlpha(mol)
165 denom = (P3 + alpha)**2
166 if denom:
167 if A % 2 == 1:
168 kappa = (A + alpha - 1)*(A + alpha - 3)**2 / denom
169 else:
170 kappa = (A + alpha - 2)*(A + alpha - 3)**2 / denom
171 else:
172 kappa = 0
173 return kappa
174
175
176 HallKierAlpha = lambda x:rdMolDescriptors.CalcHallKierAlpha(x)
177 HallKierAlpha.version=rdMolDescriptors._CalcHallKierAlpha_version
178 Kappa1 = lambda x:rdMolDescriptors.CalcKappa1(x)
179 Kappa1.version=rdMolDescriptors._CalcKappa1_version
180 Kappa2 = lambda x:rdMolDescriptors.CalcKappa2(x)
181 Kappa2.version=rdMolDescriptors._CalcKappa2_version
182 Kappa3 = lambda x:rdMolDescriptors.CalcKappa3(x)
183 Kappa3.version=rdMolDescriptors._CalcKappa3_version
184
185
187 """ From equations (1),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
188
189 """
190 deltas = [x.GetDegree() for x in mol.GetAtoms()]
191 while 0 in deltas:
192 deltas.remove(0)
193 deltas = numpy.array(deltas,'d')
194 res = sum(numpy.sqrt(1./deltas))
195 return res
196 Chi0.version="1.0.0"
197
198
200 """ From equations (1),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
201
202 """
203 c1s = [x.GetBeginAtom().GetDegree()*x.GetEndAtom().GetDegree() for x in mol.GetBonds()]
204 while 0 in c1s:
205 c1s.remove(0)
206 c1s = numpy.array(c1s,'d')
207 res = sum(numpy.sqrt(1./c1s))
208 return res
209 Chi1.version="1.0.0"
210
212 return periodicTable.GetNOuterElecs(atom.GetAtomicNum())-atom.GetTotalNumHs()
213
215 global periodicTable
216 res = []
217 if hasattr(mol,'_hkDeltas') and mol._hkDeltas is not None:
218 return mol._hkDeltas
219 for atom in mol.GetAtoms():
220 n = atom.GetAtomicNum()
221 if n>1:
222 nV = periodicTable.GetNOuterElecs(n)
223 nHs = atom.GetTotalNumHs()
224 if n <= 10:
225
226 res.append(float(nV-nHs))
227 else:
228
229 res.append(float(nV-nHs)/float(n-nV-1))
230 elif n==1:
231 if not skipHs:
232 res.append(0.0)
233 else:
234 res.append(0.0)
235 mol._hkDeltas = res
236 return res
237
239 """ From equations (5),(9) and (10) of Rev. Comp. Chem. vol 2, 367-422, (1991)
240
241 """
242 deltas = _hkDeltas(mol)
243 while 0 in deltas:
244 deltas.remove(0)
245 mol._hkDeltas=None
246 res = sum(numpy.sqrt(1./numpy.array(deltas)))
247 return res
248
250 """ From equations (5),(11) and (12) of Rev. Comp. Chem. vol 2, 367-422, (1991)
251
252 """
253 deltas = numpy.array(_hkDeltas(mol,skipHs=0))
254 res = 0.0
255 for bond in mol.GetBonds():
256 v = deltas[bond.GetBeginAtomIdx()]*deltas[bond.GetEndAtomIdx()]
257 if v != 0.0:
258 res += numpy.sqrt(1./v)
259 return res
260
262 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
263
264 **NOTE**: because the current path finding code does, by design,
265 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
266 length 3), values of ChiNv with N >= 3 may give results that differ
267 from those provided by the old code in molecules that have rings of
268 size 3.
269
270 """
271 deltas = numpy.array([(1. / numpy.sqrt(hkd) if hkd!=0.0 else 0.0) for hkd in _hkDeltas(mol, skipHs=0)])
272 accum = 0.0
273 for path in Chem.FindAllPathsOfLengthN(mol, order + 1, useBonds=0):
274 accum += numpy.prod(deltas[numpy.array(path)])
275 return accum
276
278 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
279
280 """
281 return _pyChiNv_(mol,2)
282
284 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
285
286 """
287 return _pyChiNv_(mol,3)
288
290 """ From equations (5),(15) and (16) of Rev. Comp. Chem. vol 2, 367-422, (1991)
291
292 **NOTE**: because the current path finding code does, by design,
293 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
294 length 3), values of Chi4v may give results that differ from those
295 provided by the old code in molecules that have 3 rings.
296
297 """
298 return _pyChiNv_(mol,4)
299
301 """ Similar to Hall Kier Chi0v, but uses nVal instead of valence
302 This makes a big difference after we get out of the first row.
303
304 """
305 deltas = [_nVal(x) for x in mol.GetAtoms()]
306 while deltas.count(0):
307 deltas.remove(0)
308 deltas = numpy.array(deltas,'d')
309 res = sum(numpy.sqrt(1./deltas))
310 return res
311
313 """ Similar to Hall Kier Chi1v, but uses nVal instead of valence
314
315 """
316 delts = numpy.array([_nVal(x) for x in mol.GetAtoms()],'d')
317 res = 0.0
318 for bond in mol.GetBonds():
319 v = delts[bond.GetBeginAtomIdx()]*delts[bond.GetEndAtomIdx()]
320 if v != 0.0:
321 res += numpy.sqrt(1./v)
322 return res
323
325 """ Similar to Hall Kier ChiNv, but uses nVal instead of valence
326 This makes a big difference after we get out of the first row.
327
328 **NOTE**: because the current path finding code does, by design,
329 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
330 length 3), values of ChiNn with N >= 3 may give results that differ
331 from those provided by the old code in molecules that have rings of
332 size 3.
333
334 """
335 nval = [_nVal(x) for x in mol.GetAtoms()]
336 deltas = numpy.array([(1. / numpy.sqrt(x) if x else 0.0) for x in nval])
337 accum = 0.0
338 for path in Chem.FindAllPathsOfLengthN(mol,order+1,useBonds=0):
339 accum += numpy.prod(deltas[numpy.array(path)])
340 return accum
341
343 """ Similar to Hall Kier Chi2v, but uses nVal instead of valence
344 This makes a big difference after we get out of the first row.
345
346 """
347 return _pyChiNn_(mol,2)
348
350 """ Similar to Hall Kier Chi3v, but uses nVal instead of valence
351 This makes a big difference after we get out of the first row.
352
353 """
354 return _pyChiNn_(mol,3)
355
357 """ Similar to Hall Kier Chi4v, but uses nVal instead of valence
358 This makes a big difference after we get out of the first row.
359
360
361 **NOTE**: because the current path finding code does, by design,
362 detect rings as paths (e.g. in C1CC1 there is *1* atom path of
363 length 3), values of Chi4n may give results that differ from those
364 provided by the old code in molecules that have 3 rings.
365
366 """
367 return _pyChiNn_(mol,4)
368
369 Chi0v = lambda x:rdMolDescriptors.CalcChi0v(x)
370 Chi0v.version=rdMolDescriptors._CalcChi0v_version
371 Chi1v = lambda x:rdMolDescriptors.CalcChi1v(x)
372 Chi1v.version=rdMolDescriptors._CalcChi1v_version
373 Chi2v = lambda x:rdMolDescriptors.CalcChi2v(x)
374 Chi2v.version=rdMolDescriptors._CalcChi2v_version
375 Chi3v = lambda x:rdMolDescriptors.CalcChi3v(x)
376 Chi3v.version=rdMolDescriptors._CalcChi3v_version
377 Chi4v = lambda x:rdMolDescriptors.CalcChi4v(x)
378 Chi4v.version=rdMolDescriptors._CalcChi4v_version
379 ChiNv_ = lambda x,y:rdMolDescriptors.CalcChiNv(x,y)
380 ChiNv_.version=rdMolDescriptors._CalcChiNv_version
381
382 Chi0n = lambda x:rdMolDescriptors.CalcChi0n(x)
383 Chi0n.version=rdMolDescriptors._CalcChi0n_version
384 Chi1n = lambda x:rdMolDescriptors.CalcChi1n(x)
385 Chi1n.version=rdMolDescriptors._CalcChi1n_version
386 Chi2n = lambda x:rdMolDescriptors.CalcChi2n(x)
387 Chi2n.version=rdMolDescriptors._CalcChi2n_version
388 Chi3n = lambda x:rdMolDescriptors.CalcChi3n(x)
389 Chi3n.version=rdMolDescriptors._CalcChi3n_version
390 Chi4n = lambda x:rdMolDescriptors.CalcChi4n(x)
391 Chi4n.version=rdMolDescriptors._CalcChi4n_version
392 ChiNn_ = lambda x,y:rdMolDescriptors.CalcChiNn(x,y)
393 ChiNn_.version=rdMolDescriptors._CalcChiNn_version
394
395 -def BalabanJ(mol,dMat=None,forceDMat=0):
396 """ Calculate Balaban's J value for a molecule
397
398 **Arguments**
399
400 - mol: a molecule
401
402 - dMat: (optional) a distance/adjacency matrix for the molecule, if this
403 is not provide, one will be calculated
404
405 - forceDMat: (optional) if this is set, the distance/adjacency matrix
406 will be recalculated regardless of whether or not _dMat_ is provided
407 or the molecule already has one
408
409 **Returns**
410
411 - a float containing the J value
412
413 We follow the notation of Balaban's paper:
414 Chem. Phys. Lett. vol 89, 399-404, (1982)
415
416 """
417
418 if forceDMat or dMat is None:
419 if forceDMat:
420
421 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1)
422 mol._balabanMat = dMat
423 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
424 mol._adjMat = adjMat
425 else:
426 try:
427
428 dMat = mol._balabanMat
429 except AttributeError:
430
431 dMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=0,prefix="Balaban")
432
433 mol._balabanMat = dMat
434 try:
435 adjMat = mol._adjMat
436 except AttributeError:
437 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
438 mol._adjMat = adjMat
439 else:
440 adjMat = Chem.GetAdjacencyMatrix(mol,useBO=0,emptyVal=0,force=0,prefix="NoBO")
441
442 s = _VertexDegrees(dMat)
443 q = _NumAdjacencies(mol,dMat)
444 n = mol.GetNumAtoms()
445 mu = q - n + 1
446
447 sum = 0.
448 nS = len(s)
449 for i in range(nS):
450 si = s[i]
451 for j in range(i,nS):
452 if adjMat[i,j] == 1:
453 sum += 1./numpy.sqrt(si*s[j])
454
455 if mu+1 != 0:
456 J = float(q) / float(mu + 1) * sum
457 else:
458 J = 0
459
460 return J
461 BalabanJ.version="1.0.0"
462
463
464
465
466
467
468
469
471 """
472 Used by BertzCT
473
474 vdList: the number of neighbors each atom has
475 bdMat: "balaban" distance matrix
476
477 """
478 if forceBDMat:
479 bdMat = Chem.GetDistanceMatrix(mol,useBO=1,useAtomWts=0,force=1,
480 prefix="Balaban")
481 mol._balabanMat = bdMat
482
483 atomIdx = 0
484 keysSeen = []
485 symList = [0]*numAtoms
486 for i in range(numAtoms):
487 tmpList = bdMat[i].tolist()
488 tmpList.sort()
489 theKey = tuple(['%.4f'%x for x in tmpList[:cutoff]])
490 try:
491 idx = keysSeen.index(theKey)
492 except ValueError:
493 idx = len(keysSeen)
494 keysSeen.append(theKey)
495 symList[i] = idx+1
496 return tuple(symList)
497
499 """
500 Used by BertzCT
501 """
502 if atom1Id < atom2Id:
503 theKey = (atom1Id,atom2Id)
504 else:
505 theKey = (atom2Id,atom1Id)
506 tmp = bondDic[theKey]
507 if tmp == Chem.BondType.AROMATIC:
508 tmp = 1.5
509 else:
510 tmp = float(tmp)
511
512 return tmp
513
514
516 """
517 Used by BertzCT
518 """
519 connectionList = list(connectionDict.values())
520 totConnections = sum(connectionList)
521 connectionIE = totConnections*(entropy.InfoEntropy(numpy.array(connectionList)) +
522 math.log(totConnections)/_log2val)
523 atomTypeList = list(atomTypeDict.values())
524 atomTypeIE = numAtoms*entropy.InfoEntropy(numpy.array(atomTypeList))
525 return atomTypeIE + connectionIE
526
528 """ _Internal Use Only_
529 Used by BertzCT
530
531 """
532 bondDict = {}
533 nList = [None]*numAtoms
534 vdList = [0]*numAtoms
535 for aBond in mol.GetBonds():
536 atom1=aBond.GetBeginAtomIdx()
537 atom2=aBond.GetEndAtomIdx()
538 if atom1>atom2: atom2,atom1=atom1,atom2
539 if not aBond.GetIsAromatic():
540 bondDict[(atom1,atom2)] = aBond.GetBondType()
541 else:
542
543 bondDict[(atom1,atom2)] = Chem.BondType.AROMATIC
544 if nList[atom1] is None:
545 nList[atom1] = [atom2]
546 elif atom2 not in nList[atom1]:
547 nList[atom1].append(atom2)
548 if nList[atom2] is None:
549 nList[atom2] = [atom1]
550 elif atom1 not in nList[atom2]:
551 nList[atom2].append(atom1)
552
553 for i,element in enumerate(nList):
554 try:
555 element.sort()
556 vdList[i] = len(element)
557 except:
558 vdList[i] = 0
559 return bondDict, nList, vdList
560
561 -def BertzCT(mol, cutoff = 100, dMat = None, forceDMat = 1):
562 """ A topological index meant to quantify "complexity" of molecules.
563
564 Consists of a sum of two terms, one representing the complexity
565 of the bonding, the other representing the complexity of the
566 distribution of heteroatoms.
567
568 From S. H. Bertz, J. Am. Chem. Soc., vol 103, 3599-3601 (1981)
569
570 "cutoff" is an integer value used to limit the computational
571 expense. A cutoff value tells the program to consider vertices
572 topologically identical if their distance vectors (sets of
573 distances to all other vertices) are equal out to the "cutoff"th
574 nearest-neighbor.
575
576 **NOTE** The original implementation had the following comment:
577 > this implementation treats aromatic rings as the
578 > corresponding Kekule structure with alternating bonds,
579 > for purposes of counting "connections".
580 Upon further thought, this is the WRONG thing to do. It
581 results in the possibility of a molecule giving two different
582 CT values depending on the kekulization. For example, in the
583 old implementation, these two SMILES:
584 CC2=CN=C1C3=C(C(C)=C(C=N3)C)C=CC1=C2C
585 CC3=CN=C2C1=NC=C(C)C(C)=C1C=CC2=C3C
586 which correspond to differentk kekule forms, yield different
587 values.
588 The new implementation uses consistent (aromatic) bond orders
589 for aromatic bonds.
590
591 THIS MEANS THAT THIS IMPLEMENTATION IS NOT BACKWARDS COMPATIBLE.
592
593 Any molecule containing aromatic rings will yield different
594 values with this implementation. The new behavior is the correct
595 one, so we're going to live with the breakage.
596
597 **NOTE** this barfs if the molecule contains a second (or
598 nth) fragment that is one atom.
599
600 """
601 atomTypeDict = {}
602 connectionDict = {}
603 numAtoms = mol.GetNumAtoms()
604 if forceDMat or dMat is None:
605 if forceDMat:
606
607 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
608 mol._adjMat = dMat
609 else:
610 try:
611 dMat = mol._adjMat
612 except AttributeError:
613 dMat = Chem.GetDistanceMatrix(mol,useBO=0,useAtomWts=0,force=1)
614 mol._adjMat = dMat
615
616 if numAtoms < 2:
617 return 0
618
619 bondDict, neighborList, vdList = _CreateBondDictEtc(mol, numAtoms)
620 symmetryClasses = _AssignSymmetryClasses(mol, vdList, dMat, forceDMat, numAtoms, cutoff)
621
622 for atomIdx in range(numAtoms):
623 hingeAtomNumber = mol.GetAtomWithIdx(atomIdx).GetAtomicNum()
624 atomTypeDict[hingeAtomNumber] = atomTypeDict.get(hingeAtomNumber,0)+1
625
626 hingeAtomClass = symmetryClasses[atomIdx]
627 numNeighbors = vdList[atomIdx]
628 for i in range(numNeighbors):
629 neighbor_iIdx = neighborList[atomIdx][i]
630 NiClass = symmetryClasses[neighbor_iIdx]
631 bond_i_order = _LookUpBondOrder(atomIdx, neighbor_iIdx, bondDict)
632
633 if (bond_i_order > 1) and (neighbor_iIdx > atomIdx):
634 numConnections = bond_i_order*(bond_i_order - 1)/2
635 connectionKey = (min(hingeAtomClass, NiClass), max(hingeAtomClass, NiClass))
636 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
637
638 for j in range(i+1, numNeighbors):
639 neighbor_jIdx = neighborList[atomIdx][j]
640 NjClass = symmetryClasses[neighbor_jIdx]
641 bond_j_order = _LookUpBondOrder(atomIdx, neighbor_jIdx, bondDict)
642 numConnections = bond_i_order*bond_j_order
643 connectionKey = (min(NiClass, NjClass), hingeAtomClass, max(NiClass, NjClass))
644 connectionDict[connectionKey] = connectionDict.get(connectionKey,0)+numConnections
645
646 if not connectionDict:
647 connectionDict = {'a':1}
648
649 return _CalculateEntropies(connectionDict, atomTypeDict, numAtoms)
650 BertzCT.version="2.0.0"
651
652
653
654
655
656
657
658
659
660
661