Package rdkit :: Package Chem :: Module MolSurf
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolSurf

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2001-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Exposes functionality for MOE-like approximate molecular surface area 
 12  descriptors. 
 13   
 14    The MOE-like VSA descriptors are also calculated here 
 15   
 16  """ 
 17  from __future__ import print_function 
 18  from rdkit import Chem 
 19  from rdkit.Chem.PeriodicTable import numTable 
 20  from rdkit.Chem import Crippen 
 21  from rdkit.Chem import rdPartialCharges,rdMolDescriptors 
 22  import numpy 
 23  import bisect 
 24  radCol = 5 
 25  bondScaleFacts = [ .1,0,.2,.3] # aromatic,single,double,triple 
 26   
27 -def _LabuteHelper(mol,includeHs=1,force=0):
28 """ *Internal Use Only* 29 helper function for LabuteASA calculation 30 returns an array of atomic contributions to the ASA 31 32 **Note:** Changes here affect the version numbers of all ASA descriptors 33 34 """ 35 if not force: 36 try: 37 res = mol._labuteContribs 38 except AttributeError: 39 pass 40 else: 41 if res: 42 return res 43 tpl = rdMolDescriptors._CalcLabuteASAContribs(mol,includeHs) 44 ats,hs=tpl 45 Vi=[hs]+list(ats) 46 mol._labuteContribs=Vi 47 return Vi
48 -def _pyLabuteHelper(mol,includeHs=1,force=0):
49 """ *Internal Use Only* 50 helper function for LabuteASA calculation 51 returns an array of atomic contributions to the ASA 52 53 **Note:** Changes here affect the version numbers of all ASA descriptors 54 55 """ 56 import math 57 if not force: 58 try: 59 res = mol._labuteContribs 60 except AttributeError: 61 pass 62 else: 63 if res.all(): 64 return res 65 66 nAts = mol.GetNumAtoms() 67 Vi = numpy.zeros(nAts+1,'d') 68 rads = numpy.zeros(nAts+1,'d') 69 70 # 0 contains the H information 71 rads[0] = numTable[1][radCol] 72 for i in xrange(nAts): 73 rads[i+1] = numTable[mol.GetAtomWithIdx(i).GetAtomicNum()][radCol] 74 75 # start with explicit bonds 76 for bond in mol.GetBonds(): 77 idx1 = bond.GetBeginAtomIdx()+1 78 idx2 = bond.GetEndAtomIdx()+1 79 Ri = rads[idx1] 80 Rj = rads[idx2] 81 82 if not bond.GetIsAromatic(): 83 bij = Ri+Rj - bondScaleFacts[bond.GetBondType()] 84 else: 85 bij = Ri+Rj - bondScaleFacts[0] 86 dij = min( max( abs(Ri-Rj), bij), Ri+Rj) 87 Vi[idx1] += Rj*Rj - (Ri-dij)**2 / dij 88 Vi[idx2] += Ri*Ri - (Rj-dij)**2 / dij 89 90 # add in hydrogens 91 if includeHs: 92 j = 0 93 Rj = rads[j] 94 for i in xrange(1,nAts+1): 95 Ri = rads[i] 96 bij = Ri+Rj 97 dij = min( max( abs(Ri-Rj), bij), Ri+Rj) 98 Vi[i] += Rj*Rj - (Ri-dij)**2 / dij 99 Vi[j] += Ri*Ri - (Rj-dij)**2 / dij 100 101 for i in xrange(nAts+1): 102 Ri = rads[i] 103 Vi[i] = 4*math.pi * Ri**2 - math.pi * Ri * Vi[i] 104 105 mol._labuteContribs=Vi 106 return Vi
107 108 #def SMR_VSA(mol,bins=[0.11,0.26,0.35,0.39,0.44,0.485,0.56]): 109 # original default bins from assuming Labute values are logs 110 # mrBins=[1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63] 111 mrBins=[1.29, 1.82, 2.24, 2.45, 2.75, 3.05, 3.63,3.8,4.0]
112 -def pySMR_VSA_(mol,bins=None,force=1):
113 """ *Internal Use Only* 114 """ 115 if not force: 116 try: 117 res = mol._smrVSA 118 except AttributeError: 119 pass 120 else: 121 if res.all(): 122 return res 123 124 if bins is None: bins = mrBins 125 Crippen._Init() 126 propContribs = Crippen._GetAtomContribs(mol,force=force) 127 volContribs = _LabuteHelper(mol) 128 129 ans = numpy.zeros(len(bins)+1,'d') 130 for i in range(len(propContribs)): 131 prop = propContribs[i] 132 vol = volContribs[i+1] 133 if prop is not None: 134 bin = bisect.bisect_right(bins,prop[1]) 135 ans[bin] += vol 136 137 mol._smrVSA=ans 138 return ans
139 SMR_VSA_ = rdMolDescriptors.SMR_VSA_ 140 141 # 142 # Original bins (from Labute paper) are: 143 # [-0.4,-0.2,0,0.1,0.15,0.2,0.25,0.3,0.4] 144 # 145 logpBins=[-0.4,-0.2,0,0.1,0.15,0.2,0.25,0.3,0.4,0.5,0.6]
146 -def pySlogP_VSA_(mol,bins=None,force=1):
147 """ *Internal Use Only* 148 """ 149 if not force: 150 try: 151 res = mol._slogpVSA 152 except AttributeError: 153 pass 154 else: 155 if res.all(): 156 return res 157 158 if bins is None: bins = logpBins 159 Crippen._Init() 160 propContribs = Crippen._GetAtomContribs(mol,force=force) 161 volContribs = _LabuteHelper(mol) 162 163 ans = numpy.zeros(len(bins)+1,'d') 164 for i in range(len(propContribs)): 165 prop = propContribs[i] 166 vol = volContribs[i+1] 167 if prop is not None: 168 bin = bisect.bisect_right(bins,prop[0]) 169 ans[bin] += vol 170 171 mol._slogpVSA=ans 172 return ans
173 SlogP_VSA_ = rdMolDescriptors.SlogP_VSA_ 174 175 chgBins=[-.3,-.25,-.20,-.15,-.10,-.05,0,.05,.10,.15,.20,.25,.30]
176 -def pyPEOE_VSA_(mol,bins=None,force=1):
177 """ *Internal Use Only* 178 """ 179 if not force: 180 try: 181 res = mol._peoeVSA 182 except AttributeError: 183 pass 184 else: 185 if res.all(): 186 return res 187 if bins is None: bins = chgBins 188 Crippen._Init() 189 #print('\ts:',repr(mol.GetMol())) 190 #print('\t\t:',len(mol.GetAtoms())) 191 rdPartialCharges.ComputeGasteigerCharges(mol) 192 193 #propContribs = [float(x.GetProp('_GasteigerCharge')) for x in mol.GetAtoms()] 194 propContribs=[] 195 for at in mol.GetAtoms(): 196 p = at.GetProp('_GasteigerCharge') 197 try: 198 v = float(p) 199 except ValueError: 200 v = 0.0 201 propContribs.append(v) 202 #print '\tp',propContribs 203 volContribs = _LabuteHelper(mol) 204 #print '\tv',volContribs 205 206 ans = numpy.zeros(len(bins)+1,'d') 207 for i in range(len(propContribs)): 208 prop = propContribs[i] 209 vol = volContribs[i+1] 210 if prop is not None: 211 bin = bisect.bisect_right(bins,prop) 212 ans[bin] += vol 213 214 mol._peoeVSA=ans 215 return ans
216 PEOE_VSA_=rdMolDescriptors.PEOE_VSA_ 217 218 #------------------------------------------------- 219 # install the various VSA descriptors in the namespace
220 -def _InstallDescriptors():
221 for i in range(len(mrBins)): 222 fn = lambda x,y=i:SMR_VSA_(x,force=0)[y] 223 if i > 0: 224 fn.__doc__="MOE MR VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,mrBins[i-1],mrBins[i]) 225 else: 226 fn.__doc__="MOE MR VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,mrBins[i]) 227 name="SMR_VSA%d"%(i+1) 228 fn.version="1.0.1" 229 globals()[name]=fn 230 i+=1 231 fn = lambda x,y=i:SMR_VSA_(x,force=0)[y] 232 fn.__doc__="MOE MR VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,mrBins[i-1]) 233 fn.version="1.0.1" 234 name="SMR_VSA%d"%(i+1) 235 globals()[name]=fn 236 237 for i in range(len(logpBins)): 238 fn = lambda x,y=i:SlogP_VSA_(x,force=0)[y] 239 if i > 0: 240 fn.__doc__="MOE logP VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,logpBins[i-1], 241 logpBins[i]) 242 else: 243 fn.__doc__="MOE logP VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,logpBins[i]) 244 name="SlogP_VSA%d"%(i+1) 245 fn.version="1.0.1" 246 globals()[name]=fn 247 i+=1 248 fn = lambda x,y=i:SlogP_VSA_(x,force=0)[y] 249 fn.__doc__="MOE logP VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,logpBins[i-1]) 250 fn.version="1.0.1" 251 name="SlogP_VSA%d"%(i+1) 252 globals()[name]=fn 253 254 for i in range(len(chgBins)): 255 fn = lambda x,y=i:PEOE_VSA_(x,force=0)[y] 256 if i > 0: 257 fn.__doc__="MOE Charge VSA Descriptor %d (% 4.2f <= x < % 4.2f)"%(i+1,chgBins[i-1], 258 chgBins[i]) 259 else: 260 fn.__doc__="MOE Charge VSA Descriptor %d (-inf < x < % 4.2f)"%(i+1,chgBins[i]) 261 name="PEOE_VSA%d"%(i+1) 262 fn.version="1.0.1" 263 globals()[name]=fn 264 i+=1 265 fn = lambda x,y=i:PEOE_VSA_(x,force=0)[y] 266 fn.version="1.0.1" 267 fn.__doc__="MOE Charge VSA Descriptor %d (% 4.2f <= x < inf)"%(i+1,chgBins[i-1]) 268 name="PEOE_VSA%d"%(i+1) 269 globals()[name]=fn 270 fn = None
271 # Change log for the MOE-type descriptors: 272 # version 1.0.1: optimizations, values unaffected 273 _InstallDescriptors() 274
275 -def pyLabuteASA(mol,includeHs=1):
276 """ calculates Labute's Approximate Surface Area (ASA from MOE) 277 278 Definition from P. Labute's article in the Journal of the Chemical Computing Group 279 and J. Mol. Graph. Mod. _18_ 464-477 (2000) 280 281 """ 282 Vi = _LabuteHelper(mol,includeHs=includeHs) 283 return sum(Vi)
284 pyLabuteASA.version="1.0.1" 285 # Change log for LabuteASA: 286 # version 1.0.1: optimizations, values unaffected 287 LabuteASA=lambda *x,**y:rdMolDescriptors.CalcLabuteASA(*x,**y) 288 LabuteASA.version=rdMolDescriptors._CalcLabuteASA_version 289 290
291 -def _pyTPSAContribs(mol,verbose=False):
292 """ DEPRECATED: this has been reimplmented in C++ 293 calculates atomic contributions to a molecules TPSA 294 295 Algorithm described in: 296 P. Ertl, B. Rohde, P. Selzer 297 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 298 Contributions and Its Application to the Prediction of Drug Transport 299 Properties, J.Med.Chem. 43, 3714-3717, 2000 300 301 Implementation based on the Daylight contrib program tpsa.c 302 303 NOTE: The JMC paper describing the TPSA algorithm includes 304 contributions from sulfur and phosphorus, however according to 305 Peter Ertl (personal communication, 2010) the correlation of TPSA 306 with various ADME properties is better if only contributions from 307 oxygen and nitrogen are used. This matches the daylight contrib 308 implementation. 309 310 """ 311 res = [0]*mol.GetNumAtoms() 312 for i in range(mol.GetNumAtoms()): 313 atom = mol.GetAtomWithIdx(i) 314 atNum = atom.GetAtomicNum() 315 if atNum in [7,8]: 316 #nHs = atom.GetImplicitValence()-atom.GetHvyValence() 317 nHs = atom.GetTotalNumHs() 318 chg = atom.GetFormalCharge() 319 isArom = atom.GetIsAromatic() 320 in3Ring = atom.IsInRingSize(3) 321 322 bonds = atom.GetBonds() 323 numNeighbors = atom.GetDegree() 324 nSing = 0 325 nDoub = 0 326 nTrip = 0 327 nArom = 0 328 for bond in bonds: 329 otherAt = bond.GetOtherAtom(atom) 330 if otherAt.GetAtomicNum()!=1: 331 if bond.GetIsAromatic(): 332 nArom += 1 333 else: 334 ord = bond.GetBondType() 335 if ord == Chem.BondType.SINGLE: 336 nSing += 1 337 elif ord == Chem.BondType.DOUBLE: 338 nDoub += 1 339 elif ord == Chem.BondType.TRIPLE: 340 nTrip += 1 341 else: 342 numNeighbors-=1 343 nHs += 1 344 tmp = -1 345 if atNum == 7: 346 if numNeighbors == 1: 347 if nHs==0 and nTrip==1 and chg==0: tmp = 23.79 348 elif nHs==1 and nDoub==1 and chg==0: tmp = 23.85 349 elif nHs==2 and nSing==1 and chg==0: tmp = 26.02 350 elif nHs==2 and nDoub==1 and chg==1: tmp = 25.59 351 elif nHs==3 and nSing==1 and chg==1: tmp = 27.64 352 elif numNeighbors == 2: 353 if nHs==0 and nSing==1 and nDoub==1 and chg==0: tmp = 12.36 354 elif nHs==0 and nTrip==1 and nDoub==1 and chg==0: tmp = 13.60 355 elif nHs==1 and nSing==2 and chg==0: 356 if not in3Ring: tmp = 12.03 357 else: tmp=21.94 358 elif nHs==0 and nTrip==1 and nSing==1 and chg==1: tmp = 4.36 359 elif nHs==1 and nDoub==1 and nSing==1 and chg==1: tmp = 13.97 360 elif nHs==2 and nSing==2 and chg==1: tmp = 16.61 361 elif nHs==0 and nArom==2 and chg==0: tmp = 12.89 362 elif nHs==1 and nArom==2 and chg==0: tmp = 15.79 363 elif nHs==1 and nArom==2 and chg==1: tmp = 14.14 364 elif numNeighbors == 3: 365 if nHs==0 and nSing==3 and chg==0: 366 if not in3Ring: tmp = 3.24 367 else: tmp = 3.01 368 elif nHs==0 and nSing==1 and nDoub==2 and chg==0: tmp = 11.68 369 elif nHs==0 and nSing==2 and nDoub==1 and chg==1: tmp = 3.01 370 elif nHs==1 and nSing==3 and chg==1: tmp = 4.44 371 elif nHs==0 and nArom==3 and chg==0: tmp = 4.41 372 elif nHs==0 and nSing==1 and nArom==2 and chg==0: tmp = 4.93 373 elif nHs==0 and nDoub==1 and nArom==2 and chg==0: tmp = 8.39 374 elif nHs==0 and nArom==3 and chg==1: tmp = 4.10 375 elif nHs==0 and nSing==1 and nArom==2 and chg==1: tmp = 3.88 376 elif numNeighbors == 4: 377 if nHs==0 and nSing==4 and chg==1: tmp = 0.00 378 if tmp < 0.0: 379 tmp = 30.5 - numNeighbors * 8.2 + nHs * 1.5 380 if tmp < 0.0: tmp = 0.0 381 elif atNum==8: 382 #print(nHs,nSing,chg) 383 if numNeighbors == 1: 384 if nHs==0 and nDoub==1 and chg==0: tmp = 17.07 385 elif nHs==1 and nSing==1 and chg==0: tmp = 20.23 386 elif nHs==0 and nSing==1 and chg==-1: tmp = 23.06 387 elif numNeighbors == 2: 388 if nHs==0 and nSing==2 and chg==0: 389 if not in3Ring: tmp = 9.23 390 else: tmp = 12.53 391 elif nHs==0 and nArom==2 and chg==0: tmp = 13.14 392 393 if tmp < 0.0: 394 tmp = 28.5 - numNeighbors * 8.6 + nHs * 1.5 395 if tmp < 0.0: tmp = 0.0 396 if verbose: 397 print('\t',atom.GetIdx(),atom.GetSymbol(),atNum,nHs,nSing,nDoub,nTrip,nArom,chg,tmp) 398 399 res[atom.GetIdx()] = tmp 400 return res
401
402 -def _pyTPSA(mol,verbose=False):
403 """ DEPRECATED: this has been reimplmented in C++ 404 calculates the polar surface area of a molecule based upon fragments 405 406 Algorithm in: 407 P. Ertl, B. Rohde, P. Selzer 408 Fast Calculation of Molecular Polar Surface Area as a Sum of Fragment-based 409 Contributions and Its Application to the Prediction of Drug Transport 410 Properties, J.Med.Chem. 43, 3714-3717, 2000 411 412 Implementation based on the Daylight contrib program tpsa.c 413 """ 414 contribs = _pyTPSAContribs(mol,verbose=verbose) 415 res = 0.0 416 for contrib in contribs: 417 res += contrib 418 return res
419 _pyTPSA.version="1.0.1" 420 421 TPSA=lambda *x,**y:rdMolDescriptors.CalcTPSA(*x,**y) 422 TPSA.version=rdMolDescriptors._CalcTPSA_version 423 424 425 if __name__ == '__main__': 426 smis = ['C','CC','CCC','CCCC','CO','CCO','COC'] 427 smis = ['C(=O)O','c1ccccc1'] 428 for smi in smis: 429 m = Chem.MolFromSmiles(smi) 430 #print(smi, LabuteASA(m)) 431 print('-----------\n',smi) 432 #print('M:',['% 4.2f'%x for x in SMR_VSA_(m)]) 433 #print('L:',['% 4.2f'%x for x in SlogP_VSA_(m)]) 434 print('P:',['% 4.2f'%x for x in PEOE_VSA_(m)]) 435 print('P:',['% 4.2f'%x for x in PEOE_VSA_(m)]) 436 print() 437