Package rdkit :: Package Chem :: Module Crippen
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.Crippen

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2002-2008 greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ Atom-based calculation of LogP and MR using Crippen's approach 
 12   
 13   
 14      Reference: 
 15        S. A. Wildman and G. M. Crippen *JCICS* _39_ 868-873 (1999) 
 16   
 17   
 18  """ 
 19  from __future__ import print_function 
 20  import os 
 21  from rdkit import RDConfig 
 22  from rdkit import Chem 
 23  from rdkit.Chem import rdMolDescriptors 
 24  import numpy 
 25   
 26  _smartsPatterns = {} 
 27  _patternOrder = [] 
 28  # this is the file containing the atom contributions 
 29  defaultPatternFileName = os.path.join(RDConfig.RDDataDir,'Crippen.txt') 
 30   
31 -def _ReadPatts(fileName):
32 """ *Internal Use Only* 33 34 parses the pattern list from the data file 35 36 """ 37 patts = {} 38 order = [] 39 with open(fileName,'r') as f: 40 lines = f.readlines() 41 for line in lines: 42 if line[0] != '#': 43 splitLine = line.split('\t') 44 if len(splitLine)>=4 and splitLine[0] != '': 45 sma = splitLine[1] 46 if sma!='SMARTS': 47 sma.replace('"','') 48 try: 49 p = Chem.MolFromSmarts(sma) 50 except: 51 pass 52 else: 53 if p: 54 if len(splitLine[0])>1 and splitLine[0][1] not in 'S0123456789': 55 cha = splitLine[0][:2] 56 else: 57 cha = splitLine[0][0] 58 logP = float(splitLine[2]) 59 if splitLine[3] != '': 60 mr = float(splitLine[3]) 61 else: 62 mr = 0.0 63 if cha not in order: 64 order.append(cha) 65 l = patts.get(cha,[]) 66 l.append((sma,p,logP,mr)) 67 patts[cha] = l 68 else: 69 print('Problems parsing smarts: %s'%(sma)) 70 return order,patts
71 72 _GetAtomContribs=rdMolDescriptors._CalcCrippenContribs
73 -def _pyGetAtomContribs(mol,patts=None,order=None,verbose=0,force=0):
74 """ *Internal Use Only* 75 76 calculates atomic contributions to the LogP and MR values 77 78 if the argument *force* is not set, we'll use the molecules stored 79 _crippenContribs value when possible instead of re-calculating. 80 81 **Note:** Changes here affect the version numbers of MolLogP and MolMR 82 as well as the VSA descriptors in Chem.MolSurf 83 84 """ 85 if not force and hasattr(mol,'_crippenContribs'): 86 return mol._crippenContribs 87 88 if patts is None: 89 patts = _smartsPatterns 90 order = _patternOrder 91 92 nAtoms = mol.GetNumAtoms() 93 atomContribs = [(0.,0.)]*nAtoms 94 doneAtoms=[0]*nAtoms 95 nAtomsFound=0 96 done = False 97 for cha in order: 98 pattVect = patts[cha] 99 for sma,patt,logp,mr in pattVect: 100 #print('try:',entry[0]) 101 for match in mol.GetSubstructMatches(patt,False,False): 102 firstIdx = match[0] 103 if not doneAtoms[firstIdx]: 104 doneAtoms[firstIdx]=1 105 atomContribs[firstIdx] = (logp,mr) 106 if verbose: 107 print('\tAtom %d: %s %4.4f %4.4f'%(match[0],sma,logp,mr)) 108 nAtomsFound+=1 109 if nAtomsFound>=nAtoms: 110 done=True 111 break 112 if done: break 113 mol._crippenContribs = atomContribs 114 return atomContribs
115
116 -def _Init():
117 global _smartsPatterns,_patternOrder 118 if _smartsPatterns == {}: 119 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName)
120
121 -def _pyMolLogP(inMol,patts=None,order=None,verbose=0,addHs=1):
122 """ DEPRECATED 123 """ 124 if addHs < 0: 125 mol = Chem.AddHs(inMol,1) 126 elif addHs > 0: 127 mol = Chem.AddHs(inMol,0) 128 else: 129 mol = inMol 130 131 if patts is None: 132 global _smartsPatterns,_patternOrder 133 if _smartsPatterns == {}: 134 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 135 patts = _smartsPatterns 136 order = _patternOrder 137 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 138 return numpy.sum(atomContribs,0)[0]
139 _pyMolLogP.version="1.1.0" 140
141 -def _pyMolMR(inMol,patts=None,order=None,verbose=0,addHs=1):
142 """ DEPRECATED 143 """ 144 if addHs < 0: 145 mol = Chem.AddHs(inMol,1) 146 elif addHs > 0: 147 mol = Chem.AddHs(inMol,0) 148 else: 149 mol = inMol 150 151 if patts is None: 152 global _smartsPatterns,_patternOrder 153 if _smartsPatterns == {}: 154 _patternOrder,_smartsPatterns = _ReadPatts(defaultPatternFileName) 155 patts = _smartsPatterns 156 order = _patternOrder 157 158 atomContribs = _pyGetAtomContribs(mol,patts,order,verbose=verbose) 159 return numpy.sum(atomContribs,0)[1]
160 _pyMolMR.version="1.1.0" 161 162 MolLogP=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[0] 163 MolLogP.version=rdMolDescriptors._CalcCrippenDescriptors_version 164 MolLogP.__doc__=""" Wildman-Crippen LogP value 165 166 Uses an atom-based scheme based on the values in the paper: 167 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 168 169 **Arguments** 170 171 - inMol: a molecule 172 173 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 174 If true, hydrogens will be added to the molecule and used in the calculation. 175 176 """ 177 178 MolMR=lambda *x,**y:rdMolDescriptors.CalcCrippenDescriptors(*x,**y)[1] 179 MolMR.version=rdMolDescriptors._CalcCrippenDescriptors_version 180 MolMR.__doc__=""" Wildman-Crippen MR value 181 182 Uses an atom-based scheme based on the values in the paper: 183 S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) 184 185 **Arguments** 186 187 - inMol: a molecule 188 189 - addHs: (optional) toggles adding of Hs to the molecule for the calculation. 190 If true, hydrogens will be added to the molecule and used in the calculation. 191 192 """ 193 194 if __name__=='__main__': 195 import sys 196 197 if len(sys.argv): 198 ms = [] 199 verbose=0 200 if '-v' in sys.argv: 201 verbose=1 202 sys.argv.remove('-v') 203 for smi in sys.argv[1:]: 204 ms.append((smi,Chem.MolFromSmiles(smi))) 205 206 for smi,m in ms: 207 print('Mol: %s'%(smi)) 208 logp = MolLogP(m,verbose=verbose) 209 print('----') 210 mr = MolMR(m,verbose=verbose) 211 print('Res:',logp,mr) 212 newM = Chem.AddHs(m) 213 logp = MolLogP(newM,addHs=0) 214 mr = MolMR(newM,addHs=0) 215 print('\t',logp,mr) 216 print('-*-*-*-*-*-*-*-*') 217