Package rdkit :: Package Chem :: Package ChemUtils :: Module TemplateExpand
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.ChemUtils.TemplateExpand

  1  # $Id: TemplateExpand.py 1053 2008-07-30 12:03:29Z landrgr1 $ 
  2  # 
  3  #  Created by Greg Landrum August, 2006 
  4  # 
  5  # 
  6  from __future__ import print_function 
  7  from rdkit import RDLogger as logging 
  8  logger = logging.logger() 
  9  logger.setLevel(logging.INFO) 
 10  from rdkit import Chem 
 11  from rdkit.Chem import Crippen 
 12  from rdkit.Chem import AllChem 
 13  from rdkit.Chem.ChemUtils.AlignDepict import AlignDepict 
 14   
 15  import sys 
 16  _version="0.8.0" 
 17  _greet="This is TemplateExpand version %s"%_version 
 18   
 19  _usage=""" 
 20  Usage: TemplateExpand [options] template <sidechains> 
 21   
 22   Unless otherwise indicated, the template and sidechains are assumed to be 
 23     Smiles 
 24   
 25   Each sidechain entry should be: 
 26     [-r] SMARTS filename 
 27       The SMARTS pattern is used to recognize the attachment point, 
 28       if the -r argument is not provided, then atoms matching the pattern 
 29       will be removed from the sidechains. 
 30     or 
 31     -n filename 
 32       where the attachment atom is the first atom in each molecule 
 33     The filename provides the list of potential sidechains. 
 34   
 35   options: 
 36     -o filename.sdf:      provides the name of the output file, otherwise 
 37                           stdout is used 
 38   
 39     --sdf :               expect the sidechains to be in SD files 
 40   
 41     --moltemplate:        the template(s) are in a mol/SD file, new depiction(s) 
 42                           will not be generated unless the --redraw argument is also 
 43                           provided 
 44   
 45     --smilesFileTemplate: extract the template(s) from a SMILES file instead of  
 46                           expecting SMILES on the command line. 
 47   
 48     --redraw:             generate a new depiction for the molecular template(s) 
 49   
 50     --useall: 
 51       or 
 52     --useallmatches:      generate a product for each possible match of the attachment 
 53                           pattern to each sidechain. If this is not provided, the first 
 54                           match (not canonically defined) will be used. 
 55   
 56     --force:              by default, the program prompts the user if the library is  
 57                           going to contain more than 1000 compounds. This argument  
 58                           disables the prompt. 
 59      
 60     --templateSmarts="smarts":  provides a space-delimited list containing the SMARTS  
 61                                 patterns to be used to recognize attachment points in 
 62                                                   the template 
 63                
 64     --autoNames:          when set this toggle causes the resulting compounds to be named 
 65                           based on there sequence id in the file, e.g.  
 66                           "TemplateEnum: Mol_1", "TemplateEnum: Mol_2", etc. 
 67                           otherwise the names of the template and building blocks (from 
 68                           the input files) will be combined to form a name for each 
 69                           product molecule. 
 70   
 71     --3D :                Generate 3d coordinates for the product molecules instead of 2d coordinates, 
 72                           requires the --moltemplate option 
 73   
 74     --tether :            refine the 3d conformations using a tethered minimization 
 75   
 76   
 77  """ 
78 -def Usage():
79 print(_usage, file=sys.stderr) 80 sys.exit(-1)
81 82 #pylint: disable=C0111,C0103,C0322,C0324,C0323 83 nDumped=0
84 -def _exploder(mol,depth,sidechains,core,chainIndices,autoNames=True,templateName='', 85 resetCounter=True,do3D=False,useTethers=False):
86 global nDumped 87 if resetCounter: 88 nDumped=0 89 ourChains = sidechains[depth] 90 patt = '[%d*]'%(depth+1) 91 patt = Chem.MolFromSmiles(patt) 92 for i,(chainIdx,chain) in enumerate(ourChains): 93 tchain = chainIndices[:] 94 tchain.append((i,chainIdx)) 95 rs = Chem.ReplaceSubstructs(mol,patt,chain,replaceAll=True) 96 if rs: 97 r = rs[0] 98 if depth<len(sidechains)-1: 99 for entry in _exploder(r,depth+1,sidechains,core,tchain, 100 autoNames=autoNames,templateName=templateName, 101 resetCounter=0,do3D=do3D,useTethers=useTethers): 102 yield entry 103 else: 104 try: 105 Chem.SanitizeMol(r) 106 except ValueError: 107 import traceback 108 traceback.print_exc() 109 continue 110 if not do3D: 111 if r.HasSubstructMatch(core): 112 try: 113 AlignDepict(r,core) 114 except: 115 import traceback 116 traceback.print_exc() 117 print(Chem.MolToSmiles(r), file=sys.stderr) 118 else: 119 print('>>> no match', file=sys.stderr) 120 AllChem.Compute2DCoords(r) 121 else: 122 r = Chem.AddHs(r) 123 AllChem.ConstrainedEmbed(r,core,useTethers) 124 Chem.Kekulize(r) 125 if autoNames: 126 tName = "TemplateEnum: Mol_%d"%(nDumped+1) 127 else: 128 tName = templateName 129 for bbI,bb in enumerate(tchain): 130 bbMol = sidechains[bbI][bb[0]][1] 131 if bbMol.HasProp('_Name'): 132 bbNm = bbMol.GetProp('_Name') 133 else: 134 bbNm = str(bb[1]) 135 tName += '_' + bbNm 136 137 r.SetProp("_Name",tName) 138 r.SetProp('seq_num',str(nDumped+1)) 139 r.SetProp('reagent_indices','_'.join([str(x[1]) for x in tchain])) 140 for bbI,bb in enumerate(tchain): 141 bbMol = sidechains[bbI][bb[0]][1] 142 if bbMol.HasProp('_Name'): 143 bbNm = bbMol.GetProp('_Name') 144 else: 145 bbNm = str(bb[1]) 146 r.SetProp('building_block_%d'%(bbI+1),bbNm) 147 for propN in bbMol.GetPropNames(): 148 r.SetProp('building_block_%d_%s'%(bbI+1,propN),bbMol.GetProp(propN)) 149 nDumped += 1 150 if not nDumped%100: 151 logger.info('Done %d molecules'%nDumped) 152 yield r
153
154 -def Explode(template,sidechains,outF,autoNames=True,do3D=False,useTethers=False):
155 chainIndices=[] 156 core = Chem.DeleteSubstructs(template,Chem.MolFromSmiles('[*]')) 157 try: 158 templateName = template.GetProp('_Name') 159 except KeyError: 160 templateName="template" 161 for mol in _exploder(template,0,sidechains,core,chainIndices,autoNames=autoNames, 162 templateName=templateName,do3D=do3D,useTethers=useTethers): 163 outF.write(Chem.MolToMolBlock(mol)) 164 for pN in mol.GetPropNames(): 165 print('> <%s>\n%s\n'%(pN,mol.GetProp(pN)), file=outF) 166 print('$$$$', file=outF)
167
168 -def MoveDummyNeighborsToBeginning(mol,useAll=False):
169 dummyPatt=Chem.MolFromSmiles('[*]') 170 matches = mol.GetSubstructMatches(dummyPatt) 171 res = [] 172 for match in matches: 173 matchIdx = match[0] 174 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=matchIdx) 175 entry = Chem.MolFromSmiles(smi) 176 # entry now has [*] as atom 0 and the neighbor 177 # as atom 1. Cleave the [*]: 178 entry = Chem.DeleteSubstructs(entry,dummyPatt) 179 for propN in mol.GetPropNames(): 180 entry.SetProp(propN,mol.GetProp(propN)); 181 182 # now we have a molecule with the atom to be joined 183 # in position zero; Keep that: 184 res.append(entry) 185 if not useAll: 186 break 187 return res
188
189 -def ConstructSidechains(suppl,sma=None,replace=True,useAll=False):
190 if sma: 191 try: 192 patt = Chem.MolFromSmarts(sma) 193 except: 194 logger.error('could not construct pattern from smarts: %s'%sma, 195 exc_info=True) 196 return None 197 else: 198 patt = None 199 200 if replace: 201 replacement = Chem.MolFromSmiles('[*]') 202 203 res = [] 204 for idx,mol in enumerate(suppl): 205 if not mol: 206 continue 207 if patt: 208 if not mol.HasSubstructMatch(patt): 209 logger.warning('The substructure pattern did not match sidechain %d. This may result in errors.'%(idx+1)) 210 if replace: 211 tmp = list(Chem.ReplaceSubstructs(mol,patt,replacement)) 212 if not useAll: tmp = [tmp[0]] 213 for i,entry in enumerate(tmp): 214 entry = MoveDummyNeighborsToBeginning(entry) 215 if not entry: 216 continue 217 entry = entry[0] 218 219 for propN in mol.GetPropNames(): 220 entry.SetProp(propN,mol.GetProp(propN)); 221 # now we have a molecule with the atom to be joined 222 # in position zero; Keep that: 223 tmp[i] = (idx+1,entry) 224 else: 225 # no replacement, use the pattern to reorder 226 # atoms only: 227 matches = mol.GetSubstructMatches(patt) 228 if matches: 229 tmp = [] 230 for match in matches: 231 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=match[0]) 232 entry = Chem.MolFromSmiles(smi) 233 for propN in mol.GetPropNames(): 234 entry.SetProp(propN,mol.GetProp(propN)); 235 236 # now we have a molecule with the atom to be joined 237 # in position zero; Keep that: 238 tmp.append((idx+1,entry)) 239 else: 240 tmp = None 241 else: 242 tmp = [(idx+1,mol)] 243 if tmp: 244 res.extend(tmp) 245 return res
246 247 if __name__=='__main__': 248 import getopt 249 print(_greet, file=sys.stderr) 250 251 try: 252 args,extras = getopt.getopt(sys.argv[1:],'o:h',[ 253 'sdf', 254 'moltemplate', 255 'molTemplate', 256 'smilesFileTemplate', 257 'templateSmarts=', 258 'redraw', 259 'force', 260 'useall', 261 'useallmatches', 262 'autoNames', 263 '3D','3d', 264 'tethers', 265 'tether', 266 ]) 267 except: 268 import traceback 269 traceback.print_exc() 270 Usage() 271 272 if len(extras)<3: 273 Usage() 274 275 tooLong=1000 276 sdLigands=False 277 molTemplate=False 278 redrawTemplate=False 279 outF=None 280 forceIt=False 281 useAll=False 282 templateSmarts=[] 283 smilesFileTemplate=False 284 autoNames=False 285 do3D=False 286 useTethers=False 287 for arg,val in args: 288 if arg=='-o': 289 outF=val 290 elif arg=='--sdf': 291 sdLigands=True 292 elif arg in ('--moltemplate','--molTemplate'): 293 molTemplate=True 294 elif arg=='--smilesFileTemplate': 295 smilesFileTemplate=True 296 elif arg=='--templateSmarts': 297 templateSmarts = val 298 elif arg=='--redraw': 299 redrawTemplate=True 300 elif arg=='--force': 301 forceIt=True 302 elif arg=='--autoNames': 303 autoNames=True 304 elif arg in ('--useall','--useallmatches'): 305 useAll=True 306 elif arg in ('--3D','--3d'): 307 do3D=True 308 elif arg in ('--tethers','--tether'): 309 useTethers=True 310 elif arg=='-h': 311 Usage() 312 sys.exit(0) 313 314 if do3D: 315 if not molTemplate: 316 raise ValueError('the --3D option is only useable in combination with --moltemplate') 317 if redrawTemplate: 318 logger.warning('--redrawTemplate does not make sense in combination with --molTemplate. removing it') 319 redrawTemplate=False 320 321 322 if templateSmarts: 323 splitL = templateSmarts.split(' ') #pylint: disable=E1103 324 templateSmarts = [] 325 for i,sma in enumerate(splitL): 326 patt = Chem.MolFromSmarts(sma) 327 if not patt: 328 raise ValueError('could not convert smarts "%s" to a query'%sma) 329 if i>=4: 330 i+=1 331 replace = Chem.MolFromSmiles('[%d*]'%(i+1)) 332 templateSmarts.append((patt,replace)) 333 334 if molTemplate: 335 removeHs = not do3D 336 try: 337 s = Chem.SDMolSupplier(extras[0],removeHs=removeHs) 338 templates = [x for x in s] 339 except: 340 logger.error('Could not construct templates from input file: %s'%extras[0], 341 exc_info=True) 342 sys.exit(1) 343 if redrawTemplate: 344 for template in templates: 345 AllChem.Compute2DCoords(template) 346 else: 347 if not smilesFileTemplate: 348 try: 349 templates = [Chem.MolFromSmiles(extras[0])] 350 except: 351 logger.error('Could not construct template from smiles: %s'%extras[0], 352 exc_info=True) 353 sys.exit(1) 354 else: 355 try: 356 s = Chem.SmilesMolSupplier(extras[0],titleLine=False) 357 templates = [x for x in s] 358 except: 359 logger.error('Could not construct templates from input file: %s'%extras[0], 360 exc_info=True) 361 sys.exit(1) 362 for template in templates: 363 AllChem.Compute2DCoords(template) 364 if templateSmarts: 365 finalTs = [] 366 for i,template in enumerate(templates): 367 for j,(patt,replace) in enumerate(templateSmarts): 368 if not template.HasSubstructMatch(patt): 369 logger.error('template %d did not match sidechain pattern %d, skipping it'%(i+1,j+1)) 370 template =None 371 break 372 template = Chem.ReplaceSubstructs(template,patt,replace)[0] 373 if template: 374 Chem.SanitizeMol(template) 375 finalTs.append(template) 376 templates = finalTs 377 378 sidechains = [] 379 pos = 1 380 while pos<len(extras): 381 if extras[pos]=='-r': 382 replaceIt=False 383 pos += 1 384 else: 385 replaceIt=True 386 if extras[pos]=='-n': 387 sma = None 388 else: 389 sma = extras[pos] 390 pos += 1 391 try: 392 dat = extras[pos] 393 except IndexError: 394 logger.error('missing a sidechain filename') 395 sys.exit(-1) 396 pos += 1 397 if sdLigands: 398 try: 399 suppl = Chem.SDMolSupplier(dat) 400 except: 401 logger.error('could not construct supplier from SD file: %s'%dat, 402 exc_info=True) 403 suppl = [] 404 else: 405 tmpF = file(dat,'r') 406 inL = tmpF.readline() 407 if len(inL.split(' '))<2: 408 nmCol=-1 409 else: 410 nmCol=1 411 try: 412 suppl = Chem.SmilesMolSupplier(dat,nameColumn=nmCol) 413 except: 414 logger.error('could not construct supplier from smiles file: %s'%dat, 415 exc_info=True) 416 suppl = [] 417 suppl = [x for x in suppl] 418 chains = ConstructSidechains(suppl,sma=sma,replace=replaceIt,useAll=useAll) 419 if chains: 420 sidechains.append(chains) 421 count = 1 422 for chain in sidechains: 423 count *= len(chain) 424 count *= len(templates) 425 if not sidechains or not count: 426 print("No molecules to be generated.", file=sys.stderr) 427 sys.exit(0) 428 429 if not forceIt and count>tooLong: 430 print("This will generate %d molecules."%count, file=sys.stderr) 431 print("Continue anyway? [no] ", file=sys.stderr, end='') 432 sys.stderr.flush() 433 ans = sys.stdin.readline().strip() 434 if ans not in ('y','yes','Y','YES'): 435 sys.exit(0) 436 437 if outF and outF!="-": 438 try: 439 outF = file(outF,'w+') 440 except IOError: 441 logger.error('could not open file %s for writing'%(outF), 442 exc_info=True) 443 else: 444 outF = sys.stdout 445 446 for template in templates: 447 Explode(template,sidechains,outF,autoNames=autoNames,do3D=do3D, 448 useTethers=useTethers) 449