1
2
3
4
5
6 from __future__ import print_function
7 from rdkit import RDLogger as logging
8 logger = logging.logger()
9 logger.setLevel(logging.INFO)
10 from rdkit import Chem
11 from rdkit.Chem import Crippen
12 from rdkit.Chem import AllChem
13 from rdkit.Chem.ChemUtils.AlignDepict import AlignDepict
14
15 import sys
16 _version="0.8.0"
17 _greet="This is TemplateExpand version %s"%_version
18
19 _usage="""
20 Usage: TemplateExpand [options] template <sidechains>
21
22 Unless otherwise indicated, the template and sidechains are assumed to be
23 Smiles
24
25 Each sidechain entry should be:
26 [-r] SMARTS filename
27 The SMARTS pattern is used to recognize the attachment point,
28 if the -r argument is not provided, then atoms matching the pattern
29 will be removed from the sidechains.
30 or
31 -n filename
32 where the attachment atom is the first atom in each molecule
33 The filename provides the list of potential sidechains.
34
35 options:
36 -o filename.sdf: provides the name of the output file, otherwise
37 stdout is used
38
39 --sdf : expect the sidechains to be in SD files
40
41 --moltemplate: the template(s) are in a mol/SD file, new depiction(s)
42 will not be generated unless the --redraw argument is also
43 provided
44
45 --smilesFileTemplate: extract the template(s) from a SMILES file instead of
46 expecting SMILES on the command line.
47
48 --redraw: generate a new depiction for the molecular template(s)
49
50 --useall:
51 or
52 --useallmatches: generate a product for each possible match of the attachment
53 pattern to each sidechain. If this is not provided, the first
54 match (not canonically defined) will be used.
55
56 --force: by default, the program prompts the user if the library is
57 going to contain more than 1000 compounds. This argument
58 disables the prompt.
59
60 --templateSmarts="smarts": provides a space-delimited list containing the SMARTS
61 patterns to be used to recognize attachment points in
62 the template
63
64 --autoNames: when set this toggle causes the resulting compounds to be named
65 based on there sequence id in the file, e.g.
66 "TemplateEnum: Mol_1", "TemplateEnum: Mol_2", etc.
67 otherwise the names of the template and building blocks (from
68 the input files) will be combined to form a name for each
69 product molecule.
70
71 --3D : Generate 3d coordinates for the product molecules instead of 2d coordinates,
72 requires the --moltemplate option
73
74 --tether : refine the 3d conformations using a tethered minimization
75
76
77 """
79 print(_usage, file=sys.stderr)
80 sys.exit(-1)
81
82
83 nDumped=0
84 -def _exploder(mol,depth,sidechains,core,chainIndices,autoNames=True,templateName='',
85 resetCounter=True,do3D=False,useTethers=False):
86 global nDumped
87 if resetCounter:
88 nDumped=0
89 ourChains = sidechains[depth]
90 patt = '[%d*]'%(depth+1)
91 patt = Chem.MolFromSmiles(patt)
92 for i,(chainIdx,chain) in enumerate(ourChains):
93 tchain = chainIndices[:]
94 tchain.append((i,chainIdx))
95 rs = Chem.ReplaceSubstructs(mol,patt,chain,replaceAll=True)
96 if rs:
97 r = rs[0]
98 if depth<len(sidechains)-1:
99 for entry in _exploder(r,depth+1,sidechains,core,tchain,
100 autoNames=autoNames,templateName=templateName,
101 resetCounter=0,do3D=do3D,useTethers=useTethers):
102 yield entry
103 else:
104 try:
105 Chem.SanitizeMol(r)
106 except ValueError:
107 import traceback
108 traceback.print_exc()
109 continue
110 if not do3D:
111 if r.HasSubstructMatch(core):
112 try:
113 AlignDepict(r,core)
114 except:
115 import traceback
116 traceback.print_exc()
117 print(Chem.MolToSmiles(r), file=sys.stderr)
118 else:
119 print('>>> no match', file=sys.stderr)
120 AllChem.Compute2DCoords(r)
121 else:
122 r = Chem.AddHs(r)
123 AllChem.ConstrainedEmbed(r,core,useTethers)
124 Chem.Kekulize(r)
125 if autoNames:
126 tName = "TemplateEnum: Mol_%d"%(nDumped+1)
127 else:
128 tName = templateName
129 for bbI,bb in enumerate(tchain):
130 bbMol = sidechains[bbI][bb[0]][1]
131 if bbMol.HasProp('_Name'):
132 bbNm = bbMol.GetProp('_Name')
133 else:
134 bbNm = str(bb[1])
135 tName += '_' + bbNm
136
137 r.SetProp("_Name",tName)
138 r.SetProp('seq_num',str(nDumped+1))
139 r.SetProp('reagent_indices','_'.join([str(x[1]) for x in tchain]))
140 for bbI,bb in enumerate(tchain):
141 bbMol = sidechains[bbI][bb[0]][1]
142 if bbMol.HasProp('_Name'):
143 bbNm = bbMol.GetProp('_Name')
144 else:
145 bbNm = str(bb[1])
146 r.SetProp('building_block_%d'%(bbI+1),bbNm)
147 for propN in bbMol.GetPropNames():
148 r.SetProp('building_block_%d_%s'%(bbI+1,propN),bbMol.GetProp(propN))
149 nDumped += 1
150 if not nDumped%100:
151 logger.info('Done %d molecules'%nDumped)
152 yield r
153
154 -def Explode(template,sidechains,outF,autoNames=True,do3D=False,useTethers=False):
155 chainIndices=[]
156 core = Chem.DeleteSubstructs(template,Chem.MolFromSmiles('[*]'))
157 try:
158 templateName = template.GetProp('_Name')
159 except KeyError:
160 templateName="template"
161 for mol in _exploder(template,0,sidechains,core,chainIndices,autoNames=autoNames,
162 templateName=templateName,do3D=do3D,useTethers=useTethers):
163 outF.write(Chem.MolToMolBlock(mol))
164 for pN in mol.GetPropNames():
165 print('> <%s>\n%s\n'%(pN,mol.GetProp(pN)), file=outF)
166 print('$$$$', file=outF)
167
169 dummyPatt=Chem.MolFromSmiles('[*]')
170 matches = mol.GetSubstructMatches(dummyPatt)
171 res = []
172 for match in matches:
173 matchIdx = match[0]
174 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=matchIdx)
175 entry = Chem.MolFromSmiles(smi)
176
177
178 entry = Chem.DeleteSubstructs(entry,dummyPatt)
179 for propN in mol.GetPropNames():
180 entry.SetProp(propN,mol.GetProp(propN));
181
182
183
184 res.append(entry)
185 if not useAll:
186 break
187 return res
188
190 if sma:
191 try:
192 patt = Chem.MolFromSmarts(sma)
193 except:
194 logger.error('could not construct pattern from smarts: %s'%sma,
195 exc_info=True)
196 return None
197 else:
198 patt = None
199
200 if replace:
201 replacement = Chem.MolFromSmiles('[*]')
202
203 res = []
204 for idx,mol in enumerate(suppl):
205 if not mol:
206 continue
207 if patt:
208 if not mol.HasSubstructMatch(patt):
209 logger.warning('The substructure pattern did not match sidechain %d. This may result in errors.'%(idx+1))
210 if replace:
211 tmp = list(Chem.ReplaceSubstructs(mol,patt,replacement))
212 if not useAll: tmp = [tmp[0]]
213 for i,entry in enumerate(tmp):
214 entry = MoveDummyNeighborsToBeginning(entry)
215 if not entry:
216 continue
217 entry = entry[0]
218
219 for propN in mol.GetPropNames():
220 entry.SetProp(propN,mol.GetProp(propN));
221
222
223 tmp[i] = (idx+1,entry)
224 else:
225
226
227 matches = mol.GetSubstructMatches(patt)
228 if matches:
229 tmp = []
230 for match in matches:
231 smi = Chem.MolToSmiles(mol,True,rootedAtAtom=match[0])
232 entry = Chem.MolFromSmiles(smi)
233 for propN in mol.GetPropNames():
234 entry.SetProp(propN,mol.GetProp(propN));
235
236
237
238 tmp.append((idx+1,entry))
239 else:
240 tmp = None
241 else:
242 tmp = [(idx+1,mol)]
243 if tmp:
244 res.extend(tmp)
245 return res
246
247 if __name__=='__main__':
248 import getopt
249 print(_greet, file=sys.stderr)
250
251 try:
252 args,extras = getopt.getopt(sys.argv[1:],'o:h',[
253 'sdf',
254 'moltemplate',
255 'molTemplate',
256 'smilesFileTemplate',
257 'templateSmarts=',
258 'redraw',
259 'force',
260 'useall',
261 'useallmatches',
262 'autoNames',
263 '3D','3d',
264 'tethers',
265 'tether',
266 ])
267 except:
268 import traceback
269 traceback.print_exc()
270 Usage()
271
272 if len(extras)<3:
273 Usage()
274
275 tooLong=1000
276 sdLigands=False
277 molTemplate=False
278 redrawTemplate=False
279 outF=None
280 forceIt=False
281 useAll=False
282 templateSmarts=[]
283 smilesFileTemplate=False
284 autoNames=False
285 do3D=False
286 useTethers=False
287 for arg,val in args:
288 if arg=='-o':
289 outF=val
290 elif arg=='--sdf':
291 sdLigands=True
292 elif arg in ('--moltemplate','--molTemplate'):
293 molTemplate=True
294 elif arg=='--smilesFileTemplate':
295 smilesFileTemplate=True
296 elif arg=='--templateSmarts':
297 templateSmarts = val
298 elif arg=='--redraw':
299 redrawTemplate=True
300 elif arg=='--force':
301 forceIt=True
302 elif arg=='--autoNames':
303 autoNames=True
304 elif arg in ('--useall','--useallmatches'):
305 useAll=True
306 elif arg in ('--3D','--3d'):
307 do3D=True
308 elif arg in ('--tethers','--tether'):
309 useTethers=True
310 elif arg=='-h':
311 Usage()
312 sys.exit(0)
313
314 if do3D:
315 if not molTemplate:
316 raise ValueError('the --3D option is only useable in combination with --moltemplate')
317 if redrawTemplate:
318 logger.warning('--redrawTemplate does not make sense in combination with --molTemplate. removing it')
319 redrawTemplate=False
320
321
322 if templateSmarts:
323 splitL = templateSmarts.split(' ')
324 templateSmarts = []
325 for i,sma in enumerate(splitL):
326 patt = Chem.MolFromSmarts(sma)
327 if not patt:
328 raise ValueError('could not convert smarts "%s" to a query'%sma)
329 if i>=4:
330 i+=1
331 replace = Chem.MolFromSmiles('[%d*]'%(i+1))
332 templateSmarts.append((patt,replace))
333
334 if molTemplate:
335 removeHs = not do3D
336 try:
337 s = Chem.SDMolSupplier(extras[0],removeHs=removeHs)
338 templates = [x for x in s]
339 except:
340 logger.error('Could not construct templates from input file: %s'%extras[0],
341 exc_info=True)
342 sys.exit(1)
343 if redrawTemplate:
344 for template in templates:
345 AllChem.Compute2DCoords(template)
346 else:
347 if not smilesFileTemplate:
348 try:
349 templates = [Chem.MolFromSmiles(extras[0])]
350 except:
351 logger.error('Could not construct template from smiles: %s'%extras[0],
352 exc_info=True)
353 sys.exit(1)
354 else:
355 try:
356 s = Chem.SmilesMolSupplier(extras[0],titleLine=False)
357 templates = [x for x in s]
358 except:
359 logger.error('Could not construct templates from input file: %s'%extras[0],
360 exc_info=True)
361 sys.exit(1)
362 for template in templates:
363 AllChem.Compute2DCoords(template)
364 if templateSmarts:
365 finalTs = []
366 for i,template in enumerate(templates):
367 for j,(patt,replace) in enumerate(templateSmarts):
368 if not template.HasSubstructMatch(patt):
369 logger.error('template %d did not match sidechain pattern %d, skipping it'%(i+1,j+1))
370 template =None
371 break
372 template = Chem.ReplaceSubstructs(template,patt,replace)[0]
373 if template:
374 Chem.SanitizeMol(template)
375 finalTs.append(template)
376 templates = finalTs
377
378 sidechains = []
379 pos = 1
380 while pos<len(extras):
381 if extras[pos]=='-r':
382 replaceIt=False
383 pos += 1
384 else:
385 replaceIt=True
386 if extras[pos]=='-n':
387 sma = None
388 else:
389 sma = extras[pos]
390 pos += 1
391 try:
392 dat = extras[pos]
393 except IndexError:
394 logger.error('missing a sidechain filename')
395 sys.exit(-1)
396 pos += 1
397 if sdLigands:
398 try:
399 suppl = Chem.SDMolSupplier(dat)
400 except:
401 logger.error('could not construct supplier from SD file: %s'%dat,
402 exc_info=True)
403 suppl = []
404 else:
405 tmpF = file(dat,'r')
406 inL = tmpF.readline()
407 if len(inL.split(' '))<2:
408 nmCol=-1
409 else:
410 nmCol=1
411 try:
412 suppl = Chem.SmilesMolSupplier(dat,nameColumn=nmCol)
413 except:
414 logger.error('could not construct supplier from smiles file: %s'%dat,
415 exc_info=True)
416 suppl = []
417 suppl = [x for x in suppl]
418 chains = ConstructSidechains(suppl,sma=sma,replace=replaceIt,useAll=useAll)
419 if chains:
420 sidechains.append(chains)
421 count = 1
422 for chain in sidechains:
423 count *= len(chain)
424 count *= len(templates)
425 if not sidechains or not count:
426 print("No molecules to be generated.", file=sys.stderr)
427 sys.exit(0)
428
429 if not forceIt and count>tooLong:
430 print("This will generate %d molecules."%count, file=sys.stderr)
431 print("Continue anyway? [no] ", file=sys.stderr, end='')
432 sys.stderr.flush()
433 ans = sys.stdin.readline().strip()
434 if ans not in ('y','yes','Y','YES'):
435 sys.exit(0)
436
437 if outF and outF!="-":
438 try:
439 outF = file(outF,'w+')
440 except IOError:
441 logger.error('could not open file %s for writing'%(outF),
442 exc_info=True)
443 else:
444 outF = sys.stdout
445
446 for template in templates:
447 Explode(template,sidechains,outF,autoNames=autoNames,do3D=do3D,
448 useTethers=useTethers)
449