1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 """ Implementation of the RECAP algorithm from Lewell et al. JCICS *38* 511-522 (1998)
33
34 The published algorithm is implemented more or less without
35 modification. The results are returned as a hierarchy of nodes instead
36 of just as a set of fragments. The hope is that this will allow a bit
37 more flexibility in working with the results.
38
39 For example:
40 >>> m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncc(OC)cc1')
41 >>> res = Recap.RecapDecompose(m)
42 >>> res
43 <Chem.Recap.RecapHierarchyNode object at 0x00CDB5D0>
44 >>> res.children.keys()
45 ['[*]C1CC1', '[*]c1ccccc1-c1ncc(OC)cc1', '[*]c1ccc(OC)cn1', '[*]c1ccccc1OC1CC1']
46 >>> res.GetAllChildren().keys()
47 ['[*]c1ccccc1[*]', '[*]C1CC1', '[*]c1ccccc1-c1ncc(OC)cc1', '[*]c1ccc(OC)cn1', '[*]c1ccccc1OC1CC1']
48
49
50 To get the standard set of RECAP results, use GetLeaves():
51 >>> leaves=res.GetLeaves()
52 >>> leaves.keys()
53 ['[*]c1ccccc1[*]', '[*]c1ccc(OC)cn1', '[*]C1CC1']
54 >>> leaf = leaves['[*]C1CC1']
55 >>> leaf.mol
56 <Chem.rdchem.Mol object at 0x00CBE0F0>
57
58
59 """
60 import sys
61 import weakref
62 from rdkit import Chem
63 from rdkit.Chem import rdChemReactions as Reactions
64 from rdkit.six import iterkeys, iteritems, next
65
66
67 reactionDefs = (
68 "[#7;+0;D2,D3:1]!@C(!@=O)!@[#7;+0;D2,D3:2]>>[*][#7:1].[#7:2][*]",
69
70 "[C;!$(C([#7])[#7]):1](=!@[O:2])!@[#7;+0;!D1:3]>>[*][C:1]=[O:2].[*][#7:3]",
71
72 "[C:1](=!@[O:2])!@[O;+0:3]>>[*][C:1]=[O:2].[O:3][*]",
73
74 "[N;!D1;+0;!$(N-C=[#7,#8,#15,#16])](-!@[*:1])-!@[*:2]>>[*][*:1].[*:2][*]",
75
76
77
78 "[#7;R;D3;+0:1]-!@[*:2]>>[*][#7:1].[*:2][*]",
79
80 "[#6:1]-!@[O;+0]-!@[#6:2]>>[#6:1][*].[*][#6:2]",
81
82 "[C:1]=!@[C:2]>>[C:1][*].[*][C:2]",
83
84 "[n;+0:1]-!@[C:2]>>[n:1][*].[C:2][*]",
85
86 "[O:3]=[C:4]-@[N;+0:1]-!@[C:2]>>[O:3]=[C:4]-[N:1][*].[C:2][*]",
87
88 "[c:1]-!@[c:2]>>[c:1][*].[*][c:2]",
89
90 "[n;+0:1]-!@[c:2]>>[n:1][*].[*][c:2]",
91
92 "[#7;+0;D2,D3:1]-!@[S:2](=[O:3])=[O:4]>>[#7:1][*].[*][S:2](=[O:3])=[O:4]",
93 )
94
95 reactions = tuple([Reactions.ReactionFromSmarts(x) for x in reactionDefs])
96
97
99 """ This class is used to hold the Recap hiearchy
100 """
101 mol=None
102 children=None
103 parents=None
104 smiles = None
109
111 " returns a dictionary, keyed by SMILES, of children "
112 res = {}
113 for smi,child in iteritems(self.children):
114 res[smi] = child
115 child._gacRecurse(res,terminalOnly=False)
116 return res
117
119 " returns a dictionary, keyed by SMILES, of leaf (terminal) nodes "
120 res = {}
121 for smi,child in iteritems(self.children):
122 if not len(child.children):
123 res[smi] = child
124 else:
125 child._gacRecurse(res,terminalOnly=True)
126 return res
127
129 """ returns all the nodes in the hierarchy tree that contain this
130 node as a child
131 """
132 if not self.parents:
133 res = [self]
134 else:
135 res = []
136 for p in self.parents.values():
137 for uP in p.getUltimateParents():
138 if uP not in res:
139 res.append(uP)
140 return res
141
143 for smi,child in iteritems(self.children):
144 if not terminalOnly or not len(child.children):
145 res[smi] = child
146 child._gacRecurse(res,terminalOnly=terminalOnly)
147
152
153
154 -def RecapDecompose(mol,allNodes=None,minFragmentSize=0,onlyUseReactions=None):
155 """ returns the recap decomposition for a molecule """
156 mSmi = Chem.MolToSmiles(mol,1)
157
158 if allNodes is None:
159 allNodes={}
160 if mSmi in allNodes:
161 return allNodes[mSmi]
162
163 res = RecapHierarchyNode(mol)
164 res.smiles =mSmi
165 activePool={mSmi:res}
166 allNodes[mSmi]=res
167 while activePool:
168 nSmi = next(iterkeys(activePool))
169 node = activePool.pop(nSmi)
170 if not node.mol: continue
171 for rxnIdx,reaction in enumerate(reactions):
172 if onlyUseReactions and rxnIdx not in onlyUseReactions:
173 continue
174
175
176 ps = reaction.RunReactants((node.mol,))
177
178 if ps:
179 for prodSeq in ps:
180 seqOk=True
181
182
183 tSeq = [(prod.GetNumAtoms(onlyExplicit=True),idx) for idx,prod in enumerate(prodSeq)]
184 tSeq.sort()
185 ts=[(x,prodSeq[y]) for x,y in tSeq]
186 prodSeq=ts
187 for nats,prod in prodSeq:
188 try:
189 Chem.SanitizeMol(prod)
190 except:
191 continue
192 pSmi = Chem.MolToSmiles(prod,1)
193 if minFragmentSize>0:
194 nDummies = pSmi.count('*')
195 if nats-nDummies<minFragmentSize:
196 seqOk=False
197 break
198
199
200 elif pSmi.replace('[*]','').replace('()','') in ('','C','CC','CCC'):
201 seqOk=False
202 break
203 prod.pSmi = pSmi
204 if seqOk:
205 for nats,prod in prodSeq:
206 pSmi = prod.pSmi
207
208 if not pSmi in allNodes:
209 pNode = RecapHierarchyNode(prod)
210 pNode.smiles=pSmi
211 pNode.parents[nSmi]=weakref.proxy(node)
212 node.children[pSmi]=pNode
213 activePool[pSmi] = pNode
214 allNodes[pSmi]=pNode
215 else:
216 pNode=allNodes[pSmi]
217 pNode.parents[nSmi]=weakref.proxy(node)
218 node.children[pSmi]=pNode
219
220 return res
221
222
223
224
225 if __name__=='__main__':
226 import unittest
229 m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncc(OC)cc1')
230 res = RecapDecompose(m)
231 self.assertTrue(res)
232 self.assertTrue(len(res.children.keys())==4)
233 self.assertTrue(len(res.GetAllChildren().keys())==5)
234 self.assertTrue(len(res.GetLeaves().keys())==3)
241 allNodes={}
242 m = Chem.MolFromSmiles('c1ccccc1-c1ncccc1')
243 res = RecapDecompose(m,allNodes=allNodes)
244 self.assertTrue(res)
245 self.assertTrue(len(res.children.keys())==2)
246 self.assertTrue(len(allNodes.keys())==3)
247
248 m = Chem.MolFromSmiles('COc1ccccc1-c1ncccc1')
249 res = RecapDecompose(m,allNodes=allNodes)
250 self.assertTrue(res)
251 self.assertTrue(len(res.children.keys())==2)
252
253 self.assertTrue(len(allNodes.keys())==5)
254 self.assertTrue('[*]c1ccccc1OC' in allNodes)
255 self.assertTrue('[*]c1ccccc1' in allNodes)
256
257 m = Chem.MolFromSmiles('C1CC1Oc1ccccc1-c1ncccc1')
258 res = RecapDecompose(m,allNodes=allNodes)
259 self.assertTrue(res)
260 self.assertTrue(len(res.children.keys())==4)
261 self.assertTrue(len(allNodes.keys())==10)
262
264 m = Chem.MolFromSmiles('c1ccccc1OC(Oc1ccccc1)Oc1ccccc1')
265 res = RecapDecompose(m)
266 self.assertTrue(res)
267 self.assertTrue(len(res.GetLeaves())==2)
268 ks = res.GetLeaves().keys()
269 self.assertFalse('[*]C([*])[*]' in ks)
270 self.assertTrue('[*]c1ccccc1' in ks)
271 self.assertTrue('[*]C([*])Oc1ccccc1' in ks)
272
274 m = Chem.MolFromSmiles('C1CCCCN1CCCC')
275 res = RecapDecompose(m)
276 self.assertTrue(res)
277 self.assertTrue(len(res.GetLeaves())==2)
278 ks = res.GetLeaves().keys()
279 self.assertTrue('[*]N1CCCCC1' in ks)
280 self.assertTrue('[*]CCCC' in ks)
281
283 m = Chem.MolFromSmiles('CCCOCCC')
284 res = RecapDecompose(m)
285 self.assertTrue(res)
286 self.assertTrue(res.children=={})
287 res = RecapDecompose(m,minFragmentSize=3)
288 self.assertTrue(res)
289 self.assertTrue(len(res.GetLeaves())==1)
290 ks = res.GetLeaves().keys()
291 self.assertTrue('[*]CCC' in ks)
292
293 m = Chem.MolFromSmiles('CCCOCC')
294 res = RecapDecompose(m,minFragmentSize=3)
295 self.assertTrue(res)
296 self.assertTrue(res.children=={})
297
298 m = Chem.MolFromSmiles('CCCOCCOC')
299 res = RecapDecompose(m,minFragmentSize=2)
300 self.assertTrue(res)
301 self.assertTrue(len(res.GetLeaves())==2)
302 ks = res.GetLeaves().keys()
303 self.assertTrue('[*]CCC' in ks)
304 ks = res.GetLeaves().keys()
305 self.assertTrue('[*]CCOC' in ks)
306
308 m = Chem.MolFromSmiles('C1CC1C(=O)NC1OC1')
309 res = RecapDecompose(m,onlyUseReactions=[1])
310 self.assertTrue(res)
311 self.assertTrue(len(res.GetLeaves())==2)
312 ks = res.GetLeaves().keys()
313 self.assertTrue('[*]C(=O)C1CC1' in ks)
314 self.assertTrue('[*]NC1CO1' in ks)
315
316 m = Chem.MolFromSmiles('C1CC1C(=O)N(C)C1OC1')
317 res = RecapDecompose(m,onlyUseReactions=[1])
318 self.assertTrue(res)
319 self.assertTrue(len(res.GetLeaves())==2)
320 ks = res.GetLeaves().keys()
321 self.assertTrue('[*]C(=O)C1CC1' in ks)
322 self.assertTrue('[*]N(C)C1CO1' in ks)
323
324 m = Chem.MolFromSmiles('C1CC1C(=O)n1cccc1')
325 res = RecapDecompose(m,onlyUseReactions=[1])
326 self.assertTrue(res)
327 self.assertTrue(len(res.GetLeaves())==2)
328 ks = res.GetLeaves().keys()
329 self.assertTrue('[*]C(=O)C1CC1' in ks)
330 self.assertTrue('[*]n1cccc1' in ks)
331
332 m = Chem.MolFromSmiles('C1CC1C(=O)CC1OC1')
333 res = RecapDecompose(m,onlyUseReactions=[1])
334 self.assertTrue(res)
335 self.assertTrue(len(res.GetLeaves())==0)
336
337 m = Chem.MolFromSmiles('C1CCC(=O)NC1')
338 res = RecapDecompose(m,onlyUseReactions=[1])
339 self.assertTrue(res)
340 self.assertTrue(len(res.GetLeaves())==0)
341
342 m = Chem.MolFromSmiles('CC(=O)NC')
343 res = RecapDecompose(m,onlyUseReactions=[1])
344 self.assertTrue(res)
345 self.assertTrue(len(res.GetLeaves())==2)
346 ks = res.GetLeaves().keys()
347
348 m = Chem.MolFromSmiles('CC(=O)N')
349 res = RecapDecompose(m,onlyUseReactions=[1])
350 self.assertTrue(res)
351 self.assertTrue(len(res.GetLeaves())==0)
352
353 m = Chem.MolFromSmiles('C(=O)NCCNC(=O)CC')
354 res = RecapDecompose(m,onlyUseReactions=[1])
355 self.assertTrue(res)
356 self.assertTrue(len(res.children)==4)
357 self.assertTrue(len(res.GetLeaves())==3)
358
360 m = Chem.MolFromSmiles('C1CC1C(=O)OC1OC1')
361 res = RecapDecompose(m,onlyUseReactions=[2])
362 self.assertTrue(res)
363 self.assertTrue(len(res.GetLeaves())==2)
364 ks = res.GetLeaves().keys()
365 self.assertTrue('[*]C(=O)C1CC1' in ks)
366 self.assertTrue('[*]OC1CO1' in ks)
367
368 m = Chem.MolFromSmiles('C1CC1C(=O)CC1OC1')
369 res = RecapDecompose(m,onlyUseReactions=[2])
370 self.assertTrue(res)
371 self.assertTrue(len(res.GetLeaves())==0)
372
373 m = Chem.MolFromSmiles('C1CCC(=O)OC1')
374 res = RecapDecompose(m,onlyUseReactions=[2])
375 self.assertTrue(res)
376 self.assertTrue(len(res.GetLeaves())==0)
377
379 m = Chem.MolFromSmiles('C1CC1NC(=O)NC1OC1')
380 res = RecapDecompose(m,onlyUseReactions=[0])
381 self.assertTrue(res)
382 self.assertTrue(len(res.GetLeaves())==2)
383 ks = res.GetLeaves().keys()
384 self.assertTrue('[*]NC1CC1' in ks)
385 self.assertTrue('[*]NC1CO1' in ks)
386
387 m = Chem.MolFromSmiles('C1CC1NC(=O)N(C)C1OC1')
388 res = RecapDecompose(m,onlyUseReactions=[0])
389 self.assertTrue(res)
390 self.assertTrue(len(res.GetLeaves())==2)
391 ks = res.GetLeaves().keys()
392 self.assertTrue('[*]NC1CC1' in ks)
393 self.assertTrue('[*]N(C)C1CO1' in ks)
394
395 m = Chem.MolFromSmiles('C1CCNC(=O)NC1C')
396 res = RecapDecompose(m,onlyUseReactions=[0])
397 self.assertTrue(res)
398 self.assertTrue(len(res.GetLeaves())==0)
399
400 m = Chem.MolFromSmiles('c1cccn1C(=O)NC1OC1')
401 res = RecapDecompose(m,onlyUseReactions=[0])
402 self.assertTrue(res)
403 self.assertTrue(len(res.GetLeaves())==2)
404 ks = res.GetLeaves().keys()
405 self.assertTrue('[*]n1cccc1' in ks)
406 self.assertTrue('[*]NC1CO1' in ks)
407
408 m = Chem.MolFromSmiles('c1cccn1C(=O)n1c(C)ccc1')
409 res = RecapDecompose(m,onlyUseReactions=[0])
410 self.assertTrue(res)
411 self.assertTrue(len(res.GetLeaves())==2)
412 ks = res.GetLeaves().keys()
413 self.assertTrue('[*]n1cccc1C' in ks)
414
416 m = Chem.MolFromSmiles('C1CC1N(C1NC1)C1OC1')
417 res = RecapDecompose(m)
418 self.assertTrue(res)
419 self.assertTrue(len(res.GetLeaves())==3)
420 ks = res.GetLeaves().keys()
421 self.assertTrue('[*]C1CC1' in ks)
422 self.assertTrue('[*]C1CO1' in ks)
423 self.assertTrue('[*]C1CN1' in ks)
424
425 m = Chem.MolFromSmiles('c1ccccc1N(C1NC1)C1OC1')
426 res = RecapDecompose(m)
427 self.assertTrue(res)
428 self.assertTrue(len(res.GetLeaves())==3)
429 ks = res.GetLeaves().keys()
430 self.assertTrue('[*]c1ccccc1' in ks)
431 self.assertTrue('[*]C1CO1' in ks)
432 self.assertTrue('[*]C1CN1' in ks)
433
434 m = Chem.MolFromSmiles('c1ccccc1N(c1ncccc1)C1OC1')
435 res = RecapDecompose(m)
436 self.assertTrue(res)
437 self.assertTrue(len(res.GetLeaves())==3)
438 ks = res.GetLeaves().keys()
439 self.assertTrue('[*]c1ccccc1' in ks)
440 self.assertTrue('[*]c1ccccn1' in ks)
441 self.assertTrue('[*]C1CO1' in ks)
442
443 m = Chem.MolFromSmiles('c1ccccc1N(c1ncccc1)c1ccco1')
444 res = RecapDecompose(m)
445 self.assertTrue(res)
446 self.assertTrue(len(res.GetLeaves())==3)
447 ks = res.GetLeaves().keys()
448 self.assertTrue('[*]c1ccccc1' in ks)
449 self.assertTrue('[*]c1ccccn1' in ks)
450 self.assertTrue('[*]c1ccco1' in ks)
451
452 m = Chem.MolFromSmiles('C1CCCCN1C1CC1')
453 res = RecapDecompose(m)
454 self.assertTrue(res)
455 self.assertTrue(len(res.GetLeaves())==2)
456 ks = res.GetLeaves().keys()
457 self.assertTrue('[*]N1CCCCC1' in ks)
458 self.assertTrue('[*]C1CC1' in ks)
459
460 m = Chem.MolFromSmiles('C1CCC2N1CC2')
461 res = RecapDecompose(m)
462 self.assertTrue(res)
463 self.assertTrue(len(res.GetLeaves())==0)
464
466 m = Chem.MolFromSmiles('C1CC1OC1OC1')
467 res = RecapDecompose(m)
468 self.assertTrue(res)
469 self.assertTrue(len(res.GetLeaves())==2)
470 ks = res.GetLeaves().keys()
471 self.assertTrue('[*]C1CC1' in ks)
472 self.assertTrue('[*]C1CO1' in ks)
473
474 m = Chem.MolFromSmiles('C1CCCCO1')
475 res = RecapDecompose(m)
476 self.assertTrue(res)
477 self.assertTrue(len(res.GetLeaves())==0)
478
479 m = Chem.MolFromSmiles('c1ccccc1OC1OC1')
480 res = RecapDecompose(m)
481 self.assertTrue(res)
482 self.assertTrue(len(res.GetLeaves())==2)
483 ks = res.GetLeaves().keys()
484 self.assertTrue('[*]c1ccccc1' in ks)
485 self.assertTrue('[*]C1CO1' in ks)
486
487 m = Chem.MolFromSmiles('c1ccccc1Oc1ncccc1')
488 res = RecapDecompose(m)
489 self.assertTrue(res)
490 self.assertTrue(len(res.GetLeaves())==2)
491 ks = res.GetLeaves().keys()
492 self.assertTrue('[*]c1ccccc1' in ks)
493 self.assertTrue('[*]c1ccccn1' in ks)
494
496 m = Chem.MolFromSmiles('ClC=CBr')
497 res = RecapDecompose(m)
498 self.assertTrue(res)
499 self.assertTrue(len(res.GetLeaves())==2)
500 ks = res.GetLeaves().keys()
501 self.assertTrue('[*]CCl' in ks)
502 self.assertTrue('[*]CBr' in ks)
503
504 m = Chem.MolFromSmiles('C1CC=CC1')
505 res = RecapDecompose(m)
506 self.assertTrue(res)
507 self.assertTrue(len(res.GetLeaves())==0)
508
510 m = Chem.MolFromSmiles('c1cccn1CCCC')
511 res = RecapDecompose(m)
512 self.assertTrue(res)
513 self.assertTrue(len(res.GetLeaves())==2)
514 ks = res.GetLeaves().keys()
515 self.assertTrue('[*]n1cccc1' in ks)
516 self.assertTrue('[*]CCCC' in ks)
517
518 m = Chem.MolFromSmiles('c1ccc2n1CCCC2')
519 res = RecapDecompose(m)
520 self.assertTrue(res)
521 self.assertTrue(len(res.GetLeaves())==0)
522
524 m = Chem.MolFromSmiles('C1CC(=O)N1CCCC')
525 res = RecapDecompose(m,onlyUseReactions=[8])
526 self.assertTrue(res)
527 self.assertTrue(len(res.GetLeaves())==2)
528 ks = res.GetLeaves().keys()
529 self.assertTrue('[*]N1CCC1=O' in ks)
530 self.assertTrue('[*]CCCC' in ks)
531
532 m = Chem.MolFromSmiles('O=C1CC2N1CCCC2')
533 res = RecapDecompose(m)
534 self.assertTrue(res)
535 self.assertTrue(len(res.GetLeaves())==0)
536
538 m = Chem.MolFromSmiles('c1ccccc1c1ncccc1')
539 res = RecapDecompose(m)
540 self.assertTrue(res)
541 self.assertTrue(len(res.GetLeaves())==2)
542 ks = res.GetLeaves().keys()
543 self.assertTrue('[*]c1ccccc1' in ks)
544 self.assertTrue('[*]c1ccccn1' in ks)
545
546 m = Chem.MolFromSmiles('c1ccccc1C1CC1')
547 res = RecapDecompose(m)
548 self.assertTrue(res)
549 self.assertTrue(len(res.GetLeaves())==0)
550
552 m = Chem.MolFromSmiles('c1cccn1c1ccccc1')
553 res = RecapDecompose(m)
554 self.assertTrue(res)
555 self.assertTrue(len(res.GetLeaves())==2)
556 ks = res.GetLeaves().keys()
557 self.assertTrue('[*]n1cccc1' in ks)
558 self.assertTrue('[*]c1ccccc1' in ks)
559
561 m = Chem.MolFromSmiles('CCCNS(=O)(=O)CC')
562 res = RecapDecompose(m)
563 self.assertTrue(res)
564 self.assertTrue(len(res.GetLeaves())==2)
565 ks = res.GetLeaves().keys()
566 self.assertTrue('[*]NCCC' in ks)
567 self.assertTrue('[*]S(=O)(=O)CC' in ks)
568
569 m = Chem.MolFromSmiles('c1cccn1S(=O)(=O)CC')
570 res = RecapDecompose(m)
571 self.assertTrue(res)
572 self.assertTrue(len(res.GetLeaves())==2)
573 ks = res.GetLeaves().keys()
574 self.assertTrue('[*]n1cccc1' in ks)
575 self.assertTrue('[*]S(=O)(=O)CC' in ks)
576
577 m = Chem.MolFromSmiles('C1CNS(=O)(=O)CC1')
578 res = RecapDecompose(m)
579 self.assertTrue(res)
580 self.assertTrue(len(res.GetLeaves())==0)
581
583 m = Chem.MolFromSmiles('c1ccccc1n1cccc1')
584 res = RecapDecompose(m)
585 self.assertTrue(res)
586 self.assertTrue(len(res.GetLeaves())==2)
587 m = Chem.MolFromSmiles('c1ccccc1[n+]1ccccc1')
588 res = RecapDecompose(m)
589 self.assertTrue(res)
590 self.assertTrue(len(res.GetLeaves())==0)
591
592 m = Chem.MolFromSmiles('C1CC1NC(=O)CC')
593 res = RecapDecompose(m)
594 self.assertTrue(res)
595 self.assertTrue(len(res.GetLeaves())==2)
596 m = Chem.MolFromSmiles('C1CC1[NH+]C(=O)CC')
597 res = RecapDecompose(m)
598 self.assertTrue(res)
599 self.assertTrue(len(res.GetLeaves())==0)
600
601 m = Chem.MolFromSmiles('C1CC1NC(=O)NC1CCC1')
602 res = RecapDecompose(m)
603 self.assertTrue(res)
604 self.assertTrue(len(res.GetLeaves())==2)
605 m = Chem.MolFromSmiles('C1CC1[NH+]C(=O)[NH+]C1CCC1')
606 res = RecapDecompose(m)
607 self.assertTrue(res)
608 self.assertTrue(len(res.GetLeaves())==0)
609
610 unittest.main()
611