1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008)
35
36 """
37 from __future__ import print_function
38 import sys,re,random
39 from rdkit import Chem
40 from rdkit.Chem import rdChemReactions as Reactions
41 from rdkit.six import iteritems, iterkeys, next
42 from rdkit.six.moves import range
43
44
45 environs = {
46 'L1':'[C;D3]([#0,#6,#7,#8])(=O)',
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66 'L3':'[O;D2]-;!@[#0,#6,#1]',
67 'L4':'[C;!D1;!$(C=*)]-;!@[#6]',
68
69 'L5':'[N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]',
70 'L6':'[C;D3;!R](=O)-;!@[#0,#6,#7,#8]',
71 'L7a':'[C;D2,D3]-[#6]',
72 'L7b':'[C;D2,D3]-[#6]',
73 '#L8':'[C;!R;!D1]-;!@[#6]',
74 'L8':'[C;!R;!D1;!$(C!-*)]',
75 'L9':'[n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]',
76 'L10':'[N;R;$(N(@C(=O))@[C,N,O,S])]',
77 'L11':'[S;D2](-;!@[#0,#6])',
78 'L12':'[S;D4]([#6,#0])(=O)(=O)',
79 'L13':'[C;$(C(-;@[C,N,O,S])-;@[N,O,S])]',
80 'L14':'[c;$(c(:[c,n,o,s]):[n,o,s])]',
81 'L14b':'[c;$(c(:[c,n,o,s]):[n,o,s])]',
82 'L15':'[C;$(C(-;@C)-;@C)]',
83 'L16':'[c;$(c(:c):c)]',
84 'L16b':'[c;$(c(:c):c)]',
85 }
86 reactionDefs = (
87
88 [
89 ('1','3','-'),
90 ('1','5','-'),
91 ('1','10','-'),
92 ],
93
94
95 [
96 ('3','4','-'),
97 ('3','13','-'),
98 ('3','14','-'),
99 ('3','15','-'),
100 ('3','16','-'),
101 ],
102
103
104 [
105 ('4','5','-'),
106 ('4','11','-'),
107 ],
108
109
110 [
111 ('5','12','-'),
112 ('5','14','-'),
113 ('5','16','-'),
114 ('5','13','-'),
115 ('5','15','-'),
116 ],
117
118
119 [
120 ('6','13','-'),
121 ('6','14','-'),
122 ('6','15','-'),
123 ('6','16','-'),
124 ],
125
126
127 [
128 ('7a','7b','='),
129 ],
130
131
132 [
133 ('8','9','-'),
134 ('8','10','-'),
135 ('8','13','-'),
136 ('8','14','-'),
137 ('8','15','-'),
138 ('8','16','-'),
139 ],
140
141
142 [
143 ('9','13','-'),
144 ('9','14','-'),
145 ('9','15','-'),
146 ('9','16','-'),
147 ],
148
149
150 [
151 ('10','13','-'),
152 ('10','14','-'),
153 ('10','15','-'),
154 ('10','16','-'),
155 ],
156
157
158 [
159 ('11','13','-'),
160 ('11','14','-'),
161 ('11','15','-'),
162 ('11','16','-'),
163 ],
164
165
166
167
168
169 [
170 ('13','14','-'),
171 ('13','15','-'),
172 ('13','16','-'),
173 ],
174
175
176 [
177 ('14','14','-'),
178 ('14','15','-'),
179 ('14','16','-'),
180 ],
181
182
183 [
184 ('15','16','-'),
185 ],
186
187
188 [
189 ('16','16','-'),
190 ],
191 )
192 import copy
193 smartsGps=copy.deepcopy(reactionDefs)
194 for gp in smartsGps:
195 for j,defn in enumerate(gp):
196 g1,g2,bnd = defn
197 r1=environs['L'+g1]
198 r2=environs['L'+g2]
199 g1 = re.sub('[a-z,A-Z]','',g1)
200 g2 = re.sub('[a-z,A-Z]','',g2)
201 sma='[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]'%(r1,bnd,r2,g1,g2)
202 gp[j] =sma
203
204 for gp in smartsGps:
205 for defn in gp:
206 try:
207 t=Reactions.ReactionFromSmarts(defn)
208 t.Initialize()
209 except:
210 print(defn)
211 raise
212
213 environMatchers={}
214 for env,sma in iteritems(environs):
215 environMatchers[env]=Chem.MolFromSmarts(sma)
216
217 bondMatchers=[]
218 for i,compats in enumerate(reactionDefs):
219 tmp=[]
220 for i1,i2,bType in compats:
221 e1 = environs['L%s'%i1]
222 e2 = environs['L%s'%i2]
223 patt = '[$(%s)]%s;!@[$(%s)]'%(e1,bType,e2)
224 patt = Chem.MolFromSmarts(patt)
225 tmp.append((i1,i2,bType,patt))
226 bondMatchers.append(tmp)
227
228 reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps])
229 reverseReactions = []
230 for i,rxnSet in enumerate(smartsGps):
231 for j,sma in enumerate(rxnSet):
232 rs,ps = sma.split('>>')
233 sma = '%s>>%s'%(ps,rs)
234 rxn = Reactions.ReactionFromSmarts(sma)
235 labels = re.findall(r'\[([0-9]+?)\*\]',ps)
236 rxn._matchers=[Chem.MolFromSmiles('[%s*]'%x) for x in labels]
237 reverseReactions.append(rxn)
238
240 """ returns the bonds in a molecule that BRICS would cleave
241
242 >>> from rdkit import Chem
243 >>> m = Chem.MolFromSmiles('CCCOCC')
244 >>> res = list(FindBRICSBonds(m))
245 >>> res
246 [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))]
247
248 a more complicated case:
249 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
250 >>> res = list(FindBRICSBonds(m))
251 >>> res
252 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]
253
254 we can also randomize the order of the results:
255 >>> random.seed(23)
256 >>> res = list(FindBRICSBonds(m,randomizeOrder=True))
257 >>> sorted(res)
258 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))]
259
260 Note that this is a generator function :
261 >>> res = FindBRICSBonds(m)
262 >>> res
263 <generator object ...>
264 >>> next(res)
265 ((3, 2), ('3', '4'))
266
267 >>> m = Chem.MolFromSmiles('CC=CC')
268 >>> res = list(FindBRICSBonds(m))
269 >>> sorted(res)
270 [((1, 2), ('7', '7'))]
271
272 make sure we don't match ring bonds:
273 >>> m = Chem.MolFromSmiles('O=C1NCCC1')
274 >>> list(FindBRICSBonds(m))
275 []
276
277 another nice one, make sure environment 8 doesn't match something connected
278 to a ring atom:
279 >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1')
280 >>> list(FindBRICSBonds(m))
281 []
282
283 """
284 letter = re.compile('[a-z,A-Z]')
285 indices = list(range(len(bondMatchers)))
286 bondsDone=set()
287 if randomizeOrder: random.shuffle(indices,random=random.random)
288
289 envMatches={}
290 for env,patt in iteritems(environMatchers):
291 envMatches[env]=mol.HasSubstructMatch(patt)
292 for gpIdx in indices:
293 if randomizeOrder:
294 compats =bondMatchers[gpIdx][:]
295 random.shuffle(compats,random=random.random)
296 else:
297 compats = bondMatchers[gpIdx]
298 for i1,i2,bType,patt in compats:
299 if not envMatches['L'+i1] or not envMatches['L'+i2]: continue
300 matches = mol.GetSubstructMatches(patt)
301 i1 = letter.sub('',i1)
302 i2 = letter.sub('',i2)
303 for match in matches:
304 if match not in bondsDone and (match[1],match[0]) not in bondsDone:
305 bondsDone.add(match)
306 yield(((match[0],match[1]),(i1,i2)))
307
309 """ breaks the BRICS bonds in a molecule and returns the results
310
311 >>> from rdkit import Chem
312 >>> m = Chem.MolFromSmiles('CCCOCC')
313 >>> m2=BreakBRICSBonds(m)
314 >>> Chem.MolToSmiles(m2,True)
315 '[3*]O[3*].[4*]CC.[4*]CCC'
316
317 a more complicated case:
318 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
319 >>> m2=BreakBRICSBonds(m)
320 >>> Chem.MolToSmiles(m2,True)
321 '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O'
322
323
324 can also specify a limited set of bonds to work with:
325 >>> m = Chem.MolFromSmiles('CCCOCC')
326 >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))])
327 >>> Chem.MolToSmiles(m2,True)
328 '[3*]OCC.[4*]CCC'
329
330 this can be used as an alternate approach for doing a BRICS decomposition by
331 following BreakBRICSBonds with a call to Chem.GetMolFrags:
332 >>> m = Chem.MolFromSmiles('CCCOCC')
333 >>> m2=BreakBRICSBonds(m)
334 >>> frags = Chem.GetMolFrags(m2,asMols=True)
335 >>> [Chem.MolToSmiles(x,True) for x in frags]
336 ['[4*]CCC', '[3*]O[3*]', '[4*]CC']
337
338 """
339 if not bonds:
340
341 res = Chem.FragmentOnBRICSBonds(mol)
342 if sanitize:
343 Chem.SanitizeMol(res)
344 return res
345 eMol = Chem.EditableMol(mol)
346 nAts = mol.GetNumAtoms()
347
348 dummyPositions=[]
349 for indices,dummyTypes in bonds:
350 ia,ib = indices
351 obond = mol.GetBondBetweenAtoms(ia,ib)
352 bondType=obond.GetBondType()
353 eMol.RemoveBond(ia,ib)
354
355 da,db = dummyTypes
356 atoma = Chem.Atom(0)
357 atoma.SetIsotope(int(da))
358 atoma.SetNoImplicit(True)
359 idxa = nAts
360 nAts+=1
361 eMol.AddAtom(atoma)
362 eMol.AddBond(ia,idxa,bondType)
363
364 atomb = Chem.Atom(0)
365 atomb.SetIsotope(int(db))
366 atomb.SetNoImplicit(True)
367 idxb = nAts
368 nAts+=1
369 eMol.AddAtom(atomb)
370 eMol.AddBond(ib,idxb,bondType)
371 if mol.GetNumConformers():
372 dummyPositions.append((idxa,ib))
373 dummyPositions.append((idxb,ia))
374 res = eMol.GetMol()
375 if sanitize:
376 Chem.SanitizeMol(res)
377 if mol.GetNumConformers():
378 for conf in mol.GetConformers():
379 resConf = res.GetConformer(conf.GetId())
380 for ia,pa in dummyPositions:
381 resConf.SetAtomPosition(ia,conf.GetAtomPosition(pa))
382 return res
383
384 -def BRICSDecompose(mol,allNodes=None,minFragmentSize=1,onlyUseReactions=None,
385 silent=True,keepNonLeafNodes=False,singlePass=False,returnMols=False):
386 """ returns the BRICS decomposition for a molecule
387
388 >>> from rdkit import Chem
389 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
390 >>> res = list(BRICSDecompose(m))
391 >>> sorted(res)
392 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']
393
394 >>> res = list(BRICSDecompose(m,returnMols=True))
395 >>> res[0]
396 <rdkit.Chem.rdchem.Mol object ...>
397 >>> smis = [Chem.MolToSmiles(x,True) for x in res]
398 >>> sorted(smis)
399 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]']
400
401 nexavar, an example from the paper (corrected):
402 >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
403 >>> res = list(BRICSDecompose(m))
404 >>> sorted(res)
405 ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F']
406
407 it's also possible to keep pieces that haven't been fully decomposed:
408 >>> m = Chem.MolFromSmiles('CCCOCC')
409 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
410 >>> sorted(res)
411 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC']
412
413 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
414 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True))
415 >>> sorted(res)
416 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC']
417
418 or to only do a single pass of decomposition:
419 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1')
420 >>> res = list(BRICSDecompose(m,singlePass=True))
421 >>> sorted(res)
422 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC']
423
424 setting a minimum size for the fragments:
425 >>> m = Chem.MolFromSmiles('CCCOCC')
426 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2))
427 >>> sorted(res)
428 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']
429 >>> m = Chem.MolFromSmiles('CCCOCC')
430 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3))
431 >>> sorted(res)
432 ['CCCOCC', '[3*]OCC', '[4*]CCC']
433 >>> res = list(BRICSDecompose(m,minFragmentSize=2))
434 >>> sorted(res)
435 ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC']
436
437
438 """
439 global reactions
440 mSmi = Chem.MolToSmiles(mol,1)
441
442 if allNodes is None:
443 allNodes=set()
444
445 if mSmi in allNodes:
446 return set()
447
448 activePool={mSmi:mol}
449 allNodes.add(mSmi)
450 foundMols={mSmi:mol}
451 for gpIdx,reactionGp in enumerate(reactions):
452 newPool = {}
453 while activePool:
454 matched=False
455 nSmi = next(iterkeys(activePool))
456 mol = activePool.pop(nSmi)
457 for rxnIdx,reaction in enumerate(reactionGp):
458 if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions:
459 continue
460 if not silent:
461 print('--------')
462 print(smartsGps[gpIdx][rxnIdx])
463 ps = reaction.RunReactants((mol,))
464 if ps:
465 if not silent: print(nSmi,'->',len(ps),'products')
466 for prodSeq in ps:
467 seqOk=True
468
469 tSeq = [(prod.GetNumAtoms(onlyExplicit=True),idx) for idx,prod in enumerate(prodSeq)]
470 tSeq.sort()
471 for nats,idx in tSeq:
472 prod = prodSeq[idx]
473 try:
474 Chem.SanitizeMol(prod)
475 except:
476 continue
477 pSmi = Chem.MolToSmiles(prod,1)
478 if minFragmentSize>0:
479 nDummies = pSmi.count('*')
480 if nats-nDummies<minFragmentSize:
481 seqOk=False
482 break
483 prod.pSmi = pSmi
484 ts = [(x,prodSeq[y]) for x,y in tSeq]
485 prodSeq=ts
486 if seqOk:
487 matched=True
488 for nats,prod in prodSeq:
489 pSmi = prod.pSmi
490
491 if pSmi not in allNodes:
492 if not singlePass:
493 activePool[pSmi] = prod
494 allNodes.add(pSmi)
495 foundMols[pSmi]=prod
496 if singlePass or keepNonLeafNodes or not matched:
497 newPool[nSmi]=mol
498 activePool = newPool
499 if not (singlePass or keepNonLeafNodes):
500 if not returnMols:
501 res = set(activePool.keys())
502 else:
503 res = activePool.values()
504 else:
505 if not returnMols:
506 res = allNodes
507 else:
508 res = foundMols.values()
509 return res
510
511
512 import random
513 dummyPattern=Chem.MolFromSmiles('[*]')
514 -def BRICSBuild(fragments,onlyCompleteMols=True,seeds=None,uniquify=True,
515 scrambleReagents=True,maxDepth=3):
516 seen = set()
517 if not seeds:
518 seeds = list(fragments)
519 if scrambleReagents:
520 seeds = list(seeds)
521 random.shuffle(seeds,random=random.random)
522 if scrambleReagents:
523 tempReactions = list(reverseReactions)
524 random.shuffle(tempReactions,random=random.random)
525 else:
526 tempReactions=reverseReactions
527 for seed in seeds:
528 seedIsR1=False
529 seedIsR2=False
530 nextSteps=[]
531 for rxn in tempReactions:
532 if seed.HasSubstructMatch(rxn._matchers[0]):
533 seedIsR1=True
534 if seed.HasSubstructMatch(rxn._matchers[1]):
535 seedIsR2=True
536 for fragment in fragments:
537 ps = None
538 if fragment.HasSubstructMatch(rxn._matchers[0]):
539 if seedIsR2:
540 ps = rxn.RunReactants((fragment,seed))
541 if fragment.HasSubstructMatch(rxn._matchers[1]):
542 if seedIsR1:
543 ps = rxn.RunReactants((seed,fragment))
544 if ps:
545 for p in ps:
546 if uniquify:
547 pSmi =Chem.MolToSmiles(p[0],True)
548 if pSmi in seen:
549 continue
550 else:
551 seen.add(pSmi)
552 if p[0].HasSubstructMatch(dummyPattern):
553 nextSteps.append(p[0])
554 if not onlyCompleteMols:
555 yield p[0]
556 else:
557 yield p[0]
558 if nextSteps and maxDepth>0:
559 for p in BRICSBuild(fragments,onlyCompleteMols=onlyCompleteMols,
560 seeds=nextSteps,uniquify=uniquify,
561 maxDepth=maxDepth-1):
562 if uniquify:
563 pSmi =Chem.MolToSmiles(p,True)
564 if pSmi in seen:
565 continue
566 else:
567 seen.add(pSmi)
568 yield p
569
570
571
572
573
574
575
576
577
578
579
581 import doctest,sys
582 return doctest.testmod(sys.modules["__main__"],
583 optionflags=doctest.ELLIPSIS+doctest.NORMALIZE_WHITESPACE)
584
585 if __name__=='__main__':
586 import unittest
589 m = Chem.MolFromSmiles('CC(=O)OC')
590 res = BRICSDecompose(m)
591 self.assertTrue(res)
592 self.assertTrue(len(res)==2)
593
594 m = Chem.MolFromSmiles('CC(=O)N1CCC1=O')
595 res = BRICSDecompose(m)
596 self.assertTrue(res)
597 self.assertTrue(len(res)==2,res)
598
599 m = Chem.MolFromSmiles('c1ccccc1N(C)C')
600 res = BRICSDecompose(m)
601 self.assertTrue(res)
602 self.assertTrue(len(res)==2,res)
603
604 m = Chem.MolFromSmiles('c1cccnc1N(C)C')
605 res = BRICSDecompose(m)
606 self.assertTrue(res)
607 self.assertTrue(len(res)==2,res)
608
609 m = Chem.MolFromSmiles('o1ccnc1N(C)C')
610 res = BRICSDecompose(m)
611 self.assertTrue(res)
612 self.assertTrue(len(res)==2)
613
614 m = Chem.MolFromSmiles('c1ccccc1OC')
615 res = BRICSDecompose(m)
616 self.assertTrue(res)
617 self.assertTrue(len(res)==2)
618
619 m = Chem.MolFromSmiles('o1ccnc1OC')
620 res = BRICSDecompose(m)
621 self.assertTrue(res)
622 self.assertTrue(len(res)==2)
623
624 m = Chem.MolFromSmiles('O1CCNC1OC')
625 res = BRICSDecompose(m)
626 self.assertTrue(res)
627 self.assertTrue(len(res)==2)
628
629 m = Chem.MolFromSmiles('CCCSCC')
630 res = BRICSDecompose(m)
631 self.assertTrue(res)
632 self.assertTrue(len(res)==3,res)
633 self.assertTrue('[11*]S[11*]' in res,res)
634
635 m = Chem.MolFromSmiles('CCNC(=O)C1CC1')
636 res = BRICSDecompose(m)
637 self.assertTrue(res)
638 self.assertTrue(len(res)==4,res)
639 self.assertTrue('[5*]N[5*]' in res,res)
640
642
643 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1')
644 res = BRICSDecompose(m)
645 self.assertTrue(res)
646 self.assertTrue(len(res)==9,res)
647
649 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1')
650 res = BRICSDecompose(m)
651 self.assertTrue(res)
652 self.assertTrue(len(res)==5,res)
653 self.assertTrue('[5*]N[5*]' in res,res)
654 self.assertTrue('[16*]c1ccccc1' in res,res)
655 self.assertTrue('[8*]C(F)(F)F' in res,res)
656
657
659 allNodes = set()
660 m = Chem.MolFromSmiles('c1ccccc1OCCC')
661 res = BRICSDecompose(m,allNodes=allNodes)
662 self.assertTrue(res)
663 leaves=res
664 self.assertTrue(len(leaves)==3,leaves)
665 self.assertTrue(len(allNodes)==6,allNodes)
666 res = BRICSDecompose(m,allNodes=allNodes)
667 self.assertFalse(res)
668 self.assertTrue(len(allNodes)==6,allNodes)
669
670 m = Chem.MolFromSmiles('c1ccccc1OCCCC')
671 res = BRICSDecompose(m,allNodes=allNodes)
672 self.assertTrue(res)
673 leaves.update(res)
674 self.assertTrue(len(allNodes)==9,allNodes)
675 self.assertTrue(len(leaves)==4,leaves)
676
677
678 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC')
679 res = BRICSDecompose(m,allNodes=allNodes)
680 self.assertTrue(res)
681 leaves.update(res)
682 self.assertTrue(len(leaves)==8,leaves)
683 self.assertTrue(len(allNodes)==18,allNodes)
684
686 allNodes = set()
687 frags = [
688 '[14*]c1ncncn1',
689 '[16*]c1ccccc1',
690 '[14*]c1ncccc1',
691 ]
692 frags = [Chem.MolFromSmiles(x) for x in frags]
693 res = BRICSBuild(frags)
694 self.assertTrue(res)
695 res= list(res)
696 self.assertTrue(len(res)==6)
697 smis = [Chem.MolToSmiles(x,True) for x in res]
698 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
699 self.assertTrue('c1ccc(-c2ccccn2)cc1' in smis)
700
702 allNodes = set()
703 frags = [
704 '[3*]O[3*]',
705 '[16*]c1ccccc1',
706 ]
707 frags = [Chem.MolFromSmiles(x) for x in frags]
708 res = BRICSBuild(frags)
709 self.assertTrue(res)
710 res=list(res)
711 smis = [Chem.MolToSmiles(x,True) for x in res]
712 self.assertTrue(len(smis)==2,smis)
713 self.assertTrue('c1ccc(Oc2ccccc2)cc1' in smis)
714 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
715
716
717
719 allNodes = set()
720 frags = [
721 '[16*]c1ccccc1',
722 '[3*]OC',
723 '[9*]n1cccc1',
724 ]
725 frags = [Chem.MolFromSmiles(x) for x in frags]
726 res = BRICSBuild(frags)
727 self.assertTrue(res)
728 res= list(res)
729 self.assertTrue(len(res)==3)
730 smis = [Chem.MolToSmiles(x,True) for x in res]
731 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
732 self.assertTrue('COc1ccccc1' in smis)
733 self.assertTrue('c1ccc(-n2cccc2)cc1' in smis,smis)
734
736 allNodes = set()
737 frags = [
738 '[16*]c1ccccc1',
739 '[3*]OC',
740 '[3*]OCC(=O)[6*]',
741 ]
742 frags = [Chem.MolFromSmiles(x) for x in frags]
743 res = BRICSBuild(frags)
744 self.assertTrue(res)
745 res= list(res)
746 smis = [Chem.MolToSmiles(x,True) for x in res]
747 self.assertTrue(len(res)==3)
748 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
749 self.assertTrue('COc1ccccc1' in smis)
750 self.assertTrue('O=C(COc1ccccc1)c1ccccc1' in smis)
751
753 random.seed(23)
754 base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1")
755 catalog = BRICSDecompose(base)
756 self.assertTrue(len(catalog)==5,catalog)
757 catalog = [Chem.MolFromSmiles(x) for x in catalog]
758 ms = list(BRICSBuild(catalog,maxDepth=4))
759 for m in ms:
760 Chem.SanitizeMol(m)
761 ms = [Chem.MolToSmiles(x) for x in ms]
762 self.assertEqual(len(ms),36)
763
764 ts = ['n1cnc(C2CNC2)nc1','n1cnc(-c2ncncn2)nc1','C(OC1CNC1)C(C1CC1)OC1CNC1',
765 'n1cnc(OC(COC2CNC2)C2CC2)nc1','n1cnc(OCC(OC2CNC2)C2CNC2)nc1']
766 ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x),True) for x in ts]
767 for t in ts:
768 self.assertTrue(t in ms,(t,ms))
769
771 m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1')
772 res=BRICSDecompose(m)
773 self.assertEqual(len(res),7)
774 self.assertTrue('[3*]O[3*]' in res)
775 self.assertFalse('[14*]c1ncnc(NCCCC)n1' in res)
776 res = BRICSDecompose(m,singlePass=True)
777 self.assertEqual(len(res),13)
778 self.assertTrue('[3*]OCC' in res)
779 self.assertTrue('[14*]c1ncnc(NCCCC)n1' in res)
780
782 m = Chem.MolFromSmiles('C1CCCCN1c1ccccc1')
783 res=BRICSDecompose(m)
784 self.assertEqual(len(res),2,res)
785
787
788 molblock="""
789 RDKit 3D
790
791 13 14 0 0 0 0 0 0 0 0999 V2000
792 -1.2004 0.5900 0.6110 C 0 0 0 0 0 0 0 0 0 0 0 0
793 -2.2328 1.3173 0.0343 C 0 0 0 0 0 0 0 0 0 0 0 0
794 -3.4299 0.6533 -0.1500 C 0 0 0 0 0 0 0 0 0 0 0 0
795 -3.3633 -0.7217 -0.3299 C 0 0 0 0 0 0 0 0 0 0 0 0
796 -2.1552 -1.3791 -0.2207 C 0 0 0 0 0 0 0 0 0 0 0 0
797 -1.1425 -0.7969 0.5335 C 0 0 0 0 0 0 0 0 0 0 0 0
798 0.1458 -1.4244 0.4108 O 0 0 0 0 0 0 0 0 0 0 0 0
799 1.2976 -0.7398 -0.1026 C 0 0 0 0 0 0 0 0 0 0 0 0
800 2.4889 -0.7939 0.5501 N 0 0 0 0 0 0 0 0 0 0 0 0
801 3.4615 0.1460 0.3535 C 0 0 0 0 0 0 0 0 0 0 0 0
802 3.0116 1.4034 -0.0296 C 0 0 0 0 0 0 0 0 0 0 0 0
803 1.9786 1.4264 -0.9435 C 0 0 0 0 0 0 0 0 0 0 0 0
804 1.1399 0.3193 -0.9885 C 0 0 0 0 0 0 0 0 0 0 0 0
805 1 2 2 0
806 2 3 1 0
807 3 4 2 0
808 4 5 1 0
809 5 6 2 0
810 6 7 1 0
811 7 8 1 0
812 8 9 2 0
813 9 10 1 0
814 10 11 2 0
815 11 12 1 0
816 12 13 2 0
817 6 1 1 0
818 13 8 1 0
819 M END
820 """
821 m = Chem.MolFromMolBlock(molblock)
822 pieces = BreakBRICSBonds(m)
823
824 frags = Chem.GetMolFrags(pieces,asMols=True)
825 self.assertEqual(len(frags),3)
826 self.assertEqual(frags[0].GetNumAtoms(),7)
827 self.assertEqual(frags[1].GetNumAtoms(),3)
828 self.assertEqual(frags[2].GetNumAtoms(),7)
829
830 c1 = m.GetConformer()
831 c2 = frags[0].GetConformer()
832 for i in range(6):
833 p1 = c1.GetAtomPosition(i)
834 p2 = c2.GetAtomPosition(i)
835 self.assertEqual((p1-p2).Length(),0.0)
836 p1 = c1.GetAtomPosition(6)
837 p2 = c2.GetAtomPosition(6)
838 self.assertEqual((p1-p2).Length(),0.0)
839
840 c2 = frags[2].GetConformer()
841 for i in range(6):
842 p1 = c1.GetAtomPosition(i+7)
843 p2 = c2.GetAtomPosition(i)
844 self.assertEqual((p1-p2).Length(),0.0)
845 p1 = c1.GetAtomPosition(6)
846 p2 = c2.GetAtomPosition(6)
847 self.assertEqual((p1-p2).Length(),0.0)
848
849 c2 = frags[1].GetConformer()
850 for i in range(1):
851 p1 = c1.GetAtomPosition(i+6)
852 p2 = c2.GetAtomPosition(i)
853 self.assertEqual((p1-p2).Length(),0.0)
854 p1 = c1.GetAtomPosition(5)
855 p2 = c2.GetAtomPosition(1)
856 self.assertEqual((p1-p2).Length(),0.0)
857 p1 = c1.GetAtomPosition(6)
858 p2 = c2.GetAtomPosition(0)
859 self.assertEqual((p1-p2).Length(),0.0)
860
861
862
863 molblock="""
864 RDKit 2D
865
866 13 14 0 0 0 0 0 0 0 0999 V2000
867 -1.2990 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
868 -2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
869 -3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
870 -3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
871 -2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
872 -1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
873 -0.0000 1.3846 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
874 1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
875 1.2990 -0.8654 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0
876 2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
877 3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
878 3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
879 2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
880 1 2 2 0
881 2 3 1 0
882 3 4 2 0
883 4 5 1 0
884 5 6 2 0
885 6 7 1 0
886 7 8 1 0
887 8 9 2 0
888 9 10 1 0
889 10 11 2 0
890 11 12 1 0
891 12 13 2 0
892 6 1 1 0
893 13 8 1 0
894 M END
895 """
896 m2 = Chem.MolFromMolBlock(molblock)
897 m.AddConformer(m2.GetConformer(),assignId=True)
898 self.assertEqual(m.GetNumConformers(),2)
899
900 pieces = BreakBRICSBonds(m)
901 frags = Chem.GetMolFrags(pieces,asMols=True)
902 self.assertEqual(len(frags),3)
903 self.assertEqual(frags[0].GetNumAtoms(),7)
904 self.assertEqual(frags[1].GetNumAtoms(),3)
905 self.assertEqual(frags[2].GetNumAtoms(),7)
906 self.assertEqual(frags[0].GetNumConformers(),2)
907 self.assertEqual(frags[1].GetNumConformers(),2)
908 self.assertEqual(frags[2].GetNumConformers(),2)
909
910 c1 = m.GetConformer(0)
911 c2 = frags[0].GetConformer(0)
912 for i in range(6):
913 p1 = c1.GetAtomPosition(i)
914 p2 = c2.GetAtomPosition(i)
915 self.assertEqual((p1-p2).Length(),0.0)
916 p1 = c1.GetAtomPosition(6)
917 p2 = c2.GetAtomPosition(6)
918 self.assertEqual((p1-p2).Length(),0.0)
919
920 c2 = frags[2].GetConformer(0)
921 for i in range(6):
922 p1 = c1.GetAtomPosition(i+7)
923 p2 = c2.GetAtomPosition(i)
924 self.assertEqual((p1-p2).Length(),0.0)
925 p1 = c1.GetAtomPosition(6)
926 p2 = c2.GetAtomPosition(6)
927 self.assertEqual((p1-p2).Length(),0.0)
928
929 c2 = frags[1].GetConformer(0)
930 for i in range(1):
931 p1 = c1.GetAtomPosition(i+6)
932 p2 = c2.GetAtomPosition(i)
933 self.assertEqual((p1-p2).Length(),0.0)
934 p1 = c1.GetAtomPosition(5)
935 p2 = c2.GetAtomPosition(1)
936 self.assertEqual((p1-p2).Length(),0.0)
937 p1 = c1.GetAtomPosition(6)
938 p2 = c2.GetAtomPosition(0)
939 self.assertEqual((p1-p2).Length(),0.0)
940
941 c1 = m.GetConformer(1)
942 c2 = frags[0].GetConformer(1)
943 for i in range(6):
944 p1 = c1.GetAtomPosition(i)
945 p2 = c2.GetAtomPosition(i)
946 self.assertEqual((p1-p2).Length(),0.0)
947 p1 = c1.GetAtomPosition(6)
948 p2 = c2.GetAtomPosition(6)
949 self.assertEqual((p1-p2).Length(),0.0)
950
951 c2 = frags[2].GetConformer(1)
952 for i in range(6):
953 p1 = c1.GetAtomPosition(i+7)
954 p2 = c2.GetAtomPosition(i)
955 self.assertEqual((p1-p2).Length(),0.0)
956 p1 = c1.GetAtomPosition(6)
957 p2 = c2.GetAtomPosition(6)
958 self.assertEqual((p1-p2).Length(),0.0)
959
960 c2 = frags[1].GetConformer(1)
961 for i in range(1):
962 p1 = c1.GetAtomPosition(i+6)
963 p2 = c2.GetAtomPosition(i)
964 self.assertEqual((p1-p2).Length(),0.0)
965 p1 = c1.GetAtomPosition(5)
966 p2 = c2.GetAtomPosition(1)
967 self.assertEqual((p1-p2).Length(),0.0)
968 p1 = c1.GetAtomPosition(6)
969 p2 = c2.GetAtomPosition(0)
970 self.assertEqual((p1-p2).Length(),0.0)
971
973 m = Chem.MolFromSmiles('CCS(=O)(=O)NCC')
974 res=list(FindBRICSBonds(m))
975 self.assertEqual(len(res),2,res)
976 atIds = [x[0] for x in res]
977 atIds.sort()
978 self.assertEqual(atIds,[(5,2), (6,5)])
979
980
981
982
983
984 failed,tried = _test()
985 if failed:
986 sys.exit(failed)
987
988 unittest.main()
989