Package rdkit :: Package Chem :: Module BRICS
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.BRICS

  1  # $Id$ 
  2  # 
  3  #  Copyright (c) 2009, Novartis Institutes for BioMedical Research Inc. 
  4  #  All rights reserved. 
  5  #  
  6  # Redistribution and use in source and binary forms, with or without 
  7  # modification, are permitted provided that the following conditions are 
  8  # met:  
  9  # 
 10  #     * Redistributions of source code must retain the above copyright  
 11  #       notice, this list of conditions and the following disclaimer. 
 12  #     * Redistributions in binary form must reproduce the above 
 13  #       copyright notice, this list of conditions and the following  
 14  #       disclaimer in the documentation and/or other materials provided  
 15  #       with the distribution. 
 16  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 17  #       nor the names of its contributors may be used to endorse or promote  
 18  #       products derived from this software without specific prior written permission. 
 19  # 
 20  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 21  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 22  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 23  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 24  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 25  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 26  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 27  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 28  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 29  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 30  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 31  # 
 32  # Created by Greg Landrum, Nov 2008 
 33   
 34  """ Implementation of the BRICS algorithm from Degen et al. ChemMedChem *3* 1503-7 (2008) 
 35   
 36  """ 
 37  from __future__ import print_function 
 38  import sys,re,random 
 39  from rdkit import Chem 
 40  from rdkit.Chem import rdChemReactions as Reactions 
 41  from rdkit.six import iteritems, iterkeys, next 
 42  from rdkit.six.moves import range 
 43   
 44  # These are the definitions that will be applied to fragment molecules: 
 45  environs = { 
 46    'L1':'[C;D3]([#0,#6,#7,#8])(=O)', 
 47    # 
 48    # After some discussion, the L2 definitions ("N.pl3" in the original 
 49    # paper) have been removed and incorporated into a (almost) general 
 50    # purpose amine definition in L5 ("N.sp3" in the paper). 
 51    # 
 52    # The problem is one of consistency. 
 53    #    Based on the original definitions you should get the following 
 54    #    fragmentations: 
 55    #      C1CCCCC1NC(=O)C -> C1CCCCC1N[2*].[1*]C(=O)C 
 56    #      c1ccccc1NC(=O)C -> c1ccccc1[16*].[2*]N[2*].[1*]C(=O)C 
 57    #    This difference just didn't make sense to us. By switching to 
 58    #    the unified definition we end up with: 
 59    #      C1CCCCC1NC(=O)C -> C1CCCCC1[15*].[5*]N[5*].[1*]C(=O)C 
 60    #      c1ccccc1NC(=O)C -> c1ccccc1[16*].[5*]N[5*].[1*]C(=O)C 
 61    # 
 62    #'L2':'[N;!R;!D1;!$(N=*)]-;!@[#0,#6]', 
 63    # this one turned out to be too tricky to define above, so we set it off 
 64    # in its own definition: 
 65    #'L2a':'[N;D3;R;$(N(@[C;!$(C=*)])@[C;!$(C=*)])]', 
 66    'L3':'[O;D2]-;!@[#0,#6,#1]', 
 67    'L4':'[C;!D1;!$(C=*)]-;!@[#6]', 
 68    #'L5':'[N;!D1;!$(N*!-*);!$(N=*);!$(N-[!C;!#0])]-[#0,C]', 
 69    'L5':'[N;!D1;!$(N=*);!$(N-[!#6;!#16;!#0;!#1]);!$([N;R]@[C;R]=O)]', 
 70    'L6':'[C;D3;!R](=O)-;!@[#0,#6,#7,#8]', 
 71    'L7a':'[C;D2,D3]-[#6]', 
 72    'L7b':'[C;D2,D3]-[#6]', 
 73    '#L8':'[C;!R;!D1]-;!@[#6]', 
 74    'L8':'[C;!R;!D1;!$(C!-*)]', 
 75    'L9':'[n;+0;$(n(:[c,n,o,s]):[c,n,o,s])]', 
 76    'L10':'[N;R;$(N(@C(=O))@[C,N,O,S])]', 
 77    'L11':'[S;D2](-;!@[#0,#6])', 
 78    'L12':'[S;D4]([#6,#0])(=O)(=O)', 
 79    'L13':'[C;$(C(-;@[C,N,O,S])-;@[N,O,S])]', 
 80    'L14':'[c;$(c(:[c,n,o,s]):[n,o,s])]', 
 81    'L14b':'[c;$(c(:[c,n,o,s]):[n,o,s])]', 
 82    'L15':'[C;$(C(-;@C)-;@C)]', 
 83    'L16':'[c;$(c(:c):c)]', 
 84    'L16b':'[c;$(c(:c):c)]', 
 85    } 
 86  reactionDefs = ( 
 87    # L1 
 88    [ 
 89      ('1','3','-'), 
 90      ('1','5','-'), 
 91      ('1','10','-'), 
 92    ], 
 93   
 94     # L3  
 95     [ 
 96      ('3','4','-'), 
 97      ('3','13','-'), 
 98      ('3','14','-'), 
 99      ('3','15','-'), 
100      ('3','16','-'), 
101     ], 
102     
103     # L4 
104     [ 
105      ('4','5','-'), 
106      ('4','11','-'), 
107     ], 
108   
109     # L5 
110     [ 
111      ('5','12','-'), 
112      ('5','14','-'), 
113      ('5','16','-'), 
114      ('5','13','-'), 
115      ('5','15','-'), 
116     ], 
117     
118      # L6 
119      [ 
120      ('6','13','-'), 
121      ('6','14','-'), 
122      ('6','15','-'), 
123      ('6','16','-'), 
124      ], 
125     
126      # L7 
127      [ 
128      ('7a','7b','='), 
129      ], 
130   
131      # L8 
132      [ 
133      ('8','9','-'), 
134      ('8','10','-'), 
135      ('8','13','-'), 
136      ('8','14','-'), 
137      ('8','15','-'), 
138      ('8','16','-'), 
139      ], 
140     
141      # L9 
142      [ 
143      ('9','13','-'),# not in original paper 
144      ('9','14','-'),# not in original paper 
145      ('9','15','-'), 
146      ('9','16','-'), 
147      ], 
148     
149      # L10 
150      [ 
151      ('10','13','-'), 
152      ('10','14','-'), 
153      ('10','15','-'), 
154      ('10','16','-'), 
155      ], 
156     
157      # L11 
158      [ 
159      ('11','13','-'), 
160      ('11','14','-'), 
161      ('11','15','-'), 
162      ('11','16','-'), 
163      ], 
164     
165      # L12 
166      # none left 
167   
168      # L13 
169      [ 
170      ('13','14','-'), 
171      ('13','15','-'), 
172      ('13','16','-'), 
173      ], 
174     
175      # L14 
176      [ 
177      ('14','14','-'),# not in original paper 
178      ('14','15','-'), 
179      ('14','16','-'), 
180      ], 
181     
182      # L15 
183      [ 
184      ('15','16','-'), 
185      ], 
186   
187      # L16 
188      [ 
189      ('16','16','-'), # not in original paper 
190      ], 
191    ) 
192  import copy 
193  smartsGps=copy.deepcopy(reactionDefs) 
194  for gp in smartsGps: 
195    for j,defn in enumerate(gp): 
196      g1,g2,bnd = defn 
197      r1=environs['L'+g1] 
198      r2=environs['L'+g2] 
199      g1 = re.sub('[a-z,A-Z]','',g1) 
200      g2 = re.sub('[a-z,A-Z]','',g2) 
201      sma='[$(%s):1]%s;!@[$(%s):2]>>[%s*]-[*:1].[%s*]-[*:2]'%(r1,bnd,r2,g1,g2) 
202      gp[j] =sma 
203   
204  for gp in smartsGps: 
205    for defn in gp: 
206      try: 
207        t=Reactions.ReactionFromSmarts(defn) 
208        t.Initialize() 
209      except: 
210        print(defn) 
211        raise 
212   
213  environMatchers={} 
214  for env,sma in iteritems(environs): 
215    environMatchers[env]=Chem.MolFromSmarts(sma) 
216     
217  bondMatchers=[] 
218  for i,compats in enumerate(reactionDefs): 
219    tmp=[] 
220    for i1,i2,bType in compats: 
221        e1 = environs['L%s'%i1] 
222        e2 = environs['L%s'%i2] 
223        patt = '[$(%s)]%s;!@[$(%s)]'%(e1,bType,e2) 
224        patt = Chem.MolFromSmarts(patt) 
225        tmp.append((i1,i2,bType,patt)) 
226    bondMatchers.append(tmp) 
227       
228  reactions = tuple([[Reactions.ReactionFromSmarts(y) for y in x] for x in smartsGps]) 
229  reverseReactions = [] 
230  for i,rxnSet in enumerate(smartsGps): 
231    for j,sma in enumerate(rxnSet): 
232      rs,ps = sma.split('>>') 
233      sma = '%s>>%s'%(ps,rs) 
234      rxn = Reactions.ReactionFromSmarts(sma) 
235      labels = re.findall(r'\[([0-9]+?)\*\]',ps) 
236      rxn._matchers=[Chem.MolFromSmiles('[%s*]'%x) for x in labels] 
237      reverseReactions.append(rxn) 
238   
239 -def FindBRICSBonds(mol,randomizeOrder=False,silent=True):
240 """ returns the bonds in a molecule that BRICS would cleave 241 242 >>> from rdkit import Chem 243 >>> m = Chem.MolFromSmiles('CCCOCC') 244 >>> res = list(FindBRICSBonds(m)) 245 >>> res 246 [((3, 2), ('3', '4')), ((3, 4), ('3', '4'))] 247 248 a more complicated case: 249 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') 250 >>> res = list(FindBRICSBonds(m)) 251 >>> res 252 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] 253 254 we can also randomize the order of the results: 255 >>> random.seed(23) 256 >>> res = list(FindBRICSBonds(m,randomizeOrder=True)) 257 >>> sorted(res) 258 [((3, 2), ('3', '4')), ((3, 4), ('3', '4')), ((6, 8), ('6', '16'))] 259 260 Note that this is a generator function : 261 >>> res = FindBRICSBonds(m) 262 >>> res 263 <generator object ...> 264 >>> next(res) 265 ((3, 2), ('3', '4')) 266 267 >>> m = Chem.MolFromSmiles('CC=CC') 268 >>> res = list(FindBRICSBonds(m)) 269 >>> sorted(res) 270 [((1, 2), ('7', '7'))] 271 272 make sure we don't match ring bonds: 273 >>> m = Chem.MolFromSmiles('O=C1NCCC1') 274 >>> list(FindBRICSBonds(m)) 275 [] 276 277 another nice one, make sure environment 8 doesn't match something connected 278 to a ring atom: 279 >>> m = Chem.MolFromSmiles('CC1(C)CCCCC1') 280 >>> list(FindBRICSBonds(m)) 281 [] 282 283 """ 284 letter = re.compile('[a-z,A-Z]') 285 indices = list(range(len(bondMatchers))) 286 bondsDone=set() 287 if randomizeOrder: random.shuffle(indices,random=random.random) 288 289 envMatches={} 290 for env,patt in iteritems(environMatchers): 291 envMatches[env]=mol.HasSubstructMatch(patt) 292 for gpIdx in indices: 293 if randomizeOrder: 294 compats =bondMatchers[gpIdx][:] 295 random.shuffle(compats,random=random.random) 296 else: 297 compats = bondMatchers[gpIdx] 298 for i1,i2,bType,patt in compats: 299 if not envMatches['L'+i1] or not envMatches['L'+i2]: continue 300 matches = mol.GetSubstructMatches(patt) 301 i1 = letter.sub('',i1) 302 i2 = letter.sub('',i2) 303 for match in matches: 304 if match not in bondsDone and (match[1],match[0]) not in bondsDone: 305 bondsDone.add(match) 306 yield(((match[0],match[1]),(i1,i2)))
307
308 -def BreakBRICSBonds(mol,bonds=None,sanitize=True,silent=True):
309 """ breaks the BRICS bonds in a molecule and returns the results 310 311 >>> from rdkit import Chem 312 >>> m = Chem.MolFromSmiles('CCCOCC') 313 >>> m2=BreakBRICSBonds(m) 314 >>> Chem.MolToSmiles(m2,True) 315 '[3*]O[3*].[4*]CC.[4*]CCC' 316 317 a more complicated case: 318 >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') 319 >>> m2=BreakBRICSBonds(m) 320 >>> Chem.MolToSmiles(m2,True) 321 '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O' 322 323 324 can also specify a limited set of bonds to work with: 325 >>> m = Chem.MolFromSmiles('CCCOCC') 326 >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))]) 327 >>> Chem.MolToSmiles(m2,True) 328 '[3*]OCC.[4*]CCC' 329 330 this can be used as an alternate approach for doing a BRICS decomposition by 331 following BreakBRICSBonds with a call to Chem.GetMolFrags: 332 >>> m = Chem.MolFromSmiles('CCCOCC') 333 >>> m2=BreakBRICSBonds(m) 334 >>> frags = Chem.GetMolFrags(m2,asMols=True) 335 >>> [Chem.MolToSmiles(x,True) for x in frags] 336 ['[4*]CCC', '[3*]O[3*]', '[4*]CC'] 337 338 """ 339 if not bonds: 340 #bonds = FindBRICSBonds(mol) 341 res = Chem.FragmentOnBRICSBonds(mol) 342 if sanitize: 343 Chem.SanitizeMol(res) 344 return res 345 eMol = Chem.EditableMol(mol) 346 nAts = mol.GetNumAtoms() 347 348 dummyPositions=[] 349 for indices,dummyTypes in bonds: 350 ia,ib = indices 351 obond = mol.GetBondBetweenAtoms(ia,ib) 352 bondType=obond.GetBondType() 353 eMol.RemoveBond(ia,ib) 354 355 da,db = dummyTypes 356 atoma = Chem.Atom(0) 357 atoma.SetIsotope(int(da)) 358 atoma.SetNoImplicit(True) 359 idxa = nAts 360 nAts+=1 361 eMol.AddAtom(atoma) 362 eMol.AddBond(ia,idxa,bondType) 363 364 atomb = Chem.Atom(0) 365 atomb.SetIsotope(int(db)) 366 atomb.SetNoImplicit(True) 367 idxb = nAts 368 nAts+=1 369 eMol.AddAtom(atomb) 370 eMol.AddBond(ib,idxb,bondType) 371 if mol.GetNumConformers(): 372 dummyPositions.append((idxa,ib)) 373 dummyPositions.append((idxb,ia)) 374 res = eMol.GetMol() 375 if sanitize: 376 Chem.SanitizeMol(res) 377 if mol.GetNumConformers(): 378 for conf in mol.GetConformers(): 379 resConf = res.GetConformer(conf.GetId()) 380 for ia,pa in dummyPositions: 381 resConf.SetAtomPosition(ia,conf.GetAtomPosition(pa)) 382 return res
383
384 -def BRICSDecompose(mol,allNodes=None,minFragmentSize=1,onlyUseReactions=None, 385 silent=True,keepNonLeafNodes=False,singlePass=False,returnMols=False):
386 """ returns the BRICS decomposition for a molecule 387 388 >>> from rdkit import Chem 389 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 390 >>> res = list(BRICSDecompose(m)) 391 >>> sorted(res) 392 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] 393 394 >>> res = list(BRICSDecompose(m,returnMols=True)) 395 >>> res[0] 396 <rdkit.Chem.rdchem.Mol object ...> 397 >>> smis = [Chem.MolToSmiles(x,True) for x in res] 398 >>> sorted(smis) 399 ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] 400 401 nexavar, an example from the paper (corrected): 402 >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') 403 >>> res = list(BRICSDecompose(m)) 404 >>> sorted(res) 405 ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F'] 406 407 it's also possible to keep pieces that haven't been fully decomposed: 408 >>> m = Chem.MolFromSmiles('CCCOCC') 409 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) 410 >>> sorted(res) 411 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC'] 412 413 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 414 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) 415 >>> sorted(res) 416 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC'] 417 418 or to only do a single pass of decomposition: 419 >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') 420 >>> res = list(BRICSDecompose(m,singlePass=True)) 421 >>> sorted(res) 422 ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC'] 423 424 setting a minimum size for the fragments: 425 >>> m = Chem.MolFromSmiles('CCCOCC') 426 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2)) 427 >>> sorted(res) 428 ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] 429 >>> m = Chem.MolFromSmiles('CCCOCC') 430 >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3)) 431 >>> sorted(res) 432 ['CCCOCC', '[3*]OCC', '[4*]CCC'] 433 >>> res = list(BRICSDecompose(m,minFragmentSize=2)) 434 >>> sorted(res) 435 ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] 436 437 438 """ 439 global reactions 440 mSmi = Chem.MolToSmiles(mol,1) 441 442 if allNodes is None: 443 allNodes=set() 444 445 if mSmi in allNodes: 446 return set() 447 448 activePool={mSmi:mol} 449 allNodes.add(mSmi) 450 foundMols={mSmi:mol} 451 for gpIdx,reactionGp in enumerate(reactions): 452 newPool = {} 453 while activePool: 454 matched=False 455 nSmi = next(iterkeys(activePool)) 456 mol = activePool.pop(nSmi) 457 for rxnIdx,reaction in enumerate(reactionGp): 458 if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions: 459 continue 460 if not silent: 461 print('--------') 462 print(smartsGps[gpIdx][rxnIdx]) 463 ps = reaction.RunReactants((mol,)) 464 if ps: 465 if not silent: print(nSmi,'->',len(ps),'products') 466 for prodSeq in ps: 467 seqOk=True 468 # we want to disqualify small fragments, so sort the product sequence by size 469 tSeq = [(prod.GetNumAtoms(onlyExplicit=True),idx) for idx,prod in enumerate(prodSeq)] 470 tSeq.sort() 471 for nats,idx in tSeq: 472 prod = prodSeq[idx] 473 try: 474 Chem.SanitizeMol(prod) 475 except: 476 continue 477 pSmi = Chem.MolToSmiles(prod,1) 478 if minFragmentSize>0: 479 nDummies = pSmi.count('*') 480 if nats-nDummies<minFragmentSize: 481 seqOk=False 482 break 483 prod.pSmi = pSmi 484 ts = [(x,prodSeq[y]) for x,y in tSeq] 485 prodSeq=ts 486 if seqOk: 487 matched=True 488 for nats,prod in prodSeq: 489 pSmi = prod.pSmi 490 #print('\t',nats,pSmi) 491 if pSmi not in allNodes: 492 if not singlePass: 493 activePool[pSmi] = prod 494 allNodes.add(pSmi) 495 foundMols[pSmi]=prod 496 if singlePass or keepNonLeafNodes or not matched: 497 newPool[nSmi]=mol 498 activePool = newPool 499 if not (singlePass or keepNonLeafNodes): 500 if not returnMols: 501 res = set(activePool.keys()) 502 else: 503 res = activePool.values() 504 else: 505 if not returnMols: 506 res = allNodes 507 else: 508 res = foundMols.values() 509 return res
510 511 512 import random 513 dummyPattern=Chem.MolFromSmiles('[*]')
514 -def BRICSBuild(fragments,onlyCompleteMols=True,seeds=None,uniquify=True, 515 scrambleReagents=True,maxDepth=3):
516 seen = set() 517 if not seeds: 518 seeds = list(fragments) 519 if scrambleReagents: 520 seeds = list(seeds) 521 random.shuffle(seeds,random=random.random) 522 if scrambleReagents: 523 tempReactions = list(reverseReactions) 524 random.shuffle(tempReactions,random=random.random) 525 else: 526 tempReactions=reverseReactions 527 for seed in seeds: 528 seedIsR1=False 529 seedIsR2=False 530 nextSteps=[] 531 for rxn in tempReactions: 532 if seed.HasSubstructMatch(rxn._matchers[0]): 533 seedIsR1=True 534 if seed.HasSubstructMatch(rxn._matchers[1]): 535 seedIsR2=True 536 for fragment in fragments: 537 ps = None 538 if fragment.HasSubstructMatch(rxn._matchers[0]): 539 if seedIsR2: 540 ps = rxn.RunReactants((fragment,seed)) 541 if fragment.HasSubstructMatch(rxn._matchers[1]): 542 if seedIsR1: 543 ps = rxn.RunReactants((seed,fragment)) 544 if ps: 545 for p in ps: 546 if uniquify: 547 pSmi =Chem.MolToSmiles(p[0],True) 548 if pSmi in seen: 549 continue 550 else: 551 seen.add(pSmi) 552 if p[0].HasSubstructMatch(dummyPattern): 553 nextSteps.append(p[0]) 554 if not onlyCompleteMols: 555 yield p[0] 556 else: 557 yield p[0] 558 if nextSteps and maxDepth>0: 559 for p in BRICSBuild(fragments,onlyCompleteMols=onlyCompleteMols, 560 seeds=nextSteps,uniquify=uniquify, 561 maxDepth=maxDepth-1): 562 if uniquify: 563 pSmi =Chem.MolToSmiles(p,True) 564 if pSmi in seen: 565 continue 566 else: 567 seen.add(pSmi) 568 yield p
569 570 571 572 # ------- ------- ------- ------- ------- ------- ------- ------- 573 # Begin testing code 574 575 576 #------------------------------------ 577 # 578 # doctest boilerplate 579 #
580 -def _test():
581 import doctest,sys 582 return doctest.testmod(sys.modules["__main__"], 583 optionflags=doctest.ELLIPSIS+doctest.NORMALIZE_WHITESPACE)
584 585 if __name__=='__main__': 586 import unittest
587 - class TestCase(unittest.TestCase):
588 - def test1(self):
589 m = Chem.MolFromSmiles('CC(=O)OC') 590 res = BRICSDecompose(m) 591 self.assertTrue(res) 592 self.assertTrue(len(res)==2) 593 594 m = Chem.MolFromSmiles('CC(=O)N1CCC1=O') 595 res = BRICSDecompose(m) 596 self.assertTrue(res) 597 self.assertTrue(len(res)==2,res) 598 599 m = Chem.MolFromSmiles('c1ccccc1N(C)C') 600 res = BRICSDecompose(m) 601 self.assertTrue(res) 602 self.assertTrue(len(res)==2,res) 603 604 m = Chem.MolFromSmiles('c1cccnc1N(C)C') 605 res = BRICSDecompose(m) 606 self.assertTrue(res) 607 self.assertTrue(len(res)==2,res) 608 609 m = Chem.MolFromSmiles('o1ccnc1N(C)C') 610 res = BRICSDecompose(m) 611 self.assertTrue(res) 612 self.assertTrue(len(res)==2) 613 614 m = Chem.MolFromSmiles('c1ccccc1OC') 615 res = BRICSDecompose(m) 616 self.assertTrue(res) 617 self.assertTrue(len(res)==2) 618 619 m = Chem.MolFromSmiles('o1ccnc1OC') 620 res = BRICSDecompose(m) 621 self.assertTrue(res) 622 self.assertTrue(len(res)==2) 623 624 m = Chem.MolFromSmiles('O1CCNC1OC') 625 res = BRICSDecompose(m) 626 self.assertTrue(res) 627 self.assertTrue(len(res)==2) 628 629 m = Chem.MolFromSmiles('CCCSCC') 630 res = BRICSDecompose(m) 631 self.assertTrue(res) 632 self.assertTrue(len(res)==3,res) 633 self.assertTrue('[11*]S[11*]' in res,res) 634 635 m = Chem.MolFromSmiles('CCNC(=O)C1CC1') 636 res = BRICSDecompose(m) 637 self.assertTrue(res) 638 self.assertTrue(len(res)==4,res) 639 self.assertTrue('[5*]N[5*]' in res,res)
640
641 - def test2(self):
642 # example from the paper, nexavar: 643 m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') 644 res = BRICSDecompose(m) 645 self.assertTrue(res) 646 self.assertTrue(len(res)==9,res)
647
648 - def test3(self):
649 m = Chem.MolFromSmiles('FC(F)(F)C1=C(Cl)C=CC(NC(=O)NC2=CC=CC=C2)=C1') 650 res = BRICSDecompose(m) 651 self.assertTrue(res) 652 self.assertTrue(len(res)==5,res) 653 self.assertTrue('[5*]N[5*]' in res,res) 654 self.assertTrue('[16*]c1ccccc1' in res,res) 655 self.assertTrue('[8*]C(F)(F)F' in res,res)
656 657
658 - def test4(self):
659 allNodes = set() 660 m = Chem.MolFromSmiles('c1ccccc1OCCC') 661 res = BRICSDecompose(m,allNodes=allNodes) 662 self.assertTrue(res) 663 leaves=res 664 self.assertTrue(len(leaves)==3,leaves) 665 self.assertTrue(len(allNodes)==6,allNodes) 666 res = BRICSDecompose(m,allNodes=allNodes) 667 self.assertFalse(res) 668 self.assertTrue(len(allNodes)==6,allNodes) 669 670 m = Chem.MolFromSmiles('c1ccccc1OCCCC') 671 res = BRICSDecompose(m,allNodes=allNodes) 672 self.assertTrue(res) 673 leaves.update(res) 674 self.assertTrue(len(allNodes)==9,allNodes) 675 self.assertTrue(len(leaves)==4,leaves) 676 677 678 m = Chem.MolFromSmiles('c1cc(C(=O)NCC)ccc1OCCC') 679 res = BRICSDecompose(m,allNodes=allNodes) 680 self.assertTrue(res) 681 leaves.update(res) 682 self.assertTrue(len(leaves)==8,leaves) 683 self.assertTrue(len(allNodes)==18,allNodes)
684
685 - def test5(self):
686 allNodes = set() 687 frags = [ 688 '[14*]c1ncncn1', 689 '[16*]c1ccccc1', 690 '[14*]c1ncccc1', 691 ] 692 frags = [Chem.MolFromSmiles(x) for x in frags] 693 res = BRICSBuild(frags) 694 self.assertTrue(res) 695 res= list(res) 696 self.assertTrue(len(res)==6) 697 smis = [Chem.MolToSmiles(x,True) for x in res] 698 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) 699 self.assertTrue('c1ccc(-c2ccccn2)cc1' in smis)
700
701 - def test5a(self):
702 allNodes = set() 703 frags = [ 704 '[3*]O[3*]', 705 '[16*]c1ccccc1', 706 ] 707 frags = [Chem.MolFromSmiles(x) for x in frags] 708 res = BRICSBuild(frags) 709 self.assertTrue(res) 710 res=list(res) 711 smis = [Chem.MolToSmiles(x,True) for x in res] 712 self.assertTrue(len(smis)==2,smis) 713 self.assertTrue('c1ccc(Oc2ccccc2)cc1' in smis) 714 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis)
715 716 717
718 - def test6(self):
719 allNodes = set() 720 frags = [ 721 '[16*]c1ccccc1', 722 '[3*]OC', 723 '[9*]n1cccc1', 724 ] 725 frags = [Chem.MolFromSmiles(x) for x in frags] 726 res = BRICSBuild(frags) 727 self.assertTrue(res) 728 res= list(res) 729 self.assertTrue(len(res)==3) 730 smis = [Chem.MolToSmiles(x,True) for x in res] 731 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) 732 self.assertTrue('COc1ccccc1' in smis) 733 self.assertTrue('c1ccc(-n2cccc2)cc1' in smis,smis)
734
735 - def test7(self):
736 allNodes = set() 737 frags = [ 738 '[16*]c1ccccc1', 739 '[3*]OC', 740 '[3*]OCC(=O)[6*]', 741 ] 742 frags = [Chem.MolFromSmiles(x) for x in frags] 743 res = BRICSBuild(frags) 744 self.assertTrue(res) 745 res= list(res) 746 smis = [Chem.MolToSmiles(x,True) for x in res] 747 self.assertTrue(len(res)==3) 748 self.assertTrue('c1ccc(-c2ccccc2)cc1' in smis) 749 self.assertTrue('COc1ccccc1' in smis) 750 self.assertTrue('O=C(COc1ccccc1)c1ccccc1' in smis)
751
752 - def test8(self):
753 random.seed(23) 754 base = Chem.MolFromSmiles("n1cncnc1OCC(C1CC1)OC1CNC1") 755 catalog = BRICSDecompose(base) 756 self.assertTrue(len(catalog)==5,catalog) 757 catalog = [Chem.MolFromSmiles(x) for x in catalog] 758 ms = list(BRICSBuild(catalog,maxDepth=4)) 759 for m in ms: 760 Chem.SanitizeMol(m) 761 ms = [Chem.MolToSmiles(x) for x in ms] 762 self.assertEqual(len(ms),36) 763 764 ts = ['n1cnc(C2CNC2)nc1','n1cnc(-c2ncncn2)nc1','C(OC1CNC1)C(C1CC1)OC1CNC1', 765 'n1cnc(OC(COC2CNC2)C2CC2)nc1','n1cnc(OCC(OC2CNC2)C2CNC2)nc1'] 766 ts = [Chem.MolToSmiles(Chem.MolFromSmiles(x),True) for x in ts] 767 for t in ts: 768 self.assertTrue(t in ms,(t,ms))
769
770 - def test9(self):
771 m = Chem.MolFromSmiles('CCOc1ccccc1c1ncc(c2nc(NCCCC)ncn2)cc1') 772 res=BRICSDecompose(m) 773 self.assertEqual(len(res),7) 774 self.assertTrue('[3*]O[3*]' in res) 775 self.assertFalse('[14*]c1ncnc(NCCCC)n1' in res) 776 res = BRICSDecompose(m,singlePass=True) 777 self.assertEqual(len(res),13) 778 self.assertTrue('[3*]OCC' in res) 779 self.assertTrue('[14*]c1ncnc(NCCCC)n1' in res)
780
781 - def test10(self):
782 m = Chem.MolFromSmiles('C1CCCCN1c1ccccc1') 783 res=BRICSDecompose(m) 784 self.assertEqual(len(res),2,res)
785
786 - def test11(self):
787 # test coordinate preservation: 788 molblock=""" 789 RDKit 3D 790 791 13 14 0 0 0 0 0 0 0 0999 V2000 792 -1.2004 0.5900 0.6110 C 0 0 0 0 0 0 0 0 0 0 0 0 793 -2.2328 1.3173 0.0343 C 0 0 0 0 0 0 0 0 0 0 0 0 794 -3.4299 0.6533 -0.1500 C 0 0 0 0 0 0 0 0 0 0 0 0 795 -3.3633 -0.7217 -0.3299 C 0 0 0 0 0 0 0 0 0 0 0 0 796 -2.1552 -1.3791 -0.2207 C 0 0 0 0 0 0 0 0 0 0 0 0 797 -1.1425 -0.7969 0.5335 C 0 0 0 0 0 0 0 0 0 0 0 0 798 0.1458 -1.4244 0.4108 O 0 0 0 0 0 0 0 0 0 0 0 0 799 1.2976 -0.7398 -0.1026 C 0 0 0 0 0 0 0 0 0 0 0 0 800 2.4889 -0.7939 0.5501 N 0 0 0 0 0 0 0 0 0 0 0 0 801 3.4615 0.1460 0.3535 C 0 0 0 0 0 0 0 0 0 0 0 0 802 3.0116 1.4034 -0.0296 C 0 0 0 0 0 0 0 0 0 0 0 0 803 1.9786 1.4264 -0.9435 C 0 0 0 0 0 0 0 0 0 0 0 0 804 1.1399 0.3193 -0.9885 C 0 0 0 0 0 0 0 0 0 0 0 0 805 1 2 2 0 806 2 3 1 0 807 3 4 2 0 808 4 5 1 0 809 5 6 2 0 810 6 7 1 0 811 7 8 1 0 812 8 9 2 0 813 9 10 1 0 814 10 11 2 0 815 11 12 1 0 816 12 13 2 0 817 6 1 1 0 818 13 8 1 0 819 M END 820 """ 821 m = Chem.MolFromMolBlock(molblock) 822 pieces = BreakBRICSBonds(m) 823 824 frags = Chem.GetMolFrags(pieces,asMols=True) 825 self.assertEqual(len(frags),3) 826 self.assertEqual(frags[0].GetNumAtoms(),7) 827 self.assertEqual(frags[1].GetNumAtoms(),3) 828 self.assertEqual(frags[2].GetNumAtoms(),7) 829 830 c1 = m.GetConformer() 831 c2 = frags[0].GetConformer() 832 for i in range(6): 833 p1 = c1.GetAtomPosition(i) 834 p2 = c2.GetAtomPosition(i) 835 self.assertEqual((p1-p2).Length(),0.0) 836 p1 = c1.GetAtomPosition(6) 837 p2 = c2.GetAtomPosition(6) 838 self.assertEqual((p1-p2).Length(),0.0) 839 840 c2 = frags[2].GetConformer() 841 for i in range(6): 842 p1 = c1.GetAtomPosition(i+7) 843 p2 = c2.GetAtomPosition(i) 844 self.assertEqual((p1-p2).Length(),0.0) 845 p1 = c1.GetAtomPosition(6) 846 p2 = c2.GetAtomPosition(6) 847 self.assertEqual((p1-p2).Length(),0.0) 848 849 c2 = frags[1].GetConformer() 850 for i in range(1): 851 p1 = c1.GetAtomPosition(i+6) 852 p2 = c2.GetAtomPosition(i) 853 self.assertEqual((p1-p2).Length(),0.0) 854 p1 = c1.GetAtomPosition(5) 855 p2 = c2.GetAtomPosition(1) 856 self.assertEqual((p1-p2).Length(),0.0) 857 p1 = c1.GetAtomPosition(6) 858 p2 = c2.GetAtomPosition(0) 859 self.assertEqual((p1-p2).Length(),0.0) 860 861 862 # make sure multiple conformations (include 2D) also work: 863 molblock=""" 864 RDKit 2D 865 866 13 14 0 0 0 0 0 0 0 0999 V2000 867 -1.2990 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 868 -2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 869 -3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 870 -3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 871 -2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 872 -1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 873 -0.0000 1.3846 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 874 1.2990 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 875 1.2990 -0.8654 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 876 2.5981 -1.6154 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 877 3.8971 -0.8654 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 878 3.8971 0.6346 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 879 2.5981 1.3846 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 880 1 2 2 0 881 2 3 1 0 882 3 4 2 0 883 4 5 1 0 884 5 6 2 0 885 6 7 1 0 886 7 8 1 0 887 8 9 2 0 888 9 10 1 0 889 10 11 2 0 890 11 12 1 0 891 12 13 2 0 892 6 1 1 0 893 13 8 1 0 894 M END 895 """ 896 m2 = Chem.MolFromMolBlock(molblock) 897 m.AddConformer(m2.GetConformer(),assignId=True) 898 self.assertEqual(m.GetNumConformers(),2) 899 900 pieces = BreakBRICSBonds(m) 901 frags = Chem.GetMolFrags(pieces,asMols=True) 902 self.assertEqual(len(frags),3) 903 self.assertEqual(frags[0].GetNumAtoms(),7) 904 self.assertEqual(frags[1].GetNumAtoms(),3) 905 self.assertEqual(frags[2].GetNumAtoms(),7) 906 self.assertEqual(frags[0].GetNumConformers(),2) 907 self.assertEqual(frags[1].GetNumConformers(),2) 908 self.assertEqual(frags[2].GetNumConformers(),2) 909 910 c1 = m.GetConformer(0) 911 c2 = frags[0].GetConformer(0) 912 for i in range(6): 913 p1 = c1.GetAtomPosition(i) 914 p2 = c2.GetAtomPosition(i) 915 self.assertEqual((p1-p2).Length(),0.0) 916 p1 = c1.GetAtomPosition(6) 917 p2 = c2.GetAtomPosition(6) 918 self.assertEqual((p1-p2).Length(),0.0) 919 920 c2 = frags[2].GetConformer(0) 921 for i in range(6): 922 p1 = c1.GetAtomPosition(i+7) 923 p2 = c2.GetAtomPosition(i) 924 self.assertEqual((p1-p2).Length(),0.0) 925 p1 = c1.GetAtomPosition(6) 926 p2 = c2.GetAtomPosition(6) 927 self.assertEqual((p1-p2).Length(),0.0) 928 929 c2 = frags[1].GetConformer(0) 930 for i in range(1): 931 p1 = c1.GetAtomPosition(i+6) 932 p2 = c2.GetAtomPosition(i) 933 self.assertEqual((p1-p2).Length(),0.0) 934 p1 = c1.GetAtomPosition(5) 935 p2 = c2.GetAtomPosition(1) 936 self.assertEqual((p1-p2).Length(),0.0) 937 p1 = c1.GetAtomPosition(6) 938 p2 = c2.GetAtomPosition(0) 939 self.assertEqual((p1-p2).Length(),0.0) 940 941 c1 = m.GetConformer(1) 942 c2 = frags[0].GetConformer(1) 943 for i in range(6): 944 p1 = c1.GetAtomPosition(i) 945 p2 = c2.GetAtomPosition(i) 946 self.assertEqual((p1-p2).Length(),0.0) 947 p1 = c1.GetAtomPosition(6) 948 p2 = c2.GetAtomPosition(6) 949 self.assertEqual((p1-p2).Length(),0.0) 950 951 c2 = frags[2].GetConformer(1) 952 for i in range(6): 953 p1 = c1.GetAtomPosition(i+7) 954 p2 = c2.GetAtomPosition(i) 955 self.assertEqual((p1-p2).Length(),0.0) 956 p1 = c1.GetAtomPosition(6) 957 p2 = c2.GetAtomPosition(6) 958 self.assertEqual((p1-p2).Length(),0.0) 959 960 c2 = frags[1].GetConformer(1) 961 for i in range(1): 962 p1 = c1.GetAtomPosition(i+6) 963 p2 = c2.GetAtomPosition(i) 964 self.assertEqual((p1-p2).Length(),0.0) 965 p1 = c1.GetAtomPosition(5) 966 p2 = c2.GetAtomPosition(1) 967 self.assertEqual((p1-p2).Length(),0.0) 968 p1 = c1.GetAtomPosition(6) 969 p2 = c2.GetAtomPosition(0) 970 self.assertEqual((p1-p2).Length(),0.0)
971
972 - def test12(self):
973 m = Chem.MolFromSmiles('CCS(=O)(=O)NCC') 974 res=list(FindBRICSBonds(m)) 975 self.assertEqual(len(res),2,res) 976 atIds = [x[0] for x in res] 977 atIds.sort() 978 self.assertEqual(atIds,[(5,2), (6,5)])
979 980 981 982 983 984 failed,tried = _test() 985 if failed: 986 sys.exit(failed) 987 988 unittest.main() 989