Package rdkit :: Package Chem :: Package SimpleEnum :: Module Enumerator
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.SimpleEnum.Enumerator

  1  # 
  2  #  Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc. 
  3  #  All rights reserved. 
  4  #  
  5  # Redistribution and use in source and binary forms, with or without 
  6  # modification, are permitted provided that the following conditions are 
  7  # met:  
  8  # 
  9  #     * Redistributions of source code must retain the above copyright  
 10  #       notice, this list of conditions and the following disclaimer. 
 11  #     * Redistributions in binary form must reproduce the above 
 12  #       copyright notice, this list of conditions and the following  
 13  #       disclaimer in the documentation and/or other materials provided  
 14  #       with the distribution. 
 15  #     * Neither the name of Novartis Institutes for BioMedical Research Inc.  
 16  #       nor the names of its contributors may be used to endorse or promote  
 17  #       products derived from this software without specific prior written permission. 
 18  # 
 19  # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 20  # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 21  # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 22  # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 23  # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 24  # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 25  # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 26  # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 27  # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 28  # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 29  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 30  # 
 31  # Created by Greg Landrum, May 2009 
 32  from __future__ import print_function 
 33  from rdkit import RDConfig 
 34  from rdkit import Chem 
 35  from rdkit.Chem import AllChem 
 36  from rdkit.Chem import FunctionalGroups 
 37  import os 
 38   
39 -def PreprocessReaction(reaction,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
40 """ 41 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn') 42 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 43 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 44 >>> nWarn 45 0 46 >>> nError 47 0 48 >>> nReacts 49 2 50 >>> nProds 51 1 52 >>> reactantLabels 53 (((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),)) 54 55 If there are functional group labels in the input reaction (via atoms with molFileValue properties), 56 the corresponding atoms will have queries added to them so that they only match such things. We can 57 see this here: 58 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 59 >>> r1 = rxn.GetReactantTemplate(0) 60 >>> m1 = Chem.MolFromSmiles('CCBr') 61 >>> m2 = Chem.MolFromSmiles('c1ccccc1Br') 62 63 These both match because the reaction file itself just has R1-Br: 64 >>> m1.HasSubstructMatch(r1) 65 True 66 >>> m2.HasSubstructMatch(r1) 67 True 68 69 After preprocessing, we only match the aromatic Br: 70 >>> d = PreprocessReaction(rxn) 71 >>> m1.HasSubstructMatch(r1) 72 False 73 >>> m2.HasSubstructMatch(r1) 74 True 75 76 We also support or queries in the values field (separated by commas): 77 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','azide_reaction.rxn') 78 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 79 >>> reactantLabels = PreprocessReaction(rxn)[-1] 80 >>> reactantLabels 81 (((1, 'azide'),), ((1, 'carboxylicacid,acidchloride'),)) 82 >>> m1 = Chem.MolFromSmiles('CC(=O)O') 83 >>> m2 = Chem.MolFromSmiles('CC(=O)Cl') 84 >>> m3 = Chem.MolFromSmiles('CC(=O)N') 85 >>> r2 = rxn.GetReactantTemplate(1) 86 >>> m1.HasSubstructMatch(r2) 87 True 88 >>> m2.HasSubstructMatch(r2) 89 True 90 >>> m3.HasSubstructMatch(r2) 91 False 92 93 unrecognized final group types are returned as None: 94 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value1.rxn') 95 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 96 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 97 Traceback (most recent call last): 98 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 99 compileflags, 1) in test.globs 100 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 101 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 102 File "Enumerator.py", line 105, in PreprocessReaction 103 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 104 RuntimeError: KeyErrorException 105 106 One unrecognized group type in a comma-separated list makes the whole thing fail: 107 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value2.rxn') 108 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 109 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 110 Traceback (most recent call last): 111 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 112 compileflags, 1) in test.globs 113 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 114 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 115 File "Enumerator.py", line 105, in PreprocessReaction 116 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 117 RuntimeError: KeyErrorException 118 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value3.rxn') 119 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 120 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 121 Traceback (most recent call last): 122 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run 123 compileflags, 1) in test.globs 124 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module> 125 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn) 126 File "Enumerator.py", line 105, in PreprocessReaction 127 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True) 128 RuntimeError: KeyErrorException 129 130 """ 131 reaction._setImplicitPropertiesFlag(True) 132 reaction.Initialize() 133 nReactants = reaction.GetNumReactantTemplates() 134 nProducts = reaction.GetNumProductTemplates() 135 nWarn,nError = reaction.Validate() 136 137 if not nError: 138 try: 139 queryDict = Chem.ParseMolQueryDefFile(funcGroupFilename) 140 except: 141 raise IOError('cannot open', funcGroupFilename) 142 else: 143 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName, getLabels=True) 144 145 return nWarn,nError,nReactants,nProducts,reactantLabels
146
147 -def EnumerateReaction(reaction,bbLists,uniqueProductsOnly=False,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
148 """ 149 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn') 150 >>> rxn = AllChem.ReactionFromRxnFile(testFile) 151 >>> reacts1=['Brc1ccccc1','Brc1ncccc1','Brc1cnccc1'] 152 >>> reacts1=[Chem.MolFromSmiles(x) for x in reacts1] 153 >>> reacts2=['CCB(O)O','CCCB(O)O'] 154 >>> reacts2=[Chem.MolFromSmiles(x) for x in reacts2] 155 156 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2)) 157 >>> prods = list(prods) 158 159 This is a bit nasty because of the symmetry of the boronic acid: 160 >>> len(prods) 161 12 162 163 >>> smis = list(set([Chem.MolToSmiles(x[0]) for x in prods])) 164 >>> smis.sort() 165 >>> len(smis) 166 6 167 >>> print(smis) 168 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1'] 169 170 The nastiness can be avoided at the cost of some memory by asking for only unique products: 171 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2),uniqueProductsOnly=True) 172 >>> prods = list(prods) 173 >>> len(prods) 174 6 175 >>> print(sorted([Chem.MolToSmiles(x[0]) for x in prods])) 176 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1'] 177 178 179 """ 180 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(reaction) 181 if nError: raise ValueError('bad reaction') 182 if len(bbLists) != nReacts: raise ValueError('%d reactants in reaction, %d bb lists supplied'%(nReacts,len(bbLists))) 183 def _uniqueOnly(lst): 184 seen=[] 185 for entry in lst: 186 if entry: 187 smi = '.'.join(sorted([Chem.MolToSmiles(x,True) for x in entry])) 188 if smi not in seen: 189 seen.append(smi) 190 yield entry
191 192 ps = AllChem.EnumerateLibraryFromReaction(reaction,bbLists) 193 if not uniqueProductsOnly: 194 return ps 195 else: 196 return _uniqueOnly(ps) 197 198 199 200 201 #------------------------------------ 202 # 203 # doctest boilerplate 204 #
205 -def _test():
206 import doctest,sys 207 return doctest.testmod(sys.modules["__main__"])
208 209 210 if __name__ == '__main__': 211 import sys 212 failed,tried = _test() 213 sys.exit(failed) 214