1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 from __future__ import print_function
33 from rdkit import RDConfig
34 from rdkit import Chem
35 from rdkit.Chem import AllChem
36 from rdkit.Chem import FunctionalGroups
37 import os
38
39 -def PreprocessReaction(reaction,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
40 """
41 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn')
42 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
43 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
44 >>> nWarn
45 0
46 >>> nError
47 0
48 >>> nReacts
49 2
50 >>> nProds
51 1
52 >>> reactantLabels
53 (((0, 'halogen.bromine.aromatic'),), ((1, 'boronicacid'),))
54
55 If there are functional group labels in the input reaction (via atoms with molFileValue properties),
56 the corresponding atoms will have queries added to them so that they only match such things. We can
57 see this here:
58 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
59 >>> r1 = rxn.GetReactantTemplate(0)
60 >>> m1 = Chem.MolFromSmiles('CCBr')
61 >>> m2 = Chem.MolFromSmiles('c1ccccc1Br')
62
63 These both match because the reaction file itself just has R1-Br:
64 >>> m1.HasSubstructMatch(r1)
65 True
66 >>> m2.HasSubstructMatch(r1)
67 True
68
69 After preprocessing, we only match the aromatic Br:
70 >>> d = PreprocessReaction(rxn)
71 >>> m1.HasSubstructMatch(r1)
72 False
73 >>> m2.HasSubstructMatch(r1)
74 True
75
76 We also support or queries in the values field (separated by commas):
77 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','azide_reaction.rxn')
78 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
79 >>> reactantLabels = PreprocessReaction(rxn)[-1]
80 >>> reactantLabels
81 (((1, 'azide'),), ((1, 'carboxylicacid,acidchloride'),))
82 >>> m1 = Chem.MolFromSmiles('CC(=O)O')
83 >>> m2 = Chem.MolFromSmiles('CC(=O)Cl')
84 >>> m3 = Chem.MolFromSmiles('CC(=O)N')
85 >>> r2 = rxn.GetReactantTemplate(1)
86 >>> m1.HasSubstructMatch(r2)
87 True
88 >>> m2.HasSubstructMatch(r2)
89 True
90 >>> m3.HasSubstructMatch(r2)
91 False
92
93 unrecognized final group types are returned as None:
94 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value1.rxn')
95 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
96 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
97 Traceback (most recent call last):
98 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
99 compileflags, 1) in test.globs
100 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
101 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
102 File "Enumerator.py", line 105, in PreprocessReaction
103 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
104 RuntimeError: KeyErrorException
105
106 One unrecognized group type in a comma-separated list makes the whole thing fail:
107 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value2.rxn')
108 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
109 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
110 Traceback (most recent call last):
111 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
112 compileflags, 1) in test.globs
113 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
114 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
115 File "Enumerator.py", line 105, in PreprocessReaction
116 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
117 RuntimeError: KeyErrorException
118 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','bad_value3.rxn')
119 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
120 >>> nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
121 Traceback (most recent call last):
122 File "/usr/prog/python/2.6.6_gnu/lib/python2.6/doctest.py", line 1253, in __run
123 compileflags, 1) in test.globs
124 File "<doctest __main__.PreprocessReaction[36]>", line 1, in <module>
125 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(rxn)
126 File "Enumerator.py", line 105, in PreprocessReaction
127 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName='molFileValue', getLabels=True)
128 RuntimeError: KeyErrorException
129
130 """
131 reaction._setImplicitPropertiesFlag(True)
132 reaction.Initialize()
133 nReactants = reaction.GetNumReactantTemplates()
134 nProducts = reaction.GetNumProductTemplates()
135 nWarn,nError = reaction.Validate()
136
137 if not nError:
138 try:
139 queryDict = Chem.ParseMolQueryDefFile(funcGroupFilename)
140 except:
141 raise IOError('cannot open', funcGroupFilename)
142 else:
143 reactantLabels = reaction.AddRecursiveQueriesToReaction(queryDict, propName, getLabels=True)
144
145 return nWarn,nError,nReactants,nProducts,reactantLabels
146
147 -def EnumerateReaction(reaction,bbLists,uniqueProductsOnly=False,funcGroupFilename=os.path.join(RDConfig.RDDataDir,'Functional_Group_Hierarchy.txt'),propName='molFileValue'):
148 """
149 >>> testFile = os.path.join(RDConfig.RDCodeDir,'Chem','SimpleEnum','test_data','boronic1.rxn')
150 >>> rxn = AllChem.ReactionFromRxnFile(testFile)
151 >>> reacts1=['Brc1ccccc1','Brc1ncccc1','Brc1cnccc1']
152 >>> reacts1=[Chem.MolFromSmiles(x) for x in reacts1]
153 >>> reacts2=['CCB(O)O','CCCB(O)O']
154 >>> reacts2=[Chem.MolFromSmiles(x) for x in reacts2]
155
156 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2))
157 >>> prods = list(prods)
158
159 This is a bit nasty because of the symmetry of the boronic acid:
160 >>> len(prods)
161 12
162
163 >>> smis = list(set([Chem.MolToSmiles(x[0]) for x in prods]))
164 >>> smis.sort()
165 >>> len(smis)
166 6
167 >>> print(smis)
168 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']
169
170 The nastiness can be avoided at the cost of some memory by asking for only unique products:
171 >>> prods = EnumerateReaction(rxn,(reacts1,reacts2),uniqueProductsOnly=True)
172 >>> prods = list(prods)
173 >>> len(prods)
174 6
175 >>> print(sorted([Chem.MolToSmiles(x[0]) for x in prods]))
176 ['CCCc1ccccc1', 'CCCc1ccccn1', 'CCCc1cccnc1', 'CCc1ccccc1', 'CCc1ccccn1', 'CCc1cccnc1']
177
178
179 """
180 nWarn,nError,nReacts,nProds,reactantLabels = PreprocessReaction(reaction)
181 if nError: raise ValueError('bad reaction')
182 if len(bbLists) != nReacts: raise ValueError('%d reactants in reaction, %d bb lists supplied'%(nReacts,len(bbLists)))
183 def _uniqueOnly(lst):
184 seen=[]
185 for entry in lst:
186 if entry:
187 smi = '.'.join(sorted([Chem.MolToSmiles(x,True) for x in entry]))
188 if smi not in seen:
189 seen.append(smi)
190 yield entry
191
192 ps = AllChem.EnumerateLibraryFromReaction(reaction,bbLists)
193 if not uniqueProductsOnly:
194 return ps
195 else:
196 return _uniqueOnly(ps)
197
198
199
200
201
202
203
204
206 import doctest,sys
207 return doctest.testmod(sys.modules["__main__"])
208
209
210 if __name__ == '__main__':
211 import sys
212 failed,tried = _test()
213 sys.exit(failed)
214