1
2
3
4
5
6 """ The "parser" for compound descriptors.
7
8 I almost hesitate to document this, because it's not the prettiest
9 thing the world has ever seen... but it does work (for at least some
10 definitions of the word).
11
12 Rather than getting into the whole mess of writing a parser for the
13 compound descriptor expressions, I'm just using string substitutions
14 and python's wonderful ability to *eval* code.
15
16 It would probably be a good idea at some point to replace this with a
17 real parser, if only for the flexibility and intelligent error
18 messages that would become possible.
19
20 The general idea is that we're going to deal with expressions where
21 atomic descriptors have some kind of method applied to them which
22 reduces them to a single number for the entire composition. Compound
23 descriptors (those applicable to the compound as a whole) are not
24 operated on by anything in particular (except for standard math stuff).
25
26 Here's the general flow of things:
27
28 1) Composition descriptor references ($a, $b, etc.) are replaced with the
29 corresponding descriptor names using string subsitution.
30 (*_SubForCompoundDescriptors*)
31
32 2) Atomic descriptor references ($1, $2, etc) are replaced with lookups
33 into the atomic dict with "DEADBEEF" in place of the atom name.
34 (*_SubForAtomicVars*)
35
36 3) Calls to Calculator Functions are augmented with a reference to
37 the composition and atomic dictionary
38 (*_SubMethodArgs*)
39
40 **NOTE:**
41
42 anytime we don't know the answer for a descriptor, rather than
43 throwing a (completely incomprehensible) exception, we just return
44 -666. So bad descriptor values should stand out like sore thumbs.
45
46 """
47 from __future__ import print_function
48 __DEBUG=0
49 from rdkit import RDConfig
50
51
52 from math import *
53
54
55
56
57
58 knownMethods = ['SUM','MIN','MAX','MEAN','AVG','DEV','HAS']
59
60 -def HAS(strArg,composList,atomDict):
61 """ *Calculator Method*
62
63 does a string search
64
65 **Arguments**
66
67 - strArg: the arguments in string form
68
69 - composList: the composition vector
70
71 - atomDict: the atomic dictionary
72
73 **Returns**
74
75 1 or 0
76
77 """
78 splitArgs = string.split(strArg,',')
79 if len(splitArgs)>1:
80 for atom,num in composList:
81 tStr = splitArgs[0].replace('DEADBEEF',atom)
82 where = eval(tStr)
83 what = eval(splitArgs[1])
84 if where.find(what)!= -1:
85 return 1
86 return 0
87 else:
88 return -666
89
90 -def SUM(strArg,composList,atomDict):
91 """ *Calculator Method*
92
93 calculates the sum of a descriptor across a composition
94
95 **Arguments**
96
97 - strArg: the arguments in string form
98
99 - compos: the composition vector
100
101 - atomDict: the atomic dictionary
102
103 **Returns**
104
105 a float
106
107 """
108 accum = 0.0
109 for atom,num in composList:
110 tStr = strArg.replace('DEADBEEF',atom)
111 accum = accum + eval(tStr)*num
112 return accum
113
114 -def MEAN(strArg,composList,atomDict):
115 """ *Calculator Method*
116
117 calculates the average of a descriptor across a composition
118
119 **Arguments**
120
121 - strArg: the arguments in string form
122
123 - compos: the composition vector
124
125 - atomDict: the atomic dictionary
126
127 **Returns**
128
129 a float
130
131 """
132 accum = 0.0
133 nSoFar = 0
134 for atom,num in composList:
135 tStr = strArg.replace('DEADBEEF',atom)
136 accum = accum + eval(tStr)*num
137 nSoFar = nSoFar + num
138 return accum/nSoFar
139 AVG = MEAN
140
141 -def DEV(strArg,composList,atomDict):
142 """ *Calculator Method*
143
144 calculates the average deviation of a descriptor across a composition
145
146 **Arguments**
147
148 - strArg: the arguments in string form
149
150 - compos: the composition vector
151
152 - atomDict: the atomic dictionary
153
154 **Returns**
155
156 a float
157
158 """
159 avg = MEAN(strArg,composList,atomDict)
160 accum = 0.0
161 nSoFar = 0.0
162 for atom,num in composList:
163 tStr = strArg.replace('DEADBEEF',atom)
164 accum = accum + abs(eval(tStr)-avg)*num
165 nSoFar = nSoFar + num
166 return accum/nSoFar
167
168 -def MIN(strArg,composList,atomDict):
169 """ *Calculator Method*
170
171 calculates the minimum value of a descriptor across a composition
172
173 **Arguments**
174
175 - strArg: the arguments in string form
176
177 - compos: the composition vector
178
179 - atomDict: the atomic dictionary
180
181 **Returns**
182
183 a float
184
185 """
186 accum = []
187 for atom,num in composList:
188 tStr = strArg.replace('DEADBEEF',atom)
189 accum.append(eval(tStr))
190 return min(accum)
191
192 -def MAX(strArg,composList,atomDict):
193 """ *Calculator Method*
194
195 calculates the maximum value of a descriptor across a composition
196
197 **Arguments**
198
199 - strArg: the arguments in string form
200
201 - compos: the composition vector
202
203 - atomDict: the atomic dictionary
204
205 **Returns**
206
207 a float
208
209 """
210 accum = []
211 for atom,num in composList:
212 tStr = strArg.replace('DEADBEEF',atom)
213 accum.append(eval(tStr))
214 return max(accum)
215
216
217
218
219
220
221
223 """ replace atomic variables with the appropriate dictionary lookup
224
225 *Not intended for client use*
226
227 """
228 for i in range(len(varList)):
229 cExpr = cExpr.replace('$%d'%(i+1),
230 '%s["DEADBEEF"]["%s"]'%(dictName,varList[i]))
231 return cExpr
232
234 """ replace compound variables with the appropriate list index
235
236 *Not intended for client use*
237
238 """
239 for i in range(len(varList)):
240 cExpr = cExpr.replace('$%s'%chr(ord('a')+i),
241 '%s["%s"]'%(dictName,varList[i]))
242 return cExpr
243
245 """ alters the arguments of calls to calculator methods
246
247 *Not intended for client use*
248
249 This is kind of putrid (and the code ain't so pretty either)
250 The general idea is that the various special methods for atomic
251 descriptors need two extra arguments (the composition and the atomic
252 dict). Rather than make the user type those in, we just find
253 invocations of these methods and fill out the function calls using
254 string replacements.
255 """
256 res = cExpr
257 for method in knownMethods:
258 p = 0
259 while p != -1 and p < len(res):
260 p = res.find(method,p)
261 if p != -1:
262 p = p + len(method) + 1
263 start = p
264 parenCount = 1
265 while parenCount and p < len(res):
266 if res[p] == ')':
267 parenCount = parenCount - 1
268 elif res[p] == '(':
269 parenCount = parenCount + 1
270 p = p + 1
271 if p <= len(res):
272 res = res[0:start]+"'%s',compos,atomDict"%(res[start:p-1])+res[p-1:]
273 return res
274
276 """ calculates the value of the descriptor for a single compound
277
278 **ARGUMENTS:**
279
280 - compos: a vector/tuple containing the composition
281 information... in the form:
282 '[("Fe",1.),("Pt",2.),("Rh",0.02)]'
283
284 - argVect: a vector/tuple with three elements:
285
286 1) AtomicDescriptorNames: a list/tuple of the names of the
287 atomic descriptors being used. These determine the
288 meaning of $1, $2, etc. in the expression
289
290 2) CompoundDescriptorNames: a list/tuple of the names of the
291 compound descriptors being used. These determine the
292 meaning of $a, $b, etc. in the expression
293
294 3) Expr: a string containing the expression to be used to
295 evaluate the final result.
296
297 - atomDict:
298 a dictionary of atomic descriptors. Each atomic entry is
299 another dictionary containing the individual descriptors
300 and their values
301
302 - propVect:
303 a list of descriptors for the composition.
304
305 **RETURNS:**
306
307 the value of the descriptor, -666 if a problem was encountered
308
309 **NOTE:**
310
311 - because it takes rather a lot of work to get everything set
312 up to calculate a descriptor, if you are calculating the
313 same descriptor for multiple compounds, you probably want to
314 be calling _CalcMultipleCompoundsDescriptor()_.
315
316 """
317 try:
318 atomVarNames = argVect[0]
319 compositionVarNames = argVect[1]
320 formula = argVect[2]
321 formula = _SubForCompoundDescriptors(formula,compositionVarNames,'propDict')
322 formula = _SubForAtomicVars(formula,atomVarNames,'atomDict')
323 evalTarget = _SubMethodArgs(formula,knownMethods)
324 except:
325 if __DEBUG:
326 import sys,traceback
327 print('Sub Failure!')
328 traceback.print_exc()
329 print(evalTarget)
330 print(propDict)
331 raise RuntimeError('Failure 1')
332 else:
333 return -666
334
335 try:
336 v = eval(evalTarget)
337 except:
338 if __DEBUG:
339 import sys,traceback
340 outF = open(RDConfig.RDCodeDir+'/ml/descriptors/log.txt','a+')
341 outF.write('#------------------------------\n')
342 outF.write('formula: %s\n'%repr(formula))
343 outF.write('target: %s\n'%repr(evalTarget))
344 outF.write('propDict: %s\n'%(repr(propDict)))
345 try:
346 outF.write('keys: %s\n'%(repr(atomDict.keys())))
347 except:
348 outF.write('no atomDict\n')
349 outF.close()
350 print('ick!')
351 print('formula:',formula)
352 print('target:',evalTarget)
353 print('propDict:',propDict)
354 print('keys:',atomDict.keys())
355 traceback.print_exc()
356 raise RuntimeError('Failure 2')
357 else:
358 v = -666
359 return v
360
362 """ calculates the value of the descriptor for a list of compounds
363
364 **ARGUMENTS:**
365
366 - composVect: a vector of vector/tuple containing the composition
367 information.
368 See _CalcSingleCompoundDescriptor()_ for an explanation of the elements.
369
370 - argVect: a vector/tuple with three elements:
371
372 1) AtomicDescriptorNames: a list/tuple of the names of the
373 atomic descriptors being used. These determine the
374 meaning of $1, $2, etc. in the expression
375
376 2) CompoundDsscriptorNames: a list/tuple of the names of the
377 compound descriptors being used. These determine the
378 meaning of $a, $b, etc. in the expression
379
380 3) Expr: a string containing the expression to be used to
381 evaluate the final result.
382
383 - atomDict:
384 a dictionary of atomic descriptors. Each atomic entry is
385 another dictionary containing the individual descriptors
386 and their values
387
388 - propVectList:
389 a vector of vectors of descriptors for the composition.
390
391 **RETURNS:**
392
393 a vector containing the values of the descriptor for each
394 compound. Any given entry will be -666 if problems were
395 encountered
396
397 """
398 res = [-666]*len(composVect)
399 try:
400 atomVarNames = argVect[0]
401 compositionVarNames = argVect[1]
402 formula = argVect[2]
403 formula = _SubForCompoundDescriptors(formula,compositionVarNames,'propDict')
404 formula = _SubForAtomicVars(formula,atomVarNames,'atomDict')
405 evalTarget = _SubMethodArgs(formula,knownMethods)
406 except:
407 return res
408 for i in range(len(composVect)):
409 propDict = propDictList[i]
410 compos = composVect[i]
411 try:
412 v = eval(evalTarget)
413 except:
414 v = -666
415 res[i] = v
416 return res
417
418
419
420
421 if __name__ == '__main__':
422 piece1 = [['d1','d2'],['d1','d2']]
423 aDict = {'Fe':{'d1':1.,'d2':2.},'Pt':{'d1':10.,'d2':20.}}
424 pDict = {'d1':100.,'d2':200.}
425 compos = [('Fe',1),('Pt',1)]
426
427 cExprs = ["SUM($1)","SUM($1)+SUM($2)","SUM($1)+SUM($1)","MEAN($1)","DEV($2)","MAX($1)","MIN($1)/MAX($1)",
428 "MIN($2)","SUM($1)/$a","sqrt($a+$b)","SUM((3.*$1)/($2))","foo"]
429
430 for cExpr in cExprs:
431 argVect = piece1 + [cExpr]
432 print(cExpr)
433 print(CalcSingleCompoundDescriptor(compos,argVect,aDict,pDict))
434 print(CalcMultipleCompoundsDescriptor([compos,compos],argVect,aDict,[pDict,pDict]))
435