1
2
3
4
5
6 import copy
7
9 """ this class is pretty much obsolete (it's in C++ now) """
10 size=0
11 container={}
15
17 """
18 >>> c1=pySparseIntVect(10)
19 >>> c1.UpdateFromSequence((0,1,1,5))
20 >>> [x for x in c1]
21 [(0, 1), (1, 2), (5, 1)]
22 >>> c1.UpdateFromSequence((0,3))
23 >>> [x for x in c1]
24 [(0, 2), (1, 2), (3, 1), (5, 1)]
25
26 """
27 for v in seq:
28 self[v] += 1
30 """
31 >>> c1=pySparseIntVect(10)
32 >>> c1.InitFromSequence((0,1,1,5))
33 >>> [x for x in c1]
34 [(0, 1), (1, 2), (5, 1)]
35
36 """
37 self.container={}
38 self.UpdateFromSequence(seq)
39
40 - def Sum(self,useAbs=False):
41 """
42 >>> c1=pySparseIntVect(10)
43 >>> c1[0] = 3
44 >>> c1[2] = 2
45 >>> c1[4] = 5
46 >>> c1.Sum()
47 10
48
49 >>> c1[2] = -2
50 >>> c1.Sum()
51 6
52 >>> c1.Sum(useAbs=True)
53 10
54 """
55 res=0
56 for v in self.container.values():
57 if not useAbs:
58 res+=v
59 else:
60 res+=abs(v)
61 return res
63 return self.Sum(useAbs=useAbs)
64
65
67 """
68 >>> c1=pySparseIntVect(10)
69 >>> c1[0] = 3
70 >>> c1[2] = 2
71 >>> c1[4] = 5
72 >>> c2=pySparseIntVect(10)
73 >>> c2[0] = 3
74 >>> c2[2] = 2
75 >>> c1 == c2
76 False
77 >>> c1 == c1
78 True
79 """
80 if not isinstance(other,pySparseIntVect):
81 raise TypeError
82 if self.size != other.size:
83 return 0
84 return self.container==other.container
85
86
88 """
89 >>> c1=pySparseIntVect(10)
90 >>> c1[0] = 3
91 >>> c1[2] = 2
92 >>> c1[4] = 5
93 >>> c2=pySparseIntVect(10)
94 >>> c2[0] = 2
95 >>> c2[2] = -2
96 >>> c2[5] = 6
97 >>> c1 &= c2
98 >>> [x for x in c1]
99 [(0, 2), (2, -2)]
100
101 """
102 if not isinstance(other,pySparseIntVect):
103 raise TypeError
104 if self.size != other.size:
105 raise ValueError
106
107 newC = {}
108 for idx,v in self.container.iteritems():
109 ov = other.container.get(idx,0)
110 if ov:
111 if v<ov:
112 newC[idx]=v
113 else:
114 newC[idx]=ov
115 self.container=newC
116 return self
118 """
119 >>> c1=pySparseIntVect(10)
120 >>> c1[0] = 3
121 >>> c1[2] = 2
122 >>> c1[4] = 5
123 >>> c2=pySparseIntVect(10)
124 >>> c2[0] = 2
125 >>> c2[2] = -2
126 >>> c2[5] = 6
127 >>> c1 |= c2
128 >>> [x for x in c1]
129 [(0, 3), (2, 2), (4, 5), (5, 6)]
130
131 """
132 if not isinstance(other,pySparseIntVect):
133 raise TypeError
134 if self.size != other.size:
135 raise ValueError
136
137 newC = {}
138 for idx,v in self.container.iteritems():
139 ov = other.container.get(idx,0)
140 if v<ov:
141 newC[idx]=ov
142 else:
143 newC[idx]=v
144 for k,v in other.container.iteritems():
145 if not newC.has_key(k):
146 newC[k]=v
147 self.container=newC
148 return self
149
151 """
152 >>> c1=pySparseIntVect(10)
153 >>> c1[0] = 3
154 >>> c1[2] = 2
155 >>> c1[4] = 5
156 >>> c2=pySparseIntVect(10)
157 >>> c2[0] = 2
158 >>> c2[2] = -2
159 >>> c2[5] = 6
160 >>> c1 += c2
161 >>> [x for x in c1]
162 [(0, 5), (4, 5), (5, 6)]
163
164 """
165 if not isinstance(other,pySparseIntVect):
166 raise TypeError
167 if self.size != other.size:
168 raise ValueError
169 seen={}
170 for idx in self.container.keys():
171 seen[idx]=1
172 v = self.container[idx]+other[idx]
173 if v:
174 self.container[idx]=v
175 else:
176 del self.container[idx]
177 for idx,v in other:
178 if not seen.has_key(idx):
179 self.container[idx]=v
180 return self
181
183 """
184 >>> c1=pySparseIntVect(10)
185 >>> c1[0] = 3
186 >>> c1[2] = 2
187 >>> c1[4] = 5
188 >>> c2=pySparseIntVect(10)
189 >>> c2[0] = 2
190 >>> c2[2] = 2
191 >>> c2[5] = 6
192 >>> c1 -= c2
193 >>> [x for x in c1]
194 [(0, 1), (4, 5), (5, -6)]
195
196 """
197 if not isinstance(other,pySparseIntVect):
198 raise TypeError
199 if self.size != other.size:
200 raise ValueError
201 seen={}
202 for idx in self.container.keys():
203 seen[idx]=1
204 v = self.container[idx]-other[idx]
205 if v:
206 self.container[idx]=v
207 else:
208 del self.container[idx]
209 for idx,v in other:
210 if not seen.has_key(idx):
211 self.container[idx]=-v
212 return self
213
215 """
216 >>> c1=pySparseIntVect(10)
217 >>> c1[0] = 3
218 >>> c1[4] = 5
219 >>> c2=pySparseIntVect(10)
220 >>> c2[0] = 2
221 >>> c2[5] = 6
222 >>> c1 *= c2
223 >>> [x for x in c1]
224 [(0, 6)]
225
226 """
227 if not isinstance(other,pySparseIntVect):
228 raise TypeError
229 if self.size != other.size:
230 raise ValueError
231 for idx in self.container.keys():
232 v = self.container[idx]*other[idx]
233 if v:
234 self.container[idx]=v
235 else:
236 del self.container[idx]
237 return self
238
240 """
241 >>> c1=pySparseIntVect(10)
242 >>> c1[0] = 3
243 >>> c1[4] = 5
244 >>> c2=pySparseIntVect(10)
245 >>> c2[0] = 2
246 >>> c2[5] = 6
247 >>> c3 = c2+c1
248 >>> [x for x in c3]
249 [(0, 5), (4, 5), (5, 6)]
250
251 """
252 res = pySparseIntVect(self.size)
253 res.container = copy.deepcopy(self.container)
254 res += other
255 return res
257 """
258 >>> c1=pySparseIntVect(10)
259 >>> c1[0] = 3
260 >>> c1[2] = 2
261 >>> c1[4] = 5
262 >>> c2=pySparseIntVect(10)
263 >>> c2[0] = 2
264 >>> c2[2] = 2
265 >>> c2[5] = 6
266 >>> c3 = c1-c2
267 >>> [x for x in c3]
268 [(0, 1), (4, 5), (5, -6)]
269 >>> [x for x in c1]
270 [(0, 3), (2, 2), (4, 5)]
271
272 """
273 res = pySparseIntVect(self.size)
274 res.container = copy.deepcopy(self.container)
275 res -= other
276 return res
278 """
279 >>> c1=pySparseIntVect(10)
280 >>> c1[0] = 3
281 >>> c1[4] = 5
282 >>> c2=pySparseIntVect(10)
283 >>> c2[0] = 2
284 >>> c2[5] = 6
285 >>> c3 = c1*c2
286 >>> [x for x in c3]
287 [(0, 6)]
288 >>> [x for x in c1]
289 [(0, 3), (4, 5)]
290
291 """
292 res = pySparseIntVect(self.size)
293 res.container = copy.deepcopy(self.container)
294 res *= other
295 return res
297 """
298 >>> c1=pySparseIntVect(10)
299 >>> c1[0] = 3
300 >>> c1[2] = 2
301 >>> c1[4] = 5
302 >>> c2=pySparseIntVect(10)
303 >>> c2[0] = 2
304 >>> c2[2] = -2
305 >>> c2[5] = 6
306 >>> c3 = c1 & c2
307 >>> [x for x in c3]
308 [(0, 2), (2, -2)]
309 >>> [x for x in c1]
310 [(0, 3), (2, 2), (4, 5)]
311
312 """
313 res = pySparseIntVect(self.size)
314 res.container = copy.deepcopy(self.container)
315 res &= other
316 return res
318 """
319 >>> c1=pySparseIntVect(10)
320 >>> c1[0] = 3
321 >>> c1[2] = 2
322 >>> c1[4] = 5
323 >>> c2=pySparseIntVect(10)
324 >>> c2[0] = 2
325 >>> c2[2] = -2
326 >>> c2[5] = 6
327 >>> c3 = c1 | c2
328 >>> [x for x in c3]
329 [(0, 3), (2, 2), (4, 5), (5, 6)]
330 >>> [x for x in c1]
331 [(0, 3), (2, 2), (4, 5)]
332
333 """
334 res = pySparseIntVect(self.size)
335 res.container = copy.deepcopy(self.container)
336 res |= other
337 return res
338
342 """
343 >>> c1=pySparseIntVect(10)
344 >>> c1[0] = 3
345 >>> c1[4] = 5
346 >>> c1[0]
347 3
348 >>> c1[1]
349 0
350
351 """
352 if abs(which)>=self.size:
353 raise IndexError(which)
354 if which<0:
355 which = self.size-which
356 return self.container.get(which,0)
358 if abs(which)>=self.size:
359 raise IndexError(which)
360 val = int(val)
361 if which<0:
362 which = self.size-which
363 self.container[which]=val
365 """
366 >>> c=pySparseIntVect(10)
367 >>> c[0] = 3
368 >>> c[4] = 5
369 >>> c[7] = -1
370 >>> for idx,v in c:
371 ... print idx,v
372 0 3
373 4 5
374 7 -1
375
376 """
377 return self.container.iteritems()
378
379
380 from rdkit import DataStructs
381 DiceSimilarity=DataStructs.DiceSimilarity
383 """ Implements the DICE similarity metric.
384
385 >>> v1 = DataStructs.IntSparseIntVect(10)
386 >>> v2 = DataStructs.IntSparseIntVect(10)
387 >>> v1.UpdateFromSequence((1,2,3))
388 >>> v2.UpdateFromSequence((1,2,3))
389 >>> DiceSimilarity(v1,v2)
390 1.0
391
392 >>> v2 = DataStructs.IntSparseIntVect(10)
393 >>> v2.UpdateFromSequence((5,6))
394 >>> DiceSimilarity(v1,v2)
395 0.0
396
397 >>> v1 = DataStructs.IntSparseIntVect(10)
398 >>> v2 = DataStructs.IntSparseIntVect(10)
399 >>> v1.UpdateFromSequence((1,2,3,4))
400 >>> v2.UpdateFromSequence((1,3,5,7))
401 >>> DiceSimilarity(v1,v2)
402 0.5
403
404 >>> v1 = DataStructs.IntSparseIntVect(10)
405 >>> v2 = DataStructs.IntSparseIntVect(10)
406 >>> v1.UpdateFromSequence((1,2,3,4,5,6))
407 >>> v2.UpdateFromSequence((1,3))
408 >>> DiceSimilarity(v1,v2)
409 0.5
410
411 """
412 denom = 1.0*(v1.GetTotalVal(useAbs=useAbs)+v2.GetTotalVal(useAbs=useAbs))
413 if not denom:
414 res = 0.0
415 else:
416 if bounds and (min(len(v1),len(v2))/denom) < bounds:
417 numer = 0.0
418 else:
419 numer=0.0
420 v3=v1&v2
421 numer=v3.GetTotalVal(useAbs=useAbs)
422 res = 2.*numer/denom
423
424 return res
425
427 res = 0.0
428 for k,v in bv1.GetNonzeroElements().iteritems():
429 res += v*bv2[k]
430 return res
431
432
433
434
435
437 import doctest,sys
438 return doctest.testmod(sys.modules["__main__"])
439
440 if __name__ == '__main__':
441 import sys
442 failed,tried = _test()
443 sys.exit(failed)
444