Package rdkit :: Package DataStructs :: Module SparseIntVect
[hide private]
[frames] | no frames]

Source Code for Module rdkit.DataStructs.SparseIntVect

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2007 Greg Landrum 
  4  #   All Rights Reserved 
  5  # 
  6  import copy 
  7   
8 -class pySparseIntVect(object):
9 """ this class is pretty much obsolete (it's in C++ now) """ 10 size=0 11 container={}
12 - def __init__(self,size):
13 self.size=size 14 self.container={}
15
16 - def UpdateFromSequence(self,seq):
17 """ 18 >>> c1=pySparseIntVect(10) 19 >>> c1.UpdateFromSequence((0,1,1,5)) 20 >>> [x for x in c1] 21 [(0, 1), (1, 2), (5, 1)] 22 >>> c1.UpdateFromSequence((0,3)) 23 >>> [x for x in c1] 24 [(0, 2), (1, 2), (3, 1), (5, 1)] 25 26 """ 27 for v in seq: 28 self[v] += 1
29 - def InitFromSequence(self,seq):
30 """ 31 >>> c1=pySparseIntVect(10) 32 >>> c1.InitFromSequence((0,1,1,5)) 33 >>> [x for x in c1] 34 [(0, 1), (1, 2), (5, 1)] 35 36 """ 37 self.container={} 38 self.UpdateFromSequence(seq)
39
40 - def Sum(self,useAbs=False):
41 """ 42 >>> c1=pySparseIntVect(10) 43 >>> c1[0] = 3 44 >>> c1[2] = 2 45 >>> c1[4] = 5 46 >>> c1.Sum() 47 10 48 49 >>> c1[2] = -2 50 >>> c1.Sum() 51 6 52 >>> c1.Sum(useAbs=True) 53 10 54 """ 55 res=0 56 for v in self.container.values(): 57 if not useAbs: 58 res+=v 59 else: 60 res+=abs(v) 61 return res
62 - def GetTotalVal(self,useAbs=False):
63 return self.Sum(useAbs=useAbs)
64 65
66 - def __eq__(self,other):
67 """ 68 >>> c1=pySparseIntVect(10) 69 >>> c1[0] = 3 70 >>> c1[2] = 2 71 >>> c1[4] = 5 72 >>> c2=pySparseIntVect(10) 73 >>> c2[0] = 3 74 >>> c2[2] = 2 75 >>> c1 == c2 76 False 77 >>> c1 == c1 78 True 79 """ 80 if not isinstance(other,pySparseIntVect): 81 raise TypeError 82 if self.size != other.size: 83 return 0 84 return self.container==other.container
85 86
87 - def __iand__(self,other):
88 """ 89 >>> c1=pySparseIntVect(10) 90 >>> c1[0] = 3 91 >>> c1[2] = 2 92 >>> c1[4] = 5 93 >>> c2=pySparseIntVect(10) 94 >>> c2[0] = 2 95 >>> c2[2] = -2 96 >>> c2[5] = 6 97 >>> c1 &= c2 98 >>> [x for x in c1] 99 [(0, 2), (2, -2)] 100 101 """ 102 if not isinstance(other,pySparseIntVect): 103 raise TypeError 104 if self.size != other.size: 105 raise ValueError 106 107 newC = {} 108 for idx,v in self.container.iteritems(): 109 ov = other.container.get(idx,0) 110 if ov: 111 if v<ov: 112 newC[idx]=v 113 else: 114 newC[idx]=ov 115 self.container=newC 116 return self
117 - def __ior__(self,other):
118 """ 119 >>> c1=pySparseIntVect(10) 120 >>> c1[0] = 3 121 >>> c1[2] = 2 122 >>> c1[4] = 5 123 >>> c2=pySparseIntVect(10) 124 >>> c2[0] = 2 125 >>> c2[2] = -2 126 >>> c2[5] = 6 127 >>> c1 |= c2 128 >>> [x for x in c1] 129 [(0, 3), (2, 2), (4, 5), (5, 6)] 130 131 """ 132 if not isinstance(other,pySparseIntVect): 133 raise TypeError 134 if self.size != other.size: 135 raise ValueError 136 137 newC = {} 138 for idx,v in self.container.iteritems(): 139 ov = other.container.get(idx,0) 140 if v<ov: 141 newC[idx]=ov 142 else: 143 newC[idx]=v 144 for k,v in other.container.iteritems(): 145 if not newC.has_key(k): 146 newC[k]=v 147 self.container=newC 148 return self
149
150 - def __iadd__(self,other):
151 """ 152 >>> c1=pySparseIntVect(10) 153 >>> c1[0] = 3 154 >>> c1[2] = 2 155 >>> c1[4] = 5 156 >>> c2=pySparseIntVect(10) 157 >>> c2[0] = 2 158 >>> c2[2] = -2 159 >>> c2[5] = 6 160 >>> c1 += c2 161 >>> [x for x in c1] 162 [(0, 5), (4, 5), (5, 6)] 163 164 """ 165 if not isinstance(other,pySparseIntVect): 166 raise TypeError 167 if self.size != other.size: 168 raise ValueError 169 seen={} 170 for idx in self.container.keys(): 171 seen[idx]=1 172 v = self.container[idx]+other[idx] 173 if v: 174 self.container[idx]=v 175 else: 176 del self.container[idx] 177 for idx,v in other: 178 if not seen.has_key(idx): 179 self.container[idx]=v 180 return self
181
182 - def __isub__(self,other):
183 """ 184 >>> c1=pySparseIntVect(10) 185 >>> c1[0] = 3 186 >>> c1[2] = 2 187 >>> c1[4] = 5 188 >>> c2=pySparseIntVect(10) 189 >>> c2[0] = 2 190 >>> c2[2] = 2 191 >>> c2[5] = 6 192 >>> c1 -= c2 193 >>> [x for x in c1] 194 [(0, 1), (4, 5), (5, -6)] 195 196 """ 197 if not isinstance(other,pySparseIntVect): 198 raise TypeError 199 if self.size != other.size: 200 raise ValueError 201 seen={} 202 for idx in self.container.keys(): 203 seen[idx]=1 204 v = self.container[idx]-other[idx] 205 if v: 206 self.container[idx]=v 207 else: 208 del self.container[idx] 209 for idx,v in other: 210 if not seen.has_key(idx): 211 self.container[idx]=-v 212 return self
213
214 - def __imul__(self,other):
215 """ 216 >>> c1=pySparseIntVect(10) 217 >>> c1[0] = 3 218 >>> c1[4] = 5 219 >>> c2=pySparseIntVect(10) 220 >>> c2[0] = 2 221 >>> c2[5] = 6 222 >>> c1 *= c2 223 >>> [x for x in c1] 224 [(0, 6)] 225 226 """ 227 if not isinstance(other,pySparseIntVect): 228 raise TypeError 229 if self.size != other.size: 230 raise ValueError 231 for idx in self.container.keys(): 232 v = self.container[idx]*other[idx] 233 if v: 234 self.container[idx]=v 235 else: 236 del self.container[idx] 237 return self
238
239 - def __add__(self,other):
240 """ 241 >>> c1=pySparseIntVect(10) 242 >>> c1[0] = 3 243 >>> c1[4] = 5 244 >>> c2=pySparseIntVect(10) 245 >>> c2[0] = 2 246 >>> c2[5] = 6 247 >>> c3 = c2+c1 248 >>> [x for x in c3] 249 [(0, 5), (4, 5), (5, 6)] 250 251 """ 252 res = pySparseIntVect(self.size) 253 res.container = copy.deepcopy(self.container) 254 res += other 255 return res
256 - def __sub__(self,other):
257 """ 258 >>> c1=pySparseIntVect(10) 259 >>> c1[0] = 3 260 >>> c1[2] = 2 261 >>> c1[4] = 5 262 >>> c2=pySparseIntVect(10) 263 >>> c2[0] = 2 264 >>> c2[2] = 2 265 >>> c2[5] = 6 266 >>> c3 = c1-c2 267 >>> [x for x in c3] 268 [(0, 1), (4, 5), (5, -6)] 269 >>> [x for x in c1] 270 [(0, 3), (2, 2), (4, 5)] 271 272 """ 273 res = pySparseIntVect(self.size) 274 res.container = copy.deepcopy(self.container) 275 res -= other 276 return res
277 - def __mul__(self,other):
278 """ 279 >>> c1=pySparseIntVect(10) 280 >>> c1[0] = 3 281 >>> c1[4] = 5 282 >>> c2=pySparseIntVect(10) 283 >>> c2[0] = 2 284 >>> c2[5] = 6 285 >>> c3 = c1*c2 286 >>> [x for x in c3] 287 [(0, 6)] 288 >>> [x for x in c1] 289 [(0, 3), (4, 5)] 290 291 """ 292 res = pySparseIntVect(self.size) 293 res.container = copy.deepcopy(self.container) 294 res *= other 295 return res
296 - def __and__(self,other):
297 """ 298 >>> c1=pySparseIntVect(10) 299 >>> c1[0] = 3 300 >>> c1[2] = 2 301 >>> c1[4] = 5 302 >>> c2=pySparseIntVect(10) 303 >>> c2[0] = 2 304 >>> c2[2] = -2 305 >>> c2[5] = 6 306 >>> c3 = c1 & c2 307 >>> [x for x in c3] 308 [(0, 2), (2, -2)] 309 >>> [x for x in c1] 310 [(0, 3), (2, 2), (4, 5)] 311 312 """ 313 res = pySparseIntVect(self.size) 314 res.container = copy.deepcopy(self.container) 315 res &= other 316 return res
317 - def __or__(self,other):
318 """ 319 >>> c1=pySparseIntVect(10) 320 >>> c1[0] = 3 321 >>> c1[2] = 2 322 >>> c1[4] = 5 323 >>> c2=pySparseIntVect(10) 324 >>> c2[0] = 2 325 >>> c2[2] = -2 326 >>> c2[5] = 6 327 >>> c3 = c1 | c2 328 >>> [x for x in c3] 329 [(0, 3), (2, 2), (4, 5), (5, 6)] 330 >>> [x for x in c1] 331 [(0, 3), (2, 2), (4, 5)] 332 333 """ 334 res = pySparseIntVect(self.size) 335 res.container = copy.deepcopy(self.container) 336 res |= other 337 return res
338
339 - def __len__(self):
340 return self.size
341 - def __getitem__(self,which):
342 """ 343 >>> c1=pySparseIntVect(10) 344 >>> c1[0] = 3 345 >>> c1[4] = 5 346 >>> c1[0] 347 3 348 >>> c1[1] 349 0 350 351 """ 352 if abs(which)>=self.size: 353 raise IndexError(which) 354 if which<0: 355 which = self.size-which 356 return self.container.get(which,0)
357 - def __setitem__(self,which,val):
358 if abs(which)>=self.size: 359 raise IndexError(which) 360 val = int(val) 361 if which<0: 362 which = self.size-which 363 self.container[which]=val
364 - def __iter__(self):
365 """ 366 >>> c=pySparseIntVect(10) 367 >>> c[0] = 3 368 >>> c[4] = 5 369 >>> c[7] = -1 370 >>> for idx,v in c: 371 ... print idx,v 372 0 3 373 4 5 374 7 -1 375 376 """ 377 return self.container.iteritems()
378 379 380 from rdkit import DataStructs 381 DiceSimilarity=DataStructs.DiceSimilarity
382 -def pyDiceSimilarity(v1,v2,bounds=None,useAbs=False):
383 """ Implements the DICE similarity metric. 384 385 >>> v1 = DataStructs.IntSparseIntVect(10) 386 >>> v2 = DataStructs.IntSparseIntVect(10) 387 >>> v1.UpdateFromSequence((1,2,3)) 388 >>> v2.UpdateFromSequence((1,2,3)) 389 >>> DiceSimilarity(v1,v2) 390 1.0 391 392 >>> v2 = DataStructs.IntSparseIntVect(10) 393 >>> v2.UpdateFromSequence((5,6)) 394 >>> DiceSimilarity(v1,v2) 395 0.0 396 397 >>> v1 = DataStructs.IntSparseIntVect(10) 398 >>> v2 = DataStructs.IntSparseIntVect(10) 399 >>> v1.UpdateFromSequence((1,2,3,4)) 400 >>> v2.UpdateFromSequence((1,3,5,7)) 401 >>> DiceSimilarity(v1,v2) 402 0.5 403 404 >>> v1 = DataStructs.IntSparseIntVect(10) 405 >>> v2 = DataStructs.IntSparseIntVect(10) 406 >>> v1.UpdateFromSequence((1,2,3,4,5,6)) 407 >>> v2.UpdateFromSequence((1,3)) 408 >>> DiceSimilarity(v1,v2) 409 0.5 410 411 """ 412 denom = 1.0*(v1.GetTotalVal(useAbs=useAbs)+v2.GetTotalVal(useAbs=useAbs)) 413 if not denom: 414 res = 0.0 415 else: 416 if bounds and (min(len(v1),len(v2))/denom) < bounds: 417 numer = 0.0 418 else: 419 numer=0.0 420 v3=v1&v2 421 numer=v3.GetTotalVal(useAbs=useAbs) 422 res = 2.*numer/denom 423 424 return res
425
426 -def DotProduct(bv1,bv2):
427 res = 0.0 428 for k,v in bv1.GetNonzeroElements().iteritems(): 429 res += v*bv2[k] 430 return res
431 432 #------------------------------------ 433 # 434 # doctest boilerplate 435 #
436 -def _test():
437 import doctest,sys 438 return doctest.testmod(sys.modules["__main__"])
439 440 if __name__ == '__main__': 441 import sys 442 failed,tried = _test() 443 sys.exit(failed) 444