Package rdkit :: Package DataStructs :: Module VectCollection
[hide private]
[frames] | no frames]

Source Code for Module rdkit.DataStructs.VectCollection

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2005-2006 greg landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  from __future__ import print_function 
 12  import copy,struct,sys 
 13  from rdkit.six.moves import cPickle 
 14  from rdkit.six import iterkeys 
 15  from rdkit import six 
 16  from rdkit import DataStructs 
 17   
18 -class VectCollection(object):
19 """ 20 21 >>> vc = VectCollection() 22 >>> bv1 = DataStructs.ExplicitBitVect(10) 23 >>> bv1.SetBitsFromList((1,3,5)) 24 >>> vc.AddVect(1,bv1) 25 >>> bv1 = DataStructs.ExplicitBitVect(10) 26 >>> bv1.SetBitsFromList((6,8)) 27 >>> vc.AddVect(2,bv1) 28 >>> len(vc) 29 10 30 >>> vc.GetNumBits() 31 10 32 >>> vc[0] 33 0 34 >>> vc[1] 35 1 36 >>> vc[9] 37 0 38 >>> vc[6] 39 1 40 >>> vc.GetBit(6) 41 1 42 >>> list(vc.GetOnBits()) 43 [1, 3, 5, 6, 8] 44 45 keys must be unique, so adding a duplicate replaces the 46 previous values: 47 >>> bv1 = DataStructs.ExplicitBitVect(10) 48 >>> bv1.SetBitsFromList((7,9)) 49 >>> vc.AddVect(1,bv1) 50 >>> len(vc) 51 10 52 >>> vc[1] 53 0 54 >>> vc[9] 55 1 56 >>> vc[6] 57 1 58 59 we can also query the children: 60 >>> vc.NumChildren() 61 2 62 >>> cs = vc.GetChildren() 63 >>> id,fp = cs[0] 64 >>> id 65 1 66 >>> list(fp.GetOnBits()) 67 [7, 9] 68 >>> id,fp = cs[1] 69 >>> id 70 2 71 >>> list(fp.GetOnBits()) 72 [6, 8] 73 74 attach/detach operations: 75 >>> bv1 = DataStructs.ExplicitBitVect(10) 76 >>> bv1.SetBitsFromList((5,6)) 77 >>> vc.AddVect(3,bv1) 78 >>> vc.NumChildren() 79 3 80 >>> list(vc.GetOnBits()) 81 [5, 6, 7, 8, 9] 82 >>> vc.DetachVectsNotMatchingBit(6) 83 >>> vc.NumChildren() 84 2 85 >>> list(vc.GetOnBits()) 86 [5, 6, 8] 87 88 89 >>> bv1 = DataStructs.ExplicitBitVect(10) 90 >>> bv1.SetBitsFromList((7,9)) 91 >>> vc.AddVect(1,bv1) 92 >>> vc.NumChildren() 93 3 94 >>> list(vc.GetOnBits()) 95 [5, 6, 7, 8, 9] 96 >>> vc.DetachVectsMatchingBit(6) 97 >>> vc.NumChildren() 98 1 99 >>> list(vc.GetOnBits()) 100 [7, 9] 101 102 103 to copy VectCollections, use the copy module: 104 >>> bv1 = DataStructs.ExplicitBitVect(10) 105 >>> bv1.SetBitsFromList((5,6)) 106 >>> vc.AddVect(3,bv1) 107 >>> list(vc.GetOnBits()) 108 [5, 6, 7, 9] 109 >>> vc2 = copy.copy(vc) 110 >>> vc.DetachVectsNotMatchingBit(6) 111 >>> list(vc.GetOnBits()) 112 [5, 6] 113 >>> list(vc2.GetOnBits()) 114 [5, 6, 7, 9] 115 116 The Uniquify() method can be used to remove duplicate vectors: 117 >>> vc = VectCollection() 118 >>> bv1 = DataStructs.ExplicitBitVect(10) 119 >>> bv1.SetBitsFromList((7,9)) 120 >>> vc.AddVect(1,bv1) 121 >>> vc.AddVect(2,bv1) 122 >>> bv1 = DataStructs.ExplicitBitVect(10) 123 >>> bv1.SetBitsFromList((2,3,5)) 124 >>> vc.AddVect(3,bv1) 125 >>> vc.NumChildren() 126 3 127 >>> vc.Uniquify() 128 >>> vc.NumChildren() 129 2 130 131 132 133 """
134 - def __init__(self):
135 self.__vects = {} 136 self.__orVect = None 137 self.__numBits = -1 138 self.__needReset=True
139 140
141 - def GetOrVect(self):
142 if self.__needReset: 143 self.Reset() 144 return self.__orVect
145 orVect = property(GetOrVect) 146
147 - def AddVect(self,id,vect):
148 self.__vects[id]=vect 149 self.__needReset=True
150
151 - def Reset(self):
152 if not self.__needReset: 153 return 154 self.__orVect=None 155 if not self.__vects: 156 return 157 ks = list(iterkeys(self.__vects)) 158 self.__orVect = copy.copy(self.__vects[ks[0]]) 159 self.__numBits = self.__orVect.GetNumBits() 160 for i in range(1,len(ks)): 161 self.__orVect |= self.__vects[ks[i]] 162 self.__needReset=False
163
164 - def NumChildren(self):
165 return len(self.__vects.keys())
166
167 - def GetChildren(self):
168 return tuple(self.__vects.items())
169
170 - def GetBit(self,id):
171 if self.__needReset: 172 self.Reset() 173 return self[id]
174 - def GetNumBits(self):
175 return len(self)
176
177 - def GetOnBits(self):
178 if self.__needReset: 179 self.Reset() 180 return self.__orVect.GetOnBits()
181
182 - def DetachVectsNotMatchingBit(self,bit):
183 items = list(self.__vects.items()) 184 for k,v in items: 185 if not v.GetBit(bit): 186 del(self.__vects[k]) 187 self.__needReset=True
188
189 - def DetachVectsMatchingBit(self,bit):
190 items = list(self.__vects.items()) 191 for k,v in items: 192 if v.GetBit(bit): 193 del(self.__vects[k]) 194 self.__needReset=True
195
196 - def Uniquify(self,verbose=False):
197 obls = {} 198 for k,v in self.__vects.items(): 199 obls[k] = list(v.GetOnBits()) 200 201 keys = self.__vects.keys() 202 nKeys = len(keys) 203 keep = self.__vects.keys() 204 for i in range(nKeys): 205 k1 = keys[i] 206 if k1 in keep: 207 obl1 = obls[k1] 208 idx = keys.index(k1) 209 for j in range(idx+1,nKeys): 210 k2 = keys[j] 211 if k2 in keep: 212 obl2 = obls[k2] 213 if obl1==obl2: 214 keep.remove(k2) 215 216 self.__needsReset=True 217 tmp = {} 218 for k in keep: 219 tmp[k] = self.__vects[k] 220 if verbose: print('uniquify:',len(self.__vects),'->',len(tmp)) 221 self.__vects=tmp
222 223
224 - def __len__(self):
225 if self.__needReset: 226 self.Reset() 227 return self.__numBits
228 - def __getitem__(self,id):
229 if self.__needReset: 230 self.Reset() 231 return self.__orVect.GetBit(id)
232 233 # 234 # set up our support for pickling: 235 #
236 - def __getstate__(self):
237 pkl = struct.pack('<I',len(self.__vects)) 238 for k,v in self.__vects.items(): 239 pkl += struct.pack('<I',k) 240 p = v.ToBinary() 241 l = len(p) 242 pkl += struct.pack('<I',l) 243 pkl += struct.pack('%ds'%(l),p) 244 return pkl
245
246 - def __setstate__(self,pkl):
247 if six.PY3 and isinstance(pkl,str): 248 pkl = bytes(pkl,encoding='Latin1') 249 250 self.__vects = {} 251 self.__orVect = None 252 self.__numBits = -1 253 self.__needReset=True 254 szI = struct.calcsize('I') 255 offset = 0 256 nToRead = struct.unpack('<I',pkl[offset:offset+szI])[0] 257 offset += szI 258 for i in range(nToRead): 259 k = struct.unpack('<I',pkl[offset:offset+szI])[0] 260 offset += szI 261 l = struct.unpack('<I',pkl[offset:offset+szI])[0] 262 offset += szI 263 sz = struct.calcsize('%ds'%l) 264 bv = DataStructs.ExplicitBitVect(struct.unpack('%ds'%l,pkl[offset:offset+sz])[0]) 265 offset += sz 266 self.AddVect(k,bv)
267 268 269 270 #------------------------------------ 271 # 272 # doctest boilerplate 273 #
274 -def _test():
275 import doctest,sys 276 return doctest.testmod(sys.modules["__main__"])
277 278 279 if __name__ == '__main__': 280 import sys 281 failed,tried = _test() 282 sys.exit(failed) 283