Package rdkit :: Package Chem :: Module FastSDMolSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.FastSDMolSupplier

 1  # $Id$ 
 2  # 
 3  #  Copyright (C) 2007 Greg Landrum 
 4  #   @@ All Rights Reserved @@ 
 5  #  This file is part of the RDKit. 
 6  #  The contents are covered by the terms of the BSD license 
 7  #  which is included in the file license.txt, found at the root 
 8  #  of the RDKit source tree. 
 9  # 
10  import sys 
11  from rdkit import Chem 
12   
13 -class FastSDMolSupplier(object):
14 """ A wrapper around an SDMolSupplier that precomputes and stores 15 molecular indices (via text processing) to allow quick length 16 calculations and random access. 17 18 NOTE that this class needs to have the entire SD data in memory, 19 so it's probably not particularly useful with large files. 20 """ 21 suppl=None 22 data=None 23 sanitize=True
24 - def __init__(self,fileN=None,data=None,sanitize=True,removeHs=True):
25 if fileN: 26 data = open(fileN,'r').read() 27 self.sanitize=sanitize 28 self.removeHs=removeHs 29 if data: 30 data = data.replace('\r\n','\n') 31 self.init(data)
32
33 - def init(self,data,recogTxt='$$$$\n'):
34 if not data: 35 raise ValueError('no data') 36 # FIX: it'd be nice to not be caching data locally like this, but it's the easiest 37 # way to handle pickle support. 38 self.data=data 39 self.suppl = Chem.SDMolSupplier() 40 self.suppl.SetData(data,sanitize=self.sanitize,removeHs=self.removeHs) 41 42 self._pos = [0] 43 p = 0 44 while 1: 45 try: 46 p = data.index(recogTxt,p+1) 47 p+=len(recogTxt) 48 except: 49 break 50 else: 51 self._pos.append(p) 52 self._pos.pop(-1) 53 self.suppl._SetStreamIndices(self._pos) 54 self._idx=0
55
56 - def GetItemText(self,idx):
57 startOfItem = self._pos[idx] 58 if idx+1<len(self._pos): 59 endOfItem = self._pos[idx+1] 60 else: 61 endOfItem = -1 62 return self.data[startOfItem:endOfItem]
63
64 - def reset(self):
65 self.suppl.reset() 66 self._idx=0
67 68 # ---------------------------------------------------------------- 69 # support random access and an iterator interface:
70 - def __iter__(self):
71 self.suppl.reset() 72 return self
73 - def next(self):
74 self._idx+=1 75 return self.suppl.next()
76
77 - def __len__(self):
78 return len(self.suppl)
79 - def __getitem__(self,idx):
80 return self.suppl[idx]
81