1
2
3
4
5
6
7
8
9
10 import sys
11 from rdkit import Chem
12
14 """ A wrapper around an SDMolSupplier that precomputes and stores
15 molecular indices (via text processing) to allow quick length
16 calculations and random access.
17
18 NOTE that this class needs to have the entire SD data in memory,
19 so it's probably not particularly useful with large files.
20 """
21 suppl=None
22 data=None
23 sanitize=True
24 - def __init__(self,fileN=None,data=None,sanitize=True,removeHs=True):
32
33 - def init(self,data,recogTxt='$$$$\n'):
34 if not data:
35 raise ValueError('no data')
36
37
38 self.data=data
39 self.suppl = Chem.SDMolSupplier()
40 self.suppl.SetData(data,sanitize=self.sanitize,removeHs=self.removeHs)
41
42 self._pos = [0]
43 p = 0
44 while 1:
45 try:
46 p = data.index(recogTxt,p+1)
47 p+=len(recogTxt)
48 except:
49 break
50 else:
51 self._pos.append(p)
52 self._pos.pop(-1)
53 self.suppl._SetStreamIndices(self._pos)
54 self._idx=0
55
56 - def GetItemText(self,idx):
57 startOfItem = self._pos[idx]
58 if idx+1<len(self._pos):
59 endOfItem = self._pos[idx+1]
60 else:
61 endOfItem = -1
62 return self.data[startOfItem:endOfItem]
63
67
68
69
76
78 return len(self.suppl)
80 return self.suppl[idx]
81