Package rdkit :: Package VLib :: Package NodeLib :: Module DbPickleSupplier
[hide private]
[frames] | no frames]

Source Code for Module rdkit.VLib.NodeLib.DbPickleSupplier

  1  #  $Id$ 
  2  # 
  3  #  Copyright (C) 2004 Rational Discovery LLC 
  4  #     All Rights Reserved 
  5  # 
  6  from __future__ import print_function 
  7  from rdkit import RDConfig 
  8  import sys,os.path 
  9  from rdkit.VLib.Supply import SupplyNode 
 10  from rdkit.six.moves import cPickle 
 11   
 12  if RDConfig.usePgSQL: 
 13    from pyPgSQL import PgSQL as sql 
14 - class _lazyDataSeq:
15 """ 16 These classes are used to speed up (a lot) the process of 17 pulling pickled objects from PostgreSQL databases. Instead of 18 having to use all of PgSQL's typechecking, we'll make a lot of 19 assumptions about what's coming out of the Db and its layout. 20 The results can lead to drastic improvements in perfomance. 21 22 """
23 - def __init__(self,cursor,cmd,pickleCol=1,depickle=1,klass=None):
24 self.cursor = cursor 25 self.cmd = cmd 26 self._first=0 27 self._pickleCol=pickleCol 28 self._depickle=depickle 29 self._klass=klass
30 - def _validate(self):
31 curs = self.cursor 32 if not curs or \ 33 curs.closed or \ 34 curs.conn is None or \ 35 (curs.res.resultType != sql.RESULT_DQL and curs.closed is None): 36 raise ValueError('bad cursor') 37 if curs.res.nfields and curs.res.nfields < 2: 38 raise ValueError( 39 'invalid number of results returned (%d), must be at least 2'%curs.res.nfields) 40 desc1 = curs.description[self._pickleCol] 41 ftv = desc1[self._pickleCol].value 42 if ftv != sql.BINARY: 43 raise TypeError('pickle column (%d) of bad type'%self._pickleCol)
44
45 - def __iter__(self):
46 try: 47 self.cursor.execute(self.cmd) 48 except: 49 import traceback 50 traceback.print_exc() 51 print('COMMAND:',self.cmd) 52 raise 53 self._first=1 54 self._validate() 55 return self
56 - def next(self):
57 curs = self.cursor 58 if not curs or \ 59 curs.closed or \ 60 curs.conn is None or \ 61 curs.res is None or \ 62 (curs.res.resultType != sql.RESULT_DQL and curs.closed is None): 63 raise StopIteration 64 if not self._first: 65 res = curs.conn.conn.query('fetch 1 from "%s"'%self.cursor.name) 66 67 if res.ntuples == 0: 68 raise StopIteration 69 else: 70 if res.nfields < 2: 71 raise ValueError('bad result: %s'%str(res)) 72 t = [res.getvalue(0,x) for x in range(res.nfields)] 73 val = t[self._pickleCol] 74 else: 75 t = curs.fetchone() 76 val = str(t[self._pickleCol]) 77 self._first = 0 78 if self._depickle: 79 if not self._klass: 80 fp = cPickle.loads(val) 81 else: 82 fp = self._klass(val) 83 fields = list(t) 84 del fields[self._pickleCol] 85 fp._fieldsFromDb = fields 86 else: 87 fp = list(t) 88 return fp
89
90 - class _dataSeq(_lazyDataSeq):
91 - def __init__(self,cursor,cmd,pickleCol=1,depickle=1):
92 self.cursor=cursor 93 self.cmd = cmd 94 self.res = None 95 self.rowCount = -1 96 self.idx = 0 97 self._pickleCol=pickleCol 98 self._depickle = depickle
99 - def __iter__(self):
100 self.cursor.execute(self.cmd) 101 self._first = self.cursor.fetchone() 102 self._validate() 103 self.res = self.cursor.conn.conn.query('fetch all from "%s"'%self.cursor.name) 104 self.rowCount = self.res.ntuples+1 105 self.idx=0 106 if self.res.nfields < 2: 107 raise ValueError('bad query result'%str(res)) 108 109 return self
110 - def next(self):
111 if self.idx >= self.rowCount: 112 raise StopIteration 113 114 fp = self[self.idx] 115 self.idx += 1 116 117 return fp
118
119 - def __len__(self):
120 return self.rowCount
121 - def __getitem__(self,idx):
122 if self.res is None: 123 self.cursor.execute(self.cmd) 124 self._first = self.cursor.fetchone() 125 self._validate() 126 self.res = self.cursor.conn.conn.query('fetch all from "%s"'%self.cursor.name) 127 self.rowCount = self.res.ntuples+1 128 self.idx=0 129 if self.res.nfields < 2: 130 raise ValueError('bad query result'%str(res)) 131 132 if idx < 0: 133 idx = self.rowCount+idx 134 if idx<0 or (idx >= 0 and idx >= self.rowCount): 135 raise IndexError 136 if idx==0: 137 val = str(self._first[self._pickleCol]) 138 t = list(self._first) 139 else: 140 val = self.res.getvalue(self.idx-1,self._pickleCol) 141 t = [self.res.getvalue(self.idx-1,x) for x in range(self.res.nfields)] 142 if self._depickle: 143 try: 144 fp = cPickle.loads(val) 145 except: 146 import logging 147 del t[self._pickleCol] 148 logging.exception('Depickling failure in row: %s'%str(t)) 149 raise 150 del t[self._pickleCol] 151 fp._fieldsFromDb = t 152 else: 153 fp = t 154 return fp
155 else: 156 _dataSeq=None 157 158
159 -class DbPickleSupplyNode(SupplyNode):
160 """ Supplies pickled objects from a db result set: 161 162 Sample Usage: 163 >>> from rdkit.Dbase.DbConnection import DbConnect 164 165 """
166 - def __init__(self,cursor,cmd,binaryCol,**kwargs):
167 SupplyNode.__init__(self,**kwargs) 168 self._dbResults = dbResults 169 self._supplier = DbMolSupplier.RandomAccessDbMolSupplier(self._dbResults,**kwargs)
170
171 - def reset(self):
172 SupplyNode.reset(self) 173 self._supplier.Reset()
174 - def next(self):
175 """ 176 177 """ 178 return self._supplier.next()
179
180 -def GetNode(dbName,tableName):
181 from rdkit.Dbase.DbConnection import DbConnect 182 conn = DbConnect(dbName,tableName) 183 return DbMolSupplyNode(conn.GetData())
184 185 #------------------------------------ 186 # 187 # doctest boilerplate 188 #
189 -def _test():
190 import doctest,sys 191 return doctest.testmod(sys.modules["__main__"])
192 193 194 if __name__ == '__main__': 195 import sys 196 failed,tried = _test() 197 sys.exit(failed) 198