1
2
3
4
5
6
7
8
9
10 import sqlalchemy
11
12 from rdkit import Chem
13 from rdkit.Chem import AllChem
14 from rdkit.Chem import Lipinski,Descriptors,Crippen
15 from rdkit.Dbase.DbConnection import DbConnect
16 from rdkit.Dbase import DbModule
17 import os
18
19 from sqlalchemy.ext.declarative import declarative_base
20 from sqlalchemy import Table,Column,MetaData
21 from sqlalchemy import Integer,Text,String,ForeignKey,Binary,DateTime,Float
22 from sqlalchemy.orm import relation,mapper,sessionmaker,backref
23 from sqlalchemy import create_engine
24
25 decBase = declarative_base()
26
31
33 engine = create_engine(dbUrl,echo=echo)
34 decBase.metadata.create_all(engine)
35 maker = sessionmaker(bind=engine)
36 return maker
37
38 ConnectToSchema=RegisterSchema
39
41 engine = create_engine(dbUrl,echo=echo)
42 meta
43 decBase.metadata.create_all(engine)
44 maker = sessionmaker(bind=engine)
45 return maker
46
47
48
49 import rdkit.RDLogger as logging
50 logger = logging.logger()
51 logger.setLevel(logging.INFO)
52
53 -def ProcessMol(session,mol,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
54 redraw=False,keepHs=False,
55 skipProps=False,addComputedProps=False,
56 skipSmiles=False):
93
94 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False,
95 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False,
96 regName='molecules',skipSmiles=False,maxRowsCached=-1,
97 uniqNames=False,addComputedProps=False,lazySupplier=False,
98 numForPropScan=10,startAnew=True):
99 if not lazySupplier:
100 nMols = len(suppl)
101 else:
102 nMols=-1
103 if not silent:
104 logger.info("Generating molecular database in file %s"%dbName)
105 if not lazySupplier:
106 logger.info(" Processing %d molecules"%nMols)
107
108 globalProps = {}
109 if startAnew:
110 if os.path.exists(dbName):
111 os.unlink(dbName)
112 sIter=iter(suppl)
113 setattr(Compound,nameCol.lower(),Column(nameCol.lower(),String,default=defaultVal,unique=uniqNames))
114 if not skipSmiles:
115 Compound.smiles = Column(Text,unique=True)
116 if not skipProps:
117 while numForPropScan>0:
118 try:
119 m = next(sIter)
120 except StopIteration:
121 numForPropScan=0
122 break
123 if not m: continue
124 for pn in m.GetPropNames():
125 if pn.lower()==nameCol.lower(): continue
126 if pn not in globalProps:
127 globalProps[pn]=1
128 setattr(Compound,pn.lower(),Column(pn.lower(),String,default=defaultVal))
129 numForPropScan-=1
130 if addComputedProps:
131 Compound.DonorCount=Column(Integer)
132 Compound.AcceptorCount=Column(Integer)
133 Compound.RotatableBondCount=Column(Integer)
134 Compound.AMW=Column(Float)
135 Compound.MolLogP=Column(Float)
136 session=RegisterSchema('sqlite:///%s'%(dbName))()
137
138 nDone = 0
139 cache=[]
140 for m in suppl:
141 nDone +=1
142 if not m:
143 if errorsTo:
144 if hasattr(suppl,'GetItemText'):
145 d = suppl.GetItemText(nDone-1)
146 errorsTo.write(d)
147 else:
148 logger.warning('full error file support not complete')
149 continue
150
151 cmpd=ProcessMol(session,m,globalProps,nDone,nameProp=nameProp,
152 nameCol=nameCol,redraw=redraw,
153 keepHs=keepHs,skipProps=skipProps,
154 addComputedProps=addComputedProps,skipSmiles=skipSmiles)
155 if cmpd is not None:
156 cache.append(cmpd)
157
158 if not silent and not nDone%100:
159 logger.info(' done %d'%nDone)
160 try:
161 session.commit()
162 except:
163 session.rollback()
164 for cmpd in cache:
165 try:
166 session.add(cmpd)
167 session.commit()
168 except:
169 session.rollback()
170 cache=[]
171
172
173 try:
174 session.commit()
175 except:
176 import traceback
177 traceback.print_exc()
178 session.rollback()
179 for cmpd in cache:
180 try:
181 session.add(cmpd)
182 session.commit()
183 except:
184 session.rollback()
185
186 if __name__=='__main__':
187 import sys
188 sdf =Chem.SDMolSupplier(sys.argv[1])
189 db =sys.argv[2]
190 LoadDb(sdf,db,addComputedProps=False)
191 session = RegisterSchema('sqlite:///%s'%(db))()
192 print('>>>>', len(session.query(Compound).all()))
193