Package rdkit :: Package Chem :: Package MolDb :: Module Loader_sa
[hide private]
[frames] | no frames]

Source Code for Module rdkit.Chem.MolDb.Loader_sa

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2007-2009 Greg Landrum 
  4  #   @@ All Rights Reserved @@ 
  5  #  This file is part of the RDKit. 
  6  #  The contents are covered by the terms of the BSD license 
  7  #  which is included in the file license.txt, found at the root 
  8  #  of the RDKit source tree. 
  9  # 
 10  import sqlalchemy 
 11   
 12  from rdkit import Chem 
 13  from rdkit.Chem import AllChem 
 14  from rdkit.Chem import Lipinski,Descriptors,Crippen 
 15  from rdkit.Dbase.DbConnection import DbConnect 
 16  from rdkit.Dbase import DbModule 
 17  import os 
 18   
 19  from sqlalchemy.ext.declarative import declarative_base 
 20  from sqlalchemy import Table,Column,MetaData 
 21  from sqlalchemy import Integer,Text,String,ForeignKey,Binary,DateTime,Float 
 22  from sqlalchemy.orm import relation,mapper,sessionmaker,backref 
 23  from sqlalchemy import create_engine 
 24   
 25  decBase = declarative_base() 
 26   
27 -class Compound(decBase):
28 __tablename__='molecules' 29 guid=Column(Integer,primary_key=True) 30 molpkl=Column(Binary)
31
32 -def RegisterSchema(dbUrl,echo=False):
33 engine = create_engine(dbUrl,echo=echo) 34 decBase.metadata.create_all(engine) 35 maker = sessionmaker(bind=engine) 36 return maker
37 38 ConnectToSchema=RegisterSchema 39
40 -def _ConnectToSchema(dbUrl,echo=False):
41 engine = create_engine(dbUrl,echo=echo) 42 meta 43 decBase.metadata.create_all(engine) 44 maker = sessionmaker(bind=engine) 45 return maker
46 47 48 #set up the logger: 49 import rdkit.RDLogger as logging 50 logger = logging.logger() 51 logger.setLevel(logging.INFO) 52
53 -def ProcessMol(session,mol,globalProps,nDone,nameProp='_Name',nameCol='compound_id', 54 redraw=False,keepHs=False, 55 skipProps=False,addComputedProps=False, 56 skipSmiles=False):
57 if not mol: 58 raise ValueError('no molecule') 59 if keepHs: 60 Chem.SanitizeMol(mol) 61 try: 62 nm = mol.GetProp(nameProp) 63 except KeyError: 64 nm = None 65 if not nm: 66 nm = 'Mol_%d'%nDone 67 68 cmpd = Compound() 69 session.add(cmpd) 70 71 if redraw: 72 AllChem.Compute2DCoords(m) 73 74 if not skipSmiles: 75 cmpd.smiles=Chem.MolToSmiles(mol,True) 76 cmpd.molpkl=mol.ToBinary() 77 setattr(cmpd,nameCol,nm) 78 79 if not skipProps: 80 if addComputedProps: 81 cmpd.DonorCount=Lipinski.NumHDonors(mol) 82 cmpd.AcceptorCount=Lipinski.NumHAcceptors(mol) 83 cmpd.RotatableBondCount=Lipinski.NumRotatableBonds(mol) 84 cmpd.AMW=Descriptors.MolWt(mol) 85 cmpd.MolLogP=Crippen.MolLogP(mol) 86 pns = list(mol.GetPropNames()) 87 for pi,pn in enumerate(pns): 88 if pn.lower()==nameCol.lower(): continue 89 pv = mol.GetProp(pn).strip() 90 if pn in globalProps: 91 setattr(cmpd,pn.lower(),pv) 92 return cmpd
93
94 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False, 95 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False, 96 regName='molecules',skipSmiles=False,maxRowsCached=-1, 97 uniqNames=False,addComputedProps=False,lazySupplier=False, 98 numForPropScan=10,startAnew=True):
99 if not lazySupplier: 100 nMols = len(suppl) 101 else: 102 nMols=-1 103 if not silent: 104 logger.info("Generating molecular database in file %s"%dbName) 105 if not lazySupplier: 106 logger.info(" Processing %d molecules"%nMols) 107 108 globalProps = {} 109 if startAnew: 110 if os.path.exists(dbName): 111 os.unlink(dbName) 112 sIter=iter(suppl) 113 setattr(Compound,nameCol.lower(),Column(nameCol.lower(),String,default=defaultVal,unique=uniqNames)) 114 if not skipSmiles: 115 Compound.smiles = Column(Text,unique=True) 116 if not skipProps: 117 while numForPropScan>0: 118 try: 119 m = next(sIter) 120 except StopIteration: 121 numForPropScan=0 122 break 123 if not m: continue 124 for pn in m.GetPropNames(): 125 if pn.lower()==nameCol.lower(): continue 126 if pn not in globalProps: 127 globalProps[pn]=1 128 setattr(Compound,pn.lower(),Column(pn.lower(),String,default=defaultVal)) 129 numForPropScan-=1 130 if addComputedProps: 131 Compound.DonorCount=Column(Integer) 132 Compound.AcceptorCount=Column(Integer) 133 Compound.RotatableBondCount=Column(Integer) 134 Compound.AMW=Column(Float) 135 Compound.MolLogP=Column(Float) 136 session=RegisterSchema('sqlite:///%s'%(dbName))() 137 138 nDone = 0 139 cache=[] 140 for m in suppl: 141 nDone +=1 142 if not m: 143 if errorsTo: 144 if hasattr(suppl,'GetItemText'): 145 d = suppl.GetItemText(nDone-1) 146 errorsTo.write(d) 147 else: 148 logger.warning('full error file support not complete') 149 continue 150 151 cmpd=ProcessMol(session,m,globalProps,nDone,nameProp=nameProp, 152 nameCol=nameCol,redraw=redraw, 153 keepHs=keepHs,skipProps=skipProps, 154 addComputedProps=addComputedProps,skipSmiles=skipSmiles) 155 if cmpd is not None: 156 cache.append(cmpd) 157 158 if not silent and not nDone%100: 159 logger.info(' done %d'%nDone) 160 try: 161 session.commit() 162 except: 163 session.rollback() 164 for cmpd in cache: 165 try: 166 session.add(cmpd) 167 session.commit() 168 except: 169 session.rollback() 170 cache=[] 171 172 173 try: 174 session.commit() 175 except: 176 import traceback 177 traceback.print_exc() 178 session.rollback() 179 for cmpd in cache: 180 try: 181 session.add(cmpd) 182 session.commit() 183 except: 184 session.rollback()
185 186 if __name__=='__main__': 187 import sys 188 sdf =Chem.SDMolSupplier(sys.argv[1]) 189 db =sys.argv[2] 190 LoadDb(sdf,db,addComputedProps=False) 191 session = RegisterSchema('sqlite:///%s'%(db))() 192 print('>>>>', len(session.query(Compound).all())) 193