Package rdkit :: Package ML :: Module CompositeRun
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.CompositeRun

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2002-2006  greg Landrum and Rational Discovery LLC 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ contains a class to store parameters for and results from 
 12  Composite building 
 13   
 14  """ 
 15  from rdkit import RDConfig 
 16  from rdkit.Dbase.DbConnection import DbConnect 
 17  from rdkit import DataStructs 
 18  from rdkit.Dbase import DbModule 
 19  ##from rdkit.ML.SVM import SVMClassificationModel as SVM 
 20   
21 -def SetDefaults(runDetails):
22 """ initializes a details object with default values 23 24 **Arguments** 25 26 - details: (optional) a _CompositeRun.CompositeRun_ object. 27 If this is not provided, the global _runDetails will be used. 28 29 **Returns** 30 31 the initialized _CompositeRun_ object. 32 33 34 """ 35 runDetails.nRuns = 1 36 runDetails.nModels = 10 37 runDetails.outName = '' 38 runDetails.badName = '' 39 runDetails.splitRun=0 40 runDetails.splitFrac=0.7 41 runDetails.lockRandom = 0 42 runDetails.randomActivities = 0 43 runDetails.shuffleActivities = 0 44 runDetails.replacementSelection = 0 45 46 # 47 # Tree Parameters 48 # 49 runDetails.useTrees = 1 50 runDetails.pruneIt=0 51 runDetails.lessGreedy=0 52 runDetails.limitDepth=-1 53 runDetails.recycleVars=0 54 runDetails.randomDescriptors=0 # toggles growing of random forests 55 56 # 57 # KNN Parameters 58 # 59 runDetails.useKNN = 0 60 runDetails.knnDistFunc = '' 61 runDetails.knnNeighs = 0 62 63 # 64 # SigTree Parameters 65 # 66 runDetails.useSigTrees =0 67 runDetails.useCMIM=0 68 runDetails.allowCollections=False 69 70 # 71 # Naive Bayes Classifier Parameters 72 # 73 runDetails.useNaiveBayes = 0 74 runDetails.mEstimateVal=-1.0 75 runDetails.useSigBayes =0 76 77 ## # 78 ## # SVM Parameters 79 ## # 80 ## runDetails.useSVM = 0 81 ## runDetails.svmKernel = SVM.radialKernel 82 ## runDetails.svmType = SVM.cSVCType 83 ## runDetails.svmGamma = None 84 ## runDetails.svmCost = None 85 ## runDetails.svmWeights = None 86 ## runDetails.svmDataType = 'float' 87 ## runDetails.svmDegree = 3 88 ## runDetails.svmCoeff = 0.0 89 ## runDetails.svmEps = 0.001 90 ## runDetails.svmNu = 0.5 91 ## runDetails.svmCache = 40 92 ## runDetails.svmShrink = 1 93 ## runDetails.svmDataType='float' 94 95 96 runDetails.bayesModel = 0 97 runDetails.dbName = '' 98 runDetails.dbUser = RDConfig.defaultDBUser 99 runDetails.dbPassword = RDConfig.defaultDBPassword 100 runDetails.dbWhat='*' 101 runDetails.dbWhere='' 102 runDetails.dbJoin='' 103 runDetails.qTableName = '' 104 runDetails.qBounds = [] 105 runDetails.qBoundCount = '' 106 runDetails.activityBounds = [] 107 runDetails.activityBoundsVals = '' 108 runDetails.detailedRes = 0 109 runDetails.noScreen = 0 110 runDetails.threshold = 0.0 111 runDetails.filterFrac = 0.0 112 runDetails.filterVal = 0.0 113 runDetails.modelFilterVal = 0.0 114 runDetails.modelFilterFrac = 0.0 115 runDetails.internalHoldoutFrac = 0.3 116 runDetails.pickleDataFileName='' 117 runDetails.startAt=None 118 runDetails.persistTblName='' 119 runDetails.randomSeed=(23,42) 120 runDetails.note='' 121 122 123 return runDetails
124 125
126 -class CompositeRun:
127 """ class to store parameters for and results from Composite building 128 129 This class has a default set of fields which are added to the database. 130 131 By default these fields are stored in a tuple, so they are immutable. This 132 is probably what you want. 133 134 135 """ 136 fields = (\ 137 ("rundate","varchar(32)"), 138 ("dbName","varchar(200)"), 139 ("dbWhat","varchar(200)"), 140 ("dbWhere","varchar(200)"), 141 ("dbJoin","varchar(200)"), 142 ("tableName","varchar(80)"), 143 ("note","varchar(120)"), 144 ("shuffled","smallint"), 145 ("randomized","smallint"), 146 ("overall_error","float"), 147 ("holdout_error","float"), 148 ("overall_fraction_dropped","float"), 149 ("holdout_fraction_dropped","float"), 150 ("overall_correct_conf","float"), 151 ("overall_incorrect_conf","float"), 152 ("holdout_correct_conf","float"), 153 ("holdout_incorrect_conf","float"), 154 ("overall_result_matrix","varchar(256)"), 155 ("holdout_result_matrix","varchar(256)"), 156 ("threshold","float"), 157 ("splitFrac","float"), 158 ("filterFrac","float"), 159 ("filterVal","float"), 160 ("modelFilterVal", "float"), 161 ("modelFilterFrac", "float"), 162 ("nModels","int"), 163 ("limitDepth","int"), 164 ("bayesModels","int"), 165 ("qBoundCount","varchar(3000)"), 166 ("activityBoundsVals","varchar(200)"), 167 ("cmd","varchar(500)"), 168 ("model",DbModule.binaryTypeName), 169 ) 170
171 - def _CreateTable(self,cn,tblName):
172 """ *Internal Use only* 173 174 """ 175 names = map(lambda x:x.strip().upper(),cn.GetTableNames()) 176 if tblName.upper() not in names: 177 curs = cn.GetCursor() 178 fmt = [] 179 for name,value in self.fields: 180 fmt.append('%s %s'%(name,value)) 181 fmtStr = ','.join(fmt) 182 curs.execute('create table %s (%s)'%(tblName,fmtStr)) 183 cn.Commit() 184 else: 185 heads = [x.upper() for x in cn.GetColumnNames()] 186 curs = cn.GetCursor() 187 for name,value in self.fields: 188 if name.upper() not in heads: 189 curs.execute('alter table %s add %s %s'%(tblName,name,value)) 190 cn.Commit()
191 - def Store(self,db='models.gdb',table='results', 192 user='sysdba',password='masterkey'):
193 """ adds the result to a database 194 195 **Arguments** 196 197 - db: name of the database to use 198 199 - table: name of the table to use 200 201 - user&password: connection information 202 203 """ 204 cn = DbConnect(db,table,user,password) 205 curs = cn.GetCursor() 206 self._CreateTable(cn,table) 207 208 cols = [] 209 vals = [] 210 for name,typ in self.fields: 211 try: 212 v = getattr(self,name) 213 except AttributeError: 214 pass 215 else: 216 cols.append('%s'%name) 217 vals.append(v) 218 219 nToDo = len(vals) 220 qs = ','.join([DbModule.placeHolder]*nToDo) 221 vals = tuple(vals) 222 223 cmd = 'insert into %s (%s) values (%s)'%(table, 224 ','.join(cols), 225 qs) 226 curs.execute(cmd,vals) 227 cn.Commit()
228
229 - def GetDataSet(self,**kwargs):
230 """ Returns a MLDataSet pulled from a database using our stored 231 values. 232 233 """ 234 from rdkit.ML.Data import DataUtils 235 data = DataUtils.DBToData(self.dbName,self.tableName, 236 user=self.dbUser,password=self.dbPassword, 237 what=self.dbWhat,where=self.dbWhere, 238 join=self.dbJoin,**kwargs) 239 240 return data
241 242
243 - def GetDataSetInfo(self,**kwargs):
244 """ Returns a MLDataSet pulled from a database using our stored 245 values. 246 247 """ 248 from rdkit.Dbase.DbConnection import DbConnect 249 conn = DbConnect(self.dbName,self.tableName) 250 res = conn.GetColumnNamesAndTypes(join=self.dbJoin,what=self.dbWhat,where=self.dbWhere) 251 return res
252