Package rdkit :: Package ML :: Package Composite :: Module BayesComposite
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.Composite.BayesComposite

  1  # $Id$ 
  2  # 
  3  #  Copyright (C) 2000-2008  greg Landrum and Rational Discovery LLC 
  4  #   All Rights Reserved 
  5  # 
  6  """ code for dealing with Bayesian composite models 
  7   
  8  For a model to be useable here, it should support the following API: 
  9   
 10    - _ClassifyExample(example)_, returns a classification 
 11   
 12  Other compatibility notes: 
 13   
 14   1) To use _Composite.Grow_ there must be some kind of builder 
 15      functionality which returns a 2-tuple containing (model,percent accuracy). 
 16   
 17   2) The models should be pickleable 
 18   
 19   3) It would be very happy if the models support the __cmp__ method so that 
 20      membership tests used to make sure models are unique work. 
 21   
 22   
 23   
 24  """ 
 25  from __future__ import print_function 
 26  import numpy 
 27  from rdkit.ML.Composite import Composite 
 28   
 29   
30 -class BayesComposite(Composite.Composite):
31 """a composite model using Bayesian statistics in the Decision Proxy 32 33 34 **Notes** 35 36 - typical usage: 37 38 1) grow the composite with AddModel until happy with it 39 40 2) call AverageErrors to calculate the average error values 41 42 3) call SortModels to put things in order by either error or count 43 44 4) call Train to update the Bayesian stats. 45 46 """
47 - def Train(self,data,verbose=0):
48 # FIX: this is wrong because it doesn't take the counts of each model into account 49 nModels = len(self) 50 nResults = self.nPossibleVals[-1] 51 self.resultProbs = numpy.zeros(nResults,numpy.float) 52 self.condProbs = [None]*nModels 53 54 for i in range(nModels): 55 self.condProbs[i] = numpy.zeros((nResults,nResults),numpy.float) 56 # FIX: this is a quick hack which may slow things down a lot 57 for example in data: 58 act = self.QuantizeActivity(example)[-1] 59 self.resultProbs[int(act)] += 1 60 61 for example in data: 62 if self._mapOrder is not None: 63 example = self._RemapInput(example) 64 if self.GetActivityQuantBounds(): 65 example = self.QuantizeActivity(example) 66 if self.quantBounds is not None and 1 in self.quantizationRequirements: 67 quantExample = self.QuantizeExample(example,self.quantBounds) 68 else: 69 quantExample = [] 70 71 trueRes = int(example[-1]) 72 73 votes = self.CollectVotes(example,quantExample) 74 75 for i in range(nModels): 76 self.condProbs[i][votes[i],trueRes] += 1 77 78 #self.condProbs /= self.resultProbs 79 for i in range(nModels): 80 for j in range(nResults): 81 self.condProbs[i][j] /= sum(self.condProbs[i][j]) 82 #self.condProbs[i] /= self.resultProbs 83 84 85 self.resultProbs /= sum(self.resultProbs) 86 87 if verbose: 88 print('**** Bayesian Results') 89 print('Result probabilities') 90 print('\t',self.resultProbs) 91 print('Model by model breakdown of conditional probs') 92 for mat in self.condProbs: 93 for row in mat: 94 print('\t',row) 95 print()
96 97 98
99 - def ClassifyExample(self,example,threshold=0,verbose=0,appendExample=0):
100 """ classifies the given example using the entire composite 101 102 **Arguments** 103 104 - example: the data to be classified 105 106 - threshold: if this is a number greater than zero, then a 107 classification will only be returned if the confidence is 108 above _threshold_. Anything lower is returned as -1. 109 110 **Returns** 111 112 a (result,confidence) tuple 113 114 """ 115 if self._mapOrder is not None: 116 example = self._RemapInput(example) 117 if self.GetActivityQuantBounds(): 118 example = self.QuantizeActivity(example) 119 if self.quantBounds is not None and 1 in self.quantizationRequirements: 120 quantExample = self.QuantizeExample(example,self.quantBounds) 121 else: 122 quantExample = [] 123 self.modelVotes = self.CollectVotes(example,quantExample,appendExample=appendExample) 124 125 nPossibleRes = self.nPossibleVals[-1] 126 votes = [0.]*nPossibleRes 127 for i in range(len(self)): 128 predict = self.modelVotes[i] 129 for j in range(nPossibleRes): 130 votes[j] += self.condProbs[i][predict,j] 131 132 #totVotes = sum(votes) 133 res = numpy.argmax(votes) 134 conf = votes[res] / len(self) 135 if verbose: 136 print(votes,conf,example[-1]) 137 if conf > threshold: 138 return res,conf 139 else: 140 return -1,conf
141 142
143 - def __init__(self):
144 Composite.Composite.__init__(self) 145 self.resultProbs = None 146 self.condProbs = None
147
148 -def CompositeToBayesComposite(obj):
149 """ converts a Composite to a BayesComposite 150 151 if _obj_ is already a BayesComposite or if it is not a _Composite.Composite_ , 152 nothing will be done. 153 154 """ 155 if obj.__class__ == BayesComposite: 156 return 157 elif obj.__class__ == Composite.Composite: 158 obj.__class__ = BayesComposite 159 obj.resultProbs = None 160 obj.condProbs = None
161
162 -def BayesCompositeToComposite(obj):
163 """ converts a BayesComposite to a Composite.Composite 164 165 """ 166 if obj.__class__ == Composite.Composite: 167 return 168 elif obj.__class__ == BayesComposite: 169 obj.__class__ = Composite.Composite 170 obj.resultProbs = None 171 obj.condProbs = None
172