Package rdkit :: Package ML :: Package Composite :: Module AdjustComposite
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.Composite.AdjustComposite

 1  # $Id$ 
 2  # 
 3  #  Copyright (C) 2003  greg Landrum and Rational Discovery LLC 
 4  #   All Rights Reserved 
 5  # 
 6  """ functionality to allow adjusting composite model contents 
 7   
 8  """ 
 9  from __future__ import print_function 
10  import numpy 
11  import copy 
12   
13 -def BalanceComposite(model,set1,set2,weight,targetSize,names1=None,names2=None):
14 """ adjusts the contents of the composite model so as to maximize 15 the weighted classification accuracty across the two data sets. 16 17 The resulting composite model, with _targetSize_ models, is returned. 18 19 **Notes**: 20 21 - if _names1_ and _names2_ are not provided, _set1_ and _set2_ should 22 have the same ordering of columns and _model_ should have already 23 have had _SetInputOrder()_ called. 24 25 """ 26 # 27 # adjust the weights to be proportional to the size of the two data sets 28 # The normalization we do here assures that a perfect model contributes 29 # a score of S1+S2 to the final 30 # 31 S1 = len(set1) 32 S2 = len(set2) 33 weight1 = float(S1+S2)*(1-weight)/S1 34 weight2 = float(S1+S2)*weight/S2 35 #print '\t:::',S1,S2,weight1,weight2 36 #print 'nModels:',len(model) 37 # start with a copy so that we get all the additional schnick-schnack 38 res = copy.copy(model) 39 res.modelList = [] 40 res.errList = [] 41 res.countList = [] 42 res.quantizationRequirements = [] 43 44 startSize = len(model) 45 scores = numpy.zeros(startSize,numpy.float) 46 actQuantBounds = model.GetActivityQuantBounds() 47 if names1 is not None: 48 model.SetInputOrder(names1) 49 for pt in set1: 50 pred,conf = model.ClassifyExample(pt) 51 if actQuantBounds: 52 ans = model.QuantizeActivity(pt)[-1] 53 else: 54 ans = pt[-1] 55 votes = model.GetVoteDetails() 56 for i in range(startSize): 57 if votes[i]==ans: scores[i] += weight1 58 if names2 is not None: 59 model.SetInputOrder(names2) 60 for pt in set2: 61 pred,conf = model.ClassifyExample(pt) 62 if actQuantBounds: 63 ans = model.QuantizeActivity(pt)[-1] 64 else: 65 ans = pt[-1] 66 votes = model.GetVoteDetails() 67 for i in range(startSize): 68 if votes[i]==ans: scores[i] += weight2 69 # normalize the scores 70 nPts = S1+S2 71 scores /= nPts 72 # sort them: 73 bestOrder = list(numpy.argsort(scores)) 74 bestOrder.reverse() 75 print('\tTAKE:',bestOrder[:targetSize]) 76 # and now take the best set: 77 for i in range(targetSize): 78 idx = bestOrder[i] 79 mdl = model.modelList[idx] 80 res.modelList.append(mdl) 81 res.errList.append(1.-scores[idx]) 82 res.countList.append(1) 83 # FIX: this should probably be more general: 84 res.quantizationRequirements.append(0) 85 return res
86