Package rdkit :: Package ML :: Package Cluster :: Module Resemblance
[hide private]
[frames] | no frames]

Source Code for Module rdkit.ML.Cluster.Resemblance

  1  # $Id$ 
  2  # 
  3  # Copyright (C) 2001-2006  greg Landrum 
  4  # 
  5  #   @@ All Rights Reserved @@ 
  6  #  This file is part of the RDKit. 
  7  #  The contents are covered by the terms of the BSD license 
  8  #  which is included in the file license.txt, found at the root 
  9  #  of the RDKit source tree. 
 10  # 
 11  """ code for dealing with resemblance (metric) matrices 
 12   
 13      Here's how the matrices are stored: 
 14   
 15       '[(0,1),(0,2),(1,2),(0,3),(1,3),(2,3)...]  (row,col), col>row' 
 16   
 17       or, alternatively the matrix can be drawn, with indices as: 
 18   
 19         || - || 0 || 1 || 3  
 20         || - || - || 2 || 4  
 21         || - || - || - || 5  
 22         || - || - || - || -  
 23   
 24       the index of a given (row,col) pair is: 
 25         '(col*(col-1))/2 + row' 
 26   
 27  """ 
 28  from __future__ import print_function 
 29  import numpy 
 30   
 31   
32 -def EuclideanDistance(inData):
33 """returns the euclidean metricMat between the points in _inData_ 34 35 **Arguments** 36 37 - inData: a Numeric array of data points 38 39 **Returns** 40 41 a Numeric array with the metric matrix. See the module documentation 42 for the format. 43 44 45 """ 46 nObjs = len(inData) 47 res = numpy.zeros((nObjs*(nObjs-1)/2),numpy.float) 48 nSoFar = 0 49 for col in range(1,nObjs): 50 for row in range(col): 51 t = inData[row]-inData[col] 52 res[nSoFar] = sum(t*t) 53 nSoFar += 1 54 return numpy.sqrt(res)
55
56 -def CalcMetricMatrix(inData,metricFunc):
57 """ generates a metric matrix 58 59 **Arguments** 60 - inData is assumed to be a list of clusters (or anything with 61 a GetPosition() method) 62 63 - metricFunc is the function to be used to generate the matrix 64 65 66 **Returns** 67 68 the metric matrix as a Numeric array 69 70 """ 71 nObjs = len(inData) 72 res = [] 73 inData = map(lambda x:x.GetPosition(),inData) 74 return metricFunc(inData)
75
76 -def FindMinValInList(mat,nObjs,minIdx=None):
77 """ finds the minimum value in a metricMatrix and returns it and its indices 78 79 **Arguments** 80 81 - mat: the metric matrix 82 83 - nObjs: the number of objects to be considered 84 85 - minIdx: the index of the minimum value (value, row and column still need 86 to be calculated 87 88 **Returns** 89 90 a 3-tuple containing: 91 92 1) the row 93 2) the column 94 3) the minimum value itself 95 96 **Notes** 97 98 -this probably ain't the speediest thing on earth 99 100 """ 101 assert len(mat) == nObjs*(nObjs-1)/2, 'bad matrix length in FindMinValInList' 102 if minIdx is None: 103 minIdx = numpy.argmin(mat) 104 105 nSoFar = 0 106 col = 0 107 while nSoFar <= minIdx: 108 col = col + 1 109 nSoFar += col 110 111 row = minIdx - nSoFar + col 112 return row,col,mat[minIdx]
113
114 -def ShowMetricMat(metricMat,nObjs):
115 """ displays a metric matrix 116 117 **Arguments** 118 119 - metricMat: the matrix to be displayed 120 121 - nObjs: the number of objects to display 122 123 """ 124 assert len(metricMat) == nObjs*(nObjs-1)/2, 'bad matrix length in FindMinValInList' 125 for row in range(nObjs): 126 for col in range(nObjs): 127 if col <= row: 128 print(' --- ',end='') 129 else: 130 print('%10.6f'%metricMat[(col*(col-1))/2+row],end='') 131 print()
132 133 134 135 methods = [ 136 ("Euclidean",EuclideanDistance,"Euclidean Distance"), 137 ] 138 139 140 141 if __name__ == '__main__': 142 m = [.1,.2,.3,.4,.5,.6,.7,.8,.9,1.0] 143 nObjs = 5 144 for i in range(10): 145 print(i, FindMinValInList(m,nObjs,i)) 146