RDKit
Open-source cheminformatics and machine learning.
CorrMatGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2006 Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef _RD_CORRMATGENERATOR_H_
11 #define _RD_CORRMATGENERATOR_H_
12 
13 #include <RDGeneral/types.h>
14 #include <DataStructs/BitVects.h>
15 #include <boost/dynamic_bitset.hpp>
16 
17 namespace RDInfoTheory {
18  //FIX: won't worry about it now, but this class can be templated by the type of
19  // container for the bit list and type of descriptors (fingerprint vs. real valued)
21  /*! \brief A class to generate a correlation matrix for a bunch of fingerprints
22  *
23  * The correlation matrix is done only for the bit IDs that are set by a call to the
24  * function setDescriptorIdList
25  *
26  * cr = CorrMatGenerator();
27  * cr.setDescriptorIdList(descList);
28  * for each fingerprint in list of fingerprints {
29  * cr.collectVotes(fingerprint);
30  * }
31  * double *corrMat = cr.getCorrMat()
32  *
33  * The resulting correlation matrix is a one dimension matrix with only the lower triangle elements
34  * of the symmetric matrix
35  */
36  public:
38  this->initGenerator();
39  };
40 
42  delete [] dp_corrMat;
43  }
44 
45  void initGenerator() {
46  dp_corrMat = 0;
47  d_descs.resize(0);
48  d_nExamples = 0;
49  };
50 
51  /*! \brief Set the list bits that we are interested in correlating
52  *
53  * \param bitIdList is a list of bit ids that need to be correlated e.g. a list top ranked ensemble
54  * of bits
55  */
56  void setBitIdList(const RDKit::INT_VECT &bitIdList) {
57  d_descs = bitIdList;
58  int i, nd = d_descs.size();
59  int nelem = nd*(nd-1)/2;
60  delete [] dp_corrMat;
61 
62  dp_corrMat = new double[nd*(nd-1)/2];
63  for (i = 0; i < nelem; i++) {
64  dp_corrMat[i] = 0.0;
65  }
66  };
67 
68  //! \brief get the number of examples we used so far to compute the correlation matrix
69  int getNumExamples() const {
70  return d_nExamples;
71  };
72 
73  //! \brief Get the list of bits ID that are used to generate the correlation matrix
75  return d_descs;
76  };
77 
78  //! \brief Gets a pointer to the correlation matrix
79  double *getCorrMat() {
80  return dp_corrMat;
81  };
82 
83  //! \brief For each pair of on bits (bi, bj) in fp increase the correlation count
84  // for the pair by 1
85  void collectVotes(const BitVect &fp) {
86  unsigned int nd = d_descs.size();
87  // use a temporary bit vector to first mask the fingerprint
88  ExplicitBitVect ebv(nd);
89  int bi;
90  for (unsigned int i = 0; i < nd; i++) {
91  bi = d_descs[i];
92  if (fp[bi]) {
93  ebv.setBit(i);
94  }
95  }
96  for (unsigned i = 1; i < nd; i++) {
97  unsigned int itab = i*(i-1)/2;
98  if (ebv[i]) {
99  for (unsigned int j = 0; j < i; j++) {
100  if ( ebv[j]) {
101  dp_corrMat[itab + j] += 1;
102  }
103  }
104  }
105  }
106  d_nExamples++;
107  };
108 
109  private:
110  RDKit::INT_VECT d_descs;
111  double *dp_corrMat;
112  int d_nExamples;
113  };
114 
115 }
116 
117 #endif
118 
119 
Pulls in all the BitVect classes.
RDKit::INT_VECT getCorrBitList() const
Get the list of bits ID that are used to generate the correlation matrix.
int getNumExamples() const
get the number of examples we used so far to compute the correlation matrix
Class used to rank bits based on a specified measure of infomation.
void setBitIdList(const RDKit::INT_VECT &bitIdList)
Set the list bits that we are interested in correlating.
std::vector< int > INT_VECT
Definition: types.h:146
bool setBit(const unsigned int which)
sets a particular bit and returns its original value
double * getCorrMat()
Gets a pointer to the correlation matrix.
a class for bit vectors that are densely occupied
Abstract base class for storing BitVectors.
Definition: BitVect.h:23
BitCorrMatGenerator()
A class to generate a correlation matrix for a bunch of fingerprints.
void collectVotes(const BitVect &fp)
For each pair of on bits (bi, bj) in fp increase the correlation count.