RDKit
Open-source cheminformatics and machine learning.
CorrMatGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2006 Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_CORRMATGENERATOR_H_
12 #define _RD_CORRMATGENERATOR_H_
13 
14 #include <RDGeneral/types.h>
15 #include <DataStructs/BitVects.h>
16 #include <boost/dynamic_bitset.hpp>
17 
18 namespace RDInfoTheory {
19 // FIX: won't worry about it now, but this class can be templated by the type of
20 // container for the bit list and type of descriptors (fingerprint vs. real
21 // valued)
23  /*! \brief A class to generate a correlation matrix for a bunch of
24  *fingerprints
25  *
26  * The correlation matrix is done only for the bit IDs that are set by a call
27  *to the
28  * function setDescriptorIdList
29  *
30  * cr = CorrMatGenerator();
31  * cr.setDescriptorIdList(descList);
32  * for each fingerprint in list of fingerprints {
33  * cr.collectVotes(fingerprint);
34  * }
35  * double *corrMat = cr.getCorrMat()
36  *
37  * The resulting correlation matrix is a one dimension matrix with only the
38  *lower triangle elements
39  * of the symmetric matrix
40  */
41  public:
43 
44  ~BitCorrMatGenerator() { delete[] dp_corrMat; }
45 
46  void initGenerator() {
47  dp_corrMat = 0;
48  d_descs.resize(0);
49  d_nExamples = 0;
50  };
51 
52  /*! \brief Set the list bits that we are interested in correlating
53  *
54  * \param bitIdList is a list of bit ids that need to be correlated e.g. a
55  *list top ranked ensemble
56  * of bits
57  */
58  void setBitIdList(const RDKit::INT_VECT &bitIdList) {
59  d_descs = bitIdList;
60  int i, nd = d_descs.size();
61  int nelem = nd * (nd - 1) / 2;
62  delete[] dp_corrMat;
63 
64  dp_corrMat = new double[nd * (nd - 1) / 2];
65  for (i = 0; i < nelem; i++) {
66  dp_corrMat[i] = 0.0;
67  }
68  };
69 
70  //! \brief get the number of examples we used so far to compute the
71  //correlation matrix
72  int getNumExamples() const { return d_nExamples; };
73 
74  //! \brief Get the list of bits ID that are used to generate the correlation
75  //matrix
76  RDKit::INT_VECT getCorrBitList() const { return d_descs; };
77 
78  //! \brief Gets a pointer to the correlation matrix
79  double *getCorrMat() { return dp_corrMat; };
80 
81  //! \brief For each pair of on bits (bi, bj) in fp increase the correlation
82  //count
83  // for the pair by 1
84  void collectVotes(const BitVect &fp) {
85  unsigned int nd = d_descs.size();
86  // use a temporary bit vector to first mask the fingerprint
87  ExplicitBitVect ebv(nd);
88  int bi;
89  for (unsigned int i = 0; i < nd; i++) {
90  bi = d_descs[i];
91  if (fp[bi]) {
92  ebv.setBit(i);
93  }
94  }
95  for (unsigned i = 1; i < nd; i++) {
96  unsigned int itab = i * (i - 1) / 2;
97  if (ebv[i]) {
98  for (unsigned int j = 0; j < i; j++) {
99  if (ebv[j]) {
100  dp_corrMat[itab + j] += 1;
101  }
102  }
103  }
104  }
105  d_nExamples++;
106  };
107 
108  private:
109  RDKit::INT_VECT d_descs;
110  double *dp_corrMat;
111  int d_nExamples;
112 };
113 }
114 
115 #endif
Pulls in all the BitVect classes.
RDKit::INT_VECT getCorrBitList() const
Get the list of bits ID that are used to generate the correlation.
Class used to rank bits based on a specified measure of infomation.
void setBitIdList(const RDKit::INT_VECT &bitIdList)
Set the list bits that we are interested in correlating.
std::vector< int > INT_VECT
Definition: types.h:247
int getNumExamples() const
get the number of examples we used so far to compute the
bool setBit(const unsigned int which)
sets a particular bit and returns its original value
double * getCorrMat()
Gets a pointer to the correlation matrix.
a class for bit vectors that are densely occupied
Abstract base class for storing BitVectors.
Definition: BitVect.h:24
BitCorrMatGenerator()
A class to generate a correlation matrix for a bunch of fingerprints.
void collectVotes(const BitVect &fp)
For each pair of on bits (bi, bj) in fp increase the correlation.