RDKit
Open-source cheminformatics and machine learning.
Vector.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef __RD_VECTOR_H__
11 #define __RD_VECTOR_H__
12 
13 #include <RDGeneral/Invariant.h>
14 #include <RDGeneral/utils.h>
15 #include <math.h>
16 #include <iostream>
17 #include <iomanip>
18 #include <cstdlib>
19 #include <cstring>
20 #include <time.h>
21 #include <boost/random.hpp>
22 #include <boost/smart_ptr.hpp>
23 
24 namespace RDNumeric {
25 
26 
27  //! A class to represent vectors of numbers.
28  template <class TYPE> class Vector {
29 
30  public:
31 
32  typedef boost::shared_array<TYPE> DATA_SPTR;
33 
34  //! Initialize with only a size.
35  explicit Vector(unsigned int N) {
36  d_size = N;
37  TYPE *data = new TYPE[N];
38  memset(static_cast<void *>(data),0,d_size*sizeof(TYPE));
39  d_data.reset(data);
40  }
41 
42  //! Initialize with a size and default value.
43  Vector(unsigned int N, TYPE val) { //: Vector(N) {
44  d_size = N;
45  TYPE *data = new TYPE[N];
46 
47  unsigned int i;
48  for (i = 0; i < N; i++) {
49  data[i] = val;
50  }
51  d_data.reset(data);
52  }
53 
54  //! Initialize from a smart pointer.
55  /*!
56  <b>NOTE:</b> the data is not copied in this case
57  */
58  Vector(unsigned int N, DATA_SPTR data) {//TYPE *data) {
59  d_size = N;
60  d_data = data;
61  }
62 
63  //! copy constructor
64  /*! We make a copy of the other vector's data.
65  */
66  Vector(const Vector &other) {
67  d_size = other.size();
68  const TYPE *otherData = other.getData();
69  TYPE *data = new TYPE[d_size];
70 
71  memcpy(static_cast<void *>(data), static_cast<const void *>(otherData), d_size*sizeof(TYPE));
72  d_data.reset(data);
73  }
74 
75  ~Vector() {
76  }
77 
78  //! return the size (dimension) of the vector
79  unsigned int size() const {
80  return d_size;
81  }
82 
83  //! returns the value at a particular index
84  inline TYPE getVal(unsigned int i) const {
85  PRECONDITION(i<d_size,"bad index");
86  return d_data[i];
87  }
88 
89  //! sets the index at a particular value
90  inline void setVal(unsigned int i, TYPE val) {
91  PRECONDITION(i<d_size,"bad index");
92  d_data[i] = val;
93  }
94 
95  inline TYPE operator[](unsigned int i) const {
96  PRECONDITION(i<d_size,"bad index");
97  return d_data[i];
98  }
99 
100  inline TYPE& operator[](unsigned int i) {
101  PRECONDITION(i<d_size,"bad index");
102  return d_data[i];
103  }
104 
105  //! returns a pointer to our data array
106  inline TYPE *getData() {
107  return d_data.get();
108  }
109 
110  //! returns a const pointer to our data array
111  inline const TYPE *getData() const {
112  //return dp_data;
113  return d_data.get();
114  }
115 
116  //! Copy operator.
117  /*! We make a copy of the other Vector's data.
118  */
119 
121  PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
122  const TYPE *otherData = other.getData();
123  memcpy(static_cast<void *>(d_data.get()), static_cast<const void *>(otherData), d_size*sizeof(TYPE));
124  return *this;
125  }
126 
127  //! elementwise addition, vectors must be the same size.
129  PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
130  const TYPE *otherData = other.getData();
131  TYPE *data = d_data.get();
132  unsigned int i;
133  for (i = 0; i < d_size; i++) {
134  data[i] += otherData[i];
135  }
136  return *this;
137  }
138 
139  //! elementwise subtraction, vectors must be the same size.
141  PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
142  const TYPE *otherData = other.getData();
143  TYPE *data = d_data.get();
144  unsigned int i;
145  for (i = 0; i < d_size; i++) {
146  data[i] -= otherData[i];
147  }
148  return *this;
149  }
150 
151  //! multiplication by a scalar
152  Vector<TYPE>& operator *=(TYPE scale) {
153  unsigned int i;
154  for (i = 0; i < d_size; i++) {
155  d_data[i] *= scale;
156  }
157  return *this;
158  }
159 
160  //! division by a scalar
161  Vector<TYPE>& operator /=(TYPE scale) {
162  unsigned int i;
163  for (i = 0; i < d_size; i++) {
164  d_data[i] /= scale;
165  }
166  return *this;
167  }
168 
169  //! L2 norm squared
170  inline TYPE normL2Sq() const {
171  TYPE res = (TYPE)0.0;
172  unsigned int i;
173  TYPE *data = d_data.get();
174  for (i = 0; i < d_size; i++) {
175  res += data[i]*data[i];
176  }
177  return res;
178  }
179 
180  //! L2 norm
181  inline TYPE normL2() const {
182  return sqrt(this->normL2Sq());
183  }
184 
185  //! L1 norm
186  inline TYPE normL1() const {
187  TYPE res = (TYPE)0.0;
188  unsigned int i;
189  TYPE *data = d_data.get();
190  for (i = 0; i < d_size; i++) {
191  res += fabs(data[i]);
192  }
193  return res;
194  }
195 
196  //! L-infinity norm
197  inline TYPE normLinfinity() const {
198  TYPE res = (TYPE)(-1.0);
199  unsigned int i;
200  TYPE *data = d_data.get();
201  for (i = 0; i < d_size; i++) {
202  if (fabs(data[i]) > res) {
203  res = fabs(data[i]);
204  }
205  }
206  return res;
207  }
208 
209  //! \brief Gets the ID of the entry that has the largest absolute value
210  //! i.e. the entry being used for the L-infinity norm
211  inline unsigned int largestAbsValId() const {
212  TYPE res = (TYPE)(-1.0);
213  unsigned int i, id=d_size;
214  TYPE *data = d_data.get();
215  for (i = 0; i < d_size; i++) {
216  if (fabs(data[i]) > res) {
217  res = fabs(data[i]);
218  id = i;
219  }
220  }
221  return id;
222  }
223 
224  //! \brief Gets the ID of the entry that has the largest value
225  inline unsigned int largestValId() const {
226  TYPE res = (TYPE)(-1.e8);
227  unsigned int i, id=d_size;
228  TYPE *data = d_data.get();
229  for (i = 0; i < d_size; i++) {
230  if (data[i] > res) {
231  res = data[i];
232  id = i;
233  }
234  }
235  return id;
236  }
237 
238  //! \brief Gets the ID of the entry that has the smallest value
239  inline unsigned int smallestValId() const {
240  TYPE res = (TYPE)(1.e8);
241  unsigned int i, id=d_size;
242  TYPE *data = d_data.get();
243  for (i = 0; i < d_size; i++) {
244  if (data[i] < res) {
245  res = data[i];
246  id = i;
247  }
248  }
249  return id;
250  }
251 
252  //! returns the dot product between two Vectors
253  inline TYPE dotProduct(const Vector<TYPE> other) const {
254  PRECONDITION(d_size == other.size(), "Size mismatch in vector doct product");
255  const TYPE *oData = other.getData();
256  unsigned int i;
257  TYPE res = (TYPE)(0.0);
258  TYPE *data = d_data.get();
259  for (i = 0; i < d_size; i++) {
260  res += (data[i]*oData[i]);
261  }
262  return res;
263  }
264 
265  //! Normalize the vector using the L2 norm
266  inline void normalize() {
267  TYPE val = this->normL2();
268  (*this) /= val;
269  }
270 
271  //! Set to a random unit vector
272  inline void setToRandom(unsigned int seed=0) {
273  // we want to get our own RNG here instead of using the global
274  // one. This is related to Issue285.
275  RDKit::rng_type generator(42u);
276  RDKit::uniform_double dist(0,1.0);
277  RDKit::double_source_type randSource(generator,dist);
278  if (seed > 0) {
279  generator.seed(seed);
280  } else {
281  // we can't initialize using only clock(), because it's possible
282  // that we'll get here fast enough that clock() will return 0
283  // and generator.seed(0) is an error:
284  generator.seed(clock()+1);
285  }
286 
287  unsigned int i;
288  TYPE *data = d_data.get();
289  for (i = 0; i < d_size; i++) {
290  data[i] = randSource();
291  }
292  this->normalize();
293  }
294 
295  private:
296  unsigned int d_size; //! < our length
297  DATA_SPTR d_data;
298  Vector<TYPE>& operator=(const Vector<TYPE> &other);
299  };
300 
302 
303  //! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
304  template <typename T>
305  double
306  TanimotoSimilarity(const Vector<T> &v1,const Vector<T> &v2){
307  double numer=v1.dotProduct(v2);
308  if(numer==0.0) return 0.0;
309  double denom=v1.normL2Sq()+v2.normL2Sq()-numer;
310  if(denom==0.0) return 0.0;
311  return numer/denom;
312  }
313 } // end of namespace RDNumeric
314 
315 //! ostream operator for Vectors
316 template <typename TYPE> std::ostream & operator<<(std::ostream& target,
317  const RDNumeric::Vector<TYPE> &vec) {
318  unsigned int siz = vec.size();
319  target << "Size: " << siz << " [";
320  unsigned int i;
321  for (i = 0; i < siz; i++) {
322  target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
323  }
324  target << "]\n";
325  return target;
326 }
327 
328 #endif
329 
330 
void normalize()
Normalize the vector using the L2 norm.
Definition: Vector.h:266
unsigned int largestValId() const
Gets the ID of the entry that has the largest value.
Definition: Vector.h:225
TYPE normL2() const
L2 norm.
Definition: Vector.h:181
boost::shared_array< TYPE > DATA_SPTR
Definition: Vector.h:32
boost::minstd_rand rng_type
Definition: utils.h:32
std::ostream & operator<<(std::ostream &target, const RDNumeric::Vector< TYPE > &vec)
ostream operator for Vectors
Definition: Vector.h:316
double TanimotoSimilarity(const Vector< T > &v1, const Vector< T > &v2)
returns the algebraic tanimoto similarity [defn&#39; from JCIM 46:587-96 (2006)]
Definition: Vector.h:306
Vector< TYPE > & operator-=(const Vector< TYPE > &other)
elementwise subtraction, vectors must be the same size.
Definition: Vector.h:140
TYPE & operator[](unsigned int i)
Definition: Vector.h:100
Vector< double > DoubleVector
Definition: Vector.h:301
unsigned int smallestValId() const
Gets the ID of the entry that has the smallest value.
Definition: Vector.h:239
Vector< TYPE > & assign(const Vector< TYPE > &other)
Copy operator.
Definition: Vector.h:120
unsigned int size() const
return the size (dimension) of the vector
Definition: Vector.h:79
TYPE normLinfinity() const
L-infinity norm.
Definition: Vector.h:197
void setVal(unsigned int i, TYPE val)
sets the index at a particular value
Definition: Vector.h:90
Vector< TYPE > & operator/=(TYPE scale)
division by a scalar
Definition: Vector.h:161
TYPE normL2Sq() const
L2 norm squared.
Definition: Vector.h:170
Vector(const Vector &other)
copy constructor
Definition: Vector.h:66
TYPE normL1() const
L1 norm.
Definition: Vector.h:186
boost::uniform_real uniform_double
Definition: utils.h:34
const TYPE * getData() const
returns a const pointer to our data array
Definition: Vector.h:111
TYPE operator[](unsigned int i) const
Definition: Vector.h:95
Vector< TYPE > & operator+=(const Vector< TYPE > &other)
elementwise addition, vectors must be the same size.
Definition: Vector.h:128
unsigned int largestAbsValId() const
Gets the ID of the entry that has the largest absolute value i.e. the entry being used for the L-infi...
Definition: Vector.h:211
boost::variate_generator< rng_type &, uniform_double > double_source_type
Definition: utils.h:36
TYPE dotProduct(const Vector< TYPE > other) const
returns the dot product between two Vectors
Definition: Vector.h:253
void setToRandom(unsigned int seed=0)
Set to a random unit vector.
Definition: Vector.h:272
Vector(unsigned int N, DATA_SPTR data)
Initialize from a smart pointer.
Definition: Vector.h:58
Vector< TYPE > & operator*=(TYPE scale)
multiplication by a scalar
Definition: Vector.h:152
Vector(unsigned int N, TYPE val)
Initialize with a size and default value.
Definition: Vector.h:43
TYPE getVal(unsigned int i) const
returns the value at a particular index
Definition: Vector.h:84
#define PRECONDITION(expr, mess)
Definition: Invariant.h:119
TYPE * getData()
returns a pointer to our data array
Definition: Vector.h:106
A class to represent vectors of numbers.
Definition: Vector.h:28
Vector(unsigned int N)
Initialize with only a size.
Definition: Vector.h:35