RDKit
Open-source cheminformatics and machine learning.
Vector.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef __RD_VECTOR_H__
12 #define __RD_VECTOR_H__
13 
14 #include <RDGeneral/Invariant.h>
15 #include <RDGeneral/utils.h>
16 #include <math.h>
17 #include <iostream>
18 #include <iomanip>
19 #include <cstdlib>
20 #include <cstring>
21 #include <time.h>
22 #include <boost/random.hpp>
23 #include <boost/smart_ptr.hpp>
24 
25 namespace RDNumeric {
26 
27 //! A class to represent vectors of numbers.
28 template <class TYPE>
29 class Vector {
30  public:
31  typedef boost::shared_array<TYPE> DATA_SPTR;
32 
33  //! Initialize with only a size.
34  explicit Vector(unsigned int N) {
35  d_size = N;
36  TYPE *data = new TYPE[N];
37  memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
38  d_data.reset(data);
39  }
40 
41  //! Initialize with a size and default value.
42  Vector(unsigned int N, TYPE val) { //: Vector(N) {
43  d_size = N;
44  TYPE *data = new TYPE[N];
45 
46  unsigned int i;
47  for (i = 0; i < N; i++) {
48  data[i] = val;
49  }
50  d_data.reset(data);
51  }
52 
53  //! Initialize from a smart pointer.
54  /*!
55  <b>NOTE:</b> the data is not copied in this case
56  */
57  Vector(unsigned int N, DATA_SPTR data) { // TYPE *data) {
58  d_size = N;
59  d_data = data;
60  }
61 
62  //! copy constructor
63  /*! We make a copy of the other vector's data.
64  */
65  Vector(const Vector &other) {
66  d_size = other.size();
67  const TYPE *otherData = other.getData();
68  TYPE *data = new TYPE[d_size];
69 
70  memcpy(static_cast<void *>(data), static_cast<const void *>(otherData),
71  d_size * sizeof(TYPE));
72  d_data.reset(data);
73  }
74 
75  ~Vector() {}
76 
77  //! return the size (dimension) of the vector
78  unsigned int size() const { return d_size; }
79 
80  //! returns the value at a particular index
81  inline TYPE getVal(unsigned int i) const {
82  PRECONDITION(i < d_size, "bad index");
83  return d_data[i];
84  }
85 
86  //! sets the index at a particular value
87  inline void setVal(unsigned int i, TYPE val) {
88  PRECONDITION(i < d_size, "bad index");
89  d_data[i] = val;
90  }
91 
92  inline TYPE operator[](unsigned int i) const {
93  PRECONDITION(i < d_size, "bad index");
94  return d_data[i];
95  }
96 
97  inline TYPE &operator[](unsigned int i) {
98  PRECONDITION(i < d_size, "bad index");
99  return d_data[i];
100  }
101 
102  //! returns a pointer to our data array
103  inline TYPE *getData() { return d_data.get(); }
104 
105  //! returns a const pointer to our data array
106  inline const TYPE *getData() const {
107  // return dp_data;
108  return d_data.get();
109  }
110 
111  //! Copy operator.
112  /*! We make a copy of the other Vector's data.
113  */
114 
116  PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
117  const TYPE *otherData = other.getData();
118  memcpy(static_cast<void *>(d_data.get()),
119  static_cast<const void *>(otherData), d_size * sizeof(TYPE));
120  return *this;
121  }
122 
123  //! elementwise addition, vectors must be the same size.
125  PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
126  const TYPE *otherData = other.getData();
127  TYPE *data = d_data.get();
128  unsigned int i;
129  for (i = 0; i < d_size; i++) {
130  data[i] += otherData[i];
131  }
132  return *this;
133  }
134 
135  //! elementwise subtraction, vectors must be the same size.
137  PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
138  const TYPE *otherData = other.getData();
139  TYPE *data = d_data.get();
140  unsigned int i;
141  for (i = 0; i < d_size; i++) {
142  data[i] -= otherData[i];
143  }
144  return *this;
145  }
146 
147  //! multiplication by a scalar
148  Vector<TYPE> &operator*=(TYPE scale) {
149  unsigned int i;
150  for (i = 0; i < d_size; i++) {
151  d_data[i] *= scale;
152  }
153  return *this;
154  }
155 
156  //! division by a scalar
157  Vector<TYPE> &operator/=(TYPE scale) {
158  unsigned int i;
159  for (i = 0; i < d_size; i++) {
160  d_data[i] /= scale;
161  }
162  return *this;
163  }
164 
165  //! L2 norm squared
166  inline TYPE normL2Sq() const {
167  TYPE res = (TYPE)0.0;
168  unsigned int i;
169  TYPE *data = d_data.get();
170  for (i = 0; i < d_size; i++) {
171  res += data[i] * data[i];
172  }
173  return res;
174  }
175 
176  //! L2 norm
177  inline TYPE normL2() const { return sqrt(this->normL2Sq()); }
178 
179  //! L1 norm
180  inline TYPE normL1() const {
181  TYPE res = (TYPE)0.0;
182  unsigned int i;
183  TYPE *data = d_data.get();
184  for (i = 0; i < d_size; i++) {
185  res += fabs(data[i]);
186  }
187  return res;
188  }
189 
190  //! L-infinity norm
191  inline TYPE normLinfinity() const {
192  TYPE res = (TYPE)(-1.0);
193  unsigned int i;
194  TYPE *data = d_data.get();
195  for (i = 0; i < d_size; i++) {
196  if (fabs(data[i]) > res) {
197  res = fabs(data[i]);
198  }
199  }
200  return res;
201  }
202 
203  //! \brief Gets the ID of the entry that has the largest absolute value
204  //! i.e. the entry being used for the L-infinity norm
205  inline unsigned int largestAbsValId() const {
206  TYPE res = (TYPE)(-1.0);
207  unsigned int i, id = d_size;
208  TYPE *data = d_data.get();
209  for (i = 0; i < d_size; i++) {
210  if (fabs(data[i]) > res) {
211  res = fabs(data[i]);
212  id = i;
213  }
214  }
215  return id;
216  }
217 
218  //! \brief Gets the ID of the entry that has the largest value
219  inline unsigned int largestValId() const {
220  TYPE res = (TYPE)(-1.e8);
221  unsigned int i, id = d_size;
222  TYPE *data = d_data.get();
223  for (i = 0; i < d_size; i++) {
224  if (data[i] > res) {
225  res = data[i];
226  id = i;
227  }
228  }
229  return id;
230  }
231 
232  //! \brief Gets the ID of the entry that has the smallest value
233  inline unsigned int smallestValId() const {
234  TYPE res = (TYPE)(1.e8);
235  unsigned int i, id = d_size;
236  TYPE *data = d_data.get();
237  for (i = 0; i < d_size; i++) {
238  if (data[i] < res) {
239  res = data[i];
240  id = i;
241  }
242  }
243  return id;
244  }
245 
246  //! returns the dot product between two Vectors
247  inline TYPE dotProduct(const Vector<TYPE> other) const {
248  PRECONDITION(d_size == other.size(),
249  "Size mismatch in vector doct product");
250  const TYPE *oData = other.getData();
251  unsigned int i;
252  TYPE res = (TYPE)(0.0);
253  TYPE *data = d_data.get();
254  for (i = 0; i < d_size; i++) {
255  res += (data[i] * oData[i]);
256  }
257  return res;
258  }
259 
260  //! Normalize the vector using the L2 norm
261  inline void normalize() {
262  TYPE val = this->normL2();
263  (*this) /= val;
264  }
265 
266  //! Set to a random unit vector
267  inline void setToRandom(unsigned int seed = 0) {
268  // we want to get our own RNG here instead of using the global
269  // one. This is related to Issue285.
270  RDKit::rng_type generator(42u);
271  RDKit::uniform_double dist(0, 1.0);
272  RDKit::double_source_type randSource(generator, dist);
273  if (seed > 0) {
274  generator.seed(seed);
275  } else {
276  // we can't initialize using only clock(), because it's possible
277  // that we'll get here fast enough that clock() will return 0
278  // and generator.seed(0) is an error:
279  generator.seed(clock() + 1);
280  }
281 
282  unsigned int i;
283  TYPE *data = d_data.get();
284  for (i = 0; i < d_size; i++) {
285  data[i] = randSource();
286  }
287  this->normalize();
288  }
289 
290  private:
291  unsigned int d_size; //! < our length
292  DATA_SPTR d_data;
293  Vector<TYPE> &operator=(const Vector<TYPE> &other);
294 };
295 
297 
298 //! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
299 template <typename T>
300 double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
301  double numer = v1.dotProduct(v2);
302  if (numer == 0.0) return 0.0;
303  double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
304  if (denom == 0.0) return 0.0;
305  return numer / denom;
306 }
307 } // end of namespace RDNumeric
308 
309 //! ostream operator for Vectors
310 template <typename TYPE>
311 std::ostream &operator<<(std::ostream &target,
312  const RDNumeric::Vector<TYPE> &vec) {
313  unsigned int siz = vec.size();
314  target << "Size: " << siz << " [";
315  unsigned int i;
316  for (i = 0; i < siz; i++) {
317  target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
318  }
319  target << "]\n";
320  return target;
321 }
322 
323 #endif
void normalize()
Normalize the vector using the L2 norm.
Definition: Vector.h:261
TYPE normL2() const
L2 norm.
Definition: Vector.h:177
boost::shared_array< TYPE > DATA_SPTR
Definition: Vector.h:31
boost::minstd_rand rng_type
Definition: utils.h:36
std::ostream & operator<<(std::ostream &target, const RDNumeric::Vector< TYPE > &vec)
ostream operator for Vectors
Definition: Vector.h:311
double TanimotoSimilarity(const Vector< T > &v1, const Vector< T > &v2)
returns the algebraic tanimoto similarity [defn&#39; from JCIM 46:587-96 (2006)]
Definition: Vector.h:300
unsigned int size() const
return the size (dimension) of the vector
Definition: Vector.h:78
Vector< TYPE > & operator-=(const Vector< TYPE > &other)
elementwise subtraction, vectors must be the same size.
Definition: Vector.h:136
TYPE & operator[](unsigned int i)
Definition: Vector.h:97
Vector< double > DoubleVector
Definition: Vector.h:296
unsigned int largestValId() const
Gets the ID of the entry that has the largest value.
Definition: Vector.h:219
Vector< TYPE > & assign(const Vector< TYPE > &other)
Copy operator.
Definition: Vector.h:115
void setVal(unsigned int i, TYPE val)
sets the index at a particular value
Definition: Vector.h:87
unsigned int largestAbsValId() const
Gets the ID of the entry that has the largest absolute value i.e. the entry being used for the L-infi...
Definition: Vector.h:205
Vector< TYPE > & operator/=(TYPE scale)
division by a scalar
Definition: Vector.h:157
TYPE normL2Sq() const
L2 norm squared.
Definition: Vector.h:166
TYPE operator[](unsigned int i) const
Definition: Vector.h:92
Vector(const Vector &other)
copy constructor
Definition: Vector.h:65
boost::uniform_real uniform_double
Definition: utils.h:38
Vector< TYPE > & operator+=(const Vector< TYPE > &other)
elementwise addition, vectors must be the same size.
Definition: Vector.h:124
const TYPE * getData() const
returns a const pointer to our data array
Definition: Vector.h:106
void setToRandom(unsigned int seed=0)
Set to a random unit vector.
Definition: Vector.h:267
TYPE dotProduct(const Vector< TYPE > other) const
returns the dot product between two Vectors
Definition: Vector.h:247
Vector(unsigned int N, DATA_SPTR data)
Initialize from a smart pointer.
Definition: Vector.h:57
Vector< TYPE > & operator*=(TYPE scale)
multiplication by a scalar
Definition: Vector.h:148
Vector(unsigned int N, TYPE val)
Initialize with a size and default value.
Definition: Vector.h:42
#define PRECONDITION(expr, mess)
Definition: Invariant.h:108
boost::variate_generator< rng_type &, uniform_double > double_source_type
Definition: utils.h:40
TYPE * getData()
returns a pointer to our data array
Definition: Vector.h:103
unsigned int smallestValId() const
Gets the ID of the entry that has the smallest value.
Definition: Vector.h:233
A class to represent vectors of numbers.
Definition: Vector.h:29
TYPE getVal(unsigned int i) const
returns the value at a particular index
Definition: Vector.h:81
TYPE normL1() const
L1 norm.
Definition: Vector.h:180
TYPE normLinfinity() const
L-infinity norm.
Definition: Vector.h:191
Vector(unsigned int N)
Initialize with only a size.
Definition: Vector.h:34