RDKit
Open-source cheminformatics and machine learning.
RDValue-doublemagic.h
Go to the documentation of this file.
1 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #include <RDGeneral/export.h>
32 #ifndef RDKIT_RDVALUE_PTRMAGIC_H
33 #define RDKIT_RDVALUE_PTRMAGIC_H
34 
35 #include <boost/cstdint.hpp>
36 #include <cassert>
37 #include <boost/any.hpp>
38 #include "Invariant.h"
39 #include <iostream>
40 #include <iomanip>
41 #include <sstream>
42 #include <vector>
43 #include <string>
45 #include <boost/utility.hpp>
46 #include <boost/lexical_cast.hpp>
47 #include <boost/type_traits.hpp>
48 #include <boost/static_assert.hpp>
50 #include <cmath>
51 #include "LocaleSwitcher.h"
52 
53 #define RDVALUE_HASBOOL
54 
55 namespace RDKit {
56 
57  // Inspired by
58  // https://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html
59 // 16 bit storage for value types using Quiet NaN spaces in
60 // doubles
61 // Won't work on Solaris and some other os's as mmaping maps from
62 // top memory down
63 // Example check:
64 // std::string *pointer = new std::string(v);
65 // assert((reinterpret_cast<boost::uint64_t>(pointer) & StringTag) == 0);
66 
67 // implementations, need a typedef at compile time to figure this out.
68 // current implementation is probably little endian, need to check.
69 
70 /*
71  Encoding for storing other things as a double. Use
72  Quiet NaN
73  Quiet NaN: // used to encode types
74  F F F 1XXX < - X = type bits (first bit is set to one)
75 
76  seeeeeee|eeeemmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm
77  s1111111|11111ppp|pppppppp|pppppppp|pppppppp|pppppppp|pppppppp|pppppppp
78  ^- first mantissa bit 1 everything else is "payload" -^
79  ^- exponent bits all 1 and mustn't be all-zero (as it
80  ^- any sign bit would be INF then)
81 
82  Available
83  8 = 1000 MaxDouble // Not really a tag, is a sentinel
84  9 = 1001 Float
85  b = 1010 Int32
86  a = 1011 Uint32
87  C = 1100 <none>
88  D = 1101 <none>
89  E = 1110 <none>
90  F = 1111 PtrTag (look at lower 3 bits for type)
91 */
92 
93 namespace RDTypeTag {
94 static const boost::uint64_t NaN = 0xfff7FFFFFFFFFFFF; // signalling NaN
95 static const boost::uint64_t MaxDouble = 0xfff8000000000000; //
96 static const boost::uint64_t DoubleTag = 0xfff8000000000000; //
97 static const boost::uint64_t FloatTag = 0xfff9000000000000; //
98 static const boost::uint64_t IntTag = 0xfffa000000000000; //
99 static const boost::uint64_t UnsignedIntTag = 0xfffb000000000000; //
100 static const boost::uint64_t BoolTag = 0xfffc000000000000; //
101 
102 // PTR Tags use the last 3 bits for typing info
103 static const boost::uint64_t PtrTag = 0xffff000000000000;
104 static const boost::uint64_t StringTag = 0xffff000000000001; // 001
105 static const boost::uint64_t VecDoubleTag = 0xffff000000000002; // 010
106 static const boost::uint64_t VecFloatTag = 0xffff000000000003; // 011
107 static const boost::uint64_t VecIntTag = 0xffff000000000004; // 100
108 static const boost::uint64_t VecUnsignedIntTag = 0xffff000000000005; // 101
109 static const boost::uint64_t VecStringTag = 0xffff000000000006; // 110
110 static const boost::uint64_t AnyTag = 0xffff000000000007; // 111
111 
112 // Retrieves the tag (and PtrMask) from the type
113 template <class T>
114 inline boost::uint64_t GetTag() {
115  return AnyTag; }
116  template<> inline boost::uint64_t GetTag<double>() { return MaxDouble; }
117  template<> inline boost::uint64_t GetTag<float>() { return FloatTag; }
118  template<> inline boost::uint64_t GetTag<int>() { return IntTag; }
119  template<> inline boost::uint64_t GetTag<unsigned int>() { return UnsignedIntTag; }
120  template<> inline boost::uint64_t GetTag<bool>() { return BoolTag; }
121  template<> inline boost::uint64_t GetTag<std::string>() { return StringTag; }
122  template<> inline boost::uint64_t GetTag<std::vector<double> >() { return VecDoubleTag; }
123  template<> inline boost::uint64_t GetTag<std::vector<float> >() { return VecFloatTag; }
124  template<> inline boost::uint64_t GetTag<std::vector<int> >() { return VecIntTag; }
125  template<> inline boost::uint64_t GetTag<std::vector<unsigned int> >() { return VecUnsignedIntTag; }
126  template<> inline boost::uint64_t GetTag<std::vector<std::string> >() { return VecStringTag; }
127  template<> inline boost::uint64_t GetTag<boost::any>() { return AnyTag; }
128 }
129 
130 
131 struct RDValue {
132  // Bit Twidling for conversion from the Tag to a Pointer
133  static const boost::uint64_t TagMask = 0xFFFF000000000000;
134  static const boost::uint64_t PointerTagMask = 0xFFFF000000000007;
135  static const boost::uint64_t ApplyMask = 0x0000FFFFFFFFFFFF;
136  static const boost::uint64_t ApplyPtrMask = 0x0000FFFFFFFFFFF8;
137 
138  union {
139  double doubleBits;
140  boost::uint64_t otherBits;
141  };
142 
143  inline RDValue() : doubleBits(0.0) {}
144 
145  inline RDValue(double number) {
146  if (boost::math::isnan(number)) {
147  // Store a signalling NaN for NaN's.
148  // quiet NaNs are used for other types.
149  otherBits = RDTypeTag::NaN;
150  assert(boost::math::isnan(doubleBits));
151  }
152  else
153  doubleBits = number;
154  }
155 
156  inline RDValue(float number) {
157  otherBits = 0 | RDTypeTag::FloatTag;
158  memcpy(((char*)&otherBits), &number, sizeof(float));
159  }
160 
161  inline RDValue(int32_t number) {
162  otherBits = (((boost::uint64_t)number) & ApplyMask ) | RDTypeTag::IntTag;
163  }
164 
165  inline RDValue(unsigned int number) {
166  otherBits = (((boost::uint64_t)number) & ApplyMask ) | RDTypeTag::UnsignedIntTag;
167  }
168 
169  inline RDValue(bool number) {
170  otherBits = (static_cast<boost::uint64_t>(number) & ApplyMask) | RDTypeTag::BoolTag;
171  }
172 
173  inline RDValue(boost::any *pointer) {
174  // ensure that the pointer really is only 48 bit
175  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
176  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
177  }
178 
179  inline RDValue(const boost::any &any) {
180  // ensure that the pointer really is only 48 bit
181  boost::any *pointer = new boost::any(any);
182  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
183  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
184  }
185 
186  // Unknown types are stored as boost::any
187  template <class T>
188  inline RDValue(const T&v) {
189  boost::any *pointer = new boost::any(v);
190  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
191  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
192  }
193 
194  inline RDValue(const std::string &v) {
195  std::string *pointer = new std::string(v);
196  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::StringTag) == 0);
197  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::StringTag;
198  }
199 
200  inline RDValue(const std::vector<double> &v) {
201  std::vector<double> *pointer = new std::vector<double>(v);
202  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecDoubleTag) == 0);
203  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecDoubleTag;
204  }
205 
206  inline RDValue(const std::vector<float> &v) {
207  std::vector<float> *pointer = new std::vector<float>(v);
208  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecFloatTag) == 0);
209  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecFloatTag;
210  }
211 
212  inline RDValue(const std::vector<int> &v) {
213  std::vector<int> *pointer = new std::vector<int>(v);
214  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecIntTag) == 0);
215  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecIntTag;
216  }
217 
218  inline RDValue(const std::vector<unsigned int> &v) {
219  std::vector<unsigned int> *pointer = new std::vector<unsigned int>(v);
220  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecIntTag) == 0);
221  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecUnsignedIntTag;
222  }
223 
224  inline RDValue(const std::vector<std::string> &v) {
225  std::vector<std::string> *pointer = new std::vector<std::string>(v);
226  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecStringTag) == 0);
227  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecStringTag;
228  }
229 
230  boost::uint64_t getTag() const {
231  if (otherBits < RDTypeTag::MaxDouble ||
232  (otherBits & RDTypeTag::NaN) == RDTypeTag::NaN) {
233  return RDTypeTag::DoubleTag;
234  }
235 
236  boost::uint64_t tag = otherBits & TagMask;
237  if (tag == RDTypeTag::PtrTag)
238  return otherBits & PointerTagMask;
239  return tag;
240  }
241 
242  // ptrCast - unsafe, use rdvalue_cast instead.
243  template<class T>
244  inline T* ptrCast() const {
245  return reinterpret_cast<T*>(otherBits & ~RDTypeTag::GetTag<T>());
246  }
247 
248  // RDValue doesn't have an explicit destructor, it must
249  // be wrapped in a container.
250  // The idea is that POD types don't need to be destroyed
251  // and this allows the container optimization possibilities.
252  inline void destroy() {
253  switch(getTag()) {
255  delete ptrCast<std::string>();
256  break;
258  delete ptrCast<std::vector<double> >();
259  break;
261  delete ptrCast<std::vector<float> >();
262  break;
264  delete ptrCast<std::vector<int> >();
265  break;
267  delete ptrCast<std::vector<unsigned int> >();
268  break;
270  delete ptrCast<std::vector<std::string> >();
271  break;
272  case RDTypeTag::AnyTag:
273  delete ptrCast<boost::any>();
274  break;
275  default:
276  break;
277  }
278  }
279 
280  static
281  inline void cleanup_rdvalue(RDValue v) { v.destroy(); }
282 
283 };
284 
285 /////////////////////////////////////////////////////////////////////////////////////
286 // Given two RDValue::Values - copy the appropriate structure
287 // RDValue doesn't have a copy constructor, the default
288 // copy act's like a move for better value semantics.
289 // Containers may need to copy though.
290 inline void copy_rdvalue(RDValue &dest,
291  const RDValue &src) {
292  dest.destroy();
293  switch(src.getTag()) {
295  dest = RDValue(*src.ptrCast<std::string>());
296  break;
298  dest = RDValue(*src.ptrCast<std::vector<double> >());
299  break;
301  dest = RDValue(*src.ptrCast<std::vector<float> >());
302  break;
304  dest = RDValue(*src.ptrCast<std::vector<int> >());
305  break;
307  dest = RDValue(*src.ptrCast<std::vector<unsigned int> >());
308  break;
310  dest = RDValue(*src.ptrCast<std::vector<std::string> >());
311  break;
312  case RDTypeTag::AnyTag:
313  dest = RDValue(*src.ptrCast<boost::any>());
314  break;
315  default:
316  dest = src;
317  }
318 }
319 
320 /////////////////////////////////////////////////////////////////////////////////////
321 // rdvalue_is<T>
322 
323 template<class T>
324 inline bool rdvalue_is(RDValue v) {
325  return v.getTag() == RDTypeTag::GetTag<typename boost::remove_reference<T>::type>();
326 }
327 
328 template<>
329 inline bool rdvalue_is<double>(RDValue v) {
330  return v.otherBits < RDTypeTag::MaxDouble ||
331  (v.otherBits & RDTypeTag::NaN) == RDTypeTag::NaN;
332 }
333 
334 template<>
335 inline bool rdvalue_is<const double &>(RDValue v) {
336  return rdvalue_is<double>(v);
337 }
338 
339 /*
340 template<>
341 inline bool rdvalue_is<bool>(RDValue v) {
342  return (v.getTag() == RDTypeTag::IntTag &&
343  (static_cast<int32_t>(v.otherBits & ~RDTypeTag::IntTag) == 1 ||
344  static_cast<int32_t>(v.otherBits & ~RDTypeTag::IntTag) == 0 ));
345 }
346 
347 template<>
348 inline bool rdvalue_is<const bool&>(RDValue v) {
349  return rdvalue_is<bool>(v);
350 }
351 */
352 
353 /////////////////////////////////////////////////////////////////////////////////////
354 // rdvalue_cast<T>
355 //
356 // POD types do not support reference semantics. Other types do.
357 // rdvalue_cast<const std::vector<double> &>(RDValue); // ok
358 // rdvalue_cast<const float &>(RDValue); // bad_any_cast
359 
361 // Get stuff stored in boost any
362 template<class T>
363 inline T rdvalue_cast(RDValue v) {
364  // Disable reference and pointer casts to POD data.
365  BOOST_STATIC_ASSERT( !(
366  (boost::is_pointer<T>::value && (
367  boost::is_integral<typename boost::remove_pointer<T>::type>::value ||
368  boost::is_floating_point<typename boost::remove_pointer<T>::type>::value)) ||
369  (boost::is_reference<T>::value && (
370  boost::is_integral<typename boost::remove_reference<T>::type>::value ||
371  boost::is_floating_point<typename boost::remove_reference<T>::type>::value))
372  ));
373 
374  if (rdvalue_is<boost::any>(v)) {
375  return boost::any_cast<T>(*v.ptrCast<boost::any>());
376  }
377  throw boost::bad_any_cast();
378 }
379 
380 // POD casts
381 template<>
382 inline double rdvalue_cast<double>(RDValue v) {
383  if (rdvalue_is<double>(v)) return v.doubleBits;
384  throw boost::bad_any_cast();
385 }
386 
387 template<>
388 inline float rdvalue_cast<float>(RDValue v) {
389  if (rdvalue_is<float>(v)) {
390  float f;
391  memcpy(&f, ((char*)&v.otherBits), sizeof(float));
392  return f;
393  }
394  throw boost::bad_any_cast();
395 }
396 
397 // n.b. with const expressions, could use ~RDTagTypes::GetTag<T>()
398 // and enable_if
399 template<>
400 inline int rdvalue_cast<int>(RDValue v) {
401  if (rdvalue_is<int>(v)) return static_cast<int32_t>(v.otherBits &
403  throw boost::bad_any_cast();
404 }
405 template<>
406 inline unsigned int rdvalue_cast<unsigned int>(RDValue v) {
407  if (rdvalue_is<unsigned int>(v)) return static_cast<uint32_t>(
408  v.otherBits & ~RDTypeTag::UnsignedIntTag);
409  throw boost::bad_any_cast();
410 }
411 
412 template<>
413 inline bool rdvalue_cast<bool>(RDValue v) {
414  if (rdvalue_is<bool>(v)) return static_cast<bool>(
415  v.otherBits & ~RDTypeTag::BoolTag);
416  throw boost::bad_any_cast();
417 }
418 
419 } // namespace rdkit
420 #endif
bool rdvalue_is< double >(RDValue v)
RDValue(double number)
RDValue(const std::vector< unsigned int > &v)
RDValue(boost::any *pointer)
boost::uint64_t GetTag< float >()
static const boost::uint64_t MaxDouble
void copy_rdvalue(RDValue &dest, const RDValue &src)
static const boost::uint64_t VecDoubleTag
boost::uint64_t GetTag< int >()
static const boost::uint64_t UnsignedIntTag
RDValue(unsigned int number)
RDValue(int32_t number)
T rdvalue_cast(RDValue v)
static const boost::uint64_t AnyTag
static const boost::uint64_t DoubleTag
RDValue(const std::vector< float > &v)
static const boost::uint64_t FloatTag
static void cleanup_rdvalue(RDValue v)
static const boost::uint64_t StringTag
bool rdvalue_is(RDValue v)
RDValue(const std::vector< std::string > &v)
RDValue(bool number)
static const boost::uint64_t VecIntTag
static const boost::uint64_t VecUnsignedIntTag
Std stuff.
Definition: Atom.h:30
static const boost::uint64_t VecStringTag
boost::uint64_t GetTag()
static const boost::uint64_t NaN
static const boost::uint64_t IntTag
boost::uint64_t GetTag< unsigned int >()
RDValue RDValue_cast_t
static const boost::uint64_t BoolTag
boost::uint64_t getTag() const
RDValue(float number)
static const boost::uint64_t VecFloatTag
RDValue(const std::vector< double > &v)
boost::uint64_t GetTag< double >()
RDValue(const boost::any &any)
boost::uint64_t GetTag< bool >()
static const boost::uint64_t PtrTag
RDValue(const std::vector< int > &v)
RDValue(const std::string &v)
boost::uint64_t otherBits