RDKit
Open-source cheminformatics and machine learning.
StreamOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_STREAMOPS_H
13 #define _RD_STREAMOPS_H
14 
15 #include "types.h"
16 #include "Invariant.h"
17 #include "RDProps.h"
18 #include <string>
19 #include <sstream>
20 #include <iostream>
21 #include <boost/cstdint.hpp>
22 #include <boost/detail/endian.hpp>
23 
24 namespace RDKit {
25 // this code block for handling endian problems is from :
26 // http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
27 enum EEndian {
30 #if defined(BOOST_LITTLE_ENDIAN)
31  HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
32 #elif defined(BOOST_BIG_ENDIAN)
33  HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
34 #else
35 #error "Failed to determine the system endian value"
36 #endif
37 };
38 
39 // this function swap the bytes of values given it's size as a template
40 // parameter (could sizeof be used?).
41 template <class T, unsigned int size>
42 inline T SwapBytes(T value) {
43  if (size < 2)
44  return value;
45 
46  union {
47  T value;
48  char bytes[size];
49  } in, out;
50 
51  in.value = value;
52 
53  for (unsigned int i = 0; i < size; ++i) {
54  out.bytes[i] = in.bytes[size - 1 - i];
55  }
56 
57  return out.value;
58 }
59 
60 // Here is the function you will use. Again there is two compile-time assertion
61 // that use the boost librarie. You could probably comment them out, but if you
62 // do be cautious not to use this function for anything else than integers
63 // types. This function need to be calles like this :
64 //
65 // int x = someValue;
66 // int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
67 //
68 template <EEndian from, EEndian to, class T>
69 inline T EndianSwapBytes(T value) {
70  // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
71  BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
72  sizeof(T) == 8);
73  if (sizeof(T) == 1) return value;
74 
75  // A : La donnée à swapper est d'un type arithmetic
76  // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
77 
78  // Si from et to sont du même type on ne swap pas.
79  if (from == to) return value;
80 
81  return SwapBytes<T, sizeof(T)>(value);
82 }
83 template <EEndian from, EEndian to>
84 inline char EndianSwapBytes(char value) {
85  return value;
86 }
87 template <EEndian from, EEndian to>
88 inline unsigned char EndianSwapBytes(unsigned char value) {
89  return value;
90 }
91 template <EEndian from, EEndian to>
92 inline signed char EndianSwapBytes(signed char value) {
93  return value;
94 }
95 // --------------------------------------
96 
97 //! Packs an integer and outputs it to a stream
98 inline void appendPackedIntToStream(std::stringstream &ss,
99  boost::uint32_t num) {
100  int nbytes, bix;
101  unsigned int val, res;
102  char tc;
103 
104  res = num;
105  while (1) {
106  if (res < (1 << 7)) {
107  val = (res << 1);
108  nbytes = 1;
109  break;
110  }
111  res -= (1 << 7);
112  if (res < (1 << 14)) {
113  val = ((res << 2) | 1);
114  nbytes = 2;
115  break;
116  }
117  res -= (1 << 14);
118  if (res < (1 << 21)) {
119  val = ((res << 3) | 3);
120  nbytes = 3;
121  break;
122  }
123  res -= (1 << 21);
124  if (res < (1 << 29)) {
125  val = ((res << 3) | 7);
126  nbytes = 4;
127  break;
128  } else {
129  CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
130  }
131  }
132  // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
133 
134  for (bix = 0; bix < nbytes; bix++) {
135  tc = (char)(val & 255);
136  ss.write(&tc, 1);
137  val >>= 8;
138  }
139 }
140 
141 //! Reads an integer from a stream in packed format and returns the result.
142 inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
143  boost::uint32_t val, num;
144  int shift, offset;
145  char tmp;
146  ss.read(&tmp, sizeof(tmp));
147  val = UCHAR(tmp);
148  offset = 0;
149  if ((val & 1) == 0) {
150  shift = 1;
151  } else if ((val & 3) == 1) {
152  ss.read((char *)&tmp, sizeof(tmp));
153  val |= (UCHAR(tmp) << 8);
154  shift = 2;
155  offset = (1 << 7);
156  } else if ((val & 7) == 3) {
157  ss.read((char *)&tmp, sizeof(tmp));
158  val |= (UCHAR(tmp) << 8);
159  ss.read((char *)&tmp, sizeof(tmp));
160  val |= (UCHAR(tmp) << 16);
161  shift = 3;
162  offset = (1 << 7) + (1 << 14);
163  } else {
164  ss.read((char *)&tmp, sizeof(tmp));
165  val |= (UCHAR(tmp) << 8);
166  ss.read((char *)&tmp, sizeof(tmp));
167  val |= (UCHAR(tmp) << 16);
168  ss.read((char *)&tmp, sizeof(tmp));
169  val |= (UCHAR(tmp) << 24);
170  shift = 3;
171  offset = (1 << 7) + (1 << 14) + (1 << 21);
172  }
173  num = (val >> shift) + offset;
174  // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
175  return num;
176 }
177 
178 //! Reads an integer from a char * in packed format and returns the result.
179 //! The argument is advanced
180 inline boost::uint32_t pullPackedIntFromString(const char *&text) {
181  boost::uint32_t val, num;
182  int shift, offset;
183  char tmp;
184  tmp = *text;
185  text++;
186  val = UCHAR(tmp);
187  offset = 0;
188  if ((val & 1) == 0) {
189  shift = 1;
190  } else if ((val & 3) == 1) {
191  tmp = *text;
192  text++;
193  val |= (UCHAR(tmp) << 8);
194  shift = 2;
195  offset = (1 << 7);
196  } else if ((val & 7) == 3) {
197  tmp = *text;
198  text++;
199  val |= (UCHAR(tmp) << 8);
200  tmp = *text;
201  text++;
202  val |= (UCHAR(tmp) << 16);
203  shift = 3;
204  offset = (1 << 7) + (1 << 14);
205  } else {
206  tmp = *text;
207  text++;
208  val |= (UCHAR(tmp) << 8);
209  tmp = *text;
210  text++;
211  val |= (UCHAR(tmp) << 16);
212  tmp = *text;
213  text++;
214  val |= (UCHAR(tmp) << 24);
215  shift = 3;
216  offset = (1 << 7) + (1 << 14) + (1 << 21);
217  }
218  num = (val >> shift) + offset;
219  // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
220  return num;
221 }
222 
223 //! does a binary write of an object to a stream
224 template <typename T>
225 void streamWrite(std::ostream &ss, const T &val) {
226  T tval = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(val);
227  ss.write((const char *)&tval, sizeof(T));
228 }
229 
230 //! special case for string
231 inline void streamWrite(std::ostream &ss, const std::string &what) {
232  unsigned int l = rdcast<unsigned int>(what.length());
233  ss.write((const char *)&l, sizeof(l));
234  ss.write(what.c_str(), sizeof(char) * l);
235 };
236 
237 template <typename T>
238 void streamWriteVec(std::ostream &ss, const T &val) {
239  streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
240  for (size_t i = 0; i < val.size(); ++i) streamWrite(ss, val[i]);
241 }
242 
243 //! does a binary read of an object from a stream
244 template <typename T>
245 void streamRead(std::istream &ss, T &loc) {
246  T tloc;
247  ss.read((char *)&tloc, sizeof(T));
248  loc = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(tloc);
249 }
250 
251 //! special case for string
252 template <class T>
253 void streamRead(std::istream &ss, T &obj, int version) {
254  RDUNUSED_PARAM(version);
255  streamRead(ss, obj);
256 }
257 
258 inline void streamRead(std::istream &ss, std::string &what, int version) {
259  RDUNUSED_PARAM(version);
260  unsigned int l;
261  ss.read((char *)&l, sizeof(l));
262  char *buff = new char[l + 1];
263  ss.read(buff, sizeof(char) * l);
264  buff[l] = 0;
265  what = buff;
266  delete[] buff;
267 };
268 
269 template <class T>
270 void streamReadVec(std::istream &ss, T &val) {
271  boost::uint64_t size;
272  streamRead(ss, size);
273  val.resize(size);
274 
275  for (size_t i = 0; i < size; ++i) streamRead(ss, val[i]);
276 }
277 
278 inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
279  int version) {
280  boost::uint64_t size;
281  streamRead(ss, size);
282  val.resize(size);
283 
284  for (size_t i = 0; i < size; ++i) streamRead(ss, val[i], version);
285 }
286 
287 //! grabs the next line from an instream and returns it.
288 inline std::string getLine(std::istream *inStream) {
289  std::string res;
290  std::getline(*inStream, res);
291  if ((res.length() > 0) && (res[res.length() - 1] == '\r')) {
292  res.erase(res.length() - 1);
293  }
294  return res;
295 }
296 //! grabs the next line from an instream and returns it.
297 inline std::string getLine(std::istream &inStream) {
298  return getLine(&inStream);
299 }
300 
301 // n.b. We can't use RDTypeTag directly, they are implementation
302 // specific
303 namespace DTags {
304  const unsigned char StringTag = 0;
305  const unsigned char IntTag = 1;
306  const unsigned char UnsignedIntTag = 2;
307  const unsigned char BoolTag = 3;
308  const unsigned char FloatTag = 4;
309  const unsigned char DoubleTag = 5;
310  const unsigned char VecStringTag = 6;
311  const unsigned char VecIntTag = 7;
312  const unsigned char VecUIntTag = 8;
313  const unsigned char VecBoolTag = 9;
314  const unsigned char VecFloatTag = 10;
315  const unsigned char VecDoubleTag = 11;
316  const unsigned char EndTag = 0xFF;
317 }
318 
319 inline bool isSerializable(const Dict::Pair &pair) {
320  switch (pair.val.getTag()) {
322  case RDTypeTag::IntTag:
324  case RDTypeTag::BoolTag:
325  case RDTypeTag::FloatTag:
327 
333 
334  return true;
335  default:
336  return false;
337  }
338 }
339 
340 inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair) {
341  if (!isSerializable(pair)) return false;
342 
343  streamWrite(ss, pair.key);
344  switch (pair.val.getTag()) {
347  streamWrite(ss, rdvalue_cast<std::string>(pair.val));
348  break;
349  case RDTypeTag::IntTag:
351  streamWrite(ss, rdvalue_cast<int>(pair.val));
352  break;
356  break;
357  case RDTypeTag::BoolTag:
360  break;
361  case RDTypeTag::FloatTag:
364  break;
368  break;
369 
372  streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
373  break;
376  streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
377  break;
380  streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
381  break;
384  streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
385  break;
388  streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
389  break;
390  default:
391  std::cerr << "Failed to write " << pair.key << std::endl;
392  return false;
393  }
394  return true;
395 }
396 
397 inline bool streamWriteProps(std::ostream &ss, const RDProps &props,
398  bool savePrivate=false, bool saveComputed=false) {
399  STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
400  std::set<std::string> propnames(propsToSave.begin(), propsToSave.end());
401 
402  const Dict &dict = props.getDict();
403  unsigned int count = 0;
404  for(Dict::DataType::const_iterator it = dict.getData().begin();
405  it != dict.getData().end();
406  ++it) {
407  if(isSerializable(*it) && propnames.find(it->key) != propnames.end()) {
408  count ++;
409  }
410  }
411 
412  streamWrite(ss, count); // packed int?
413 
414  unsigned int writtenCount = 0;
415  for(Dict::DataType::const_iterator it = dict.getData().begin();
416  it != dict.getData().end();
417  ++it) {
418  if (propnames.find(it->key) != propnames.end()) {
419  if (isSerializable(*it)) {
420  // note - not all properties are serializable, this may be
421  // a null op
422  if (streamWriteProp(ss, *it)) {
423  writtenCount++;
424  }
425  }
426  }
427  }
428  POSTCONDITION(count==writtenCount, "Estimated property count not equal to written");
429  return true;
430 }
431 
432 template<class T>
433 void readRDValue(std::istream &ss, RDValue &value) {
434  T v;
435  streamRead(ss, v);
436  value = v;
437 }
438 
439 template <class T>
440 void readRDVecValue(std::istream &ss, RDValue &value) {
441  std::vector<T> v;
442  streamReadVec(ss, v);
443  value = v;
444 }
445 
446 inline void readRDValueString(std::istream &ss, RDValue &value) {
447  std::string v;
448  int version=0;
449  streamRead(ss, v, version);
450  value = v;
451 }
452 
453 inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
454  std::vector<std::string> v;
455  int version = 0;
456  streamReadStringVec(ss, v, version);
457  value = v;
458 }
459 
460 inline bool streamReadProp(std::istream &ss, Dict::Pair &pair) {
461  int version=0;
462  streamRead(ss, pair.key, version);
463 
464  unsigned char type;
465  streamRead(ss, type);
466  switch(type) {
467  case DTags::StringTag: readRDValueString(ss, pair.val); break;
468  case DTags::IntTag: readRDValue<int>(ss, pair.val); break;
469  case DTags::UnsignedIntTag: readRDValue<unsigned int>(ss, pair.val); break;
470  case DTags::BoolTag: readRDValue<bool>(ss, pair.val); break;
471  case DTags::FloatTag: readRDValue<float>(ss, pair.val); break;
472  case DTags::DoubleTag: readRDValue<double>(ss, pair.val); break;
473 
474  case DTags::VecStringTag:
475  readRDStringVecValue(ss, pair.val);
476  break;
477  case DTags::VecIntTag:
478  readRDVecValue<int>(ss, pair.val);
479  break;
480  case DTags::VecUIntTag:
481  readRDVecValue<unsigned int>(ss, pair.val);
482  break;
483  case DTags::VecFloatTag:
484  readRDVecValue<float>(ss, pair.val);
485  break;
486  case DTags::VecDoubleTag:
487  readRDVecValue<double>(ss, pair.val);
488  break;
489 
490  default:
491  return false;
492  }
493  return true;
494 }
495 
496 inline unsigned int streamReadProps(std::istream &ss, RDProps &props) {
497  unsigned int count;
498  streamRead(ss, count);
499 
500  Dict &dict = props.getDict();
501  dict.getData().resize(count);
502  for(unsigned index = 0; index<count; ++index) {
503  CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index]),
504  "Corrupted property serialization detected");
505  }
506 
507  return count;
508 }
509 
510 }
511 
512 #endif
const unsigned char FloatTag
Definition: StreamOps.h:308
#define POSTCONDITION(expr, mess)
Definition: Invariant.h:116
const unsigned char EndTag
Definition: StreamOps.h:316
static const boost::uint64_t VecDoubleTag
static const boost::uint64_t UnsignedIntTag
T EndianSwapBytes(T value)
Definition: StreamOps.h:69
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:100
const DataType & getData() const
Access to the underlying data.
Definition: Dict.h:114
RDValue val
Definition: Dict.h:40
T rdvalue_cast(RDValue v)
unsigned char UCHAR
Definition: types.h:245
EEndian
Definition: StreamOps.h:27
static const boost::uint64_t DoubleTag
const unsigned char VecUIntTag
Definition: StreamOps.h:312
const unsigned char UnsignedIntTag
Definition: StreamOps.h:306
double rdvalue_cast< double >(RDValue v)
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition: StreamOps.h:245
void readRDValueString(std::istream &ss, RDValue &value)
Definition: StreamOps.h:446
void readRDValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:433
boost::uint32_t readPackedIntFromStream(std::stringstream &ss)
Reads an integer from a stream in packed format and returns the result.
Definition: StreamOps.h:142
static const boost::uint64_t FloatTag
boost::uint32_t pullPackedIntFromString(const char *&text)
Definition: StreamOps.h:180
bool streamWriteProps(std::ostream &ss, const RDProps &props, bool savePrivate=false, bool saveComputed=false)
Definition: StreamOps.h:397
bool rdvalue_cast< bool >(RDValue v)
const unsigned char VecBoolTag
Definition: StreamOps.h:313
void streamReadStringVec(std::istream &ss, std::vector< std::string > &val, int version)
Definition: StreamOps.h:278
static const boost::uint64_t StringTag
void readRDVecValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:440
std::string key
Definition: Dict.h:39
const unsigned char VecFloatTag
Definition: StreamOps.h:314
static const boost::uint64_t VecIntTag
static const boost::uint64_t VecUnsignedIntTag
unsigned int rdvalue_cast< unsigned int >(RDValue v)
const unsigned char VecIntTag
Definition: StreamOps.h:311
const unsigned char DoubleTag
Definition: StreamOps.h:309
T SwapBytes(T value)
Definition: StreamOps.h:42
Std stuff.
Definition: Atom.h:30
const Dict & getDict() const
gets the underlying Dictionary
Definition: RDProps.h:26
static const boost::uint64_t VecStringTag
bool streamReadProp(std::istream &ss, Dict::Pair &pair)
Definition: StreamOps.h:460
unsigned int streamReadProps(std::istream &ss, RDProps &props)
Definition: StreamOps.h:496
void streamWriteVec(std::ostream &ss, const T &val)
Definition: StreamOps.h:238
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:195
float rdvalue_cast< float >(RDValue v)
static const boost::uint64_t IntTag
const unsigned char BoolTag
Definition: StreamOps.h:307
int rdvalue_cast< int >(RDValue v)
bool isSerializable(const Dict::Pair &pair)
Definition: StreamOps.h:319
static const boost::uint64_t BoolTag
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition: StreamOps.h:225
std::string getLine(std::istream *inStream)
grabs the next line from an instream and returns it.
Definition: StreamOps.h:288
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
Definition: RDProps.h:35
const unsigned char StringTag
Definition: StreamOps.h:304
boost::uint64_t getTag() const
const unsigned char VecStringTag
Definition: StreamOps.h:310
static const boost::uint64_t VecFloatTag
void appendPackedIntToStream(std::stringstream &ss, boost::uint32_t num)
Packs an integer and outputs it to a stream.
Definition: StreamOps.h:98
const unsigned char IntTag
Definition: StreamOps.h:305
const unsigned char VecDoubleTag
Definition: StreamOps.h:315
void readRDStringVecValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:453
The Dict class can be used to store objects of arbitrary type keyed by strings.
Definition: Dict.h:36
std::vector< std::string > STR_VECT
Definition: Dict.h:29
void streamReadVec(std::istream &ss, T &val)
Definition: StreamOps.h:270
bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair)
Definition: StreamOps.h:340