JsonCpp project page JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 // Copyright 2007-2011 Baptiste Lepilleur
2 // Distributed under MIT license, or public domain if desired and
3 // recognized in your jurisdiction.
4 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
5 
6 #if !defined(JSON_IS_AMALGAMATION)
7 #include <json/assertions.h>
8 #include <json/reader.h>
9 #include <json/value.h>
10 #include "json_tool.h"
11 #endif // if !defined(JSON_IS_AMALGAMATION)
12 #include <utility>
13 #include <cstdio>
14 #include <cassert>
15 #include <cstring>
16 #include <istream>
17 #include <sstream>
18 #include <memory>
19 #include <set>
20 
21 #if defined(_MSC_VER) && _MSC_VER < 1500 // VC++ 8.0 and below
22 #define snprintf _snprintf
23 #endif
24 
25 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
26 // Disable warning about strdup being deprecated.
27 #pragma warning(disable : 4996)
28 #endif
29 
30 static int const stackLimit_g = 1000;
31 static int stackDepth_g = 0; // see readValue()
32 
33 namespace Json {
34 
35 #if __cplusplus >= 201103L
36 typedef std::unique_ptr<CharReader> CharReaderPtr;
37 #else
38 typedef std::auto_ptr<CharReader> CharReaderPtr;
39 #endif
40 
41 // Implementation of class Features
42 // ////////////////////////////////
43 
45  : allowComments_(true), strictRoot_(false),
46  allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
47 
49 
51  Features features;
52  features.allowComments_ = false;
53  features.strictRoot_ = true;
54  features.allowDroppedNullPlaceholders_ = false;
55  features.allowNumericKeys_ = false;
56  return features;
57 }
58 
59 // Implementation of class Reader
60 // ////////////////////////////////
61 
63  for (; begin < end; ++begin)
64  if (*begin == '\n' || *begin == '\r')
65  return true;
66  return false;
67 }
68 
69 // Class Reader
70 // //////////////////////////////////////////////////////////////////
71 
73  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
74  lastValue_(), commentsBefore_(), features_(Features::all()),
75  collectComments_() {}
76 
77 Reader::Reader(const Features& features)
78  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
79  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
80 }
81 
82 bool
83 Reader::parse(const std::string& document, Value& root, bool collectComments) {
84  document_ = document;
85  const char* begin = document_.c_str();
86  const char* end = begin + document_.length();
87  return parse(begin, end, root, collectComments);
88 }
89 
90 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
91  // std::istream_iterator<char> begin(sin);
92  // std::istream_iterator<char> end;
93  // Those would allow streamed input from a file, if parse() were a
94  // template function.
95 
96  // Since std::string is reference-counted, this at least does not
97  // create an extra copy.
98  std::string doc;
99  std::getline(sin, doc, (char)EOF);
100  return parse(doc, root, collectComments);
101 }
102 
103 bool Reader::parse(const char* beginDoc,
104  const char* endDoc,
105  Value& root,
106  bool collectComments) {
107  if (!features_.allowComments_) {
108  collectComments = false;
109  }
110 
111  begin_ = beginDoc;
112  end_ = endDoc;
113  collectComments_ = collectComments;
114  current_ = begin_;
115  lastValueEnd_ = 0;
116  lastValue_ = 0;
117  commentsBefore_ = "";
118  errors_.clear();
119  while (!nodes_.empty())
120  nodes_.pop();
121  nodes_.push(&root);
122 
123  stackDepth_g = 0; // Yes, this is bad coding, but options are limited.
124  bool successful = readValue();
125  Token token;
126  skipCommentTokens(token);
127  if (collectComments_ && !commentsBefore_.empty())
128  root.setComment(commentsBefore_, commentAfter);
129  if (features_.strictRoot_) {
130  if (!root.isArray() && !root.isObject()) {
131  // Set error location to start of doc, ideally should be first token found
132  // in doc
133  token.type_ = tokenError;
134  token.start_ = beginDoc;
135  token.end_ = endDoc;
136  addError(
137  "A valid JSON document must be either an array or an object value.",
138  token);
139  return false;
140  }
141  }
142  return successful;
143 }
144 
145 bool Reader::readValue() {
146  // This is a non-reentrant way to support a stackLimit. Terrible!
147  // But this deprecated class has a security problem: Bad input can
148  // cause a seg-fault. This seems like a fair, binary-compatible way
149  // to prevent the problem.
150  if (stackDepth_g >= stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
151  ++stackDepth_g;
152 
153  Token token;
154  skipCommentTokens(token);
155  bool successful = true;
156 
157  if (collectComments_ && !commentsBefore_.empty()) {
158  currentValue().setComment(commentsBefore_, commentBefore);
159  commentsBefore_ = "";
160  }
161 
162  switch (token.type_) {
163  case tokenObjectBegin:
164  successful = readObject(token);
165  currentValue().setOffsetLimit(current_ - begin_);
166  break;
167  case tokenArrayBegin:
168  successful = readArray(token);
169  currentValue().setOffsetLimit(current_ - begin_);
170  break;
171  case tokenNumber:
172  successful = decodeNumber(token);
173  break;
174  case tokenString:
175  successful = decodeString(token);
176  break;
177  case tokenTrue:
178  {
179  Value v(true);
180  currentValue().swapPayload(v);
181  currentValue().setOffsetStart(token.start_ - begin_);
182  currentValue().setOffsetLimit(token.end_ - begin_);
183  }
184  break;
185  case tokenFalse:
186  {
187  Value v(false);
188  currentValue().swapPayload(v);
189  currentValue().setOffsetStart(token.start_ - begin_);
190  currentValue().setOffsetLimit(token.end_ - begin_);
191  }
192  break;
193  case tokenNull:
194  {
195  Value v;
196  currentValue().swapPayload(v);
197  currentValue().setOffsetStart(token.start_ - begin_);
198  currentValue().setOffsetLimit(token.end_ - begin_);
199  }
200  break;
201  case tokenArraySeparator:
202  case tokenObjectEnd:
203  case tokenArrayEnd:
204  if (features_.allowDroppedNullPlaceholders_) {
205  // "Un-read" the current token and mark the current value as a null
206  // token.
207  current_--;
208  Value v;
209  currentValue().swapPayload(v);
210  currentValue().setOffsetStart(current_ - begin_ - 1);
211  currentValue().setOffsetLimit(current_ - begin_);
212  break;
213  } // Else, fall through...
214  default:
215  currentValue().setOffsetStart(token.start_ - begin_);
216  currentValue().setOffsetLimit(token.end_ - begin_);
217  return addError("Syntax error: value, object or array expected.", token);
218  }
219 
220  if (collectComments_) {
221  lastValueEnd_ = current_;
222  lastValue_ = &currentValue();
223  }
224 
225  --stackDepth_g;
226  return successful;
227 }
228 
229 void Reader::skipCommentTokens(Token& token) {
230  if (features_.allowComments_) {
231  do {
232  readToken(token);
233  } while (token.type_ == tokenComment);
234  } else {
235  readToken(token);
236  }
237 }
238 
239 bool Reader::readToken(Token& token) {
240  skipSpaces();
241  token.start_ = current_;
242  Char c = getNextChar();
243  bool ok = true;
244  switch (c) {
245  case '{':
246  token.type_ = tokenObjectBegin;
247  break;
248  case '}':
249  token.type_ = tokenObjectEnd;
250  break;
251  case '[':
252  token.type_ = tokenArrayBegin;
253  break;
254  case ']':
255  token.type_ = tokenArrayEnd;
256  break;
257  case '"':
258  token.type_ = tokenString;
259  ok = readString();
260  break;
261  case '/':
262  token.type_ = tokenComment;
263  ok = readComment();
264  break;
265  case '0':
266  case '1':
267  case '2':
268  case '3':
269  case '4':
270  case '5':
271  case '6':
272  case '7':
273  case '8':
274  case '9':
275  case '-':
276  token.type_ = tokenNumber;
277  readNumber();
278  break;
279  case 't':
280  token.type_ = tokenTrue;
281  ok = match("rue", 3);
282  break;
283  case 'f':
284  token.type_ = tokenFalse;
285  ok = match("alse", 4);
286  break;
287  case 'n':
288  token.type_ = tokenNull;
289  ok = match("ull", 3);
290  break;
291  case ',':
292  token.type_ = tokenArraySeparator;
293  break;
294  case ':':
295  token.type_ = tokenMemberSeparator;
296  break;
297  case 0:
298  token.type_ = tokenEndOfStream;
299  break;
300  default:
301  ok = false;
302  break;
303  }
304  if (!ok)
305  token.type_ = tokenError;
306  token.end_ = current_;
307  return true;
308 }
309 
310 void Reader::skipSpaces() {
311  while (current_ != end_) {
312  Char c = *current_;
313  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
314  ++current_;
315  else
316  break;
317  }
318 }
319 
320 bool Reader::match(Location pattern, int patternLength) {
321  if (end_ - current_ < patternLength)
322  return false;
323  int index = patternLength;
324  while (index--)
325  if (current_[index] != pattern[index])
326  return false;
327  current_ += patternLength;
328  return true;
329 }
330 
331 bool Reader::readComment() {
332  Location commentBegin = current_ - 1;
333  Char c = getNextChar();
334  bool successful = false;
335  if (c == '*')
336  successful = readCStyleComment();
337  else if (c == '/')
338  successful = readCppStyleComment();
339  if (!successful)
340  return false;
341 
342  if (collectComments_) {
343  CommentPlacement placement = commentBefore;
344  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
345  if (c != '*' || !containsNewLine(commentBegin, current_))
346  placement = commentAfterOnSameLine;
347  }
348 
349  addComment(commentBegin, current_, placement);
350  }
351  return true;
352 }
353 
354 static std::string normalizeEOL(Reader::Location begin, Reader::Location end) {
355  std::string normalized;
356  normalized.reserve(end - begin);
357  Reader::Location current = begin;
358  while (current != end) {
359  char c = *current++;
360  if (c == '\r') {
361  if (current != end && *current == '\n')
362  // convert dos EOL
363  ++current;
364  // convert Mac EOL
365  normalized += '\n';
366  } else {
367  normalized += c;
368  }
369  }
370  return normalized;
371 }
372 
373 void
374 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
375  assert(collectComments_);
376  const std::string& normalized = normalizeEOL(begin, end);
377  if (placement == commentAfterOnSameLine) {
378  assert(lastValue_ != 0);
379  lastValue_->setComment(normalized, placement);
380  } else {
381  commentsBefore_ += normalized;
382  }
383 }
384 
385 bool Reader::readCStyleComment() {
386  while (current_ != end_) {
387  Char c = getNextChar();
388  if (c == '*' && *current_ == '/')
389  break;
390  }
391  return getNextChar() == '/';
392 }
393 
394 bool Reader::readCppStyleComment() {
395  while (current_ != end_) {
396  Char c = getNextChar();
397  if (c == '\n')
398  break;
399  if (c == '\r') {
400  // Consume DOS EOL. It will be normalized in addComment.
401  if (current_ != end_ && *current_ == '\n')
402  getNextChar();
403  // Break on Moc OS 9 EOL.
404  break;
405  }
406  }
407  return true;
408 }
409 
410 void Reader::readNumber() {
411  const char *p = current_;
412  char c = '0'; // stopgap for already consumed character
413  // integral part
414  while (c >= '0' && c <= '9')
415  c = (current_ = p) < end_ ? *p++ : 0;
416  // fractional part
417  if (c == '.') {
418  c = (current_ = p) < end_ ? *p++ : 0;
419  while (c >= '0' && c <= '9')
420  c = (current_ = p) < end_ ? *p++ : 0;
421  }
422  // exponential part
423  if (c == 'e' || c == 'E') {
424  c = (current_ = p) < end_ ? *p++ : 0;
425  if (c == '+' || c == '-')
426  c = (current_ = p) < end_ ? *p++ : 0;
427  while (c >= '0' && c <= '9')
428  c = (current_ = p) < end_ ? *p++ : 0;
429  }
430 }
431 
432 bool Reader::readString() {
433  Char c = 0;
434  while (current_ != end_) {
435  c = getNextChar();
436  if (c == '\\')
437  getNextChar();
438  else if (c == '"')
439  break;
440  }
441  return c == '"';
442 }
443 
444 bool Reader::readObject(Token& tokenStart) {
445  Token tokenName;
446  std::string name;
447  Value init(objectValue);
448  currentValue().swapPayload(init);
449  currentValue().setOffsetStart(tokenStart.start_ - begin_);
450  while (readToken(tokenName)) {
451  bool initialTokenOk = true;
452  while (tokenName.type_ == tokenComment && initialTokenOk)
453  initialTokenOk = readToken(tokenName);
454  if (!initialTokenOk)
455  break;
456  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
457  return true;
458  name = "";
459  if (tokenName.type_ == tokenString) {
460  if (!decodeString(tokenName, name))
461  return recoverFromError(tokenObjectEnd);
462  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
463  Value numberName;
464  if (!decodeNumber(tokenName, numberName))
465  return recoverFromError(tokenObjectEnd);
466  name = numberName.asString();
467  } else {
468  break;
469  }
470 
471  Token colon;
472  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
473  return addErrorAndRecover(
474  "Missing ':' after object member name", colon, tokenObjectEnd);
475  }
476  Value& value = currentValue()[name];
477  nodes_.push(&value);
478  bool ok = readValue();
479  nodes_.pop();
480  if (!ok) // error already set
481  return recoverFromError(tokenObjectEnd);
482 
483  Token comma;
484  if (!readToken(comma) ||
485  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
486  comma.type_ != tokenComment)) {
487  return addErrorAndRecover(
488  "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
489  }
490  bool finalizeTokenOk = true;
491  while (comma.type_ == tokenComment && finalizeTokenOk)
492  finalizeTokenOk = readToken(comma);
493  if (comma.type_ == tokenObjectEnd)
494  return true;
495  }
496  return addErrorAndRecover(
497  "Missing '}' or object member name", tokenName, tokenObjectEnd);
498 }
499 
500 bool Reader::readArray(Token& tokenStart) {
501  Value init(arrayValue);
502  currentValue().swapPayload(init);
503  currentValue().setOffsetStart(tokenStart.start_ - begin_);
504  skipSpaces();
505  if (*current_ == ']') // empty array
506  {
507  Token endArray;
508  readToken(endArray);
509  return true;
510  }
511  int index = 0;
512  for (;;) {
513  Value& value = currentValue()[index++];
514  nodes_.push(&value);
515  bool ok = readValue();
516  nodes_.pop();
517  if (!ok) // error already set
518  return recoverFromError(tokenArrayEnd);
519 
520  Token token;
521  // Accept Comment after last item in the array.
522  ok = readToken(token);
523  while (token.type_ == tokenComment && ok) {
524  ok = readToken(token);
525  }
526  bool badTokenType =
527  (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
528  if (!ok || badTokenType) {
529  return addErrorAndRecover(
530  "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
531  }
532  if (token.type_ == tokenArrayEnd)
533  break;
534  }
535  return true;
536 }
537 
538 bool Reader::decodeNumber(Token& token) {
539  Value decoded;
540  if (!decodeNumber(token, decoded))
541  return false;
542  currentValue().swapPayload(decoded);
543  currentValue().setOffsetStart(token.start_ - begin_);
544  currentValue().setOffsetLimit(token.end_ - begin_);
545  return true;
546 }
547 
548 bool Reader::decodeNumber(Token& token, Value& decoded) {
549  // Attempts to parse the number as an integer. If the number is
550  // larger than the maximum supported value of an integer then
551  // we decode the number as a double.
552  Location current = token.start_;
553  bool isNegative = *current == '-';
554  if (isNegative)
555  ++current;
556  // TODO: Help the compiler do the div and mod at compile time or get rid of them.
557  Value::LargestUInt maxIntegerValue =
560  Value::LargestUInt threshold = maxIntegerValue / 10;
561  Value::LargestUInt value = 0;
562  while (current < token.end_) {
563  Char c = *current++;
564  if (c < '0' || c > '9')
565  return decodeDouble(token, decoded);
566  Value::UInt digit(c - '0');
567  if (value >= threshold) {
568  // We've hit or exceeded the max value divided by 10 (rounded down). If
569  // a) we've only just touched the limit, b) this is the last digit, and
570  // c) it's small enough to fit in that rounding delta, we're okay.
571  // Otherwise treat this number as a double to avoid overflow.
572  if (value > threshold || current != token.end_ ||
573  digit > maxIntegerValue % 10) {
574  return decodeDouble(token, decoded);
575  }
576  }
577  value = value * 10 + digit;
578  }
579  if (isNegative)
580  decoded = -Value::LargestInt(value);
581  else if (value <= Value::LargestUInt(Value::maxInt))
582  decoded = Value::LargestInt(value);
583  else
584  decoded = value;
585  return true;
586 }
587 
588 bool Reader::decodeDouble(Token& token) {
589  Value decoded;
590  if (!decodeDouble(token, decoded))
591  return false;
592  currentValue().swapPayload(decoded);
593  currentValue().setOffsetStart(token.start_ - begin_);
594  currentValue().setOffsetLimit(token.end_ - begin_);
595  return true;
596 }
597 
598 bool Reader::decodeDouble(Token& token, Value& decoded) {
599  double value = 0;
600  std::string buffer(token.start_, token.end_);
601  std::istringstream is(buffer);
602  if (!(is >> value))
603  return addError("'" + std::string(token.start_, token.end_) +
604  "' is not a number.",
605  token);
606  decoded = value;
607  return true;
608 }
609 
610 bool Reader::decodeString(Token& token) {
611  std::string decoded_string;
612  if (!decodeString(token, decoded_string))
613  return false;
614  Value decoded(decoded_string);
615  currentValue().swapPayload(decoded);
616  currentValue().setOffsetStart(token.start_ - begin_);
617  currentValue().setOffsetLimit(token.end_ - begin_);
618  return true;
619 }
620 
621 bool Reader::decodeString(Token& token, std::string& decoded) {
622  decoded.reserve(token.end_ - token.start_ - 2);
623  Location current = token.start_ + 1; // skip '"'
624  Location end = token.end_ - 1; // do not include '"'
625  while (current != end) {
626  Char c = *current++;
627  if (c == '"')
628  break;
629  else if (c == '\\') {
630  if (current == end)
631  return addError("Empty escape sequence in string", token, current);
632  Char escape = *current++;
633  switch (escape) {
634  case '"':
635  decoded += '"';
636  break;
637  case '/':
638  decoded += '/';
639  break;
640  case '\\':
641  decoded += '\\';
642  break;
643  case 'b':
644  decoded += '\b';
645  break;
646  case 'f':
647  decoded += '\f';
648  break;
649  case 'n':
650  decoded += '\n';
651  break;
652  case 'r':
653  decoded += '\r';
654  break;
655  case 't':
656  decoded += '\t';
657  break;
658  case 'u': {
659  unsigned int unicode;
660  if (!decodeUnicodeCodePoint(token, current, end, unicode))
661  return false;
662  decoded += codePointToUTF8(unicode);
663  } break;
664  default:
665  return addError("Bad escape sequence in string", token, current);
666  }
667  } else {
668  decoded += c;
669  }
670  }
671  return true;
672 }
673 
674 bool Reader::decodeUnicodeCodePoint(Token& token,
675  Location& current,
676  Location end,
677  unsigned int& unicode) {
678 
679  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
680  return false;
681  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
682  // surrogate pairs
683  if (end - current < 6)
684  return addError(
685  "additional six characters expected to parse unicode surrogate pair.",
686  token,
687  current);
688  unsigned int surrogatePair;
689  if (*(current++) == '\\' && *(current++) == 'u') {
690  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
691  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
692  } else
693  return false;
694  } else
695  return addError("expecting another \\u token to begin the second half of "
696  "a unicode surrogate pair",
697  token,
698  current);
699  }
700  return true;
701 }
702 
703 bool Reader::decodeUnicodeEscapeSequence(Token& token,
704  Location& current,
705  Location end,
706  unsigned int& unicode) {
707  if (end - current < 4)
708  return addError(
709  "Bad unicode escape sequence in string: four digits expected.",
710  token,
711  current);
712  unicode = 0;
713  for (int index = 0; index < 4; ++index) {
714  Char c = *current++;
715  unicode *= 16;
716  if (c >= '0' && c <= '9')
717  unicode += c - '0';
718  else if (c >= 'a' && c <= 'f')
719  unicode += c - 'a' + 10;
720  else if (c >= 'A' && c <= 'F')
721  unicode += c - 'A' + 10;
722  else
723  return addError(
724  "Bad unicode escape sequence in string: hexadecimal digit expected.",
725  token,
726  current);
727  }
728  return true;
729 }
730 
731 bool
732 Reader::addError(const std::string& message, Token& token, Location extra) {
733  ErrorInfo info;
734  info.token_ = token;
735  info.message_ = message;
736  info.extra_ = extra;
737  errors_.push_back(info);
738  return false;
739 }
740 
741 bool Reader::recoverFromError(TokenType skipUntilToken) {
742  int errorCount = int(errors_.size());
743  Token skip;
744  for (;;) {
745  if (!readToken(skip))
746  errors_.resize(errorCount); // discard errors caused by recovery
747  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
748  break;
749  }
750  errors_.resize(errorCount);
751  return false;
752 }
753 
754 bool Reader::addErrorAndRecover(const std::string& message,
755  Token& token,
756  TokenType skipUntilToken) {
757  addError(message, token);
758  return recoverFromError(skipUntilToken);
759 }
760 
761 Value& Reader::currentValue() { return *(nodes_.top()); }
762 
763 Reader::Char Reader::getNextChar() {
764  if (current_ == end_)
765  return 0;
766  return *current_++;
767 }
768 
769 void Reader::getLocationLineAndColumn(Location location,
770  int& line,
771  int& column) const {
772  Location current = begin_;
773  Location lastLineStart = current;
774  line = 0;
775  while (current < location && current != end_) {
776  Char c = *current++;
777  if (c == '\r') {
778  if (*current == '\n')
779  ++current;
780  lastLineStart = current;
781  ++line;
782  } else if (c == '\n') {
783  lastLineStart = current;
784  ++line;
785  }
786  }
787  // column & line start at 1
788  column = int(location - lastLineStart) + 1;
789  ++line;
790 }
791 
792 std::string Reader::getLocationLineAndColumn(Location location) const {
793  int line, column;
794  getLocationLineAndColumn(location, line, column);
795  char buffer[18 + 16 + 16 + 1];
796 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
797 #if defined(WINCE)
798  _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
799 #else
800  sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
801 #endif
802 #else
803  snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
804 #endif
805  return buffer;
806 }
807 
808 // Deprecated. Preserved for backward compatibility
809 std::string Reader::getFormatedErrorMessages() const {
810  return getFormattedErrorMessages();
811 }
812 
814  std::string formattedMessage;
815  for (Errors::const_iterator itError = errors_.begin();
816  itError != errors_.end();
817  ++itError) {
818  const ErrorInfo& error = *itError;
819  formattedMessage +=
820  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
821  formattedMessage += " " + error.message_ + "\n";
822  if (error.extra_)
823  formattedMessage +=
824  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
825  }
826  return formattedMessage;
827 }
828 
829 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
830  std::vector<Reader::StructuredError> allErrors;
831  for (Errors::const_iterator itError = errors_.begin();
832  itError != errors_.end();
833  ++itError) {
834  const ErrorInfo& error = *itError;
835  Reader::StructuredError structured;
836  structured.offset_start = error.token_.start_ - begin_;
837  structured.offset_limit = error.token_.end_ - begin_;
838  structured.message = error.message_;
839  allErrors.push_back(structured);
840  }
841  return allErrors;
842 }
843 
844 bool Reader::pushError(const Value& value, const std::string& message) {
845  size_t length = end_ - begin_;
846  if(value.getOffsetStart() > length
847  || value.getOffsetLimit() > length)
848  return false;
849  Token token;
850  token.type_ = tokenError;
851  token.start_ = begin_ + value.getOffsetStart();
852  token.end_ = end_ + value.getOffsetLimit();
853  ErrorInfo info;
854  info.token_ = token;
855  info.message_ = message;
856  info.extra_ = 0;
857  errors_.push_back(info);
858  return true;
859 }
860 
861 bool Reader::pushError(const Value& value, const std::string& message, const Value& extra) {
862  size_t length = end_ - begin_;
863  if(value.getOffsetStart() > length
864  || value.getOffsetLimit() > length
865  || extra.getOffsetLimit() > length)
866  return false;
867  Token token;
868  token.type_ = tokenError;
869  token.start_ = begin_ + value.getOffsetStart();
870  token.end_ = begin_ + value.getOffsetLimit();
871  ErrorInfo info;
872  info.token_ = token;
873  info.message_ = message;
874  info.extra_ = begin_ + extra.getOffsetStart();
875  errors_.push_back(info);
876  return true;
877 }
878 
879 bool Reader::good() const {
880  return !errors_.size();
881 }
882 
883 // exact copy of Features
884 class OurFeatures {
885 public:
886  static OurFeatures all();
887  OurFeatures();
888  bool allowComments_;
889  bool strictRoot_;
890  bool allowDroppedNullPlaceholders_;
891  bool allowNumericKeys_;
892  bool allowSingleQuotes_;
893  bool failIfExtra_;
894  bool rejectDupKeys_;
895  int stackLimit_;
896 }; // OurFeatures
897 
898 // exact copy of Implementation of class Features
899 // ////////////////////////////////
900 
901 OurFeatures::OurFeatures()
902  : allowComments_(true), strictRoot_(false)
903  , allowDroppedNullPlaceholders_(false), allowNumericKeys_(false)
904  , allowSingleQuotes_(false)
905  , failIfExtra_(false)
906 {
907 }
908 
909 OurFeatures OurFeatures::all() { return OurFeatures(); }
910 
911 // Implementation of class Reader
912 // ////////////////////////////////
913 
914 // exact copy of Reader, renamed to OurReader
915 class OurReader {
916 public:
917  typedef char Char;
918  typedef const Char* Location;
919  struct StructuredError {
920  size_t offset_start;
921  size_t offset_limit;
922  std::string message;
923  };
924 
925  OurReader(OurFeatures const& features);
926  bool parse(const char* beginDoc,
927  const char* endDoc,
928  Value& root,
929  bool collectComments = true);
930  std::string getFormattedErrorMessages() const;
931  std::vector<StructuredError> getStructuredErrors() const;
932  bool pushError(const Value& value, const std::string& message);
933  bool pushError(const Value& value, const std::string& message, const Value& extra);
934  bool good() const;
935 
936 private:
937  OurReader(OurReader const&); // no impl
938  void operator=(OurReader const&); // no impl
939 
940  enum TokenType {
941  tokenEndOfStream = 0,
942  tokenObjectBegin,
943  tokenObjectEnd,
944  tokenArrayBegin,
945  tokenArrayEnd,
946  tokenString,
947  tokenNumber,
948  tokenTrue,
949  tokenFalse,
950  tokenNull,
951  tokenArraySeparator,
952  tokenMemberSeparator,
953  tokenComment,
954  tokenError
955  };
956 
957  class Token {
958  public:
959  TokenType type_;
960  Location start_;
961  Location end_;
962  };
963 
964  class ErrorInfo {
965  public:
966  Token token_;
967  std::string message_;
968  Location extra_;
969  };
970 
971  typedef std::deque<ErrorInfo> Errors;
972 
973  bool readToken(Token& token);
974  void skipSpaces();
975  bool match(Location pattern, int patternLength);
976  bool readComment();
977  bool readCStyleComment();
978  bool readCppStyleComment();
979  bool readString();
980  bool readStringSingleQuote();
981  void readNumber();
982  bool readValue();
983  bool readObject(Token& token);
984  bool readArray(Token& token);
985  bool decodeNumber(Token& token);
986  bool decodeNumber(Token& token, Value& decoded);
987  bool decodeString(Token& token);
988  bool decodeString(Token& token, std::string& decoded);
989  bool decodeDouble(Token& token);
990  bool decodeDouble(Token& token, Value& decoded);
991  bool decodeUnicodeCodePoint(Token& token,
992  Location& current,
993  Location end,
994  unsigned int& unicode);
995  bool decodeUnicodeEscapeSequence(Token& token,
996  Location& current,
997  Location end,
998  unsigned int& unicode);
999  bool addError(const std::string& message, Token& token, Location extra = 0);
1000  bool recoverFromError(TokenType skipUntilToken);
1001  bool addErrorAndRecover(const std::string& message,
1002  Token& token,
1003  TokenType skipUntilToken);
1004  void skipUntilSpace();
1005  Value& currentValue();
1006  Char getNextChar();
1007  void
1008  getLocationLineAndColumn(Location location, int& line, int& column) const;
1009  std::string getLocationLineAndColumn(Location location) const;
1010  void addComment(Location begin, Location end, CommentPlacement placement);
1011  void skipCommentTokens(Token& token);
1012 
1013  typedef std::stack<Value*> Nodes;
1014  Nodes nodes_;
1015  Errors errors_;
1016  std::string document_;
1017  Location begin_;
1018  Location end_;
1019  Location current_;
1020  Location lastValueEnd_;
1021  Value* lastValue_;
1022  std::string commentsBefore_;
1023  int stackDepth_;
1024 
1025  OurFeatures const features_;
1026  bool collectComments_;
1027 }; // OurReader
1028 
1029 // complete copy of Read impl, for OurReader
1030 
1031 OurReader::OurReader(OurFeatures const& features)
1032  : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
1033  lastValue_(), commentsBefore_(), features_(features), collectComments_() {
1034 }
1035 
1036 bool OurReader::parse(const char* beginDoc,
1037  const char* endDoc,
1038  Value& root,
1039  bool collectComments) {
1040  if (!features_.allowComments_) {
1041  collectComments = false;
1042  }
1043 
1044  begin_ = beginDoc;
1045  end_ = endDoc;
1046  collectComments_ = collectComments;
1047  current_ = begin_;
1048  lastValueEnd_ = 0;
1049  lastValue_ = 0;
1050  commentsBefore_ = "";
1051  errors_.clear();
1052  while (!nodes_.empty())
1053  nodes_.pop();
1054  nodes_.push(&root);
1055 
1056  stackDepth_ = 0;
1057  bool successful = readValue();
1058  Token token;
1059  skipCommentTokens(token);
1060  if (features_.failIfExtra_) {
1061  if (token.type_ != tokenError && token.type_ != tokenEndOfStream) {
1062  addError("Extra non-whitespace after JSON value.", token);
1063  return false;
1064  }
1065  }
1066  if (collectComments_ && !commentsBefore_.empty())
1067  root.setComment(commentsBefore_, commentAfter);
1068  if (features_.strictRoot_) {
1069  if (!root.isArray() && !root.isObject()) {
1070  // Set error location to start of doc, ideally should be first token found
1071  // in doc
1072  token.type_ = tokenError;
1073  token.start_ = beginDoc;
1074  token.end_ = endDoc;
1075  addError(
1076  "A valid JSON document must be either an array or an object value.",
1077  token);
1078  return false;
1079  }
1080  }
1081  return successful;
1082 }
1083 
1084 bool OurReader::readValue() {
1085  if (stackDepth_ >= features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
1086  ++stackDepth_;
1087  Token token;
1088  skipCommentTokens(token);
1089  bool successful = true;
1090 
1091  if (collectComments_ && !commentsBefore_.empty()) {
1092  currentValue().setComment(commentsBefore_, commentBefore);
1093  commentsBefore_ = "";
1094  }
1095 
1096  switch (token.type_) {
1097  case tokenObjectBegin:
1098  successful = readObject(token);
1099  currentValue().setOffsetLimit(current_ - begin_);
1100  break;
1101  case tokenArrayBegin:
1102  successful = readArray(token);
1103  currentValue().setOffsetLimit(current_ - begin_);
1104  break;
1105  case tokenNumber:
1106  successful = decodeNumber(token);
1107  break;
1108  case tokenString:
1109  successful = decodeString(token);
1110  break;
1111  case tokenTrue:
1112  {
1113  Value v(true);
1114  currentValue().swapPayload(v);
1115  currentValue().setOffsetStart(token.start_ - begin_);
1116  currentValue().setOffsetLimit(token.end_ - begin_);
1117  }
1118  break;
1119  case tokenFalse:
1120  {
1121  Value v(false);
1122  currentValue().swapPayload(v);
1123  currentValue().setOffsetStart(token.start_ - begin_);
1124  currentValue().setOffsetLimit(token.end_ - begin_);
1125  }
1126  break;
1127  case tokenNull:
1128  {
1129  Value v;
1130  currentValue().swapPayload(v);
1131  currentValue().setOffsetStart(token.start_ - begin_);
1132  currentValue().setOffsetLimit(token.end_ - begin_);
1133  }
1134  break;
1135  case tokenArraySeparator:
1136  case tokenObjectEnd:
1137  case tokenArrayEnd:
1138  if (features_.allowDroppedNullPlaceholders_) {
1139  // "Un-read" the current token and mark the current value as a null
1140  // token.
1141  current_--;
1142  Value v;
1143  currentValue().swapPayload(v);
1144  currentValue().setOffsetStart(current_ - begin_ - 1);
1145  currentValue().setOffsetLimit(current_ - begin_);
1146  break;
1147  } // else, fall through ...
1148  default:
1149  currentValue().setOffsetStart(token.start_ - begin_);
1150  currentValue().setOffsetLimit(token.end_ - begin_);
1151  return addError("Syntax error: value, object or array expected.", token);
1152  }
1153 
1154  if (collectComments_) {
1155  lastValueEnd_ = current_;
1156  lastValue_ = &currentValue();
1157  }
1158 
1159  --stackDepth_;
1160  return successful;
1161 }
1162 
1163 void OurReader::skipCommentTokens(Token& token) {
1164  if (features_.allowComments_) {
1165  do {
1166  readToken(token);
1167  } while (token.type_ == tokenComment);
1168  } else {
1169  readToken(token);
1170  }
1171 }
1172 
1173 bool OurReader::readToken(Token& token) {
1174  skipSpaces();
1175  token.start_ = current_;
1176  Char c = getNextChar();
1177  bool ok = true;
1178  switch (c) {
1179  case '{':
1180  token.type_ = tokenObjectBegin;
1181  break;
1182  case '}':
1183  token.type_ = tokenObjectEnd;
1184  break;
1185  case '[':
1186  token.type_ = tokenArrayBegin;
1187  break;
1188  case ']':
1189  token.type_ = tokenArrayEnd;
1190  break;
1191  case '"':
1192  token.type_ = tokenString;
1193  ok = readString();
1194  break;
1195  case '\'':
1196  if (features_.allowSingleQuotes_) {
1197  token.type_ = tokenString;
1198  ok = readStringSingleQuote();
1199  break;
1200  } // else continue
1201  case '/':
1202  token.type_ = tokenComment;
1203  ok = readComment();
1204  break;
1205  case '0':
1206  case '1':
1207  case '2':
1208  case '3':
1209  case '4':
1210  case '5':
1211  case '6':
1212  case '7':
1213  case '8':
1214  case '9':
1215  case '-':
1216  token.type_ = tokenNumber;
1217  readNumber();
1218  break;
1219  case 't':
1220  token.type_ = tokenTrue;
1221  ok = match("rue", 3);
1222  break;
1223  case 'f':
1224  token.type_ = tokenFalse;
1225  ok = match("alse", 4);
1226  break;
1227  case 'n':
1228  token.type_ = tokenNull;
1229  ok = match("ull", 3);
1230  break;
1231  case ',':
1232  token.type_ = tokenArraySeparator;
1233  break;
1234  case ':':
1235  token.type_ = tokenMemberSeparator;
1236  break;
1237  case 0:
1238  token.type_ = tokenEndOfStream;
1239  break;
1240  default:
1241  ok = false;
1242  break;
1243  }
1244  if (!ok)
1245  token.type_ = tokenError;
1246  token.end_ = current_;
1247  return true;
1248 }
1249 
1250 void OurReader::skipSpaces() {
1251  while (current_ != end_) {
1252  Char c = *current_;
1253  if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
1254  ++current_;
1255  else
1256  break;
1257  }
1258 }
1259 
1260 bool OurReader::match(Location pattern, int patternLength) {
1261  if (end_ - current_ < patternLength)
1262  return false;
1263  int index = patternLength;
1264  while (index--)
1265  if (current_[index] != pattern[index])
1266  return false;
1267  current_ += patternLength;
1268  return true;
1269 }
1270 
1271 bool OurReader::readComment() {
1272  Location commentBegin = current_ - 1;
1273  Char c = getNextChar();
1274  bool successful = false;
1275  if (c == '*')
1276  successful = readCStyleComment();
1277  else if (c == '/')
1278  successful = readCppStyleComment();
1279  if (!successful)
1280  return false;
1281 
1282  if (collectComments_) {
1283  CommentPlacement placement = commentBefore;
1284  if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
1285  if (c != '*' || !containsNewLine(commentBegin, current_))
1286  placement = commentAfterOnSameLine;
1287  }
1288 
1289  addComment(commentBegin, current_, placement);
1290  }
1291  return true;
1292 }
1293 
1294 void
1295 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
1296  assert(collectComments_);
1297  const std::string& normalized = normalizeEOL(begin, end);
1298  if (placement == commentAfterOnSameLine) {
1299  assert(lastValue_ != 0);
1300  lastValue_->setComment(normalized, placement);
1301  } else {
1302  commentsBefore_ += normalized;
1303  }
1304 }
1305 
1306 bool OurReader::readCStyleComment() {
1307  while (current_ != end_) {
1308  Char c = getNextChar();
1309  if (c == '*' && *current_ == '/')
1310  break;
1311  }
1312  return getNextChar() == '/';
1313 }
1314 
1315 bool OurReader::readCppStyleComment() {
1316  while (current_ != end_) {
1317  Char c = getNextChar();
1318  if (c == '\n')
1319  break;
1320  if (c == '\r') {
1321  // Consume DOS EOL. It will be normalized in addComment.
1322  if (current_ != end_ && *current_ == '\n')
1323  getNextChar();
1324  // Break on Moc OS 9 EOL.
1325  break;
1326  }
1327  }
1328  return true;
1329 }
1330 
1331 void OurReader::readNumber() {
1332  const char *p = current_;
1333  char c = '0'; // stopgap for already consumed character
1334  // integral part
1335  while (c >= '0' && c <= '9')
1336  c = (current_ = p) < end_ ? *p++ : 0;
1337  // fractional part
1338  if (c == '.') {
1339  c = (current_ = p) < end_ ? *p++ : 0;
1340  while (c >= '0' && c <= '9')
1341  c = (current_ = p) < end_ ? *p++ : 0;
1342  }
1343  // exponential part
1344  if (c == 'e' || c == 'E') {
1345  c = (current_ = p) < end_ ? *p++ : 0;
1346  if (c == '+' || c == '-')
1347  c = (current_ = p) < end_ ? *p++ : 0;
1348  while (c >= '0' && c <= '9')
1349  c = (current_ = p) < end_ ? *p++ : 0;
1350  }
1351 }
1352 bool OurReader::readString() {
1353  Char c = 0;
1354  while (current_ != end_) {
1355  c = getNextChar();
1356  if (c == '\\')
1357  getNextChar();
1358  else if (c == '"')
1359  break;
1360  }
1361  return c == '"';
1362 }
1363 
1364 
1365 bool OurReader::readStringSingleQuote() {
1366  Char c = 0;
1367  while (current_ != end_) {
1368  c = getNextChar();
1369  if (c == '\\')
1370  getNextChar();
1371  else if (c == '\'')
1372  break;
1373  }
1374  return c == '\'';
1375 }
1376 
1377 bool OurReader::readObject(Token& tokenStart) {
1378  Token tokenName;
1379  std::string name;
1380  Value init(objectValue);
1381  currentValue().swapPayload(init);
1382  currentValue().setOffsetStart(tokenStart.start_ - begin_);
1383  while (readToken(tokenName)) {
1384  bool initialTokenOk = true;
1385  while (tokenName.type_ == tokenComment && initialTokenOk)
1386  initialTokenOk = readToken(tokenName);
1387  if (!initialTokenOk)
1388  break;
1389  if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
1390  return true;
1391  name = "";
1392  if (tokenName.type_ == tokenString) {
1393  if (!decodeString(tokenName, name))
1394  return recoverFromError(tokenObjectEnd);
1395  } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
1396  Value numberName;
1397  if (!decodeNumber(tokenName, numberName))
1398  return recoverFromError(tokenObjectEnd);
1399  name = numberName.asString();
1400  } else {
1401  break;
1402  }
1403 
1404  Token colon;
1405  if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
1406  return addErrorAndRecover(
1407  "Missing ':' after object member name", colon, tokenObjectEnd);
1408  }
1409  if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
1410  if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
1411  std::string msg = "Duplicate key: '" + name + "'";
1412  return addErrorAndRecover(
1413  msg, tokenName, tokenObjectEnd);
1414  }
1415  Value& value = currentValue()[name];
1416  nodes_.push(&value);
1417  bool ok = readValue();
1418  nodes_.pop();
1419  if (!ok) // error already set
1420  return recoverFromError(tokenObjectEnd);
1421 
1422  Token comma;
1423  if (!readToken(comma) ||
1424  (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
1425  comma.type_ != tokenComment)) {
1426  return addErrorAndRecover(
1427  "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
1428  }
1429  bool finalizeTokenOk = true;
1430  while (comma.type_ == tokenComment && finalizeTokenOk)
1431  finalizeTokenOk = readToken(comma);
1432  if (comma.type_ == tokenObjectEnd)
1433  return true;
1434  }
1435  return addErrorAndRecover(
1436  "Missing '}' or object member name", tokenName, tokenObjectEnd);
1437 }
1438 
1439 bool OurReader::readArray(Token& tokenStart) {
1440  Value init(arrayValue);
1441  currentValue().swapPayload(init);
1442  currentValue().setOffsetStart(tokenStart.start_ - begin_);
1443  skipSpaces();
1444  if (*current_ == ']') // empty array
1445  {
1446  Token endArray;
1447  readToken(endArray);
1448  return true;
1449  }
1450  int index = 0;
1451  for (;;) {
1452  Value& value = currentValue()[index++];
1453  nodes_.push(&value);
1454  bool ok = readValue();
1455  nodes_.pop();
1456  if (!ok) // error already set
1457  return recoverFromError(tokenArrayEnd);
1458 
1459  Token token;
1460  // Accept Comment after last item in the array.
1461  ok = readToken(token);
1462  while (token.type_ == tokenComment && ok) {
1463  ok = readToken(token);
1464  }
1465  bool badTokenType =
1466  (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
1467  if (!ok || badTokenType) {
1468  return addErrorAndRecover(
1469  "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
1470  }
1471  if (token.type_ == tokenArrayEnd)
1472  break;
1473  }
1474  return true;
1475 }
1476 
1477 bool OurReader::decodeNumber(Token& token) {
1478  Value decoded;
1479  if (!decodeNumber(token, decoded))
1480  return false;
1481  currentValue().swapPayload(decoded);
1482  currentValue().setOffsetStart(token.start_ - begin_);
1483  currentValue().setOffsetLimit(token.end_ - begin_);
1484  return true;
1485 }
1486 
1487 bool OurReader::decodeNumber(Token& token, Value& decoded) {
1488  // Attempts to parse the number as an integer. If the number is
1489  // larger than the maximum supported value of an integer then
1490  // we decode the number as a double.
1491  Location current = token.start_;
1492  bool isNegative = *current == '-';
1493  if (isNegative)
1494  ++current;
1495  // TODO: Help the compiler do the div and mod at compile time or get rid of them.
1496  Value::LargestUInt maxIntegerValue =
1499  Value::LargestUInt threshold = maxIntegerValue / 10;
1500  Value::LargestUInt value = 0;
1501  while (current < token.end_) {
1502  Char c = *current++;
1503  if (c < '0' || c > '9')
1504  return decodeDouble(token, decoded);
1505  Value::UInt digit(c - '0');
1506  if (value >= threshold) {
1507  // We've hit or exceeded the max value divided by 10 (rounded down). If
1508  // a) we've only just touched the limit, b) this is the last digit, and
1509  // c) it's small enough to fit in that rounding delta, we're okay.
1510  // Otherwise treat this number as a double to avoid overflow.
1511  if (value > threshold || current != token.end_ ||
1512  digit > maxIntegerValue % 10) {
1513  return decodeDouble(token, decoded);
1514  }
1515  }
1516  value = value * 10 + digit;
1517  }
1518  if (isNegative)
1519  decoded = -Value::LargestInt(value);
1520  else if (value <= Value::LargestUInt(Value::maxInt))
1521  decoded = Value::LargestInt(value);
1522  else
1523  decoded = value;
1524  return true;
1525 }
1526 
1527 bool OurReader::decodeDouble(Token& token) {
1528  Value decoded;
1529  if (!decodeDouble(token, decoded))
1530  return false;
1531  currentValue().swapPayload(decoded);
1532  currentValue().setOffsetStart(token.start_ - begin_);
1533  currentValue().setOffsetLimit(token.end_ - begin_);
1534  return true;
1535 }
1536 
1537 bool OurReader::decodeDouble(Token& token, Value& decoded) {
1538  double value = 0;
1539 
1540  std::string buffer( token.start_, token.end_ );
1541  std::istringstream is(buffer);
1542 
1543  if (!(is >> value))
1544  return addError("'" + std::string(token.start_, token.end_) +
1545  "' is not a number.",
1546  token);
1547  decoded = value;
1548  return true;
1549 }
1550 
1551 bool OurReader::decodeString(Token& token) {
1552  std::string decoded_string;
1553  if (!decodeString(token, decoded_string))
1554  return false;
1555  Value decoded(decoded_string);
1556  currentValue().swapPayload(decoded);
1557  currentValue().setOffsetStart(token.start_ - begin_);
1558  currentValue().setOffsetLimit(token.end_ - begin_);
1559  return true;
1560 }
1561 
1562 bool OurReader::decodeString(Token& token, std::string& decoded) {
1563  decoded.reserve(token.end_ - token.start_ - 2);
1564  Location current = token.start_ + 1; // skip '"'
1565  Location end = token.end_ - 1; // do not include '"'
1566  while (current != end) {
1567  Char c = *current++;
1568  if (c == '"')
1569  break;
1570  else if (c == '\\') {
1571  if (current == end)
1572  return addError("Empty escape sequence in string", token, current);
1573  Char escape = *current++;
1574  switch (escape) {
1575  case '"':
1576  decoded += '"';
1577  break;
1578  case '/':
1579  decoded += '/';
1580  break;
1581  case '\\':
1582  decoded += '\\';
1583  break;
1584  case 'b':
1585  decoded += '\b';
1586  break;
1587  case 'f':
1588  decoded += '\f';
1589  break;
1590  case 'n':
1591  decoded += '\n';
1592  break;
1593  case 'r':
1594  decoded += '\r';
1595  break;
1596  case 't':
1597  decoded += '\t';
1598  break;
1599  case 'u': {
1600  unsigned int unicode;
1601  if (!decodeUnicodeCodePoint(token, current, end, unicode))
1602  return false;
1603  decoded += codePointToUTF8(unicode);
1604  } break;
1605  default:
1606  return addError("Bad escape sequence in string", token, current);
1607  }
1608  } else {
1609  decoded += c;
1610  }
1611  }
1612  return true;
1613 }
1614 
1615 bool OurReader::decodeUnicodeCodePoint(Token& token,
1616  Location& current,
1617  Location end,
1618  unsigned int& unicode) {
1619 
1620  if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
1621  return false;
1622  if (unicode >= 0xD800 && unicode <= 0xDBFF) {
1623  // surrogate pairs
1624  if (end - current < 6)
1625  return addError(
1626  "additional six characters expected to parse unicode surrogate pair.",
1627  token,
1628  current);
1629  unsigned int surrogatePair;
1630  if (*(current++) == '\\' && *(current++) == 'u') {
1631  if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
1632  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
1633  } else
1634  return false;
1635  } else
1636  return addError("expecting another \\u token to begin the second half of "
1637  "a unicode surrogate pair",
1638  token,
1639  current);
1640  }
1641  return true;
1642 }
1643 
1644 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
1645  Location& current,
1646  Location end,
1647  unsigned int& unicode) {
1648  if (end - current < 4)
1649  return addError(
1650  "Bad unicode escape sequence in string: four digits expected.",
1651  token,
1652  current);
1653  unicode = 0;
1654  for (int index = 0; index < 4; ++index) {
1655  Char c = *current++;
1656  unicode *= 16;
1657  if (c >= '0' && c <= '9')
1658  unicode += c - '0';
1659  else if (c >= 'a' && c <= 'f')
1660  unicode += c - 'a' + 10;
1661  else if (c >= 'A' && c <= 'F')
1662  unicode += c - 'A' + 10;
1663  else
1664  return addError(
1665  "Bad unicode escape sequence in string: hexadecimal digit expected.",
1666  token,
1667  current);
1668  }
1669  return true;
1670 }
1671 
1672 bool
1673 OurReader::addError(const std::string& message, Token& token, Location extra) {
1674  ErrorInfo info;
1675  info.token_ = token;
1676  info.message_ = message;
1677  info.extra_ = extra;
1678  errors_.push_back(info);
1679  return false;
1680 }
1681 
1682 bool OurReader::recoverFromError(TokenType skipUntilToken) {
1683  int errorCount = int(errors_.size());
1684  Token skip;
1685  for (;;) {
1686  if (!readToken(skip))
1687  errors_.resize(errorCount); // discard errors caused by recovery
1688  if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
1689  break;
1690  }
1691  errors_.resize(errorCount);
1692  return false;
1693 }
1694 
1695 bool OurReader::addErrorAndRecover(const std::string& message,
1696  Token& token,
1697  TokenType skipUntilToken) {
1698  addError(message, token);
1699  return recoverFromError(skipUntilToken);
1700 }
1701 
1702 Value& OurReader::currentValue() { return *(nodes_.top()); }
1703 
1704 OurReader::Char OurReader::getNextChar() {
1705  if (current_ == end_)
1706  return 0;
1707  return *current_++;
1708 }
1709 
1710 void OurReader::getLocationLineAndColumn(Location location,
1711  int& line,
1712  int& column) const {
1713  Location current = begin_;
1714  Location lastLineStart = current;
1715  line = 0;
1716  while (current < location && current != end_) {
1717  Char c = *current++;
1718  if (c == '\r') {
1719  if (*current == '\n')
1720  ++current;
1721  lastLineStart = current;
1722  ++line;
1723  } else if (c == '\n') {
1724  lastLineStart = current;
1725  ++line;
1726  }
1727  }
1728  // column & line start at 1
1729  column = int(location - lastLineStart) + 1;
1730  ++line;
1731 }
1732 
1733 std::string OurReader::getLocationLineAndColumn(Location location) const {
1734  int line, column;
1735  getLocationLineAndColumn(location, line, column);
1736  char buffer[18 + 16 + 16 + 1];
1737 #if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__)
1738 #if defined(WINCE)
1739  _snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1740 #else
1741  sprintf_s(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1742 #endif
1743 #else
1744  snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
1745 #endif
1746  return buffer;
1747 }
1748 
1749 std::string OurReader::getFormattedErrorMessages() const {
1750  std::string formattedMessage;
1751  for (Errors::const_iterator itError = errors_.begin();
1752  itError != errors_.end();
1753  ++itError) {
1754  const ErrorInfo& error = *itError;
1755  formattedMessage +=
1756  "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
1757  formattedMessage += " " + error.message_ + "\n";
1758  if (error.extra_)
1759  formattedMessage +=
1760  "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
1761  }
1762  return formattedMessage;
1763 }
1764 
1765 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
1766  std::vector<OurReader::StructuredError> allErrors;
1767  for (Errors::const_iterator itError = errors_.begin();
1768  itError != errors_.end();
1769  ++itError) {
1770  const ErrorInfo& error = *itError;
1771  OurReader::StructuredError structured;
1772  structured.offset_start = error.token_.start_ - begin_;
1773  structured.offset_limit = error.token_.end_ - begin_;
1774  structured.message = error.message_;
1775  allErrors.push_back(structured);
1776  }
1777  return allErrors;
1778 }
1779 
1780 bool OurReader::pushError(const Value& value, const std::string& message) {
1781  size_t length = end_ - begin_;
1782  if(value.getOffsetStart() > length
1783  || value.getOffsetLimit() > length)
1784  return false;
1785  Token token;
1786  token.type_ = tokenError;
1787  token.start_ = begin_ + value.getOffsetStart();
1788  token.end_ = end_ + value.getOffsetLimit();
1789  ErrorInfo info;
1790  info.token_ = token;
1791  info.message_ = message;
1792  info.extra_ = 0;
1793  errors_.push_back(info);
1794  return true;
1795 }
1796 
1797 bool OurReader::pushError(const Value& value, const std::string& message, const Value& extra) {
1798  size_t length = end_ - begin_;
1799  if(value.getOffsetStart() > length
1800  || value.getOffsetLimit() > length
1801  || extra.getOffsetLimit() > length)
1802  return false;
1803  Token token;
1804  token.type_ = tokenError;
1805  token.start_ = begin_ + value.getOffsetStart();
1806  token.end_ = begin_ + value.getOffsetLimit();
1807  ErrorInfo info;
1808  info.token_ = token;
1809  info.message_ = message;
1810  info.extra_ = begin_ + extra.getOffsetStart();
1811  errors_.push_back(info);
1812  return true;
1813 }
1814 
1815 bool OurReader::good() const {
1816  return !errors_.size();
1817 }
1818 
1819 
1820 class OurCharReader : public CharReader {
1821  bool const collectComments_;
1822  OurReader reader_;
1823 public:
1824  OurCharReader(
1825  bool collectComments,
1826  OurFeatures const& features)
1827  : collectComments_(collectComments)
1828  , reader_(features)
1829  {}
1830  virtual bool parse(
1831  char const* beginDoc, char const* endDoc,
1832  Value* root, std::string* errs) {
1833  bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
1834  if (errs) {
1835  *errs = reader_.getFormattedErrorMessages();
1836  }
1837  return ok;
1838  }
1839 };
1840 
1842 {
1843  setDefaults(&settings_);
1844 }
1846 {}
1848 {
1849  bool collectComments = settings_["collectComments"].asBool();
1850  OurFeatures features = OurFeatures::all();
1851  features.allowComments_ = settings_["allowComments"].asBool();
1852  features.strictRoot_ = settings_["strictRoot"].asBool();
1853  features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
1854  features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
1855  features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
1856  features.stackLimit_ = settings_["stackLimit"].asInt();
1857  features.failIfExtra_ = settings_["failIfExtra"].asBool();
1858  features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
1859  return new OurCharReader(collectComments, features);
1860 }
1861 static void getValidReaderKeys(std::set<std::string>* valid_keys)
1862 {
1863  valid_keys->clear();
1864  valid_keys->insert("collectComments");
1865  valid_keys->insert("allowComments");
1866  valid_keys->insert("strictRoot");
1867  valid_keys->insert("allowDroppedNullPlaceholders");
1868  valid_keys->insert("allowNumericKeys");
1869  valid_keys->insert("allowSingleQuotes");
1870  valid_keys->insert("stackLimit");
1871  valid_keys->insert("failIfExtra");
1872  valid_keys->insert("rejectDupKeys");
1873 }
1875 {
1876  Json::Value my_invalid;
1877  if (!invalid) invalid = &my_invalid; // so we do not need to test for NULL
1878  Json::Value& inv = *invalid;
1879  std::set<std::string> valid_keys;
1880  getValidReaderKeys(&valid_keys);
1881  Value::Members keys = settings_.getMemberNames();
1882  size_t n = keys.size();
1883  for (size_t i = 0; i < n; ++i) {
1884  std::string const& key = keys[i];
1885  if (valid_keys.find(key) == valid_keys.end()) {
1886  inv[key] = settings_[key];
1887  }
1888  }
1889  return 0u == inv.size();
1890 }
1892 {
1893  return settings_[key];
1894 }
1895 // static
1897 {
1899  (*settings)["allowComments"] = false;
1900  (*settings)["strictRoot"] = true;
1901  (*settings)["allowDroppedNullPlaceholders"] = false;
1902  (*settings)["allowNumericKeys"] = false;
1903  (*settings)["allowSingleQuotes"] = false;
1904  (*settings)["failIfExtra"] = true;
1905  (*settings)["rejectDupKeys"] = true;
1907 }
1908 // static
1910 {
1912  (*settings)["collectComments"] = true;
1913  (*settings)["allowComments"] = true;
1914  (*settings)["strictRoot"] = false;
1915  (*settings)["allowDroppedNullPlaceholders"] = false;
1916  (*settings)["allowNumericKeys"] = false;
1917  (*settings)["allowSingleQuotes"] = false;
1918  (*settings)["stackLimit"] = 1000;
1919  (*settings)["failIfExtra"] = false;
1920  (*settings)["rejectDupKeys"] = false;
1922 }
1923 
1925 // global functions
1926 
1928  CharReader::Factory const& fact, std::istream& sin,
1929  Value* root, std::string* errs)
1930 {
1931  std::ostringstream ssin;
1932  ssin << sin.rdbuf();
1933  std::string doc = ssin.str();
1934  char const* begin = doc.data();
1935  char const* end = begin + doc.size();
1936  // Note that we do not actually need a null-terminator.
1937  CharReaderPtr const reader(fact.newCharReader());
1938  return reader->parse(begin, end, root, errs);
1939 }
1940 
1941 std::istream& operator>>(std::istream& sin, Value& root) {
1943  std::string errs;
1944  bool ok = parseFromStream(b, sin, &root, &errs);
1945  if (!ok) {
1946  fprintf(stderr,
1947  "Error from reader: %s",
1948  errs.c_str());
1949 
1950  throwRuntimeError("reader error");
1951  }
1952  return sin;
1953 }
1954 
1955 } // namespace Json
static std::string codePointToUTF8(unsigned int cp)
Converts a unicode code-point to UTF-8.
Definition: json_tool.h:18
std::string asString() const
Embedded zeroes are possible.
Definition: json_value.cpp:601
std::vector< std::string > Members
Definition: value.h:165
virtual CharReader * newCharReader() const
Allocate a CharReader via operator new().
static void strictMode(Json::Value *settings)
Same as old Features::strictMode().
array value (ordered list)
Definition: value.h:85
std::auto_ptr< CharReader > CharReaderPtr
Definition: json_reader.cpp:38
bool parseFromStream(CharReader::Factory const &, std::istream &, Value *root, std::string *errs)
Consume entire stream and use its begin/end.
object value (collection of name/value pairs).
Definition: value.h:86
std::istream & operator>>(std::istream &, Value &)
Read from &#39;sin&#39; into &#39;root&#39;.
char Char
Definition: reader.h:35
std::string getFormatedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
void swapPayload(Value &other)
Swap values but leave comments and source offsets in place.
Definition: json_value.cpp:457
void setOffsetStart(size_t start)
Value & operator[](std::string key)
A simple way to update a specific setting.
static const Int maxInt
Maximum signed int value that can be stored in a Json::Value.
Definition: value.h:190
Json::LargestUInt LargestUInt
Definition: value.h:175
Features()
Initialize the configuration like JsonConfig::allFeatures;.
Definition: json_reader.cpp:44
An error tagged with where in the JSON text it was encountered.
Definition: reader.h:44
std::vector< StructuredError > getStructuredErrors() const
Returns a vector of structured erros encounted while parsing.
bool isObject() const
void setComment(const char *comment, CommentPlacement placement)
static const LargestInt minLargestInt
Minimum signed integer value that can be stored in a Json::Value.
Definition: value.h:181
bool allowComments_
true if comments are allowed. Default: true.
Definition: features.h:42
CommentPlacement
Definition: value.h:89
const Char * Location
Definition: reader.h:36
bool allowNumericKeys_
true if numeric object key are allowed. Default: false.
Definition: features.h:52
size_t getOffsetLimit() const
bool good() const
Return whether there are any errors.
bool parse(const std::string &document, Value &root, bool collectComments=true)
Read a Value from a JSON document.
Definition: json_reader.cpp:83
JSON (JavaScript Object Notation).
Definition: config.h:87
bool allowDroppedNullPlaceholders_
true if dropped null placeholders are allowed. Default: false.
Definition: features.h:49
bool validate(Json::Value *invalid) const
Json::LargestInt LargestInt
Definition: value.h:174
static int const stackLimit_g
Definition: json_reader.cpp:30
void throwRuntimeError(std::string const &msg)
used internally
Definition: json_value.cpp:170
Json::UInt UInt
Definition: value.h:168
static void setDefaults(Json::Value *settings)
Called by ctor, but you can use this to reset settings_.
Interface for reading JSON from a char array.
Definition: reader.h:247
ArrayIndex size() const
Number of values in array or object.
Definition: json_value.cpp:838
Represents a JSON value.
Definition: value.h:162
void setOffsetLimit(size_t limit)
static Features all()
A configuration that allows all features and assumes all strings are UTF-8.
Definition: json_reader.cpp:48
static std::string normalizeEOL(Reader::Location begin, Reader::Location end)
a comment on the line after a value (only make sense for
Definition: value.h:92
bool pushError(const Value &value, const std::string &message)
Add a semantic error message.
static Features strictMode()
A configuration that is strictly compatible with the JSON specification.
Definition: json_reader.cpp:50
bool strictRoot_
true if root must be either an array or an object value.
Definition: features.h:46
bool isArray() const
Build a CharReader implementation.
Definition: reader.h:293
size_t getOffsetStart() const
static int stackDepth_g
Definition: json_reader.cpp:31
#define snprintf
Definition: json_reader.cpp:22
static void getValidReaderKeys(std::set< std::string > *valid_keys)
static bool containsNewLine(Reader::Location begin, Reader::Location end)
Definition: json_reader.cpp:62
Configuration passed to reader and writer.
Definition: features.h:19
virtual CharReader * newCharReader() const =0
Allocate a CharReader via operator new().
a comment placed on the line before a value
Definition: value.h:90
Reader()
Constructs a Reader allowing all features for parsing.
Definition: json_reader.cpp:72
std::string getFormattedErrorMessages() const
Returns a user friendly string that list errors in the parsed document.
a comment just after a value on the same line
Definition: value.h:91
static const LargestUInt maxLargestUInt
Maximum unsigned integer value that can be stored in a Json::Value.
Definition: value.h:185