1 ////////////////////////////////////////////////////////////////////////////////
3 // Copyright 2006 - 2017, Paul Beckingham, Federico Hernandez.
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
12 // The above copyright notice and this permission notice shall be included
13 // in all copies or substantial portions of the Software.
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 // http://www.opensource.org/licenses/mit-license.php
25 ////////////////////////////////////////////////////////////////////////////////
35 ////////////////////////////////////////////////////////////////////////////////
36 bool json::SAX::parse (const std::string& input, SAX::Sink& sink)
38 sink.eventDocStart ();
39 std::string::size_type cursor = 0;
40 ignoreWhitespace (input, cursor);
41 if (isObject (input, cursor, sink) ||
42 isArray (input, cursor, sink))
44 ignoreWhitespace (input, cursor);
45 if (cursor < input.length ())
46 error ("Error: extra characters found at position ", cursor);
52 error ("Error: Missing '{' or '[' at position ", cursor);
56 ////////////////////////////////////////////////////////////////////////////////
57 // Complete Unicode whitespace list.
59 // http://en.wikipedia.org/wiki/Whitespace_character
61 void json::SAX::ignoreWhitespace (const std::string& input, std::string::size_type& cursor)
63 int c = input[cursor];
64 while (c == 0x0020 || // space Common Separator, space
65 c == 0x0009 || // Common Other, control HT, Horizontal Tab
66 c == 0x000A || // Common Other, control LF, Line feed
67 c == 0x000B || // Common Other, control VT, Vertical Tab
68 c == 0x000C || // Common Other, control FF, Form feed
69 c == 0x000D || // Common Other, control CR, Carriage return
70 c == 0x0085 || // Common Other, control NEL, Next line
71 c == 0x00A0 || // no-break space Common Separator, space
72 c == 0x1680 || // ogham space mark Ogham Separator, space
73 c == 0x180E || // mongolian vowel separator Mongolian Separator, space
74 c == 0x2000 || // en quad Common Separator, space
75 c == 0x2001 || // em quad Common Separator, space
76 c == 0x2002 || // en space Common Separator, space
77 c == 0x2003 || // em space Common Separator, space
78 c == 0x2004 || // three-per-em space Common Separator, space
79 c == 0x2005 || // four-per-em space Common Separator, space
80 c == 0x2006 || // six-per-em space Common Separator, space
81 c == 0x2007 || // figure space Common Separator, space
82 c == 0x2008 || // punctuation space Common Separator, space
83 c == 0x2009 || // thin space Common Separator, space
84 c == 0x200A || // hair space Common Separator, space
85 c == 0x200B || // zero width space
86 c == 0x200C || // zero width non-joiner
87 c == 0x200D || // zero width joiner
88 c == 0x2028 || // line separator Common Separator, line
89 c == 0x2029 || // paragraph separator Common Separator, paragraph
90 c == 0x202F || // narrow no-break space Common Separator, space
91 c == 0x205F || // medium mathematical space Common Separator, space
92 c == 0x2060 || // word joiner
93 c == 0x3000) // ideographic space Common Separator, space
99 ////////////////////////////////////////////////////////////////////////////////
100 // object := '{' [<pair> [, <pair> ...]] '}'
101 bool json::SAX::isObject (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
103 ignoreWhitespace (input, cursor);
104 auto backup = cursor;
106 if (isLiteral (input, '{', cursor))
108 sink.eventObjectStart ();
111 if (isPair (input, cursor, sink))
114 while (isLiteral (input, ',', cursor) &&
115 isPair (input, cursor, sink))
121 ignoreWhitespace (input, cursor);
122 if (isLiteral (input, '}', cursor))
124 sink.eventObjectEnd (counter);
128 error ("Error: Missing '}' at position ", cursor);
135 ////////////////////////////////////////////////////////////////////////////////
136 // array := '[' [<value> [, <value> ...]] ']'
137 bool json::SAX::isArray (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
139 ignoreWhitespace (input, cursor);
140 auto backup = cursor;
142 if (isLiteral (input, '[', cursor))
144 sink.eventArrayStart ();
147 if (isValue (input, cursor, sink))
150 while (isLiteral (input, ',', cursor) &&
151 isValue (input, cursor, sink))
157 ignoreWhitespace (input, cursor);
158 if (isLiteral (input, ']', cursor))
160 sink.eventArrayEnd (counter);
164 error ("Error: Missing ']' at position ", cursor);
171 ////////////////////////////////////////////////////////////////////////////////
172 // pair := <string> ':' <value>
173 bool json::SAX::isPair (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
175 ignoreWhitespace (input, cursor);
176 auto backup = cursor;
178 if (isKey (input, cursor, sink))
180 if (isLiteral (input, ':', cursor))
182 if (isValue (input, cursor, sink))
185 error ("Error: Missing value at position ", cursor);
188 error ("Error: Missing ':' at position ", cursor);
195 ////////////////////////////////////////////////////////////////////////////////
203 bool json::SAX::isValue (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
205 ignoreWhitespace (input, cursor);
207 return isString (input, cursor, sink) ||
208 isNumber (input, cursor, sink) ||
209 isObject (input, cursor, sink) ||
210 isArray (input, cursor, sink) ||
211 isBool (input, cursor, sink) ||
212 isNull (input, cursor, sink);
215 ////////////////////////////////////////////////////////////////////////////////
216 bool json::SAX::isKey (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
218 ignoreWhitespace (input, cursor);
221 if (isStringValue (input, cursor, value))
223 sink.eventName (value);
230 ////////////////////////////////////////////////////////////////////////////////
231 bool json::SAX::isString (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
233 ignoreWhitespace (input, cursor);
236 if (isStringValue (input, cursor, value))
238 sink.eventValueString (value);
245 ////////////////////////////////////////////////////////////////////////////////
246 // string := '"' [<chars> ...] '"'
247 // chars := <unicode>
257 bool json::SAX::isStringValue (const std::string& input, std::string::size_type& cursor, std::string& value)
259 auto backup = cursor;
261 if (isLiteral (input, '"', cursor))
265 while ((c = input[cursor]))
275 // Unicode \uXXXX codepoint.
276 else if (input[cursor + 0] == '\\' &&
277 input[cursor + 1] == 'u' &&
278 isHexDigit (input[cursor + 2]) &&
279 isHexDigit (input[cursor + 3]) &&
280 isHexDigit (input[cursor + 4]) &&
281 isHexDigit (input[cursor + 5]))
283 word += utf8_character (
298 case '"': word += (char) 0x22; ++cursor; break;
299 case '\'': word += (char) 0x27; ++cursor; break;
300 case '\\': word += (char) 0x5C; ++cursor; break;
301 case 'b': word += (char) 0x08; ++cursor; break;
302 case 'f': word += (char) 0x0C; ++cursor; break;
303 case 'n': word += (char) 0x0A; ++cursor; break;
304 case 'r': word += (char) 0x0D; ++cursor; break;
305 case 't': word += (char) 0x09; ++cursor; break;
306 case 'v': word += (char) 0x0B; ++cursor; break;
308 // This pass-through default case means that anything can be escaped
309 // harmlessly. In particular 'quote' is included, if it not one of the
311 default: word += (char) c; ++cursor; break;
315 // Ordinary character.
323 error ("Error: Missing '\"' at position ", cursor);
330 ////////////////////////////////////////////////////////////////////////////////
331 // number := <int> [<frac>] [<exp>]
332 bool json::SAX::isNumber (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
334 ignoreWhitespace (input, cursor);
335 auto backup = cursor;
337 std::string integerPart;
338 if (isInt (input, cursor, integerPart))
340 std::string fractionalPart;
341 isFrac (input, cursor, fractionalPart);
343 std::string exponentPart;
344 isExp (input, cursor, exponentPart);
346 // Does it fit in a long?
347 std::string combined = integerPart + fractionalPart + exponentPart;
349 long longValue = strtol (combined.c_str (), &end, 10);
350 if (! *end && errno != ERANGE)
352 sink.eventValueInt (longValue);
356 // Does it fit in an unsigned long?
357 unsigned long ulongValue = strtoul (combined.c_str (), &end, 10);
358 if (! *end && errno != ERANGE)
360 sink.eventValueUint (ulongValue);
364 // If the above fail, allow this one to be capped at imax.
365 double doubleValue = strtod (combined.c_str (), &end);
368 sink.eventValueDouble (doubleValue);
377 ////////////////////////////////////////////////////////////////////////////////
378 // int := ['-'] <digits>
379 bool json::SAX::isInt (const std::string& input, std::string::size_type& cursor, std::string& value)
381 auto backup = cursor;
383 isLiteral (input, '-', cursor);
384 if (isDigits (input, cursor))
386 value = input.substr (backup, cursor - backup);
390 // No restore necessary.
394 ////////////////////////////////////////////////////////////////////////////////
395 // frac := '.' <digits>
396 bool json::SAX::isFrac (const std::string& input, std::string::size_type& cursor, std::string& value)
398 auto backup = cursor;
400 if (isLiteral (input, '.', cursor) &&
401 isDigits (input, cursor))
403 value = input.substr (backup, cursor - backup);
411 ////////////////////////////////////////////////////////////////////////////////
412 // digits := <digit> [<digit> ...]
413 bool json::SAX::isDigits (const std::string& input, std::string::size_type& cursor)
415 int c = input[cursor];
420 while (isDecDigit (c))
429 ////////////////////////////////////////////////////////////////////////////////
430 // digit := 0x30 ('0') .. 0x39 ('9')
431 bool json::SAX::isDecDigit (int c)
433 return c >= 0x30 && c <= 0x39;
436 ////////////////////////////////////////////////////////////////////////////////
437 // hex := 0x30 ('0') .. 0x39 ('9')
438 bool json::SAX::isHexDigit (int c)
440 return (c >= 0x30 && c <= 0x39) ||
441 (c >= 0x61 && c <= 0x66) ||
442 (c >= 0x41 && c <= 0x46);
445 ////////////////////////////////////////////////////////////////////////////////
446 // exp := <e> <digits>
447 bool json::SAX::isExp (const std::string& input, std::string::size_type& cursor, std::string& value)
449 auto backup = cursor;
451 if (isE (input, cursor) &&
452 isDigits (input, cursor))
454 value = input.substr (backup, cursor - backup);
462 ////////////////////////////////////////////////////////////////////////////////
469 bool json::SAX::isE (const std::string& input, std::string::size_type& cursor)
471 int c = input[cursor];
489 ////////////////////////////////////////////////////////////////////////////////
490 bool json::SAX::isBool (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
492 ignoreWhitespace (input, cursor);
494 if (input[cursor + 0] == 't' &&
495 input[cursor + 1] == 'r' &&
496 input[cursor + 2] == 'u' &&
497 input[cursor + 3] == 'e')
500 sink.eventValueBool (true);
503 else if (input[cursor + 0] == 'f' &&
504 input[cursor + 1] == 'a' &&
505 input[cursor + 2] == 'l' &&
506 input[cursor + 3] == 's' &&
507 input[cursor + 4] == 'e')
510 sink.eventValueBool (false);
517 ////////////////////////////////////////////////////////////////////////////////
518 bool json::SAX::isNull (const std::string& input, std::string::size_type& cursor, SAX::Sink& sink)
520 ignoreWhitespace (input, cursor);
522 if (input[cursor + 0] == 'n' &&
523 input[cursor + 1] == 'u' &&
524 input[cursor + 2] == 'l' &&
525 input[cursor + 3] == 'l')
528 sink.eventValueNull ();
535 ////////////////////////////////////////////////////////////////////////////////
536 bool json::SAX::isLiteral (const std::string& input, char literal, std::string::size_type& cursor)
538 ignoreWhitespace (input, cursor);
540 if (input[cursor] == literal)
549 ////////////////////////////////////////////////////////////////////////////////
554 int json::SAX::hexToInt (int c)
556 if (c >= 0x30 && c <= 0x39) return (c - 0x30);
557 else if (c >= 0x41 && c <= 0x46) return (c - 0x41 + 10);
558 else return (c - 0x61 + 10);
561 ////////////////////////////////////////////////////////////////////////////////
562 int json::SAX::hexToInt (int c0, int c1, int c2, int c3)
564 return (hexToInt (c0) << 12) +
565 (hexToInt (c1) << 8) +
566 (hexToInt (c2) << 4) +
570 ////////////////////////////////////////////////////////////////////////////////
571 void json::SAX::error (const std::string& message, std::string::size_type cursor)
573 std::stringstream error;
574 error << message << cursor;
578 ////////////////////////////////////////////////////////////////////////////////