1 ////////////////////////////////////////////////////////////////////////////////
3 // Copyright 2015 - 2017, Paul Beckingham, Federico Hernandez.
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
12 // The above copyright notice and this permission notice shall be included
13 // in all copies or substantial portions of the Software.
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 // http://www.opensource.org/licenses/mit-license.php
25 ////////////////////////////////////////////////////////////////////////////////
37 ////////////////////////////////////////////////////////////////////////////////
38 Pig::Pig (const std::string& text)
39 : _text {std::make_shared <std::string> (text)}
43 ////////////////////////////////////////////////////////////////////////////////
44 bool Pig::skip (int c)
46 if ((*_text)[_cursor] == c)
55 ////////////////////////////////////////////////////////////////////////////////
56 bool Pig::skipN (const int quantity)
61 while (count++ < quantity)
63 if (! utf8_next_char (*_text, _cursor))
73 ////////////////////////////////////////////////////////////////////////////////
80 while ((c = utf8_next_char (*_text, _cursor)))
82 if (! unicodeWhitespace (c))
90 return _cursor > save;
93 ////////////////////////////////////////////////////////////////////////////////
94 bool Pig::skipLiteral (const std::string& literal)
96 if (_text->find (literal, _cursor) == _cursor)
98 _cursor += literal.length ();
105 ////////////////////////////////////////////////////////////////////////////////
106 bool Pig::skipPartial (const std::string& reference, std::string& result)
108 // Walk the common substring.
110 while (reference[pos] &&
111 (*_text)[_cursor + pos] &&
112 reference[pos] == (*_text)[_cursor + pos])
117 result = _text->substr (_cursor, pos);
125 ////////////////////////////////////////////////////////////////////////////////
126 bool Pig::getUntil (int end, std::string& result)
132 while ((c = utf8_next_char (*_text, _cursor)))
137 result = _text->substr (save, _cursor - save);
143 result = _text->substr (save, _cursor - save);
150 return _cursor > save;
153 ////////////////////////////////////////////////////////////////////////////////
154 bool Pig::getUntilWS (std::string& result)
160 while ((c = utf8_next_char (*_text, _cursor)))
162 if (unicodeWhitespace (c))
165 result = _text->substr (save, _cursor - save);
169 // Note: This test must follow the above unicodeWhitespace(c) test because
170 // it is testing the value of 'c', and eos() is testing _cursor,
171 // which has already been advanced.
174 result = _text->substr (save, _cursor - save);
181 return _cursor > save;
184 ////////////////////////////////////////////////////////////////////////////////
185 bool Pig::getCharacter (int& result)
187 int c = (*_text)[_cursor];
198 ////////////////////////////////////////////////////////////////////////////////
199 bool Pig::getDigit (int& result)
201 int c = (*_text)[_cursor];
203 unicodeLatinDigit (c))
213 ////////////////////////////////////////////////////////////////////////////////
214 bool Pig::getDigit2 (int& result)
216 if (unicodeLatinDigit ((*_text)[_cursor + 0]))
218 if (unicodeLatinDigit ((*_text)[_cursor + 1]))
220 result = strtoimax (_text->substr (_cursor, 2).c_str (), NULL, 10);
229 ////////////////////////////////////////////////////////////////////////////////
230 bool Pig::getDigit3 (int& result)
232 if (unicodeLatinDigit ((*_text)[_cursor + 0]))
234 if (unicodeLatinDigit ((*_text)[_cursor + 1]))
236 if (unicodeLatinDigit ((*_text)[_cursor + 2]))
238 result = strtoimax (_text->substr (_cursor, 3).c_str (), NULL, 10);
248 ////////////////////////////////////////////////////////////////////////////////
249 bool Pig::getDigit4 (int& result)
251 if (unicodeLatinDigit ((*_text)[_cursor + 0]))
253 if (unicodeLatinDigit ((*_text)[_cursor + 1]))
255 if (unicodeLatinDigit ((*_text)[_cursor + 2]))
257 if (unicodeLatinDigit ((*_text)[_cursor + 3]))
259 result = strtoimax (_text->substr (_cursor, 4).c_str (), NULL, 10);
270 ////////////////////////////////////////////////////////////////////////////////
271 bool Pig::getDigits (int& result)
277 while ((c = utf8_next_char (*_text, _cursor)))
279 if (! unicodeLatinDigit (c))
290 result = strtoimax (_text->substr (save, _cursor - save).c_str (), NULL, 10);
297 ////////////////////////////////////////////////////////////////////////////////
298 bool Pig::getHexDigit (int& result)
300 int c = (*_text)[_cursor];
304 if (c >= '0' && c <= '9')
310 else if (c >= 'A' && c <= 'F')
312 result = c - 'A' + 10;
316 else if (c >= 'a' && c <= 'f')
318 result = c - 'a' + 10;
327 ////////////////////////////////////////////////////////////////////////////////
343 bool Pig::getNumber (std::string& result)
349 ((*_text)[i] == '-' ||
355 unicodeLatinDigit ((*_text)[i]))
359 while ((*_text)[i] && unicodeLatinDigit ((*_text)[i]))
363 if ((*_text)[i] && (*_text)[i] == '.')
367 while ((*_text)[i] && unicodeLatinDigit ((*_text)[i]))
371 // ( [eE] [+-]? digit+ )?
373 ((*_text)[i] == 'e' ||
379 ((*_text)[i] == '+' ||
383 if ((*_text)[i] && unicodeLatinDigit ((*_text)[i]))
387 while ((*_text)[i] && unicodeLatinDigit ((*_text)[i]))
390 result = _text->substr (_cursor, i - _cursor);
398 result = _text->substr (_cursor, i - _cursor);
406 ////////////////////////////////////////////////////////////////////////////////
407 bool Pig::getNumber (double& result)
412 result = std::strtod (s.c_str (), NULL);
419 ////////////////////////////////////////////////////////////////////////////////
420 // [ + | - ] \d+ [ . [ \d+ ]]
421 bool Pig::getDecimal (std::string& result)
427 ((*_text)[i] == '-' ||
432 if ((*_text)[i] && unicodeLatinDigit ((*_text)[i]))
436 while ((*_text)[i] && unicodeLatinDigit ((*_text)[i]))
440 if ((*_text)[i] && (*_text)[i] == '.')
444 while ((*_text)[i] && unicodeLatinDigit ((*_text)[i]))
448 result = _text->substr (_cursor, i - _cursor);
456 ////////////////////////////////////////////////////////////////////////////////
457 bool Pig::getDecimal (double& result)
462 result = std::strtod (s.c_str (), NULL);
469 ////////////////////////////////////////////////////////////////////////////////
470 // Gets quote content: "foobar" -> foobar (for c = '"')
471 // Handles escaped quotes: "foo\"bar" -> foo\"bar (for c = '"')
472 // Returns false if first character is not c, or if there is no closing c.
473 // Does not modify content between quotes.
474 bool Pig::getQuoted (int quote, std::string& result)
476 if (! (*_text)[_cursor] ||
477 (*_text)[_cursor] != quote)
480 auto start = _cursor + utf8_sequence (quote);
485 i = _text->find (quote, i);
486 if (i == std::string::npos)
487 return false; // Unclosed quote. Short cut, not definitive.
492 _cursor += 2 * utf8_sequence (quote); // Skip both quote chars
497 if ((*_text)[i - 1] == '\\')
499 // Check for escaped backslashes. Backtracking like this is not very
500 // efficient, but is only done in extreme corner cases.
502 auto j = i - 2; // Start one character further left
503 bool is_escaped_quote = true;
504 while (j >= start && (*_text)[j] == '\\')
506 // Toggle flag for each further backslash encountered.
507 is_escaped_quote = is_escaped_quote ? false : true;
511 if (is_escaped_quote)
519 // None of the above applied, we must have found the closing quote char.
520 result.assign (*_text, start, i - start);
521 _cursor = i + utf8_sequence (quote); // Skip closing quote char
525 // This should never be reached. We could throw here instead.
529 ////////////////////////////////////////////////////////////////////////////////
530 // Assumes that the options are sorted by decreasing length, so that if the
531 // options contain 'fourteen' and 'four', the stream is first matched against
534 const std::vector <std::string>& options,
537 for (const auto& option : options)
539 if (skipLiteral (option))
549 ////////////////////////////////////////////////////////////////////////////////
550 bool Pig::getHMS (int& hours, int& minutes, int& seconds)
554 if ((getDigit2 (hours) || getDigit (hours)) &&
560 ! getDigit2 (seconds))
570 ////////////////////////////////////////////////////////////////////////////////
571 bool Pig::getRemainder (std::string& result)
573 if ((*_text)[_cursor])
575 result = _text->substr (_cursor);
576 _cursor += result.length ();
583 ////////////////////////////////////////////////////////////////////////////////
584 bool Pig::eos () const
586 return (*_text)[_cursor] == '\0';
589 ////////////////////////////////////////////////////////////////////////////////
590 // Peeks ahead - does not move cursor.
591 int Pig::peek () const
593 return (*_text)[_cursor];
596 ////////////////////////////////////////////////////////////////////////////////
597 // Peeks ahead - does not move cursor.
598 std::string Pig::peek (const int quantity) const
600 std::string::size_type adjusted = std::min (static_cast <std::string::size_type> (quantity), _text->length () - _cursor);
601 if ((*_text)[_cursor])
602 return _text->substr (_cursor, adjusted);
607 ////////////////////////////////////////////////////////////////////////////////
608 std::string::size_type Pig::cursor () const
613 ////////////////////////////////////////////////////////////////////////////////
614 // Note: never called internally, otherwise the client cannot rely on iṫ.
615 std::string::size_type Pig::save ()
617 return _saved = _cursor;
620 ////////////////////////////////////////////////////////////////////////////////
621 // Note: never called internally, otherwise the client cannot rely on iṫ.
622 std::string::size_type Pig::restore ()
624 return _cursor = _saved;
627 ////////////////////////////////////////////////////////////////////////////////
628 std::string::size_type Pig::restoreTo (std::string::size_type previous)
630 return _cursor = previous;
633 ////////////////////////////////////////////////////////////////////////////////
634 std::string Pig::substr (
635 std::string::size_type start,
636 std::string::size_type end) const
638 return _text->substr (start, end - start);
641 ////////////////////////////////////////////////////////////////////////////////
642 std::string Pig::str () const
644 return _text->substr (_cursor);
647 ////////////////////////////////////////////////////////////////////////////////
648 // Show the text, with the matched part in white on green, and the unmatched
649 // part white on red, followed by the index equivalent.
650 std::string Pig::dump () const
652 std::stringstream out;
655 << _text->substr (0, _cursor)
659 << _text->substr (_cursor)
665 return str_replace (out.str (), "\n", "\\n");
668 ////////////////////////////////////////////////////////////////////////////////