1 ////////////////////////////////////////////////////////////////////////////////
3 // Copyright 2013 - 2017, Paul Beckingham, Federico Hernandez.
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
12 // The above copyright notice and this permission notice shall be included
13 // in all copies or substantial portions of the Software.
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 // http://www.opensource.org/licenses/mit-license.php
25 ////////////////////////////////////////////////////////////////////////////////
27 #ifndef INCLUDED_LEXER
28 #define INCLUDED_LEXER
39 // These are overridable.
40 static std::string dateFormat;
42 enum class Type { uuid, number, hex,
51 explicit Lexer (const std::string&);
52 bool token (std::string&, Lexer::Type&);
53 static std::string typeToString (Lexer::Type);
56 static std::vector <std::tuple <std::string, Lexer::Type>> tokenize (const std::string&);
57 static const std::string typeName (const Lexer::Type&);
58 static bool isSingleCharOperator (int);
59 static bool isDoubleCharOperator (int, int, int);
60 static bool isTripleCharOperator (int, int, int, int);
61 static bool isBoundary (int, int);
62 static bool isHardBoundary (int, int);
63 static bool isPunctuation (int);
64 static bool wasQuoted (const std::string&);
65 static bool readWord (const std::string&, const std::string&, std::string::size_type&, std::string&);
66 static bool readWord (const std::string&, std::string::size_type&, std::string&);
67 static int hexToInt (int);
68 static int hexToInt (int, int);
69 static int hexToInt (int, int, int, int);
70 static std::string trimLeft (const std::string& in, const std::string& t = " ");
71 static std::string trimRight (const std::string& in, const std::string& t = " ");
72 static std::string trim (const std::string& in, const std::string& t = " ");
73 static std::string dequote (const std::string&, const std::string& quotes = "'\"");
75 // Stream Classifiers.
77 bool isString (std::string&, Lexer::Type&, const std::string&);
78 bool isDate (std::string&, Lexer::Type&);
79 bool isDuration (std::string&, Lexer::Type&);
80 bool isUUID (std::string&, Lexer::Type&, bool);
81 bool isNumber (std::string&, Lexer::Type&);
82 bool isInteger (std::string&, Lexer::Type&);
83 bool isHexNumber (std::string&, Lexer::Type&);
84 bool isURL (std::string&, Lexer::Type&);
85 bool isPath (std::string&, Lexer::Type&);
86 bool isPattern (std::string&, Lexer::Type&);
87 bool isOperator (std::string&, Lexer::Type&);
88 bool isWord (std::string&, Lexer::Type&);
90 // Disabling features.
91 void noString () { _enableString = false; }
92 void noDate () { _enableDate = false; }
93 void noDuration () { _enableDuration = false; }
94 void noUUID () { _enableUUID = false; }
95 void noHexNumber () { _enableHexNumber = false; }
96 void noWord () { _enableWord = false; }
97 void noURL () { _enableURL = false; }
98 void noPath () { _enablePath = false; }
99 void noPattern () { _enablePattern = false; }
100 void noOperator () { _enableOperator = false; }
103 std::string _text {};
104 std::size_t _cursor {0};
105 std::size_t _eos {0};
107 bool _enableString {true};
108 bool _enableDate {true};
109 bool _enableDuration {true};
110 bool _enableUUID {true};
111 bool _enableHexNumber {true};
112 bool _enableWord {true};
113 bool _enableURL {true};
114 bool _enablePath {true};
115 bool _enablePattern {true};
116 bool _enableOperator {true};