1 ////////////////////////////////////////////////////////////////////////////////
3 // Copyright 2015 - 2017, Paul Beckingham, Federico Hernandez.
5 // Permission is hereby granted, free of charge, to any person obtaining a copy
6 // of this software and associated documentation files (the "Software"), to deal
7 // in the Software without restriction, including without limitation the rights
8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 // copies of the Software, and to permit persons to whom the Software is
10 // furnished to do so, subject to the following conditions:
12 // The above copyright notice and this permission notice shall be included
13 // in all copies or substantial portions of the Software.
15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 // http://www.opensource.org/licenses/mit-license.php
25 ////////////////////////////////////////////////////////////////////////////////
31 ////////////////////////////////////////////////////////////////////////////////
32 // Complete Unicode whitespace list.
33 bool unicodeWhitespace (unsigned int c)
35 return unicodeHorizontalWhitespace (c) ||
36 unicodeVerticalWhitespace (c);
39 ////////////////////////////////////////////////////////////////////////////////
40 // Complete Unicode whitespace list.
42 // http://en.wikipedia.org/wiki/Whitespace_character
46 // TODO This list should be derived from the Unicode database.
47 bool unicodeHorizontalWhitespace (unsigned int c)
49 return (c == 0x0020 || // space Common Separator, space
50 c == 0x0009 || // Common Other, control HT, Horizontal Tab
51 c == 0x00A0 || // no-break space Common Separator, space
52 c == 0x1680 || // ogham space mark Ogham Separator, space
53 c == 0x180E || // mongolian vowel separator Mongolian Separator, space
54 c == 0x2000 || // en quad Common Separator, space
55 c == 0x2001 || // em quad Common Separator, space
56 c == 0x2002 || // en space Common Separator, space
57 c == 0x2003 || // em space Common Separator, space
58 c == 0x2004 || // three-per-em space Common Separator, space
59 c == 0x2005 || // four-per-em space Common Separator, space
60 c == 0x2006 || // six-per-em space Common Separator, space
61 c == 0x2007 || // figure space Common Separator, space
62 c == 0x2008 || // punctuation space Common Separator, space
63 c == 0x2009 || // thin space Common Separator, space
64 c == 0x200A || // hair space Common Separator, space
65 c == 0x200B || // zero width space
66 c == 0x200C || // zero width non-joiner
67 c == 0x200D || // zero width joiner
68 c == 0x202F || // narrow no-break space Common Separator, space
69 c == 0x205F || // medium mathematical space Common Separator, space
70 c == 0x2060 || // word joiner
71 c == 0x3000); // ideographic space Common Separator, space
74 ////////////////////////////////////////////////////////////////////////////////
75 // Complete Unicode whitespace list.
77 // http://en.wikipedia.org/wiki/Whitespace_character
81 // TODO This list should be derived from the Unicode database.
82 bool unicodeVerticalWhitespace (unsigned int c)
84 return (c == 0x000A || // Common Other, control LF, Line feed
85 c == 0x000B || // Common Other, control VT, Vertical Tab
86 c == 0x000C || // Common Other, control FF, Form feed
87 c == 0x000D || // Common Other, control CR, Carriage return
88 c == 0x0085 || // Common Other, control NEL, Next line
89 c == 0x2028 || // line separator Common Separator, line
90 c == 0x2029); // paragraph separator Common Separator, paragraph
93 ////////////////////////////////////////////////////////////////////////////////
94 bool unicodePunctuation (unsigned int c)
96 return iswpunct (c) ? true : false;
99 ////////////////////////////////////////////////////////////////////////////////
100 bool unicodeAlpha (unsigned int c)
102 return iswprint (c) &&
104 ! unicodeWhitespace (c);
107 ////////////////////////////////////////////////////////////////////////////////
108 // TODO Needs better definition.
109 bool unicodeLatinAlpha (unsigned int c)
111 return (c >= 'A' && c <= 'Z') ||
112 (c >= 'a' && c <= 'z');
115 ////////////////////////////////////////////////////////////////////////////////
118 // TODO This list should be derived from the Unicode database.
119 bool unicodeLatinDigit (unsigned int c)
121 return c >= 0x30 && c <= 0x39;
124 ////////////////////////////////////////////////////////////////////////////////
125 // Digits 0-9 a-f A-F.
126 bool unicodeHexDigit (unsigned int c)
128 return (c >= '0' && c <= '9') ||
129 (c >= 'a' && c <= 'f') ||
130 (c >= 'A' && c <= 'F');
133 ////////////////////////////////////////////////////////////////////////////////