Unravel Engine C++ Reference
Loading...
Searching...
No Matches
checked.h
Go to the documentation of this file.
1// Copyright 2006-2016 Nemanja Trifunovic
2
3/*
4Permission is hereby granted, free of charge, to any person or organization
5obtaining a copy of the software and accompanying documentation covered by
6this license (the "Software") to use, reproduce, display, distribute,
7execute, and transmit the Software, and to prepare derivative works of the
8Software, and to permit third-parties to whom the Software is furnished to
9do so, all subject to the following:
10
11The copyright notices in the Software and this entire statement, including
12the above license grant, this restriction and the following disclaimer,
13must be included in all copies of the Software, in whole or in part, and
14all derivative works of the Software, unless such copies or derivative
15works are solely in the form of machine-executable object code generated by
16a source language processor.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24DEALINGS IN THE SOFTWARE.
25*/
26
27
28#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
30
31#include "core.h"
32#include <stdexcept>
33
34namespace utf8
35{
36 // Base for the exceptions that may be thrown from the library
37 class exception : public ::std::exception {
38 };
39
40 // Exceptions that may be thrown from the library functions.
42 utfchar32_t cp;
43 public:
44 invalid_code_point(utfchar32_t codepoint) : cp(codepoint) {}
45 virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; }
46 utfchar32_t code_point() const {return cp;}
47 };
48
49 class invalid_utf8 : public exception {
50 utfchar8_t u8;
51 public:
52 invalid_utf8 (utfchar8_t u) : u8(u) {}
53 invalid_utf8 (char c) : u8(static_cast<utfchar8_t>(c)) {}
54 virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; }
55 utfchar8_t utf8_octet() const {return u8;}
56 };
57
58 class invalid_utf16 : public exception {
59 utfchar16_t u16;
60 public:
61 invalid_utf16 (utfchar16_t u) : u16(u) {}
62 virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; }
63 utfchar16_t utf16_word() const {return u16;}
64 };
65
66 class not_enough_room : public exception {
67 public:
68 virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; }
69 };
70
72
73 template <typename octet_iterator>
74 octet_iterator append(utfchar32_t cp, octet_iterator result)
75 {
77 throw invalid_code_point(cp);
78
79 return internal::append(cp, result);
80 }
81
82 inline void append(utfchar32_t cp, std::string& s)
83 {
84 append(cp, std::back_inserter(s));
85 }
86
87 template <typename word_iterator>
88 word_iterator append16(utfchar32_t cp, word_iterator result)
89 {
91 throw invalid_code_point(cp);
92
93 return internal::append16(cp, result);
94 }
95
96 template <typename octet_iterator, typename output_iterator>
97 output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
98 {
99 while (start != end) {
100 octet_iterator sequence_start = start;
102 switch (err_code) {
103 case internal::UTF8_OK :
104 for (octet_iterator it = sequence_start; it != start; ++it)
105 *out++ = *it;
106 break;
108 out = utf8::append (replacement, out);
109 start = end;
110 break;
112 out = utf8::append (replacement, out);
113 ++start;
114 break;
118 out = utf8::append (replacement, out);
119 ++start;
120 // just one replacement mark for the sequence
121 while (start != end && utf8::internal::is_trail(*start))
122 ++start;
123 break;
124 }
125 }
126 return out;
127 }
128
129 template <typename octet_iterator, typename output_iterator>
130 inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
131 {
132 static const utfchar32_t replacement_marker = utf8::internal::mask16(0xfffd);
133 return utf8::replace_invalid(start, end, out, replacement_marker);
134 }
135
136 inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
137 {
138 std::string result;
139 replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
140 return result;
141 }
142
143 inline std::string replace_invalid(const std::string& s)
144 {
145 std::string result;
146 replace_invalid(s.begin(), s.end(), std::back_inserter(result));
147 return result;
148 }
149
150 template <typename octet_iterator>
151 utfchar32_t next(octet_iterator& it, octet_iterator end)
152 {
153 utfchar32_t cp = 0;
155 switch (err_code) {
156 case internal::UTF8_OK :
157 break;
159 throw not_enough_room();
163 throw invalid_utf8(static_cast<utfchar8_t>(*it));
165 throw invalid_code_point(cp);
166 }
167 return cp;
168 }
169
170 template <typename word_iterator>
171 utfchar32_t next16(word_iterator& it, word_iterator end)
172 {
173 utfchar32_t cp = 0;
175 if (err_code == internal::NOT_ENOUGH_ROOM)
176 throw not_enough_room();
177 return cp;
178 }
179
180 template <typename octet_iterator>
181 utfchar32_t peek_next(octet_iterator it, octet_iterator end)
182 {
183 return utf8::next(it, end);
184 }
185
186 template <typename octet_iterator>
187 utfchar32_t prior(octet_iterator& it, octet_iterator start)
188 {
189 // can't do much if it == start
190 if (it == start)
191 throw not_enough_room();
192
193 octet_iterator end = it;
194 // Go back until we hit either a lead octet or start
195 while (utf8::internal::is_trail(*(--it)))
196 if (it == start)
197 throw invalid_utf8(*it); // error - no lead byte in the sequence
198 return utf8::peek_next(it, end);
199 }
200
201 template <typename octet_iterator, typename distance_type>
202 void advance (octet_iterator& it, distance_type n, octet_iterator end)
203 {
204 const distance_type zero(0);
205 if (n < zero) {
206 // backward
207 for (distance_type i = n; i < zero; ++i)
208 utf8::prior(it, end);
209 } else {
210 // forward
211 for (distance_type i = zero; i < n; ++i)
212 utf8::next(it, end);
213 }
214 }
215
216 template <typename octet_iterator>
217 typename std::iterator_traits<octet_iterator>::difference_type
218 distance (octet_iterator first, octet_iterator last)
219 {
220 typename std::iterator_traits<octet_iterator>::difference_type dist;
221 for (dist = 0; first < last; ++dist)
222 utf8::next(first, last);
223 return dist;
224 }
225
226 template <typename u16bit_iterator, typename octet_iterator>
227 octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
228 {
229 while (start != end) {
230 utfchar32_t cp = utf8::internal::mask16(*start++);
231 // Take care of surrogate pairs first
233 if (start != end) {
234 const utfchar32_t trail_surrogate = utf8::internal::mask16(*start++);
235 if (utf8::internal::is_trail_surrogate(trail_surrogate))
236 cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
237 else
238 throw invalid_utf16(static_cast<utfchar16_t>(trail_surrogate));
239 }
240 else
241 throw invalid_utf16(static_cast<utfchar16_t>(cp));
242
243 }
244 // Lone trail surrogate
246 throw invalid_utf16(static_cast<utfchar16_t>(cp));
247
248 result = utf8::append(cp, result);
249 }
250 return result;
251 }
252
253 template <typename u16bit_iterator, typename octet_iterator>
254 u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
255 {
256 while (start < end) {
257 const utfchar32_t cp = utf8::next(start, end);
258 if (cp > 0xffff) { //make a surrogate pair
259 *result++ = static_cast<utfchar16_t>((cp >> 10) + internal::LEAD_OFFSET);
260 *result++ = static_cast<utfchar16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
261 }
262 else
263 *result++ = static_cast<utfchar16_t>(cp);
264 }
265 return result;
266 }
267
268 template <typename octet_iterator, typename u32bit_iterator>
269 octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
270 {
271 while (start != end)
272 result = utf8::append(*(start++), result);
273
274 return result;
275 }
276
277 template <typename octet_iterator, typename u32bit_iterator>
278 u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
279 {
280 while (start < end)
281 (*result++) = utf8::next(start, end);
282
283 return result;
284 }
285
286 // The iterator class
287 template <typename octet_iterator>
288 class iterator {
289 octet_iterator it;
290 octet_iterator range_start;
291 octet_iterator range_end;
292 public:
296 typedef std::ptrdiff_t difference_type;
297 typedef std::bidirectional_iterator_tag iterator_category;
299 explicit iterator (const octet_iterator& octet_it,
300 const octet_iterator& rangestart,
301 const octet_iterator& rangeend) :
302 it(octet_it), range_start(rangestart), range_end(rangeend)
303 {
304 if (it < range_start || it > range_end)
305 throw std::out_of_range("Invalid utf-8 iterator position");
306 }
307 // the default "big three" are OK
308 octet_iterator base () const { return it; }
310 {
311 octet_iterator temp = it;
312 return utf8::next(temp, range_end);
313 }
314 bool operator == (const iterator& rhs) const
315 {
316 if (range_start != rhs.range_start || range_end != rhs.range_end)
317 throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
318 return (it == rhs.it);
319 }
320 bool operator != (const iterator& rhs) const
321 {
322 return !(operator == (rhs));
323 }
325 {
326 utf8::next(it, range_end);
327 return *this;
328 }
330 {
331 iterator temp = *this;
332 utf8::next(it, range_end);
333 return temp;
334 }
336 {
337 utf8::prior(it, range_start);
338 return *this;
339 }
341 {
342 iterator temp = *this;
343 utf8::prior(it, range_start);
344 return temp;
345 }
346 }; // class iterator
347
348} // namespace utf8
349
350#if UTF_CPP_CPLUSPLUS >= 202002L // C++ 20 or later
351#include "cpp20.h"
352#elif UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later
353#include "cpp17.h"
354#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
355#include "cpp11.h"
356#endif // C++ 11 or later
357
358#endif //header guard
359
virtual const char * what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE
Definition checked.h:45
invalid_code_point(utfchar32_t codepoint)
Definition checked.h:44
utfchar32_t code_point() const
Definition checked.h:46
utfchar16_t utf16_word() const
Definition checked.h:63
virtual const char * what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE
Definition checked.h:62
invalid_utf16(utfchar16_t u)
Definition checked.h:61
invalid_utf8(char c)
Definition checked.h:53
utfchar8_t utf8_octet() const
Definition checked.h:55
invalid_utf8(utfchar8_t u)
Definition checked.h:52
virtual const char * what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE
Definition checked.h:54
utfchar32_t value_type
Definition checked.h:293
bool operator==(const iterator &rhs) const
Definition checked.h:314
octet_iterator base() const
Definition checked.h:308
utfchar32_t & reference
Definition checked.h:295
iterator & operator--()
Definition checked.h:335
utfchar32_t operator*() const
Definition checked.h:309
std::ptrdiff_t difference_type
Definition checked.h:296
iterator & operator++()
Definition checked.h:324
std::bidirectional_iterator_tag iterator_category
Definition checked.h:297
bool operator!=(const iterator &rhs) const
Definition checked.h:320
utfchar32_t * pointer
Definition checked.h:294
iterator(const octet_iterator &octet_it, const octet_iterator &rangestart, const octet_iterator &rangeend)
Definition checked.h:299
virtual const char * what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE
Definition checked.h:68
#define UTF_CPP_OVERRIDE
Definition core.h:48
#define UTF_CPP_NOEXCEPT
Definition core.h:49
bool is_lead_surrogate(utfchar32_t cp)
Definition core.h:107
octet_iterator append(utfchar32_t cp, octet_iterator result)
Definition core.h:357
const utfchar32_t SURROGATE_OFFSET
Definition core.h:85
bool is_trail_surrogate(utfchar32_t cp)
Definition core.h:112
utfchar16_t mask16(u16_type oc)
Definition core.h:96
word_iterator append16(utfchar32_t cp, word_iterator result)
Definition core.h:406
@ INCOMPLETE_SEQUENCE
Definition core.h:165
@ OVERLONG_SEQUENCE
Definition core.h:165
@ INVALID_CODE_POINT
Definition core.h:165
@ NOT_ENOUGH_ROOM
Definition core.h:165
utf_error validate_next16(word_iterator &it, word_iterator end, utfchar32_t &code_point)
Definition core.h:314
bool is_code_point_valid(utfchar32_t cp)
Definition core.h:122
bool is_trail(octet_type oc)
Definition core.h:102
utf_error validate_next(octet_iterator &it, octet_iterator end, utfchar32_t &code_point)
Definition core.h:254
const utfchar16_t TRAIL_SURROGATE_MIN
Definition core.h:82
const utfchar16_t LEAD_OFFSET
Definition core.h:84
Definition checked.h:35
utfchar32_t peek_next(octet_iterator it, octet_iterator end)
Definition checked.h:181
unsigned int utfchar32_t
Definition core.h:71
word_iterator append16(utfchar32_t cp, word_iterator result)
Definition checked.h:88
void advance(octet_iterator &it, distance_type n, octet_iterator end)
Definition checked.h:202
unsigned short utfchar16_t
Definition core.h:70
octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
Definition checked.h:269
utfchar32_t next16(word_iterator &it, word_iterator end)
Definition checked.h:171
u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
Definition checked.h:254
unsigned char utfchar8_t
Definition core.h:69
utfchar32_t next(octet_iterator &it, octet_iterator end)
Definition checked.h:151
u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
Definition checked.h:278
octet_iterator append(utfchar32_t cp, octet_iterator result)
The library API - functions intended to be called by the users.
Definition checked.h:74
octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
Definition checked.h:227
utfchar32_t prior(octet_iterator &it, octet_iterator start)
Definition checked.h:187
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
Definition checked.h:97
float distance