Unravel Engine C++ Reference
Loading...
Searching...
No Matches
unchecked.h
Go to the documentation of this file.
1// Copyright 2006 Nemanja Trifunovic
2
3/*
4Permission is hereby granted, free of charge, to any person or organization
5obtaining a copy of the software and accompanying documentation covered by
6this license (the "Software") to use, reproduce, display, distribute,
7execute, and transmit the Software, and to prepare derivative works of the
8Software, and to permit third-parties to whom the Software is furnished to
9do so, all subject to the following:
10
11The copyright notices in the Software and this entire statement, including
12the above license grant, this restriction and the following disclaimer,
13must be included in all copies of the Software, in whole or in part, and
14all derivative works of the Software, unless such copies or derivative
15works are solely in the form of machine-executable object code generated by
16a source language processor.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24DEALINGS IN THE SOFTWARE.
25*/
26
27
28#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
30
31#include "core.h"
32
33namespace utf8
34{
35 namespace unchecked
36 {
37 template <typename octet_iterator>
38 octet_iterator append(utfchar32_t cp, octet_iterator result)
39 {
40 return internal::append(cp, result);
41 }
42
43 template <typename word_iterator>
44 word_iterator append16(utfchar32_t cp, word_iterator result)
45 {
46 return internal::append16(cp, result);
47 }
48
49 template <typename octet_iterator, typename output_iterator>
50 output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
51 {
52 while (start != end) {
53 octet_iterator sequence_start = start;
55 switch (err_code) {
57 for (octet_iterator it = sequence_start; it != start; ++it)
58 *out++ = *it;
59 break;
61 out = utf8::unchecked::append(replacement, out);
62 start = end;
63 break;
65 out = utf8::unchecked::append(replacement, out);
66 ++start;
67 break;
71 out = utf8::unchecked::append(replacement, out);
72 ++start;
73 // just one replacement mark for the sequence
74 while (start != end && utf8::internal::is_trail(*start))
75 ++start;
76 break;
77 }
78 }
79 return out;
80 }
81
82 template <typename octet_iterator, typename output_iterator>
83 inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
84 {
85 static const utfchar32_t replacement_marker = utf8::internal::mask16(0xfffd);
86 return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
87 }
88
89 inline std::string replace_invalid(const std::string& s, utfchar32_t replacement)
90 {
91 std::string result;
92 replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
93 return result;
94 }
95
96 inline std::string replace_invalid(const std::string& s)
97 {
98 std::string result;
99 replace_invalid(s.begin(), s.end(), std::back_inserter(result));
100 return result;
101 }
102
103 template <typename octet_iterator>
104 utfchar32_t next(octet_iterator& it)
105 {
108 case 1:
109 break;
110 case 2:
111 it++;
112 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
113 break;
114 case 3:
115 ++it;
116 cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
117 ++it;
118 cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
119 break;
120 case 4:
121 ++it;
122 cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
123 ++it;
124 cp = static_cast<utfchar32_t>(cp + ((utf8::internal::mask8(*it) << 6) & 0xfff));
125 ++it;
126 cp = static_cast<utfchar32_t>(cp + ((*it) & 0x3f));
127 break;
128 }
129 ++it;
130 return cp;
131 }
132
133 template <typename octet_iterator>
134 utfchar32_t peek_next(octet_iterator it)
135 {
136 return utf8::unchecked::next(it);
137 }
138
139 template <typename word_iterator>
140 utfchar32_t next16(word_iterator& it)
141 {
144 return (cp << 10) + *it++ + utf8::internal::SURROGATE_OFFSET;
145 return cp;
146 }
147
148 template <typename octet_iterator>
149 utfchar32_t prior(octet_iterator& it)
150 {
151 while (utf8::internal::is_trail(*(--it))) ;
152 octet_iterator temp = it;
153 return utf8::unchecked::next(temp);
154 }
155
156 template <typename octet_iterator, typename distance_type>
157 void advance(octet_iterator& it, distance_type n)
158 {
159 const distance_type zero(0);
160 if (n < zero) {
161 // backward
162 for (distance_type i = n; i < zero; ++i)
164 } else {
165 // forward
166 for (distance_type i = zero; i < n; ++i)
168 }
169 }
170
171 template <typename octet_iterator>
172 typename std::iterator_traits<octet_iterator>::difference_type
173 distance(octet_iterator first, octet_iterator last)
174 {
175 typename std::iterator_traits<octet_iterator>::difference_type dist;
176 for (dist = 0; first < last; ++dist)
178 return dist;
179 }
180
181 template <typename u16bit_iterator, typename octet_iterator>
182 octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
183 {
184 while (start != end) {
185 utfchar32_t cp = utf8::internal::mask16(*start++);
186 // Take care of surrogate pairs first
188 if (start == end)
189 return result;
190 utfchar32_t trail_surrogate = utf8::internal::mask16(*start++);
191 cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
192 }
193 result = utf8::unchecked::append(cp, result);
194 }
195 return result;
196 }
197
198 template <typename u16bit_iterator, typename octet_iterator>
199 u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
200 {
201 while (start < end) {
203 if (cp > 0xffff) { //make a surrogate pair
204 *result++ = static_cast<utfchar16_t>((cp >> 10) + internal::LEAD_OFFSET);
205 *result++ = static_cast<utfchar16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
206 }
207 else
208 *result++ = static_cast<utfchar16_t>(cp);
209 }
210 return result;
211 }
212
213 template <typename octet_iterator, typename u32bit_iterator>
214 octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
215 {
216 while (start != end)
217 result = utf8::unchecked::append(*(start++), result);
218
219 return result;
220 }
221
222 template <typename octet_iterator, typename u32bit_iterator>
223 u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
224 {
225 while (start < end)
226 (*result++) = utf8::unchecked::next(start);
227
228 return result;
229 }
230
231 // The iterator class
232 template <typename octet_iterator>
233 class iterator {
234 octet_iterator it;
235 public:
239 typedef std::ptrdiff_t difference_type;
240 typedef std::bidirectional_iterator_tag iterator_category;
242 explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
243 // the default "big three" are OK
244 octet_iterator base () const { return it; }
246 {
247 octet_iterator temp = it;
248 return utf8::unchecked::next(temp);
249 }
250 bool operator == (const iterator& rhs) const
251 {
252 return (it == rhs.it);
253 }
254 bool operator != (const iterator& rhs) const
255 {
256 return !(operator == (rhs));
257 }
259 {
260 ::std::advance(it, utf8::internal::sequence_length(it));
261 return *this;
262 }
264 {
265 iterator temp = *this;
266 ::std::advance(it, utf8::internal::sequence_length(it));
267 return temp;
268 }
270 {
272 return *this;
273 }
275 {
276 iterator temp = *this;
278 return temp;
279 }
280 }; // class iterator
281
282 } // namespace utf8::unchecked
283} // namespace utf8
284
285
286#endif // header guard
287
iterator(const octet_iterator &octet_it)
Definition unchecked.h:242
bool operator!=(const iterator &rhs) const
Definition unchecked.h:254
utfchar32_t & reference
Definition unchecked.h:238
octet_iterator base() const
Definition unchecked.h:244
std::ptrdiff_t difference_type
Definition unchecked.h:239
utfchar32_t operator*() const
Definition unchecked.h:245
bool operator==(const iterator &rhs) const
Definition unchecked.h:250
std::bidirectional_iterator_tag iterator_category
Definition unchecked.h:240
bool is_lead_surrogate(utfchar32_t cp)
Definition core.h:107
octet_iterator append(utfchar32_t cp, octet_iterator result)
Definition core.h:357
const utfchar32_t SURROGATE_OFFSET
Definition core.h:85
utfchar8_t mask8(octet_type oc)
Definition core.h:91
utfchar16_t mask16(u16_type oc)
Definition core.h:96
word_iterator append16(utfchar32_t cp, word_iterator result)
Definition core.h:406
@ INCOMPLETE_SEQUENCE
Definition core.h:165
@ OVERLONG_SEQUENCE
Definition core.h:165
@ INVALID_CODE_POINT
Definition core.h:165
@ NOT_ENOUGH_ROOM
Definition core.h:165
bool is_trail(octet_type oc)
Definition core.h:102
utf_error validate_next(octet_iterator &it, octet_iterator end, utfchar32_t &code_point)
Definition core.h:254
int sequence_length(octet_iterator lead_it)
Definition core.h:133
const utfchar16_t TRAIL_SURROGATE_MIN
Definition core.h:82
const utfchar16_t LEAD_OFFSET
Definition core.h:84
octet_iterator append(utfchar32_t cp, octet_iterator result)
Definition unchecked.h:38
utfchar32_t next16(word_iterator &it)
Definition unchecked.h:140
void advance(octet_iterator &it, distance_type n)
Definition unchecked.h:157
word_iterator append16(utfchar32_t cp, word_iterator result)
Definition unchecked.h:44
u32bit_iterator utf8to32(octet_iterator start, octet_iterator end, u32bit_iterator result)
Definition unchecked.h:223
utfchar32_t prior(octet_iterator &it)
Definition unchecked.h:149
utfchar32_t next(octet_iterator &it)
Definition unchecked.h:104
output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, utfchar32_t replacement)
Definition unchecked.h:50
octet_iterator utf16to8(u16bit_iterator start, u16bit_iterator end, octet_iterator result)
Definition unchecked.h:182
u16bit_iterator utf8to16(octet_iterator start, octet_iterator end, u16bit_iterator result)
Definition unchecked.h:199
octet_iterator utf32to8(u32bit_iterator start, u32bit_iterator end, octet_iterator result)
Definition unchecked.h:214
utfchar32_t peek_next(octet_iterator it)
Definition unchecked.h:134
Definition checked.h:35
unsigned int utfchar32_t
Definition core.h:71
unsigned short utfchar16_t
Definition core.h:70
float distance