28#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
39#if !defined UTF_CPP_CPLUSPLUS
40 #define UTF_CPP_CPLUSPLUS __cplusplus
43#if UTF_CPP_CPLUSPLUS >= 201103L
44 #define UTF_CPP_OVERRIDE override
45 #define UTF_CPP_NOEXCEPT noexcept
46 #define UTF_CPP_STATIC_ASSERT(condition) static_assert(condition, "UTFCPP static assert");
48 #define UTF_CPP_OVERRIDE
49 #define UTF_CPP_NOEXCEPT throw()
51 template <
bool Condition>
struct StaticAssert {
static void assert() {
int static_assert_impl[(Condition ? 1 : -1)];} };
53 #define UTF_CPP_STATIC_ASSERT(condition) StaticAssert<condition>::assert();
60#if UTF_CPP_CPLUSPLUS >= 201103L
61 #if UTF_CPP_CPLUSPLUS >= 202002L
90 template<
typename octet_type>
95 template<
typename u16_type>
101 template<
typename octet_type>
132 template <
typename octet_iterator>
138 else if ((lead >> 5) == 0x6)
140 else if ((lead >> 4) == 0xe)
142 else if ((lead >> 3) == 0x1e)
154 else if (cp < 0x800) {
158 else if (cp < 0x10000) {
168 template <
typename octet_iterator>
180 #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
183 template <
typename octet_iterator>
194 template <
typename octet_iterator>
204 code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
209 template <
typename octet_iterator>
223 code_point =
static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
228 template <
typename octet_iterator>
246 code_point =
static_cast<utfchar32_t>(code_point + ((*it) & 0x3f));
251 #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
253 template <
typename octet_iterator>
261 octet_iterator original_it = it;
307 template <
typename octet_iterator>
313 template <
typename word_iterator>
317 typedef typename std::iterator_traits<word_iterator>::value_type word_type;
324 word_iterator original_it = it;
330 code_point = first_word;
356 template <
typename octet_iterator,
typename octet_type>
359 *(result++) =
static_cast<octet_type
>(cp);
360 else if (cp < 0x800) {
361 *(result++) =
static_cast<octet_type
>((cp >> 6) | 0xc0);
362 *(result++) =
static_cast<octet_type
>((cp & 0x3f) | 0x80);
364 else if (cp < 0x10000) {
365 *(result++) =
static_cast<octet_type
>((cp >> 12) | 0xe0);
366 *(result++) =
static_cast<octet_type
>(((cp >> 6) & 0x3f) | 0x80);
367 *(result++) =
static_cast<octet_type
>((cp & 0x3f) | 0x80);
370 *(result++) =
static_cast<octet_type
>((cp >> 18) | 0xf0);
371 *(result++) =
static_cast<octet_type
>(((cp >> 12) & 0x3f)| 0x80);
372 *(result++) =
static_cast<octet_type
>(((cp >> 6) & 0x3f) | 0x80);
373 *(result++) =
static_cast<octet_type
>((cp & 0x3f) | 0x80);
387 template<
typename container_type>
388 std::back_insert_iterator<container_type>
append
389 (
utfchar32_t cp, std::back_insert_iterator<container_type> result) {
391 typename container_type::value_type>(cp, result);
397 template <
typename octet_iterator>
405 template <
typename word_iterator,
typename word_type>
409 *(result++) =
static_cast<word_type
>(cp);
412 *(result++) =
static_cast<word_type
>(
LEAD_OFFSET + (cp >> 10));
420 template<
typename container_type>
422 (
utfchar32_t cp, std::back_insert_iterator<container_type> result) {
424 typename container_type::value_type>(cp, result);
430 template <
typename word_iterator>
442 template <
typename octet_iterator>
445 octet_iterator result = start;
446 while (result != end) {
456 const char* end = str + std::strlen(str);
462 std::string::const_iterator invalid =
find_invalid(s.begin(), s.end());
463 return (invalid == s.end()) ? std::string::npos :
static_cast<std::size_t
>(invalid - s.begin());
466 template <
typename octet_iterator>
467 inline bool is_valid(octet_iterator start, octet_iterator end)
479 return is_valid(s.begin(), s.end());
484 template <
typename octet_iterator>
std::shared_ptr< btRigidBody > internal
#define UTF_CPP_STATIC_ASSERT(condition)
#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END)
bool is_lead_surrogate(utfchar32_t cp)
utf_error get_sequence_2(octet_iterator &it, octet_iterator end, utfchar32_t &code_point)
octet_iterator append(utfchar32_t cp, octet_iterator result)
const utfchar32_t SURROGATE_OFFSET
const utfchar16_t LEAD_SURROGATE_MIN
bool is_surrogate(utfchar32_t cp)
bool is_in_bmp(utfchar32_t cp)
bool is_trail_surrogate(utfchar32_t cp)
utf_error get_sequence_1(octet_iterator &it, octet_iterator end, utfchar32_t &code_point)
get_sequence_x functions decode utf-8 sequences of the length x
utfchar8_t mask8(octet_type oc)
utf_error get_sequence_4(octet_iterator &it, octet_iterator end, utfchar32_t &code_point)
const utfchar32_t CODE_POINT_MAX
utfchar16_t mask16(u16_type oc)
word_iterator append16(utfchar32_t cp, word_iterator result)
utf_error get_sequence_3(octet_iterator &it, octet_iterator end, utfchar32_t &code_point)
utf_error validate_next16(word_iterator &it, word_iterator end, utfchar32_t &code_point)
bool is_code_point_valid(utfchar32_t cp)
bool is_trail(octet_type oc)
const utfchar16_t TRAIL_SURROGATE_MAX
const utfchar16_t LEAD_SURROGATE_MAX
utf_error validate_next(octet_iterator &it, octet_iterator end, utfchar32_t &code_point)
bool is_overlong_sequence(utfchar32_t cp, int length)
int sequence_length(octet_iterator lead_it)
const utfchar16_t TRAIL_SURROGATE_MIN
const utfchar16_t LEAD_OFFSET
utf_error increase_safely(octet_iterator &it, const octet_iterator end)
Helper for get_sequence_x.
unsigned short utfchar16_t
bool starts_with_bom(octet_iterator it, octet_iterator end)
const utfchar8_t bom[]
The library API - functions intended to be called by the users.
bool is_valid(octet_iterator start, octet_iterator end)
octet_iterator find_invalid(octet_iterator start, octet_iterator end)