64template <
bool RANGE_CHECK,
bool SUPPORT_MB4>
118template <
bool RANGE_CHECK,
bool SUPPORT_MB4>
135 if ((s[1] & 0xc0) != 0x80)
146 uint16_t two_bytes = 0;
147 memcpy(&two_bytes, s + 1,
sizeof(two_bytes));
148 if ((two_bytes & 0xc0c0) != 0x8080)
151 *pwc = ((
my_wc_t)(c & 0x0f) << 12) + ((
my_wc_t)(s[1] & 0x3f) << 6) +
159 if (*pwc >= 0xd800 && *pwc <= 0xdfff)
return MY_CS_ILSEQ;
164 if (RANGE_CHECK && s + 4 > e)
171 uint32_t four_bytes = 0;
172 memcpy(&four_bytes, s,
sizeof(four_bytes));
173#ifdef WORDS_BIGENDIAN
174 if ((four_bytes & 0xf8c0c0c0) != 0xf0808080)
176 if ((four_bytes & 0xc0c0c0f8) != 0x808080f0)
180 *pwc = ((
my_wc_t)(c & 0x07) << 18) + ((
my_wc_t)(s[1] & 0x3f) << 12) +
182 if (*pwc < 0x10000 || *pwc > 0x10ffff)
return MY_CS_ILSEQ;
228 const uint8_t *s,
const uint8_t *e);
231 const uint8_t *s,
const uint8_t *e);
Functor that uses a function pointer to convert a multibyte sequence to a wide character.
Definition: mb_wc.h:101
Mb_wc_through_function_pointer(const CHARSET_INFO *cs)
Definition: mb_wc.h:103
const mbwc_func_t m_funcptr
Definition: mb_wc.h:114
const CHARSET_INFO *const m_cs
Definition: mb_wc.h:115
int operator()(my_wc_t *pwc, const uint8_t *s, const uint8_t *e) const
Definition: mb_wc.h:106
int(* mbwc_func_t)(const CHARSET_INFO *, my_wc_t *, const uint8_t *, const uint8_t *)
Definition: mb_wc.h:111
A better implementation of the UNIX ctype(3) library.
static constexpr int MY_CS_TOOSMALL4
Definition: m_ctype.h:97
static constexpr int MY_CS_TOOSMALL
Definition: m_ctype.h:89
static constexpr int MY_CS_TOOSMALL3
Definition: m_ctype.h:93
unsigned long my_wc_t
Our own version of wchar_t, ie., a type that holds a single Unicode code point ("wide character").
Definition: m_ctype.h:57
static constexpr int MY_CS_ILSEQ
Definition: m_ctype.h:85
static constexpr int MY_CS_TOOSMALL2
Definition: m_ctype.h:91
int my_mb_wc_utf8mb4_thunk(const CHARSET_INFO *cs, my_wc_t *pwc, const uint8_t *s, const uint8_t *e)
A thunk to be able to use my_mb_wc_utf8mb4 in MY_CHARSET_HANDLER structs.
Definition: ctype-utf8.cc:7194
int my_mb_wc_utf8mb3_thunk(const CHARSET_INFO *cs, my_wc_t *pwc, const uint8_t *s, const uint8_t *e)
A thunk to be able to use my_mb_wc_utf8mb3 in MY_CHARSET_HANDLER structs.
Definition: ctype-utf8.cc:7178
static int my_mb_wc_utf8mb4(my_wc_t *pwc, const uint8_t *s, const uint8_t *e)
Parses a single UTF-8 character from a byte string.
Definition: mb_wc.h:217
static int my_mb_wc_utf8_prototype(my_wc_t *pwc, const uint8_t *s, const uint8_t *e)
static int my_mb_wc_utf8mb3(my_wc_t *pwc, const uint8_t *s, const uint8_t *e)
Parses a single UTF-8 character from a byte string.
Definition: mb_wc.h:199
Header for compiler-dependent features.
#define ALWAYS_INLINE
Definition: my_compiler.h:99
Definition: commit_order_queue.h:34
Definition: m_ctype.h:423
Functor that converts a UTF-8 multibyte sequence (up to three bytes) to a wide character.
Definition: mb_wc.h:75
ALWAYS_INLINE int operator()(my_wc_t *pwc, const uint8_t *s, const uint8_t *e) const
Definition: mb_wc.h:79
Functor that converts a UTF-8 multibyte sequence (up to four bytes) to a wide character.
Definition: mb_wc.h:88
ALWAYS_INLINE int operator()(my_wc_t *pwc, const uint8_t *s, const uint8_t *e) const
Definition: mb_wc.h:92