57template <
bool RANGE_CHECK,
bool SUPPORT_MB4>
111template <
bool RANGE_CHECK,
bool SUPPORT_MB4>
128 if ((s[1] & 0xc0) != 0x80)
140 memcpy(&two_bytes, s + 1,
sizeof(two_bytes));
141 if ((two_bytes & 0xc0c0) != 0x8080)
144 *pwc = ((
my_wc_t)(c & 0x0f) << 12) + ((
my_wc_t)(s[1] & 0x3f) << 6) +
152 if (*pwc >= 0xd800 && *pwc <= 0xdfff)
return MY_CS_ILSEQ;
157 if (RANGE_CHECK && s + 4 > e)
165 memcpy(&four_bytes, s,
sizeof(four_bytes));
166#ifdef WORDS_BIGENDIAN
167 if ((four_bytes & 0xf8c0c0c0) != 0xf0808080)
169 if ((four_bytes & 0xc0c0c0f8) != 0x808080f0)
173 *pwc = ((
my_wc_t)(c & 0x07) << 18) + ((
my_wc_t)(s[1] & 0x3f) << 12) +
175 if (*pwc < 0x10000 || *pwc > 0x10ffff)
return MY_CS_ILSEQ;
Functor that uses a function pointer to convert a multibyte sequence to a wide character.
Definition: mb_wc.h:94
Mb_wc_through_function_pointer(const CHARSET_INFO *cs)
Definition: mb_wc.h:96
const mbwc_func_t m_funcptr
Definition: mb_wc.h:107
int(* mbwc_func_t)(const CHARSET_INFO *, my_wc_t *, const uchar *, const uchar *)
Definition: mb_wc.h:104
const CHARSET_INFO *const m_cs
Definition: mb_wc.h:108
int operator()(my_wc_t *pwc, const uchar *s, const uchar *e) const
Definition: mb_wc.h:99
A better implementation of the UNIX ctype(3) library.
#define MY_CS_TOOSMALL2
Definition: m_ctype.h:97
#define MY_CS_ILSEQ
Definition: m_ctype.h:94
#define MY_CS_TOOSMALL
Definition: m_ctype.h:96
#define MY_CS_TOOSMALL3
Definition: m_ctype.h:98
ulong my_wc_t
Our own version of wchar_t, ie., a type that holds a single Unicode code point ("wide character").
Definition: m_ctype.h:59
#define MY_CS_TOOSMALL4
Definition: m_ctype.h:100
int my_mb_wc_utf8mb3_thunk(const CHARSET_INFO *cs, my_wc_t *pwc, const uchar *s, const uchar *e)
A thunk to be able to use my_mb_wc_utf8mb3 in MY_CHARSET_HANDLER structs.
Definition: ctype-utf8.cc:7163
static int my_mb_wc_utf8_prototype(my_wc_t *pwc, const uchar *s, const uchar *e)
static int my_mb_wc_utf8mb4(my_wc_t *pwc, const uchar *s, const uchar *e)
Parses a single UTF-8 character from a byte string.
Definition: mb_wc.h:210
static int my_mb_wc_utf8mb3(my_wc_t *pwc, const uchar *s, const uchar *e)
Parses a single UTF-8 character from a byte string.
Definition: mb_wc.h:192
int my_mb_wc_utf8mb4_thunk(const CHARSET_INFO *cs, my_wc_t *pwc, const uchar *s, const uchar *e)
A thunk to be able to use my_mb_wc_utf8mb4 in MY_CHARSET_HANDLER structs.
Definition: ctype-utf8.cc:7179
Header for compiler-dependent features.
#define ALWAYS_INLINE
Definition: my_compiler.h:110
unsigned char uchar
Definition: my_inttypes.h:52
uint16_t uint16
Definition: my_inttypes.h:65
uint32_t uint32
Definition: my_inttypes.h:67
Definition: commit_order_queue.h:34
Definition: m_ctype.h:385
Functor that converts a UTF-8 multibyte sequence (up to three bytes) to a wide character.
Definition: mb_wc.h:68
ALWAYS_INLINE int operator()(my_wc_t *pwc, const uchar *s, const uchar *e) const
Definition: mb_wc.h:72
Functor that converts a UTF-8 multibyte sequence (up to four bytes) to a wide character.
Definition: mb_wc.h:81
ALWAYS_INLINE int operator()(my_wc_t *pwc, const uchar *s, const uchar *e) const
Definition: mb_wc.h:85