61template <
bool RANGE_CHECK,
bool SUPPORT_MB4>
115template <
bool RANGE_CHECK,
bool SUPPORT_MB4>
132 if ((s[1] & 0xc0) != 0x80)
144 memcpy(&two_bytes, s + 1,
sizeof(two_bytes));
145 if ((two_bytes & 0xc0c0) != 0x8080)
148 *pwc = ((
my_wc_t)(c & 0x0f) << 12) + ((
my_wc_t)(s[1] & 0x3f) << 6) +
156 if (*pwc >= 0xd800 && *pwc <= 0xdfff)
return MY_CS_ILSEQ;
161 if (RANGE_CHECK && s + 4 > e)
169 memcpy(&four_bytes, s,
sizeof(four_bytes));
170#ifdef WORDS_BIGENDIAN
171 if ((four_bytes & 0xf8c0c0c0) != 0xf0808080)
173 if ((four_bytes & 0xc0c0c0f8) != 0x808080f0)
177 *pwc = ((
my_wc_t)(c & 0x07) << 18) + ((
my_wc_t)(s[1] & 0x3f) << 12) +
179 if (*pwc < 0x10000 || *pwc > 0x10ffff)
return MY_CS_ILSEQ;
Functor that uses a function pointer to convert a multibyte sequence to a wide character.
Definition: mb_wc.h:98
Mb_wc_through_function_pointer(const CHARSET_INFO *cs)
Definition: mb_wc.h:100
const mbwc_func_t m_funcptr
Definition: mb_wc.h:111
int(* mbwc_func_t)(const CHARSET_INFO *, my_wc_t *, const uchar *, const uchar *)
Definition: mb_wc.h:108
const CHARSET_INFO *const m_cs
Definition: mb_wc.h:112
int operator()(my_wc_t *pwc, const uchar *s, const uchar *e) const
Definition: mb_wc.h:103
A better implementation of the UNIX ctype(3) library.
#define MY_CS_TOOSMALL2
Definition: m_ctype.h:97
#define MY_CS_ILSEQ
Definition: m_ctype.h:94
#define MY_CS_TOOSMALL
Definition: m_ctype.h:96
#define MY_CS_TOOSMALL3
Definition: m_ctype.h:98
ulong my_wc_t
Our own version of wchar_t, ie., a type that holds a single Unicode code point ("wide character").
Definition: m_ctype.h:59
#define MY_CS_TOOSMALL4
Definition: m_ctype.h:100
int my_mb_wc_utf8mb3_thunk(const CHARSET_INFO *cs, my_wc_t *pwc, const uchar *s, const uchar *e)
A thunk to be able to use my_mb_wc_utf8mb3 in MY_CHARSET_HANDLER structs.
Definition: ctype-utf8.cc:7162
static int my_mb_wc_utf8_prototype(my_wc_t *pwc, const uchar *s, const uchar *e)
static int my_mb_wc_utf8mb4(my_wc_t *pwc, const uchar *s, const uchar *e)
Parses a single UTF-8 character from a byte string.
Definition: mb_wc.h:214
static int my_mb_wc_utf8mb3(my_wc_t *pwc, const uchar *s, const uchar *e)
Parses a single UTF-8 character from a byte string.
Definition: mb_wc.h:196
int my_mb_wc_utf8mb4_thunk(const CHARSET_INFO *cs, my_wc_t *pwc, const uchar *s, const uchar *e)
A thunk to be able to use my_mb_wc_utf8mb4 in MY_CHARSET_HANDLER structs.
Definition: ctype-utf8.cc:7178
Header for compiler-dependent features.
#define ALWAYS_INLINE
Definition: my_compiler.h:110
unsigned char uchar
Definition: my_inttypes.h:52
uint16_t uint16
Definition: my_inttypes.h:65
uint32_t uint32
Definition: my_inttypes.h:67
Definition: commit_order_queue.h:34
Definition: m_ctype.h:385
Functor that converts a UTF-8 multibyte sequence (up to three bytes) to a wide character.
Definition: mb_wc.h:72
ALWAYS_INLINE int operator()(my_wc_t *pwc, const uchar *s, const uchar *e) const
Definition: mb_wc.h:76
Functor that converts a UTF-8 multibyte sequence (up to four bytes) to a wide character.
Definition: mb_wc.h:85
ALWAYS_INLINE int operator()(my_wc_t *pwc, const uchar *s, const uchar *e) const
Definition: mb_wc.h:89