MySQL 8.4.3
Source Code Documentation
|
#include "my_config.h"
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include <string.h>
#include <sys/types.h>
#include <algorithm>
#include <type_traits>
#include "my_byteorder.h"
#include "my_compiler.h"
#include "mysql/strings/m_ctype.h"
#include "strings/m_ctype_internals.h"
#include "strings/mb_wc.h"
#include "template_utils.h"
Namespaces | |
namespace | anonymous_namespace{ctype-utf8.cc} |
Macros | |
#define | MY_FILENAME_ESCAPE '@' |
Functions | |
static int | MY_CS_TOOSMALLN (int n) |
static int | my_valid_mbcharlen_utf8mb3 (const uint8_t *s, const uint8_t *e) |
static void | my_tosort_unicode (const MY_UNICASE_INFO *uni_plane, my_wc_t *wc, uint flags) |
static int | my_wildcmp_unicode_impl (const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, const MY_UNICASE_INFO *weights, int recurse_level) |
int | my_wildcmp_unicode (const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, const MY_UNICASE_INFO *weights) |
template<class Pointee , class Offset > | |
Pointee * | anonymous_namespace{ctype-utf8.cc}::add_with_saturate (Pointee *ptr, Offset offset) |
static size_t | my_strxfrm_pad_nweights_unicode (uint8_t *str, uint8_t *strend, size_t nweights) |
Pad buffer with weights for space characters. More... | |
static size_t | my_strxfrm_pad_unicode (uint8_t *str, uint8_t *strend) |
Pad buffer with weights for space characters. More... | |
template<class Mb_wc > | |
static size_t | my_strnxfrm_unicode_tmpl (const CHARSET_INFO *cs, Mb_wc mb_wc, uint8_t *dst, size_t dstlen, uint nweights, const uint8_t *src, size_t srclen, uint flags) |
size_t | my_strnxfrm_unicode (const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, uint nweights, const uint8_t *src, size_t srclen, uint flags) |
size_t | my_strnxfrm_unicode_full_bin (const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, uint nweights, const uint8_t *src, size_t srclen, uint flags) |
size_t | my_strnxfrmlen_unicode_full_bin (const CHARSET_INFO *cs, size_t len) |
static int | bincmp (const uint8_t *s, const uint8_t *se, const uint8_t *t, const uint8_t *te) |
static int | my_mb_wc_utf8mb3_no_range (my_wc_t *pwc, const uint8_t *s) |
static int | my_uni_utf8mb3 (const CHARSET_INFO *cs, my_wc_t wc, uint8_t *r, uint8_t *e) |
static int | my_uni_utf8mb3_no_range (const CHARSET_INFO *cs, my_wc_t wc, uint8_t *r) |
static void | my_tolower_utf8mb3 (const MY_UNICASE_INFO *uni_plane, my_wc_t *wc) |
static void | my_toupper_utf8mb3 (const MY_UNICASE_INFO *uni_plane, my_wc_t *wc) |
static size_t | my_caseup_utf8mb3 (const CHARSET_INFO *cs, char *src, size_t srclen, char *dst, size_t dstlen) |
static void | my_hash_sort_utf8mb3 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64 *n1, uint64 *n2) |
static size_t | my_caseup_str_utf8mb3 (const CHARSET_INFO *cs, char *src) |
static size_t | my_casedn_utf8mb3 (const CHARSET_INFO *cs, char *src, size_t srclen, char *dst, size_t dstlen) |
static size_t | my_casedn_str_utf8mb3 (const CHARSET_INFO *cs, char *src) |
static int | my_strnncoll_utf8mb3 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) |
static int | my_strnncollsp_utf8mb3 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) |
static int | my_strcasecmp_utf8mb3 (const CHARSET_INFO *cs, const char *s, const char *t) |
static int | my_wildcmp_utf8mb3 (const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) |
static size_t | my_strnxfrmlen_utf8mb3 (const CHARSET_INFO *cs, size_t len) |
static size_t | my_well_formed_len_utf8mb3 (const CHARSET_INFO *, const char *b, const char *e, size_t pos, int *error) |
static uint | my_ismbchar_utf8mb3 (const CHARSET_INFO *, const char *b, const char *e) |
static uint | my_mbcharlen_utf8mb3 (const CHARSET_INFO *cs, uint c) |
static int | hexlo (int x) |
static int | my_mb_wc_filename (const CHARSET_INFO *cs, my_wc_t *pwc, const uint8_t *s, const uint8_t *e) |
static int | my_wc_mb_filename (const CHARSET_INFO *cs, my_wc_t wc, uint8_t *s, uint8_t *e) |
static int | bincmp_utf8mb4 (const uint8_t *s, const uint8_t *se, const uint8_t *t, const uint8_t *te) |
int | my_mb_wc_utf8mb3_thunk (const CHARSET_INFO *cs, my_wc_t *pwc, const uint8_t *s, const uint8_t *e) |
A thunk to be able to use my_mb_wc_utf8mb3 in MY_CHARSET_HANDLER structs. More... | |
int | my_mb_wc_utf8mb4_thunk (const CHARSET_INFO *cs, my_wc_t *pwc, const uint8_t *s, const uint8_t *e) |
A thunk to be able to use my_mb_wc_utf8mb4 in MY_CHARSET_HANDLER structs. More... | |
static int | my_mb_wc_utf8mb4_no_range (const CHARSET_INFO *cs, my_wc_t *pwc, const uint8_t *s) |
static int | my_wc_mb_utf8mb4 (const CHARSET_INFO *cs, my_wc_t wc, uint8_t *r, uint8_t *e) |
static int | my_wc_mb_utf8mb4_no_range (const CHARSET_INFO *cs, my_wc_t wc, uint8_t *r) |
static void | my_tolower_utf8mb4 (const MY_UNICASE_INFO *uni_plane, my_wc_t *wc) |
static void | my_toupper_utf8mb4 (const MY_UNICASE_INFO *uni_plane, my_wc_t *wc) |
static size_t | my_caseup_utf8mb4 (const CHARSET_INFO *cs, char *src, size_t srclen, char *dst, size_t dstlen) |
static void | my_hash_sort_utf8mb4 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64 *n1, uint64 *n2) |
static size_t | my_caseup_str_utf8mb4 (const CHARSET_INFO *cs, char *src) |
static size_t | my_casedn_utf8mb4 (const CHARSET_INFO *cs, char *src, size_t srclen, char *dst, size_t dstlen) |
static size_t | my_casedn_str_utf8mb4 (const CHARSET_INFO *cs, char *src) |
static int | my_strnncoll_utf8mb4 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) |
static int | my_strnncollsp_utf8mb4 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) |
Compare strings, discarding end space. More... | |
static int | my_strcasecmp_utf8mb4 (const CHARSET_INFO *cs, const char *s, const char *t) |
Compare 0-terminated UTF8 strings. More... | |
static int | my_wildcmp_utf8mb4 (const CHARSET_INFO *cs, const char *str, const char *strend, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) |
static size_t | my_strnxfrmlen_utf8mb4 (const CHARSET_INFO *cs, size_t len) |
static ALWAYS_INLINE int | my_valid_mbcharlen_utf8mb4 (const CHARSET_INFO *cs, const uint8_t *s, const uint8_t *e) |
static size_t | my_well_formed_len_utf8mb4 (const CHARSET_INFO *cs, const char *b, const char *e, size_t pos, int *error) |
static uint ALWAYS_INLINE | my_ismbchar_utf8mb4_inl (const CHARSET_INFO *cs, const char *b, const char *e) |
static uint | my_ismbchar_utf8mb4 (const CHARSET_INFO *cs, const char *b, const char *e) |
size_t | my_charpos_mb4 (const CHARSET_INFO *cs, const char *pos, const char *end, size_t length) |
static uint | my_mbcharlen_utf8mb4 (const CHARSET_INFO *cs, uint c) |
#define MY_FILENAME_ESCAPE '@' |
|
inlinestatic |
|
inlinestatic |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
size_t my_charpos_mb4 | ( | const CHARSET_INFO * | cs, |
const char * | pos, | ||
const char * | end, | ||
size_t | length | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
int my_mb_wc_utf8mb3_thunk | ( | const CHARSET_INFO * | cs, |
my_wc_t * | pwc, | ||
const uint8_t * | s, | ||
const uint8_t * | e | ||
) |
A thunk to be able to use my_mb_wc_utf8mb3 in MY_CHARSET_HANDLER structs.
cs | Unused. |
pwc | [output] The parsed character, if any. |
s | The string to read from. |
e | The end of the string; will not read past this. |
|
static |
int my_mb_wc_utf8mb4_thunk | ( | const CHARSET_INFO * | cs, |
my_wc_t * | pwc, | ||
const uint8_t * | s, | ||
const uint8_t * | e | ||
) |
A thunk to be able to use my_mb_wc_utf8mb4 in MY_CHARSET_HANDLER structs.
cs | Unused. |
pwc | [output] The parsed character, if any. |
s | The string to read from. |
e | The end of the string; will not read past this. |
|
static |
|
static |
|
static |
|
static |
Compare 0-terminated UTF8 strings.
cs | character set handler |
s | First 0-terminated string to compare |
t | Second 0-terminated string to compare |
negative | number if s < t |
positive | number if s > t |
0 | is the strings are equal |
|
static |
|
static |
|
static |
|
static |
Compare strings, discarding end space.
If one string is shorter as the other, then we space extend the other so that the strings have equal length.
This will ensure that the following things hold:
"a" == "a " "a\0" < "a" "a\0" < "a "
cs | Character set pinter. |
s | First string to compare. |
slen | Length of 's'. |
t | Second string to compare. |
tlen | Length of 't'. |
Negative | number, if a less than b. |
0,if | a is equal to b |
Positive | number, if a > b |
size_t my_strnxfrm_unicode | ( | const CHARSET_INFO * | cs, |
uint8_t * | dst, | ||
size_t | dstlen, | ||
uint | nweights, | ||
const uint8_t * | src, | ||
size_t | srclen, | ||
uint | flags | ||
) |
size_t my_strnxfrm_unicode_full_bin | ( | const CHARSET_INFO * | cs, |
uint8_t * | dst, | ||
size_t | dstlen, | ||
uint | nweights, | ||
const uint8_t * | src, | ||
size_t | srclen, | ||
uint | flags | ||
) |
|
inlinestatic |
size_t my_strnxfrmlen_unicode_full_bin | ( | const CHARSET_INFO * | cs, |
size_t | len | ||
) |
|
static |
|
static |
|
static |
Pad buffer with weights for space characters.
This functions fills the buffer pointed by "str" with weights of space character. Not more than "nweights" weights are put. If at some iteration step only a half of weight can fit (which is possible if buffer length is an odd number) then a half of this weight is put - this gives a little bit better ORDER BY result for long strings.
str | Buffer |
strend | End of buffer |
nweights | Number of weights |
|
static |
Pad buffer with weights for space characters.
This functions fills the buffer pointed by "str" with weights of space character. Putting half of weight (when buffer length is an odd number) is OK.
str | Buffer |
strend | End of buffer |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
|
static |
|
inlinestatic |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
int my_wildcmp_unicode | ( | const CHARSET_INFO * | cs, |
const char * | str, | ||
const char * | str_end, | ||
const char * | wildstr, | ||
const char * | wildend, | ||
int | escape, | ||
int | w_one, | ||
int | w_many, | ||
const MY_UNICASE_INFO * | weights | ||
) |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
CHARSET_INFO my_charset_filename |
|
static |
CHARSET_INFO my_charset_utf8mb3_bin |
CHARSET_INFO my_charset_utf8mb3_general_ci |
CHARSET_INFO my_charset_utf8mb3_general_mysql500_ci |
MY_CHARSET_HANDLER my_charset_utf8mb3_handler |
CHARSET_INFO my_charset_utf8mb3_tolower_ci |
CHARSET_INFO my_charset_utf8mb4_bin |
CHARSET_INFO my_charset_utf8mb4_general_ci |
MY_CHARSET_HANDLER my_charset_utf8mb4_handler |
|
static |
|
static |
|
static |
|
static |
|
static |
MY_UNICASE_INFO my_unicase_default = {0xFFFF, my_unicase_pages_default} |
MY_UNICASE_INFO my_unicase_mysql500 = {0xFFFF, my_unicase_pages_mysql500} |
|
static |
|
static |
|
static |
const MY_UNICASE_CHARACTER* my_unicase_pages_unicode520[4352] |
MY_UNICASE_INFO my_unicase_turkish = {0xFFFF, my_unicase_pages_turkish} |
MY_UNICASE_INFO my_unicase_unicode520 = {0x10FFFF, my_unicase_pages_unicode520} |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |