MySQL 8.4.0
Source Code Documentation
ctype.cc File Reference
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include "my_byteorder.h"
#include "my_sys.h"
#include "my_xml.h"
#include "mysql/my_loglevel.h"
#include "mysql/strings/collations.h"
#include "mysql/strings/m_ctype.h"
#include "mysys_err.h"
#include "sql_chars.h"
#include "strings/collations_internal.h"
#include "strings/m_ctype_internals.h"
#include "template_utils.h"

Classes

struct  my_cs_file_section_st
 
struct  my_cs_file_info
 

Macros

#define _CS_MISC   1
 
#define _CS_ID   2
 
#define _CS_CSNAME   3
 
#define _CS_FAMILY   4
 
#define _CS_ORDER   5
 
#define _CS_COLNAME   6
 
#define _CS_FLAG   7
 
#define _CS_CHARSET   8
 
#define _CS_COLLATION   9
 
#define _CS_UPPERMAP   10
 
#define _CS_LOWERMAP   11
 
#define _CS_UNIMAP   12
 
#define _CS_COLLMAP   13
 
#define _CS_CTYPEMAP   14
 
#define _CS_PRIMARY_ID   15
 
#define _CS_BINARY_ID   16
 
#define _CS_CSDESCRIPT   17
 
#define _CS_UCA_VERSION   100
 
#define _CS_CL_SUPPRESS_CONTRACTIONS   101
 
#define _CS_CL_OPTIMIZE   102
 
#define _CS_CL_SHIFT_AFTER_METHOD   103
 
#define _CS_ST_SETTINGS   200
 
#define _CS_ST_STRENGTH   201
 
#define _CS_ST_ALTERNATE   202
 
#define _CS_ST_BACKWARDS   203
 
#define _CS_ST_NORMALIZATION   204
 
#define _CS_ST_CASE_LEVEL   205
 
#define _CS_ST_CASE_FIRST   206
 
#define _CS_ST_HIRAGANA_QUATERNARY   207
 
#define _CS_ST_NUMERIC   208
 
#define _CS_ST_VARIABLE_TOP   209
 
#define _CS_ST_MATCH_BOUNDARIES   210
 
#define _CS_ST_MATCH_STYLE   211
 
#define _CS_RULES   300
 
#define _CS_RESET   301
 
#define _CS_DIFF1   302
 
#define _CS_DIFF2   303
 
#define _CS_DIFF3   304
 
#define _CS_DIFF4   305
 
#define _CS_IDENTICAL   306
 
#define _CS_EXP_X   320
 
#define _CS_EXP_EXTEND   321
 
#define _CS_EXP_DIFF1   322
 
#define _CS_EXP_DIFF2   323
 
#define _CS_EXP_DIFF3   324
 
#define _CS_EXP_DIFF4   325
 
#define _CS_EXP_IDENTICAL   326
 
#define _CS_A_DIFF1   351
 
#define _CS_A_DIFF2   352
 
#define _CS_A_DIFF3   353
 
#define _CS_A_DIFF4   354
 
#define _CS_A_IDENTICAL   355
 
#define _CS_CONTEXT   370
 
#define _CS_RESET_BEFORE   380
 
#define _CS_RESET_FIRST_PRIMARY_IGNORABLE   401
 
#define _CS_RESET_LAST_PRIMARY_IGNORABLE   402
 
#define _CS_RESET_FIRST_SECONDARY_IGNORABLE   403
 
#define _CS_RESET_LAST_SECONDARY_IGNORABLE   404
 
#define _CS_RESET_FIRST_TERTIARY_IGNORABLE   405
 
#define _CS_RESET_LAST_TERTIARY_IGNORABLE   406
 
#define _CS_RESET_FIRST_TRAILING   407
 
#define _CS_RESET_LAST_TRAILING   408
 
#define _CS_RESET_FIRST_VARIABLE   409
 
#define _CS_RESET_LAST_VARIABLE   410
 
#define _CS_RESET_FIRST_NON_IGNORABLE   411
 
#define _CS_RESET_LAST_NON_IGNORABLE   412
 
#define MY_CS_CSDESCR_SIZE   64
 
#define MY_CS_CONTEXT_SIZE   64
 

Typedefs

typedef struct my_cs_file_info MY_CHARSET_FILE
 

Functions

static char * mstr (char *str, const char *src, size_t l1, size_t l2)
 
static struct my_cs_file_section_stcs_file_sec (const char *attr, size_t len)
 
static void my_charset_file_reset_charset (MY_CHARSET_FILE *i)
 
static void my_charset_file_reset_collation (MY_CHARSET_FILE *i)
 
static void my_charset_file_init (MY_CHARSET_FILE *i)
 
static void my_charset_file_free (MY_CHARSET_FILE *i)
 
static int my_charset_file_tailoring_realloc (MY_CHARSET_FILE *i, size_t newlen)
 
static int fill_uchar (uint8_t *a, unsigned size, const char *str, size_t len)
 
static int fill_uint16 (uint16_t *a, unsigned size, const char *str, size_t len)
 
static int tailoring_append (MY_XML_PARSER *st, const char *fmt, size_t len, const char *attr)
 
static int tailoring_append2 (MY_XML_PARSER *st, const char *fmt, size_t len1, const char *attr1, size_t len2, const char *attr2)
 
static size_t scan_one_character (const char *s, const char *e, my_wc_t *wc)
 
static int tailoring_append_abbreviation (MY_XML_PARSER *st, const char *fmt, size_t len, const char *attr)
 
static int cs_enter (MY_XML_PARSER *st, const char *attr, size_t len)
 
static int cs_leave (MY_XML_PARSER *st, const char *attr, size_t len)
 
static int cs_value (MY_XML_PARSER *st, const char *attr, size_t len)
 
bool my_parse_charset_xml (MY_CHARSET_LOADER *loader, const char *buf, size_t len, MY_CHARSET_ERRMSG *errmsg)
 
unsigned my_string_repertoire (const CHARSET_INFO *cs, const char *str, size_t length)
 
unsigned my_charset_repertoire (const CHARSET_INFO *cs)
 
bool my_charset_is_8bit_pure_ascii (const CHARSET_INFO *cs)
 
bool my_charset_is_ascii_compatible (const CHARSET_INFO *cs)
 
static size_t my_convert_internal (char *to, size_t to_length, const CHARSET_INFO *to_cs, const char *from, size_t from_length, const CHARSET_INFO *from_cs, unsigned *errors)
 Convert a string between two character sets. More...
 
size_t my_convert (char *to, size_t to_length, const CHARSET_INFO *to_cs, const char *from, size_t from_length, const CHARSET_INFO *from_cs, unsigned *errors)
 Convert a string between two character sets. More...
 
unsigned my_mbcharlen_ptr (const CHARSET_INFO *cs, const char *s, const char *e)
 Get the length of the first code in given sequence of chars. More...
 
bool my_is_prefixidx_cand (const CHARSET_INFO *cs, const char *wildstr, const char *wildend, int escape, int w_many, size_t *prefix_len)
 Identify whether given like pattern looks like a prefix pattern, which can become candidate for index only scan on prefix indexes. More...
 
static void * once_memdup (MY_CHARSET_LOADER *loader, const void *from, size_t size)
 
static const char * once_strdup (MY_CHARSET_LOADER *loader, const char *from)
 
static void simple_cs_init_functions (CHARSET_INFO *cs)
 
static bool cs_copy_data (MY_CHARSET_LOADER *loader, CHARSET_INFO *to, CHARSET_INFO *from)
 
static bool simple_cs_is_full (CHARSET_INFO *cs)
 
static void copy_uca_collation (CHARSET_INFO *to, CHARSET_INFO *from)
 
static void clear_cs_info (CHARSET_INFO *cs)
 

Variables

CHARSET_INFO my_charset_ucs2_unicode_ci
 
CHARSET_INFO my_charset_utf16_unicode_ci
 
CHARSET_INFO my_charset_utf8mb4_unicode_ci
 
static struct my_cs_file_section_st sec []
 
static const char * diff_fmt [5]
 
static const char * context_diff_fmt [5]
 

Macro Definition Documentation

◆ _CS_A_DIFF1

#define _CS_A_DIFF1   351

◆ _CS_A_DIFF2

#define _CS_A_DIFF2   352

◆ _CS_A_DIFF3

#define _CS_A_DIFF3   353

◆ _CS_A_DIFF4

#define _CS_A_DIFF4   354

◆ _CS_A_IDENTICAL

#define _CS_A_IDENTICAL   355

◆ _CS_BINARY_ID

#define _CS_BINARY_ID   16

◆ _CS_CHARSET

#define _CS_CHARSET   8

◆ _CS_CL_OPTIMIZE

#define _CS_CL_OPTIMIZE   102

◆ _CS_CL_SHIFT_AFTER_METHOD

#define _CS_CL_SHIFT_AFTER_METHOD   103

◆ _CS_CL_SUPPRESS_CONTRACTIONS

#define _CS_CL_SUPPRESS_CONTRACTIONS   101

◆ _CS_COLLATION

#define _CS_COLLATION   9

◆ _CS_COLLMAP

#define _CS_COLLMAP   13

◆ _CS_COLNAME

#define _CS_COLNAME   6

◆ _CS_CONTEXT

#define _CS_CONTEXT   370

◆ _CS_CSDESCRIPT

#define _CS_CSDESCRIPT   17

◆ _CS_CSNAME

#define _CS_CSNAME   3

◆ _CS_CTYPEMAP

#define _CS_CTYPEMAP   14

◆ _CS_DIFF1

#define _CS_DIFF1   302

◆ _CS_DIFF2

#define _CS_DIFF2   303

◆ _CS_DIFF3

#define _CS_DIFF3   304

◆ _CS_DIFF4

#define _CS_DIFF4   305

◆ _CS_EXP_DIFF1

#define _CS_EXP_DIFF1   322

◆ _CS_EXP_DIFF2

#define _CS_EXP_DIFF2   323

◆ _CS_EXP_DIFF3

#define _CS_EXP_DIFF3   324

◆ _CS_EXP_DIFF4

#define _CS_EXP_DIFF4   325

◆ _CS_EXP_EXTEND

#define _CS_EXP_EXTEND   321

◆ _CS_EXP_IDENTICAL

#define _CS_EXP_IDENTICAL   326

◆ _CS_EXP_X

#define _CS_EXP_X   320

◆ _CS_FAMILY

#define _CS_FAMILY   4

◆ _CS_FLAG

#define _CS_FLAG   7

◆ _CS_ID

#define _CS_ID   2

◆ _CS_IDENTICAL

#define _CS_IDENTICAL   306

◆ _CS_LOWERMAP

#define _CS_LOWERMAP   11

◆ _CS_MISC

#define _CS_MISC   1

◆ _CS_ORDER

#define _CS_ORDER   5

◆ _CS_PRIMARY_ID

#define _CS_PRIMARY_ID   15

◆ _CS_RESET

#define _CS_RESET   301

◆ _CS_RESET_BEFORE

#define _CS_RESET_BEFORE   380

◆ _CS_RESET_FIRST_NON_IGNORABLE

#define _CS_RESET_FIRST_NON_IGNORABLE   411

◆ _CS_RESET_FIRST_PRIMARY_IGNORABLE

#define _CS_RESET_FIRST_PRIMARY_IGNORABLE   401

◆ _CS_RESET_FIRST_SECONDARY_IGNORABLE

#define _CS_RESET_FIRST_SECONDARY_IGNORABLE   403

◆ _CS_RESET_FIRST_TERTIARY_IGNORABLE

#define _CS_RESET_FIRST_TERTIARY_IGNORABLE   405

◆ _CS_RESET_FIRST_TRAILING

#define _CS_RESET_FIRST_TRAILING   407

◆ _CS_RESET_FIRST_VARIABLE

#define _CS_RESET_FIRST_VARIABLE   409

◆ _CS_RESET_LAST_NON_IGNORABLE

#define _CS_RESET_LAST_NON_IGNORABLE   412

◆ _CS_RESET_LAST_PRIMARY_IGNORABLE

#define _CS_RESET_LAST_PRIMARY_IGNORABLE   402

◆ _CS_RESET_LAST_SECONDARY_IGNORABLE

#define _CS_RESET_LAST_SECONDARY_IGNORABLE   404

◆ _CS_RESET_LAST_TERTIARY_IGNORABLE

#define _CS_RESET_LAST_TERTIARY_IGNORABLE   406

◆ _CS_RESET_LAST_TRAILING

#define _CS_RESET_LAST_TRAILING   408

◆ _CS_RESET_LAST_VARIABLE

#define _CS_RESET_LAST_VARIABLE   410

◆ _CS_RULES

#define _CS_RULES   300

◆ _CS_ST_ALTERNATE

#define _CS_ST_ALTERNATE   202

◆ _CS_ST_BACKWARDS

#define _CS_ST_BACKWARDS   203

◆ _CS_ST_CASE_FIRST

#define _CS_ST_CASE_FIRST   206

◆ _CS_ST_CASE_LEVEL

#define _CS_ST_CASE_LEVEL   205

◆ _CS_ST_HIRAGANA_QUATERNARY

#define _CS_ST_HIRAGANA_QUATERNARY   207

◆ _CS_ST_MATCH_BOUNDARIES

#define _CS_ST_MATCH_BOUNDARIES   210

◆ _CS_ST_MATCH_STYLE

#define _CS_ST_MATCH_STYLE   211

◆ _CS_ST_NORMALIZATION

#define _CS_ST_NORMALIZATION   204

◆ _CS_ST_NUMERIC

#define _CS_ST_NUMERIC   208

◆ _CS_ST_SETTINGS

#define _CS_ST_SETTINGS   200

◆ _CS_ST_STRENGTH

#define _CS_ST_STRENGTH   201

◆ _CS_ST_VARIABLE_TOP

#define _CS_ST_VARIABLE_TOP   209

◆ _CS_UCA_VERSION

#define _CS_UCA_VERSION   100

◆ _CS_UNIMAP

#define _CS_UNIMAP   12

◆ _CS_UPPERMAP

#define _CS_UPPERMAP   10

◆ MY_CS_CONTEXT_SIZE

#define MY_CS_CONTEXT_SIZE   64

◆ MY_CS_CSDESCR_SIZE

#define MY_CS_CSDESCR_SIZE   64

Typedef Documentation

◆ MY_CHARSET_FILE

Function Documentation

◆ clear_cs_info()

static void clear_cs_info ( CHARSET_INFO cs)
static

◆ copy_uca_collation()

static void copy_uca_collation ( CHARSET_INFO to,
CHARSET_INFO from 
)
static

◆ cs_copy_data()

static bool cs_copy_data ( MY_CHARSET_LOADER loader,
CHARSET_INFO to,
CHARSET_INFO from 
)
static

◆ cs_enter()

static int cs_enter ( MY_XML_PARSER st,
const char *  attr,
size_t  len 
)
static

◆ cs_file_sec()

static struct my_cs_file_section_st * cs_file_sec ( const char *  attr,
size_t  len 
)
static

◆ cs_leave()

static int cs_leave ( MY_XML_PARSER st,
const char *  attr,
size_t  len 
)
static

◆ cs_value()

static int cs_value ( MY_XML_PARSER st,
const char *  attr,
size_t  len 
)
static

◆ fill_uchar()

static int fill_uchar ( uint8_t *  a,
unsigned  size,
const char *  str,
size_t  len 
)
static

◆ fill_uint16()

static int fill_uint16 ( uint16_t *  a,
unsigned  size,
const char *  str,
size_t  len 
)
static

◆ mstr()

static char * mstr ( char *  str,
const char *  src,
size_t  l1,
size_t  l2 
)
static

◆ my_charset_file_free()

static void my_charset_file_free ( MY_CHARSET_FILE i)
static

◆ my_charset_file_init()

static void my_charset_file_init ( MY_CHARSET_FILE i)
static

◆ my_charset_file_reset_charset()

static void my_charset_file_reset_charset ( MY_CHARSET_FILE i)
static

◆ my_charset_file_reset_collation()

static void my_charset_file_reset_collation ( MY_CHARSET_FILE i)
static

◆ my_charset_file_tailoring_realloc()

static int my_charset_file_tailoring_realloc ( MY_CHARSET_FILE i,
size_t  newlen 
)
static

◆ my_charset_is_8bit_pure_ascii()

bool my_charset_is_8bit_pure_ascii ( const CHARSET_INFO cs)

◆ my_charset_is_ascii_compatible()

bool my_charset_is_ascii_compatible ( const CHARSET_INFO cs)

◆ my_charset_repertoire()

unsigned my_charset_repertoire ( const CHARSET_INFO cs)

◆ my_convert()

size_t my_convert ( char *  to,
size_t  to_length,
const CHARSET_INFO to_cs,
const char *  from,
size_t  from_length,
const CHARSET_INFO from_cs,
unsigned *  errors 
)

Convert a string between two character sets.

Optimized for quick copying of ASCII characters in the range 0x00..0x7F. 'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.

Parameters
[out]toStore result here
to_lengthSize of "to" buffer
to_csCharacter set of result string
fromCopy from here
from_lengthLength of the "from" string
from_csCharacter set of the "from" string
[out]errorsNumber of conversion errors
Returns
Number of bytes copied to 'to' string

◆ my_convert_internal()

static size_t my_convert_internal ( char *  to,
size_t  to_length,
const CHARSET_INFO to_cs,
const char *  from,
size_t  from_length,
const CHARSET_INFO from_cs,
unsigned *  errors 
)
static

Convert a string between two character sets.

'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.

Parameters
[out]toStore result here
to_lengthSize of "to" buffer
to_csCharacter set of result string
fromCopy from here
from_lengthLength of the "from" string
from_csCharacter set of the "from" string
[out]errorsNumber of conversion errors
Returns
Number of bytes copied to 'to' string

◆ my_is_prefixidx_cand()

bool my_is_prefixidx_cand ( const CHARSET_INFO cs,
const char *  wildstr,
const char *  wildend,
int  escape,
int  w_many,
size_t *  prefix_len 
)

Identify whether given like pattern looks like a prefix pattern, which can become candidate for index only scan on prefix indexes.

Parameters
csCharacter set and collation pointer
wildstrPointer to LIKE pattern.
wildendPointer to end of LIKE pattern.
escapeEscape character pattern, typically '\'.
w_many'Many characters' pattern, typically ''.
[out]prefix_lenLength of LIKE pattern.
Returns
Optimization status.
Return values
TRUEif LIKE pattern can be used for prefix index only scan.
FALSEelse.

◆ my_mbcharlen_ptr()

unsigned my_mbcharlen_ptr ( const CHARSET_INFO cs,
const char *  s,
const char *  e 
)

Get the length of the first code in given sequence of chars.

This func is introduced because we can't determine the length by checking the first byte only for gb18030, so we first try my_mbcharlen, and then my_mbcharlen_2 if necessary to get the length

Parameters
[in]cscharset_info
[in]sstart of the char sequence
[in]eend of the char sequence
Returns
The length of the first code, or 0 for invalid code

◆ my_parse_charset_xml()

bool my_parse_charset_xml ( MY_CHARSET_LOADER loader,
const char *  buf,
size_t  len,
MY_CHARSET_ERRMSG errmsg 
)

◆ my_string_repertoire()

unsigned my_string_repertoire ( const CHARSET_INFO cs,
const char *  str,
size_t  length 
)

◆ once_memdup()

static void * once_memdup ( MY_CHARSET_LOADER loader,
const void *  from,
size_t  size 
)
static

◆ once_strdup()

static const char * once_strdup ( MY_CHARSET_LOADER loader,
const char *  from 
)
static

◆ scan_one_character()

static size_t scan_one_character ( const char *  s,
const char *  e,
my_wc_t wc 
)
static

◆ simple_cs_init_functions()

static void simple_cs_init_functions ( CHARSET_INFO cs)
static

◆ simple_cs_is_full()

static bool simple_cs_is_full ( CHARSET_INFO cs)
static

◆ tailoring_append()

static int tailoring_append ( MY_XML_PARSER st,
const char *  fmt,
size_t  len,
const char *  attr 
)
static

◆ tailoring_append2()

static int tailoring_append2 ( MY_XML_PARSER st,
const char *  fmt,
size_t  len1,
const char *  attr1,
size_t  len2,
const char *  attr2 
)
static

◆ tailoring_append_abbreviation()

static int tailoring_append_abbreviation ( MY_XML_PARSER st,
const char *  fmt,
size_t  len,
const char *  attr 
)
static

Variable Documentation

◆ context_diff_fmt

const char* context_diff_fmt[5]
static
Initial value:
= {
"<%.*s|%.*s", "<<%.*s|%.*s", "<<<%.*s|%.*s", "<<<<%.*s|%.*s", "=%.*s|%.*s"}

◆ diff_fmt

const char* diff_fmt[5]
static
Initial value:
= {"<%.*s", "<<%.*s", "<<<%.*s", "<<<<%.*s",
"=%.*s"}

◆ my_charset_ucs2_unicode_ci

CHARSET_INFO my_charset_ucs2_unicode_ci
extern

◆ my_charset_utf16_unicode_ci

CHARSET_INFO my_charset_utf16_unicode_ci
extern

◆ my_charset_utf8mb4_unicode_ci

CHARSET_INFO my_charset_utf8mb4_unicode_ci
extern

◆ sec

struct my_cs_file_section_st sec[]
static