MySQL 8.4.3
Source Code Documentation
|
#include <algorithm>
#include <bitset>
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <iterator>
#include <map>
#include <vector>
#include "m_string.h"
#include "my_byteorder.h"
#include "my_compiler.h"
#include "mysql/my_loglevel.h"
#include "mysql/strings/m_ctype.h"
#include "mysys_err.h"
#include "string_with_len.h"
#include "strings/m_ctype_internals.h"
#include "strings/mb_wc.h"
#include "strings/str_uca_type.h"
#include "strings/uca900_data.h"
#include "strings/uca900_ja_data.h"
#include "strings/uca900_zh_data.h"
#include "strings/uca_data.h"
#include "template_utils.h"
Classes | |
class | my_uca_scanner |
Unicode Collation Algorithm: Collation element (weight) scanner, for consequent scan of collations weights from a string. More... | |
struct | uca_scanner_any< Mb_wc > |
class | uca_scanner_900< Mb_wc, LEVELS_FOR_COMPARE > |
struct | trie_node_cmp |
struct | MY_COLL_LEXEM |
struct | MY_COLL_RULE |
struct | MY_COLL_RULES |
struct | MY_COLL_RULE_PARSER |
Macros | |
#define | HANGUL_JAMO_MAX_LENGTH 3 |
#define | MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */ |
#define | MY_CS_UTF8MB3_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE) |
#define | MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT) |
#define | MY_CS_UTF32_UCA_FLAGS |
#define | MY_CS_UTF16_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII) |
Typedefs | |
typedef enum my_coll_lexem_num_en | my_coll_lexem_num |
Enumerations | |
enum | my_coll_lexem_num_en { MY_COLL_LEXEM_EOF = 0 , MY_COLL_LEXEM_SHIFT = 1 , MY_COLL_LEXEM_RESET = 4 , MY_COLL_LEXEM_CHAR = 5 , MY_COLL_LEXEM_ERROR = 6 , MY_COLL_LEXEM_OPTION = 7 , MY_COLL_LEXEM_EXTEND = 8 , MY_COLL_LEXEM_CONTEXT = 9 } |
enum | my_coll_shift_method { my_shift_method_simple = 0 , my_shift_method_expand } |
Functions | |
static void | my_uca_add_contraction_flag (MY_UCA_INFO::flags_type *flags, my_wc_t wc, int flag) |
Mark a code point as a contraction part. More... | |
static bool | my_uca_have_contractions (const MY_UCA_INFO *uca) |
Check if UCA level data has contractions. More... | |
static std::vector< MY_CONTRACTION >::const_iterator | find_contraction_part_in_trie (const std::vector< MY_CONTRACTION > &cont_nodes, my_wc_t ch) |
static std::vector< MY_CONTRACTION >::iterator | find_contraction_part_in_trie (std::vector< MY_CONTRACTION > &cont_nodes, my_wc_t ch) |
const uint16_t * | my_uca_contraction2_weight (const std::vector< MY_CONTRACTION > *cont_nodes, my_wc_t wc1, my_wc_t wc2) |
Find a contraction consisting of two code points and return its weight array. More... | |
static bool | my_uca_can_be_previous_context_head (const MY_UCA_INFO::flags_type *flags, my_wc_t wc) |
Check if a code point can be previous context head. More... | |
static bool | my_uca_can_be_previous_context_tail (const MY_UCA_INFO::flags_type *flags, my_wc_t wc) |
Check if a code point can be previous context tail. More... | |
static const uint16_t * | my_uca_contraction_weight (const std::vector< MY_CONTRACTION > *cont_nodes, const my_wc_t *wc, size_t len) |
Check if a string is a contraction of exactly the given length, and return its weight array on success. More... | |
static size_t | my_wstrnlen (my_wc_t *s, size_t maxlen) |
Return length of a 0-terminated wide string, analogous to strnlen(). More... | |
static int | my_decompose_hangul_syllable (my_wc_t syllable, my_wc_t *jamo) |
Check if a code point is Hangul syllable. More... | |
static uint16_t | change_zh_implicit (uint16_t weight) |
static bool | is_hiragana_char (my_wc_t wc) |
static bool | is_katakana_char (my_wc_t wc) |
static bool | is_katakana_iteration (my_wc_t wc) |
static bool | is_hiragana_iteration (my_wc_t wc) |
static bool | is_ja_length_mark (my_wc_t wc) |
static bool | is_tertiary_weight_upper_case (uint16_t weight) |
template<class Scanner , int LEVELS_FOR_COMPARE, class Mb_wc > | |
static int | my_strnncoll_uca (const CHARSET_INFO *cs, const Mb_wc mb_wc, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) |
static int | my_space_weight (const CHARSET_INFO *cs) |
static uint16_t * | my_char_weight_addr (MY_UCA_INFO *uca, my_wc_t wc) |
Helper function: Find address of weights of the given code point. More... | |
static uint16_t * | my_char_weight_addr_900 (MY_UCA_INFO *uca, my_wc_t wc) |
Helper function: Find address of weights of the given code point, for UCA 9.0.0 format. More... | |
template<class Mb_wc > | |
static int | my_strnncollsp_uca (const CHARSET_INFO *cs, Mb_wc mb_wc, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) |
template<class Mb_wc > | |
static void | my_hash_sort_uca (const CHARSET_INFO *cs, Mb_wc mb_wc, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *n2) |
template<class Mb_wc > | |
static size_t | my_strnxfrm_uca (const CHARSET_INFO *cs, Mb_wc mb_wc, uint8_t *dst, size_t dstlen, unsigned num_codepoints, const uint8_t *src, size_t srclen, unsigned flags) |
static int | my_uca_charcmp_900 (const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) |
static int | my_uca_charcmp (const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) |
static int | my_wildcmp_uca_impl (const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, int recurse_level) |
static int | my_strcasecmp_uca (const CHARSET_INFO *cs, const char *s, const char *t) |
static int | my_wildcmp_uca (const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) |
static const char * | my_coll_lexem_num_to_str (my_coll_lexem_num term) |
Convert collation customization lexem to string, for nice error reporting. More... | |
static void | my_coll_lexem_init (MY_COLL_LEXEM *lexem, const char *str, const char *str_end) |
static int | lex_cmp (MY_COLL_LEXEM *lexem, const char *pattern, size_t patternlen) |
Compare lexem to string with length. More... | |
static void | my_coll_lexem_print_error (MY_COLL_LEXEM *lexem, char *errstr, size_t errsize, const char *txt, const char *col_name) |
static int | ch2x (int ch) |
static my_coll_lexem_num | my_coll_lexem_next (MY_COLL_LEXEM *lexem) |
static size_t | my_coll_rule_reset_length (MY_COLL_RULE *r) |
Return length of the "reset" string of a rule. More... | |
static size_t | my_coll_rule_shift_length (MY_COLL_RULE *r) |
Return length of the "shift" string of a rule. More... | |
static int | my_coll_rule_expand (my_wc_t *wc, size_t limit, my_wc_t code) |
Append new character to the end of a 0-terminated wide string. More... | |
static void | my_coll_rule_reset (MY_COLL_RULE *r) |
Initialize collation customization rule. More... | |
static int | my_coll_rules_realloc (MY_COLL_RULES *rules, size_t n) |
Realloc rule array to a new size. More... | |
static int | my_coll_rules_add (MY_COLL_RULES *rules, MY_COLL_RULE *rule) |
Append one new rule to a rule array. More... | |
static void | my_coll_rule_shift_at_level (MY_COLL_RULE *r, int level) |
Apply difference at level. More... | |
static MY_COLL_LEXEM * | my_coll_parser_curr (MY_COLL_RULE_PARSER *p) |
Current parser token. More... | |
static MY_COLL_LEXEM * | my_coll_parser_next (MY_COLL_RULE_PARSER *p) |
Next parser token, to look ahead. More... | |
static int | my_coll_parser_scan (MY_COLL_RULE_PARSER *p) |
Scan one token from the input stream. More... | |
static void | my_coll_parser_init (MY_COLL_RULE_PARSER *p, MY_COLL_RULES *rules, const char *str, const char *str_end) |
Initialize collation customization parser. More... | |
static int | my_coll_parser_expected_error (MY_COLL_RULE_PARSER *p, my_coll_lexem_num term) |
Display error when an unexpected token found. More... | |
static int | my_coll_parser_too_long_error (MY_COLL_RULE_PARSER *p, const char *name) |
Display error when a too long character sequence is met. More... | |
static int | my_coll_parser_scan_term (MY_COLL_RULE_PARSER *p, my_coll_lexem_num term) |
Scan the given lexem from input stream, or display "expected" error. More... | |
static int | my_coll_parser_scan_setting (MY_COLL_RULE_PARSER *p) |
Scan a collation setting in brakets, for example UCA version. More... | |
static int | my_coll_parser_scan_settings (MY_COLL_RULE_PARSER *p) |
Scan multiple collation settings. More... | |
static int | my_coll_parser_scan_reset_before (MY_COLL_RULE_PARSER *p) |
Scan [before xxx] reset option. More... | |
static int | my_coll_parser_scan_logical_position (MY_COLL_RULE_PARSER *p, my_wc_t *pwc, size_t limit) |
Scan logical position and add to the wide string. More... | |
static int | my_coll_parser_scan_character_list (MY_COLL_RULE_PARSER *p, my_wc_t *pwc, size_t limit, const char *name) |
Scan character list. More... | |
static int | my_coll_parser_scan_reset_sequence (MY_COLL_RULE_PARSER *p) |
Scan reset sequence. More... | |
static int | my_coll_parser_scan_shift_sequence (MY_COLL_RULE_PARSER *p) |
Scan shift sequence. More... | |
static int | my_coll_parser_scan_shift (MY_COLL_RULE_PARSER *p) |
Scan shift operator. More... | |
static int | my_coll_parser_scan_rule (MY_COLL_RULE_PARSER *p) |
Scan one rule: reset followed by a number of shifts. More... | |
static int | my_coll_parser_exec (MY_COLL_RULE_PARSER *p) |
Scan collation customization: settings followed by rules. More... | |
static int | my_coll_rule_parse (MY_COLL_RULES *rules, const char *str, const char *str_end, const char *col_name) |
static void | spread_case_mask (uint16_t *to, size_t to_stride, size_t tailored_ce_cnt, uint16_t case_mask) |
static void | change_weight_if_case_first (CHARSET_INFO *cs, const MY_UCA_INFO *dst, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t curr_len, size_t tailored_ce_cnt) |
static size_t | my_char_weight_put_900 (MY_UCA_INFO *dst, uint16_t *to, size_t to_stride, size_t to_length, uint16_t *to_num_ce, const MY_COLL_RULE *rule, size_t base_len) |
static size_t | my_char_weight_put (MY_UCA_INFO *dst, uint16_t *to, size_t to_stride, size_t to_length, uint16_t *to_num_ce, const MY_COLL_RULE *rule, size_t base_len, enum_uca_ver uca_ver) |
Helper function: Copies UCA weights for a given "unsigned" string to the given location. More... | |
static bool | my_uca_copy_page (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, const MY_UCA_INFO *src, MY_UCA_INFO *dst, size_t page) |
Alloc new page and copy the default UCA weights. More... | |
static bool | apply_primary_shift_900 (MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights, uint16_t *const last_weight_ptr) |
static bool | apply_secondary_shift_900 (MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights, uint16_t *const last_weight_ptr) |
static bool | apply_tertiary_shift_900 (MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights, uint16_t *const last_weight_ptr) |
static bool | apply_shift_900 (MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16_t *to, size_t to_stride, size_t nweights) |
static bool | apply_shift (MY_COLL_RULES *rules, MY_COLL_RULE *r, int level, uint16_t *to, size_t to_stride, size_t nweights) |
static MY_CONTRACTION * | add_contraction_to_trie (std::vector< MY_CONTRACTION > *cont_nodes, MY_COLL_RULE *r) |
static bool | apply_one_rule (CHARSET_INFO *cs, MY_COLL_RULES *rules, MY_COLL_RULE *r, int level, MY_UCA_INFO *dst) |
static int | check_rules (const MY_COLL_RULES *rules, const MY_UCA_INFO *dst, const MY_UCA_INFO *src) |
Check if collation rules are valid, i.e. More... | |
static void | synthesize_lengths_900 (uint8_t *lengths, const uint16_t *const *weights, unsigned npages) |
static void | copy_ja_han_pages (const CHARSET_INFO *cs, MY_UCA_INFO *dst) |
static void | copy_zh_han_pages (MY_UCA_INFO *dst, MY_CHARSET_LOADER *loader) |
static my_wc_t | convert_implicit_to_ch (uint16_t first, uint16_t second) |
static void | modify_all_zh_pages (Reorder_param *reorder_param, MY_UCA_INFO *dst, int npages) |
static bool | init_weight_level (CHARSET_INFO *cs, MY_COLL_RULES *rules, int level, MY_UCA_INFO *dst, const MY_UCA_INFO *src) |
static bool | my_comp_in_rulelist (const MY_COLL_RULES *rules, my_wc_t wc) |
Check whether the composition character is already in rule list. More... | |
static bool | my_compchar_is_normal_char (unsigned dec_ind) |
Check whether a composition character in the decomposition list is a normal character. More... | |
static bool | my_compchar_is_normal_char (const Unidata_decomp *decomp) |
static Unidata_decomp * | get_decomposition (my_wc_t ch) |
static Combining_mark * | my_find_combining_mark (my_wc_t code) |
static bool | my_is_inheritance_of_origin (const my_wc_t *origin_dec, const my_wc_t *dec_codes, my_wc_t *dec_diff) |
Check if a list of combining marks contains the whole list of origin decomposed combining marks. More... | |
static int | my_coll_add_inherit_rules (MY_COLL_RULES *rules, MY_COLL_RULE *r, const Unidata_decomp *decomp_rec, std::bitset< array_elements(uni_dec)> *comp_added) |
Add new rules recersively if one rule's characters are in decomposition list. More... | |
static bool | combining_mark_in_rulelist (const my_wc_t *dec_codes, const MY_COLL_RULE *r_start, const MY_COLL_RULE *r_end) |
static int | add_normalization_rules (const CHARSET_INFO *cs, MY_COLL_RULES *rules) |
static int | my_coll_check_rule_and_inherit (const CHARSET_INFO *cs, MY_COLL_RULES *rules) |
For every rule in rule list, check and add new rules if it is in decomposition list. More... | |
static void | my_set_weight_rec (Reorder_wt_rec(&wt_rec)[2 *UCA_MAX_CHAR_GRP], int rec_ind, uint16_t old_begin, uint16_t old_end, uint16_t new_begin, uint16_t new_end) |
Helper function to store weight boundary values. More... | |
static void | my_calc_char_grp_param (const CHARSET_INFO *cs, int &rec_ind) |
Calculate the reorder parameters for the character groups. More... | |
static void | my_calc_char_grp_gap_param (CHARSET_INFO *cs, int &rec_ind) |
Calculate the reorder parameters for the gap between character groups. More... | |
static int | my_prepare_reorder (CHARSET_INFO *cs) |
Prepare reorder parameters. More... | |
static void | adjust_japanese_weight (CHARSET_INFO *cs, int rec_ind) |
static bool | my_prepare_coll_param (CHARSET_INFO *cs, MY_COLL_RULES *rules) |
Prepare parametric tailoring, like reorder, etc. More... | |
static bool | create_tailoring (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, MY_CHARSET_ERRMSG *errmsg) |
static void | my_coll_uninit_uca (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) |
static bool | my_coll_init_uca (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, MY_CHARSET_ERRMSG *errmsg) |
static int | my_strnncoll_any_uca (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) |
static int | my_strnncollsp_any_uca (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) |
static void | my_hash_sort_any_uca (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *n2) |
static size_t | my_strnxfrm_any_uca (const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, unsigned num_codepoints, const uint8_t *src, size_t srclen, unsigned flags) |
static int | my_strnncoll_uca_900 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) |
static int | my_strnncollsp_uca_900 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) |
template<class Mb_wc , int LEVELS_FOR_COMPARE> | |
static void | my_hash_sort_uca_900_tmpl (const CHARSET_INFO *cs, const Mb_wc mb_wc, const uint8_t *s, size_t slen, uint64_t *n1) |
static void | my_hash_sort_uca_900 (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *) |
bool | my_propagate_uca_900 (const CHARSET_INFO *cs, const uint8_t *str, size_t length) |
template<class Mb_wc , int LEVELS_FOR_COMPARE> | |
static size_t | my_strnxfrm_uca_900_tmpl (const CHARSET_INFO *cs, const Mb_wc mb_wc, uint8_t *dst, size_t dstlen, const uint8_t *src, size_t srclen, unsigned flags) |
static size_t | my_strnxfrm_uca_900 (const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, unsigned num_codepoints, const uint8_t *src, size_t srclen, unsigned flags) |
static size_t | my_strnxfrmlen_uca_900 (const CHARSET_INFO *cs, size_t len) |
static int | my_strnncoll_ucs2_uca (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen, bool t_is_prefix) |
static int | my_strnncollsp_ucs2_uca (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) |
static void | my_hash_sort_ucs2_uca (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, uint64_t *n1, uint64_t *n2) |
static size_t | my_strnxfrm_ucs2_uca (const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, unsigned num_codepoints, const uint8_t *src, size_t srclen, unsigned flags) |
static size_t | my_strnxfrm_utf8mb4_0900_bin (const CHARSET_INFO *cs, uint8_t *dst, size_t dstlen, unsigned nweights, const uint8_t *src, size_t srclen, unsigned flags) |
static int | my_strnncollsp_utf8mb4_0900_bin (const CHARSET_INFO *cs, const uint8_t *s, size_t slen, const uint8_t *t, size_t tlen) |
#define HANGUL_JAMO_MAX_LENGTH 3 |
#define MY_CS_UTF16_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII) |
#define MY_CS_UTF32_UCA_FLAGS |
#define MY_CS_UTF8MB3_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE) |
#define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT) |
#define MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */ |
typedef enum my_coll_lexem_num_en my_coll_lexem_num |
enum my_coll_lexem_num_en |
enum my_coll_shift_method |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Check if collation rules are valid, i.e.
characters are not outside of the collation supported range.
|
static |
|
inlinestatic |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
|
static |
Compare lexem to string with length.
lexem | lexem |
pattern | string |
patternlen | string length |
0 | if lexem is equal to string, non-0 otherwise. |
|
static |
|
static |
Calculate the reorder parameters for the gap between character groups.
cs | Character set info |
rec_ind | The position from where to store weight boundary |
|
static |
Calculate the reorder parameters for the character groups.
cs | Character set info | |
[out] | rec_ind | The position from where to store weight boundary |
|
inlinestatic |
Helper function: Find address of weights of the given code point.
uca | Pointer to UCA data |
wc | character Unicode code point |
pointer | to weight array for the given code point, or nullptr if this page does not have implicit weights. |
|
inlinestatic |
Helper function: Find address of weights of the given code point, for UCA 9.0.0 format.
uca | Pointer to UCA data |
wc | character Unicode code point |
pointer | to weight array for the given code point, or nullptr if this page does not have implicit weights. |
|
static |
Helper function: Copies UCA weights for a given "unsigned" string to the given location.
dst | destination UCA weight data |
to | destination address |
to_stride | number of bytes between each successive weight in "to" |
to_length | size of destination |
to_num_ce | where to put the number of CEs generated |
rule | The rule that contains the characters whose weight are to copied |
base_len | The length of base character list |
uca_ver | UCA version |
|
static |
|
static |
Add new rules recersively if one rule's characters are in decomposition list.
rules | The rule list |
r | The rule to check |
decomp_rec | The decomposition of the character in rule. |
comp_added | Bitset which marks whether the comp character has been added to rule list. |
|
static |
For every rule in rule list, check and add new rules if it is in decomposition list.
cs | Character set info |
rules | The rule list |
|
static |
|
static |
|
static |
|
static |
Convert collation customization lexem to string, for nice error reporting.
term | lexem code |
|
static |
|
static |
Current parser token.
p | Collation customization parser |
|
static |
Scan collation customization: settings followed by rules.
<collation customization> ::= [ <setting> ... ] [ <rule>... ]
p | Collation customization parser |
0 | if collation customization expression was not scanned. |
1 | if collation customization expression was scanned. |
|
static |
Display error when an unexpected token found.
p | Collation customization parser |
term | Which lexem was expected |
|
static |
Initialize collation customization parser.
p | Collation customization parser |
rules | Where to store rules |
str | Beginning of a collation customization string |
str_end | End of the collation customizations string |
|
static |
Next parser token, to look ahead.
p | Collation customization parser |
|
static |
Scan one token from the input stream.
p | Collation customization parser |
|
static |
Scan character list.
<character list> ::= CHAR [ CHAR... ]
p | Collation customization parser |
pwc | Character string to add code to |
limit | The result string cannot be longer than 'limit' characters |
name | E.g. "contraction", "expansion" |
0 | if character sequence was not scanned. |
1 | if character sequence was scanned. |
|
static |
Scan logical position and add to the wide string.
p | Collation customization parser |
pwc | Wide string to add code to |
limit | The result string cannot be longer than 'limit' characters |
0 | if logical position was not scanned. |
1 | if logical position was scanned. |
|
static |
Scan [before xxx] reset option.
p | Collation customization parser |
0 | if reset option was not scanned. |
1 | if reset option was scanned. |
|
static |
Scan reset sequence.
<reset sequence> ::= [ <reset before option> ] <character list> | [ <reset before option> ] <logical reset position>
p | Collation customization parser |
0 | if reset sequence was not scanned. |
1 | if reset sequence was scanned. |
|
static |
Scan one rule: reset followed by a number of shifts.
<rule> ::= & <reset sequence> <shift> <shift sequence> [ { <shift> <shift sequence> }... ]
p | Collation customization parser |
0 | if rule was not scanned. |
1 | if rule was scanned. |
|
static |
Scan a collation setting in brakets, for example UCA version.
p | Collation customization parser |
0 | if setting was scanned. |
1 | if setting was not scanned. |
|
static |
Scan multiple collation settings.
p | Collation customization parser |
0 | if no settings were scanned. |
1 | if one or more settings were scanned. |
|
static |
Scan shift operator.
<shift> ::= < | << | <<< | <<<< | =
p | Collation customization parser |
0 | if shift operator was not scanned. |
1 | if shift operator was scanned. |
|
static |
Scan shift sequence.
<shift sequence> ::= <character list> [ / <character list> ] | <character list> [ | <character list> ]
p | Collation customization parser |
0 | if shift sequence was not scanned. |
1 | if shift sequence was scanned. |
|
static |
Scan the given lexem from input stream, or display "expected" error.
p | Collation customization parser |
term | Which lexem is expected. |
0 | if the required term was not found. |
1 | if the required term was found. |
|
static |
Display error when a too long character sequence is met.
p | Collation customization parser |
name | Which kind of sequence: contraction, expansion, etc. |
Append new character to the end of a 0-terminated wide string.
wc | Wide string |
limit | Maximum possible result length |
code | Character to add |
|
static |
|
static |
Initialize collation customization rule.
r | Rule |
|
inlinestatic |
Return length of the "reset" string of a rule.
r | Collation customization rule |
|
static |
Apply difference at level.
r | Rule |
level | Level (0,1,2,3,4) |
|
inlinestatic |
Return length of the "shift" string of a rule.
r | Collation customization rule |
|
static |
Append one new rule to a rule array.
rules | Rule container |
rule | New rule to add |
|
static |
Realloc rule array to a new size.
Reallocate memory for 128 additional rules at once, to reduce the number of reallocs, which is important for long tailorings (e.g. for East Asian collations).
rules | Rule container |
n | new number of rules |
|
static |
|
static |
Check whether the composition character is already in rule list.
rules | The rule list |
wc | The composition character |
|
inlinestatic |
|
inlinestatic |
Check whether a composition character in the decomposition list is a normal character.
dec_ind | The index of composition character in list |
Check if a code point is Hangul syllable.
Decompose it to jamos if it is, and return tailored weights.
syllable | Hangul syllable to be decomposed | |
[out] | jamo | Corresponding jamos |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Check if a list of combining marks contains the whole list of origin decomposed combining marks.
origin_dec | The origin list of combining marks decomposed from character in tailoring rule. |
dec_codes | The list of combining marks decomposed from character in decomposition list. |
dec_diff | The combining marks exist in dec_codes but not in origin_dec. |
|
static |
Prepare parametric tailoring, like reorder, etc.
cs | Character set info |
rules | Collation rule list to add to. |
|
static |
Prepare reorder parameters.
cs | Character set info |
bool my_propagate_uca_900 | ( | const CHARSET_INFO * | cs, |
const uint8_t * | str, | ||
size_t | length | ||
) |
|
inlinestatic |
Helper function to store weight boundary values.
[out] | wt_rec | Weight boundary for each character group and gap between groups |
rec_ind | The position from where to store weight boundary | |
old_begin | Beginning weight of character group before reorder | |
old_end | End weight of character group before reorder | |
new_begin | Beginning weight of character group after reorder | |
new_end | End weight of character group after reorder |
|
inlinestatic |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inlinestatic |
Mark a code point as a contraction part.
flags | Pointer to UCA contraction flag data |
wc | Unicode code point |
flag | flag: "is contraction head", "is contraction tail" |
|
inlinestatic |
Check if a code point can be previous context head.
flags | Pointer to UCA contraction flag data |
wc | Code point |
false | - cannot be previous context head |
true | - can be previous context head |
|
inlinestatic |
Check if a code point can be previous context tail.
flags | Pointer to UCA contraction flag data |
wc | Code point |
false | - cannot be contraction tail |
true | - can be contraction tail |
|
static |
|
static |
const uint16_t * my_uca_contraction2_weight | ( | const std::vector< MY_CONTRACTION > * | cont_nodes, |
my_wc_t | wc1, | ||
my_wc_t | wc2 | ||
) |
Find a contraction consisting of two code points and return its weight array.
cont_nodes | Vector that contains contraction nodes |
wc1 | First code point |
wc2 | Second code point |
NULL | - no contraction found |
ptr | - contraction weight array |
|
inlinestatic |
Check if a string is a contraction of exactly the given length, and return its weight array on success.
cont_nodes | Vector that contains contraction nodes |
wc | Pointer to wide string |
len | String length |
NULL | - Input string is not a known contraction |
ptr | - contraction weight array |
|
static |
Alloc new page and copy the default UCA weights.
cs | Character set |
loader | Character set loader |
src | Default UCA data to copy from |
dst | UCA data to copy weights to |
page | page number |
false | on success |
true | on error |
|
inlinestatic |
Check if UCA level data has contractions.
uca | Pointer to UCA data |
0 | - no contractions |
1 | - there are some contractions |
|
static |
|
static |
|
static |
Return length of a 0-terminated wide string, analogous to strnlen().
s | Pointer to wide string |
maxlen | Mamixum string length |
|
static |
|
static |
|
static |
|
static |
|
static |
The array used for "type of characters" bit mask for each character.
The ctype[0] is reserved for EOF(-1), so we use ctype[(char)+1]. Also refer to strings/CHARSET_INFO.txt
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
extern |
CHARSET_INFO my_charset_gb18030_unicode_520_ci |
CHARSET_INFO my_charset_ucs2_croatian_uca_ci |
CHARSET_INFO my_charset_ucs2_czech_uca_ci |
CHARSET_INFO my_charset_ucs2_danish_uca_ci |
CHARSET_INFO my_charset_ucs2_esperanto_uca_ci |
CHARSET_INFO my_charset_ucs2_estonian_uca_ci |
CHARSET_INFO my_charset_ucs2_german2_uca_ci |
CHARSET_INFO my_charset_ucs2_hungarian_uca_ci |
CHARSET_INFO my_charset_ucs2_icelandic_uca_ci |
CHARSET_INFO my_charset_ucs2_latvian_uca_ci |
CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci |
CHARSET_INFO my_charset_ucs2_persian_uca_ci |
CHARSET_INFO my_charset_ucs2_polish_uca_ci |
CHARSET_INFO my_charset_ucs2_roman_uca_ci |
CHARSET_INFO my_charset_ucs2_romanian_uca_ci |
CHARSET_INFO my_charset_ucs2_sinhala_uca_ci |
CHARSET_INFO my_charset_ucs2_slovak_uca_ci |
CHARSET_INFO my_charset_ucs2_slovenian_uca_ci |
CHARSET_INFO my_charset_ucs2_spanish2_uca_ci |
CHARSET_INFO my_charset_ucs2_spanish_uca_ci |
CHARSET_INFO my_charset_ucs2_swedish_uca_ci |
CHARSET_INFO my_charset_ucs2_turkish_uca_ci |
CHARSET_INFO my_charset_ucs2_unicode_520_ci |
CHARSET_INFO my_charset_ucs2_unicode_ci |
CHARSET_INFO my_charset_ucs2_vietnamese_ci |
CHARSET_INFO my_charset_utf16_croatian_uca_ci |
CHARSET_INFO my_charset_utf16_czech_uca_ci |
CHARSET_INFO my_charset_utf16_danish_uca_ci |
CHARSET_INFO my_charset_utf16_esperanto_uca_ci |
CHARSET_INFO my_charset_utf16_estonian_uca_ci |
CHARSET_INFO my_charset_utf16_german2_uca_ci |
|
extern |
CHARSET_INFO my_charset_utf16_hungarian_uca_ci |
CHARSET_INFO my_charset_utf16_icelandic_uca_ci |
CHARSET_INFO my_charset_utf16_latvian_uca_ci |
CHARSET_INFO my_charset_utf16_lithuanian_uca_ci |
CHARSET_INFO my_charset_utf16_persian_uca_ci |
CHARSET_INFO my_charset_utf16_polish_uca_ci |
CHARSET_INFO my_charset_utf16_roman_uca_ci |
CHARSET_INFO my_charset_utf16_romanian_uca_ci |
CHARSET_INFO my_charset_utf16_sinhala_uca_ci |
CHARSET_INFO my_charset_utf16_slovak_uca_ci |
CHARSET_INFO my_charset_utf16_slovenian_uca_ci |
CHARSET_INFO my_charset_utf16_spanish2_uca_ci |
CHARSET_INFO my_charset_utf16_spanish_uca_ci |
CHARSET_INFO my_charset_utf16_swedish_uca_ci |
CHARSET_INFO my_charset_utf16_turkish_uca_ci |
CHARSET_INFO my_charset_utf16_unicode_520_ci |
CHARSET_INFO my_charset_utf16_unicode_ci |
CHARSET_INFO my_charset_utf16_vietnamese_ci |
CHARSET_INFO my_charset_utf32_croatian_uca_ci |
CHARSET_INFO my_charset_utf32_czech_uca_ci |
CHARSET_INFO my_charset_utf32_danish_uca_ci |
CHARSET_INFO my_charset_utf32_esperanto_uca_ci |
CHARSET_INFO my_charset_utf32_estonian_uca_ci |
CHARSET_INFO my_charset_utf32_german2_uca_ci |
|
extern |
CHARSET_INFO my_charset_utf32_hungarian_uca_ci |
CHARSET_INFO my_charset_utf32_icelandic_uca_ci |
CHARSET_INFO my_charset_utf32_latvian_uca_ci |
CHARSET_INFO my_charset_utf32_lithuanian_uca_ci |
CHARSET_INFO my_charset_utf32_persian_uca_ci |
CHARSET_INFO my_charset_utf32_polish_uca_ci |
CHARSET_INFO my_charset_utf32_roman_uca_ci |
CHARSET_INFO my_charset_utf32_romanian_uca_ci |
CHARSET_INFO my_charset_utf32_sinhala_uca_ci |
CHARSET_INFO my_charset_utf32_slovak_uca_ci |
CHARSET_INFO my_charset_utf32_slovenian_uca_ci |
CHARSET_INFO my_charset_utf32_spanish2_uca_ci |
CHARSET_INFO my_charset_utf32_spanish_uca_ci |
CHARSET_INFO my_charset_utf32_swedish_uca_ci |
CHARSET_INFO my_charset_utf32_turkish_uca_ci |
CHARSET_INFO my_charset_utf32_unicode_520_ci |
CHARSET_INFO my_charset_utf32_unicode_ci |
CHARSET_INFO my_charset_utf32_vietnamese_ci |
CHARSET_INFO my_charset_utf8mb3_croatian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_czech_uca_ci |
CHARSET_INFO my_charset_utf8mb3_danish_uca_ci |
CHARSET_INFO my_charset_utf8mb3_esperanto_uca_ci |
CHARSET_INFO my_charset_utf8mb3_estonian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_german2_uca_ci |
|
extern |
CHARSET_INFO my_charset_utf8mb3_hungarian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_icelandic_uca_ci |
CHARSET_INFO my_charset_utf8mb3_latvian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_lithuanian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_persian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_polish_uca_ci |
CHARSET_INFO my_charset_utf8mb3_roman_uca_ci |
CHARSET_INFO my_charset_utf8mb3_romanian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_sinhala_uca_ci |
CHARSET_INFO my_charset_utf8mb3_slovak_uca_ci |
CHARSET_INFO my_charset_utf8mb3_slovenian_uca_ci |
CHARSET_INFO my_charset_utf8mb3_spanish2_uca_ci |
CHARSET_INFO my_charset_utf8mb3_spanish_uca_ci |
CHARSET_INFO my_charset_utf8mb3_swedish_uca_ci |
CHARSET_INFO my_charset_utf8mb3_turkish_uca_ci |
CHARSET_INFO my_charset_utf8mb3_unicode_520_ci |
CHARSET_INFO my_charset_utf8mb3_unicode_ci |
CHARSET_INFO my_charset_utf8mb3_vietnamese_ci |
CHARSET_INFO my_charset_utf8mb4_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_0900_as_ci |
CHARSET_INFO my_charset_utf8mb4_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_0900_bin |
CHARSET_INFO my_charset_utf8mb4_bg_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_bg_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_bs_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_bs_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_croatian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_cs_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_cs_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_czech_uca_ci |
CHARSET_INFO my_charset_utf8mb4_da_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_da_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_danish_uca_ci |
CHARSET_INFO my_charset_utf8mb4_de_pb_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_de_pb_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_eo_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_eo_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_es_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_es_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_es_trad_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_es_trad_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci |
CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_et_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_et_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_german2_uca_ci |
CHARSET_INFO my_charset_utf8mb4_gl_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_gl_0900_as_cs |
|
extern |
CHARSET_INFO my_charset_utf8mb4_hr_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_hr_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_hu_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_hu_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci |
CHARSET_INFO my_charset_utf8mb4_is_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_is_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs_ks |
CHARSET_INFO my_charset_utf8mb4_la_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_la_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_lt_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_lt_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_lv_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_lv_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_nb_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_nb_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_nn_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_nn_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_persian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_pl_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_pl_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_polish_uca_ci |
CHARSET_INFO my_charset_utf8mb4_ro_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_ro_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_roman_uca_ci |
CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_ru_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_ru_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci |
CHARSET_INFO my_charset_utf8mb4_sk_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_sk_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_sl_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_sl_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci |
CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci |
CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci |
CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci |
CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_sv_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_sv_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci |
CHARSET_INFO my_charset_utf8mb4_tr_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_tr_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci |
CHARSET_INFO my_charset_utf8mb4_unicode_520_ci |
CHARSET_INFO my_charset_utf8mb4_unicode_ci |
CHARSET_INFO my_charset_utf8mb4_vi_0900_ai_ci |
CHARSET_INFO my_charset_utf8mb4_vi_0900_as_cs |
CHARSET_INFO my_charset_utf8mb4_vietnamese_ci |
CHARSET_INFO my_charset_utf8mb4_zh_0900_as_cs |
MY_COLLATION_HANDLER my_collation_any_uca_handler |
MY_COLLATION_HANDLER my_collation_gb18030_uca_handler |
MY_COLLATION_HANDLER my_collation_uca_900_handler |
MY_COLLATION_HANDLER my_collation_ucs2_uca_handler |
MY_COLLATION_HANDLER my_collation_utf16_uca_handler |
MY_COLLATION_HANDLER my_collation_utf32_uca_handler |
|
static |
MY_UCA_INFO my_uca_v400 |
MY_UCA_INFO my_uca_v520 |
|
static |
|
staticconstexpr |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |