![]() |
MySQL 8.0.43
Source Code Documentation
|
#include <assert.h>#include <stdio.h>#include <string.h>#include <sys/types.h>#include <algorithm>#include <bitset>#include <iterator>#include <map>#include <utility>#include "m_ctype.h"#include "m_string.h"#include "my_byteorder.h"#include "my_compiler.h"#include "my_inttypes.h"#include "my_loglevel.h"#include "my_macros.h"#include "mysys_err.h"#include "strings/mb_wc.h"#include "strings/str_uca_type.h"#include "strings/uca900_data.h"#include "strings/uca900_ja_data.h"#include "strings/uca900_zh_data.h"#include "strings/uca_data.h"#include "template_utils.h"Classes | |
| class | my_uca_scanner |
| Unicode Collation Algorithm: Collation element (weight) scanner, for consequent scan of collations weights from a string. More... | |
| struct | uca_scanner_any< Mb_wc > |
| class | uca_scanner_900< Mb_wc, LEVELS_FOR_COMPARE > |
| struct | trie_node_cmp |
| struct | MY_COLL_LEXEM |
| struct | MY_COLL_RULE |
| struct | MY_COLL_RULES |
| struct | MY_COLL_RULE_PARSER |
Macros | |
| #define | HANGUL_JAMO_MAX_LENGTH 3 |
| #define | MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */ |
| #define | MY_CS_UTF8MB3_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE) |
| #define | MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT) |
| #define | MY_CS_UTF32_UCA_FLAGS |
| #define | MY_CS_UTF16_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII) |
Typedefs | |
| typedef enum my_coll_lexem_num_en | my_coll_lexem_num |
Enumerations | |
| enum | my_coll_lexem_num_en { MY_COLL_LEXEM_EOF = 0 , MY_COLL_LEXEM_SHIFT = 1 , MY_COLL_LEXEM_RESET = 4 , MY_COLL_LEXEM_CHAR = 5 , MY_COLL_LEXEM_ERROR = 6 , MY_COLL_LEXEM_OPTION = 7 , MY_COLL_LEXEM_EXTEND = 8 , MY_COLL_LEXEM_CONTEXT = 9 } |
| enum | my_coll_shift_method { my_shift_method_simple = 0 , my_shift_method_expand } |
Functions | |
| static void | my_uca_add_contraction_flag (char *flags, my_wc_t wc, int flag) |
| Mark a code point as a contraction part. More... | |
| static bool | my_uca_have_contractions (const MY_UCA_INFO *uca) |
| Check if UCA level data has contractions. More... | |
| static std::vector< MY_CONTRACTION >::const_iterator | find_contraction_part_in_trie (const std::vector< MY_CONTRACTION > &cont_nodes, my_wc_t ch) |
| static std::vector< MY_CONTRACTION >::iterator | find_contraction_part_in_trie (std::vector< MY_CONTRACTION > &cont_nodes, my_wc_t ch) |
| const uint16 * | my_uca_contraction2_weight (const std::vector< MY_CONTRACTION > *cont_nodes, my_wc_t wc1, my_wc_t wc2) |
| Find a contraction consisting of two code points and return its weight array. More... | |
| static bool | my_uca_can_be_previous_context_head (const char *flags, my_wc_t wc) |
| Check if a code point can be previous context head. More... | |
| static bool | my_uca_can_be_previous_context_tail (const char *flags, my_wc_t wc) |
| Check if a code point can be previous context tail. More... | |
| static const uint16 * | my_uca_contraction_weight (const std::vector< MY_CONTRACTION > *cont_nodes, const my_wc_t *wc, size_t len) |
| Check if a string is a contraction of exactly the given length, and return its weight array on success. More... | |
| static size_t | my_wstrnlen (my_wc_t *s, size_t maxlen) |
| Return length of a 0-terminated wide string, analogous to strnlen(). More... | |
| static int | my_decompose_hangul_syllable (my_wc_t syllable, my_wc_t *jamo) |
| Check if a code point is Hangul syllable. More... | |
| static uint16 | change_zh_implicit (uint16 weight) |
| static bool | is_hiragana_char (my_wc_t wc) |
| static bool | is_katakana_char (my_wc_t wc) |
| static bool | is_katakana_iteration (my_wc_t wc) |
| static bool | is_hiragana_iteration (my_wc_t wc) |
| static bool | is_ja_length_mark (my_wc_t wc) |
| static bool | is_tertiary_weight_upper_case (uint16 weight) |
| template<class Scanner , int LEVELS_FOR_COMPARE, class Mb_wc > | |
| static int | my_strnncoll_uca (const CHARSET_INFO *cs, const Mb_wc mb_wc, const uchar *s, size_t slen, const uchar *t, size_t tlen, bool t_is_prefix) |
| static int | my_space_weight (const CHARSET_INFO *cs) |
| static uint16 * | my_char_weight_addr (MY_UCA_INFO *uca, my_wc_t wc) |
| Helper function: Find address of weights of the given code point. More... | |
| static uint16 * | my_char_weight_addr_900 (MY_UCA_INFO *uca, my_wc_t wc) |
| Helper function: Find address of weights of the given code point, for UCA 9.0.0 format. More... | |
| template<class Mb_wc > | |
| static int | my_strnncollsp_uca (const CHARSET_INFO *cs, Mb_wc mb_wc, const uchar *s, size_t slen, const uchar *t, size_t tlen) |
| template<class Mb_wc > | |
| static void | my_hash_sort_uca (const CHARSET_INFO *cs, Mb_wc mb_wc, const uchar *s, size_t slen, uint64 *n1, uint64 *n2) |
| template<class Mb_wc > | |
| static size_t | my_strnxfrm_uca (const CHARSET_INFO *cs, Mb_wc mb_wc, uchar *dst, size_t dstlen, uint num_codepoints, const uchar *src, size_t srclen, uint flags) |
| static int | my_uca_charcmp_900 (const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) |
| static int | my_uca_charcmp (const CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) |
| static int | my_wildcmp_uca_impl (const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many, int recurse_level) |
| static int | my_strcasecmp_uca (const CHARSET_INFO *cs, const char *s, const char *t) |
| static int | my_wildcmp_uca (const CHARSET_INFO *cs, const char *str, const char *str_end, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) |
| static const char * | my_coll_lexem_num_to_str (my_coll_lexem_num term) |
| Convert collation customization lexem to string, for nice error reporting. More... | |
| static void | my_coll_lexem_init (MY_COLL_LEXEM *lexem, const char *str, const char *str_end) |
| static int | lex_cmp (MY_COLL_LEXEM *lexem, const char *pattern, size_t patternlen) |
| Compare lexem to string with length. More... | |
| static void | my_coll_lexem_print_error (MY_COLL_LEXEM *lexem, char *errstr, size_t errsize, const char *txt, const char *col_name) |
| static int | ch2x (int ch) |
| static my_coll_lexem_num | my_coll_lexem_next (MY_COLL_LEXEM *lexem) |
| static size_t | my_coll_rule_reset_length (MY_COLL_RULE *r) |
| Return length of the "reset" string of a rule. More... | |
| static size_t | my_coll_rule_shift_length (MY_COLL_RULE *r) |
| Return length of the "shift" string of a rule. More... | |
| static int | my_coll_rule_expand (my_wc_t *wc, size_t limit, my_wc_t code) |
| Append new character to the end of a 0-terminated wide string. More... | |
| static void | my_coll_rule_reset (MY_COLL_RULE *r) |
| Initialize collation customization rule. More... | |
| static int | my_coll_rules_realloc (MY_COLL_RULES *rules, size_t n) |
| Realloc rule array to a new size. More... | |
| static int | my_coll_rules_add (MY_COLL_RULES *rules, MY_COLL_RULE *rule) |
| Append one new rule to a rule array. More... | |
| static void | my_coll_rule_shift_at_level (MY_COLL_RULE *r, int level) |
| Apply difference at level. More... | |
| static MY_COLL_LEXEM * | my_coll_parser_curr (MY_COLL_RULE_PARSER *p) |
| Current parser token. More... | |
| static MY_COLL_LEXEM * | my_coll_parser_next (MY_COLL_RULE_PARSER *p) |
| Next parser token, to look ahead. More... | |
| static int | my_coll_parser_scan (MY_COLL_RULE_PARSER *p) |
| Scan one token from the input stream. More... | |
| static void | my_coll_parser_init (MY_COLL_RULE_PARSER *p, MY_COLL_RULES *rules, const char *str, const char *str_end) |
| Initialize collation customization parser. More... | |
| static int | my_coll_parser_expected_error (MY_COLL_RULE_PARSER *p, my_coll_lexem_num term) |
| Display error when an unexpected token found. More... | |
| static int | my_coll_parser_too_long_error (MY_COLL_RULE_PARSER *p, const char *name) |
| Display error when a too long character sequence is met. More... | |
| static int | my_coll_parser_scan_term (MY_COLL_RULE_PARSER *p, my_coll_lexem_num term) |
| Scan the given lexem from input stream, or display "expected" error. More... | |
| static int | my_coll_parser_scan_setting (MY_COLL_RULE_PARSER *p) |
| Scan a collation setting in brakets, for example UCA version. More... | |
| static int | my_coll_parser_scan_settings (MY_COLL_RULE_PARSER *p) |
| Scan multiple collation settings. More... | |
| static int | my_coll_parser_scan_reset_before (MY_COLL_RULE_PARSER *p) |
| Scan [before xxx] reset option. More... | |
| static int | my_coll_parser_scan_logical_position (MY_COLL_RULE_PARSER *p, my_wc_t *pwc, size_t limit) |
| Scan logical position and add to the wide string. More... | |
| static int | my_coll_parser_scan_character_list (MY_COLL_RULE_PARSER *p, my_wc_t *pwc, size_t limit, const char *name) |
| Scan character list. More... | |
| static int | my_coll_parser_scan_reset_sequence (MY_COLL_RULE_PARSER *p) |
| Scan reset sequence. More... | |
| static int | my_coll_parser_scan_shift_sequence (MY_COLL_RULE_PARSER *p) |
| Scan shift sequence. More... | |
| static int | my_coll_parser_scan_shift (MY_COLL_RULE_PARSER *p) |
| Scan shift operator. More... | |
| static int | my_coll_parser_scan_rule (MY_COLL_RULE_PARSER *p) |
| Scan one rule: reset followed by a number of shifts. More... | |
| static int | my_coll_parser_exec (MY_COLL_RULE_PARSER *p) |
| Scan collation customization: settings followed by rules. More... | |
| static int | my_coll_rule_parse (MY_COLL_RULES *rules, const char *str, const char *str_end, const char *col_name) |
| static void | spread_case_mask (uint16 *to, size_t to_stride, size_t tailored_ce_cnt, uint16 case_mask) |
| static void | change_weight_if_case_first (CHARSET_INFO *cs, const MY_UCA_INFO *dst, MY_COLL_RULE *r, uint16 *to, size_t to_stride, size_t curr_len, size_t tailored_ce_cnt) |
| static size_t | my_char_weight_put_900 (MY_UCA_INFO *dst, uint16 *to, size_t to_stride, size_t to_length, uint16 *to_num_ce, const MY_COLL_RULE *rule, size_t base_len) |
| static size_t | my_char_weight_put (MY_UCA_INFO *dst, uint16 *to, size_t to_stride, size_t to_length, uint16 *to_num_ce, const MY_COLL_RULE *rule, size_t base_len, enum_uca_ver uca_ver) |
| Helper function: Copies UCA weights for a given "uint" string to the given location. More... | |
| static bool | my_uca_copy_page (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, const MY_UCA_INFO *src, MY_UCA_INFO *dst, size_t page) |
| Alloc new page and copy the default UCA weights. More... | |
| static bool | apply_primary_shift_900 (MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16 *to, size_t to_stride, size_t nweights, uint16 *const last_weight_ptr) |
| static bool | apply_secondary_shift_900 (MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16 *to, size_t to_stride, size_t nweights, uint16 *const last_weight_ptr) |
| static bool | apply_tertiary_shift_900 (MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16 *to, size_t to_stride, size_t nweights, uint16 *const last_weight_ptr) |
| static bool | apply_shift_900 (MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, MY_COLL_RULE *r, uint16 *to, size_t to_stride, size_t nweights) |
| static bool | apply_shift (MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, MY_COLL_RULE *r, int level, uint16 *to, size_t to_stride, size_t nweights) |
| static MY_CONTRACTION * | add_contraction_to_trie (std::vector< MY_CONTRACTION > *cont_nodes, MY_COLL_RULE *r) |
| static bool | apply_one_rule (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, MY_COLL_RULE *r, int level, MY_UCA_INFO *dst) |
| static int | check_rules (MY_CHARSET_LOADER *loader, const MY_COLL_RULES *rules, const MY_UCA_INFO *dst, const MY_UCA_INFO *src) |
| Check if collation rules are valid, i.e. More... | |
| static void | synthesize_lengths_900 (uchar *lengths, const uint16 *const *weights, uint npages) |
| static void | copy_ja_han_pages (const CHARSET_INFO *cs, MY_UCA_INFO *dst) |
| static void | copy_zh_han_pages (MY_UCA_INFO *dst) |
| static my_wc_t | convert_implicit_to_ch (uint16 first, uint16 second) |
| static void | modify_all_zh_pages (Reorder_param *reorder_param, MY_UCA_INFO *dst, int npages) |
| static bool | init_weight_level (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader, MY_COLL_RULES *rules, int level, MY_UCA_INFO *dst, const MY_UCA_INFO *src, bool lengths_are_temporary) |
| static bool | my_comp_in_rulelist (const MY_COLL_RULES *rules, my_wc_t wc) |
| Check whether the composition character is already in rule list. More... | |
| static bool | my_compchar_is_normal_char (uint dec_ind) |
| Check whether a composition character in the decomposition list is a normal character. More... | |
| static bool | my_compchar_is_normal_char (const Unidata_decomp *decomp) |
| static Unidata_decomp * | get_decomposition (my_wc_t ch) |
| static Combining_mark * | my_find_combining_mark (my_wc_t code) |
| static bool | my_is_inheritance_of_origin (const my_wc_t *origin_dec, const my_wc_t *dec_codes, my_wc_t *dec_diff) |
| Check if a list of combining marks contains the whole list of origin decomposed combining marks. More... | |
| static int | my_coll_add_inherit_rules (MY_COLL_RULES *rules, MY_COLL_RULE *r, const Unidata_decomp *decomp_rec, std::bitset< array_elements(uni_dec)> *comp_added) |
| Add new rules recersively if one rule's characters are in decomposition list. More... | |
| static bool | combining_mark_in_rulelist (const my_wc_t *dec_codes, const MY_COLL_RULE *r_start, const MY_COLL_RULE *r_end) |
| static int | add_normalization_rules (const CHARSET_INFO *cs, MY_COLL_RULES *rules) |
| static int | my_coll_check_rule_and_inherit (const CHARSET_INFO *cs, MY_COLL_RULES *rules) |
| For every rule in rule list, check and add new rules if it is in decomposition list. More... | |
| static void | my_set_weight_rec (Reorder_wt_rec(&wt_rec)[2 *UCA_MAX_CHAR_GRP], int rec_ind, uint16 old_begin, uint16 old_end, uint16 new_begin, uint16 new_end) |
| Helper function to store weight boundary values. More... | |
| static void | my_calc_char_grp_param (const CHARSET_INFO *cs, int &rec_ind) |
| Calculate the reorder parameters for the character groups. More... | |
| static void | my_calc_char_grp_gap_param (CHARSET_INFO *cs, int &rec_ind) |
| Calculate the reorder parameters for the gap between character groups. More... | |
| static int | my_prepare_reorder (CHARSET_INFO *cs) |
| Prepare reorder parameters. More... | |
| static void | adjust_japanese_weight (CHARSET_INFO *cs, int rec_ind) |
| static bool | my_prepare_coll_param (CHARSET_INFO *cs, MY_COLL_RULES *rules) |
| Prepare parametric tailoring, like reorder, etc. More... | |
| static bool | create_tailoring (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) |
| static void | my_coll_uninit_uca (CHARSET_INFO *cs) |
| static bool | my_coll_init_uca (CHARSET_INFO *cs, MY_CHARSET_LOADER *loader) |
| static int | my_strnncoll_any_uca (const CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen, bool t_is_prefix) |
| static int | my_strnncollsp_any_uca (const CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen) |
| static void | my_hash_sort_any_uca (const CHARSET_INFO *cs, const uchar *s, size_t slen, uint64 *n1, uint64 *n2) |
| static size_t | my_strnxfrm_any_uca (const CHARSET_INFO *cs, uchar *dst, size_t dstlen, uint num_codepoints, const uchar *src, size_t srclen, uint flags) |
| static int | my_strnncoll_uca_900 (const CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen, bool t_is_prefix) |
| static int | my_strnncollsp_uca_900 (const CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen) |
| template<class Mb_wc , int LEVELS_FOR_COMPARE> | |
| static void | my_hash_sort_uca_900_tmpl (const CHARSET_INFO *cs, const Mb_wc mb_wc, const uchar *s, size_t slen, uint64 *n1) |
| static void | my_hash_sort_uca_900 (const CHARSET_INFO *cs, const uchar *s, size_t slen, uint64 *n1, uint64 *) |
| bool | my_propagate_uca_900 (const CHARSET_INFO *cs, const uchar *str, size_t length) |
| template<class Mb_wc , int LEVELS_FOR_COMPARE> | |
| static size_t | my_strnxfrm_uca_900_tmpl (const CHARSET_INFO *cs, const Mb_wc mb_wc, uchar *dst, size_t dstlen, const uchar *src, size_t srclen, uint flags) |
| static size_t | my_strnxfrm_uca_900 (const CHARSET_INFO *cs, uchar *dst, size_t dstlen, uint num_codepoints, const uchar *src, size_t srclen, uint flags) |
| static size_t | my_strnxfrmlen_uca_900 (const CHARSET_INFO *cs, size_t len) |
| static int | my_strnncoll_ucs2_uca (const CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen, bool t_is_prefix) |
| static int | my_strnncollsp_ucs2_uca (const CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen) |
| static void | my_hash_sort_ucs2_uca (const CHARSET_INFO *cs, const uchar *s, size_t slen, uint64 *n1, uint64 *n2) |
| static size_t | my_strnxfrm_ucs2_uca (const CHARSET_INFO *cs, uchar *dst, size_t dstlen, uint num_codepoints, const uchar *src, size_t srclen, uint flags) |
| static size_t | my_strnxfrm_utf8mb4_0900_bin (const CHARSET_INFO *cs, uchar *dst, size_t dstlen, uint nweights, const uchar *src, size_t srclen, uint flags) |
| static int | my_strnncollsp_utf8mb4_0900_bin (const CHARSET_INFO *cs, const uchar *s, size_t slen, const uchar *t, size_t tlen) |
| #define HANGUL_JAMO_MAX_LENGTH 3 |
| #define MY_CS_UTF16_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_NONASCII) |
| #define MY_CS_UTF32_UCA_FLAGS |
| #define MY_CS_UTF8MB3_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE) |
| #define MY_CS_UTF8MB4_UCA_FLAGS (MY_CS_COMPILED | MY_CS_STRNXFRM | MY_CS_UNICODE | MY_CS_UNICODE_SUPPLEMENT) |
| #define MY_UCA_MAX_EXPANSION 6 /* Maximum expansion length */ |
| typedef enum my_coll_lexem_num_en my_coll_lexem_num |
| enum my_coll_lexem_num_en |
| enum my_coll_shift_method |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Check if collation rules are valid, i.e.
characters are not outside of the collation supported range.
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
|
static |
|
static |
Compare lexem to string with length.
| lexem | lexem |
| pattern | string |
| patternlen | string length |
| 0 | if lexem is equal to string, non-0 otherwise. |
|
static |
|
static |
Calculate the reorder parameters for the gap between character groups.
| cs | Character set info |
| rec_ind | The position from where to store weight boundary |
|
static |
Calculate the reorder parameters for the character groups.
| cs | Character set info | |
| [out] | rec_ind | The position from where to store weight boundary |
|
inlinestatic |
Helper function: Find address of weights of the given code point.
| uca | Pointer to UCA data |
| wc | character Unicode code point |
| pointer | to weight array for the given code point, or nullptr if this page does not have implicit weights. |
|
inlinestatic |
Helper function: Find address of weights of the given code point, for UCA 9.0.0 format.
| uca | Pointer to UCA data |
| wc | character Unicode code point |
| pointer | to weight array for the given code point, or nullptr if this page does not have implicit weights. |
|
static |
Helper function: Copies UCA weights for a given "uint" string to the given location.
| dst | destination UCA weight data |
| to | destination address |
| to_stride | number of bytes between each successive weight in "to" |
| to_length | size of destination |
| to_num_ce | where to put the number of CEs generated |
| rule | The rule that contains the characters whose weight are to copied |
| base_len | The length of base character list |
| uca_ver | UCA version |
|
static |
|
static |
Add new rules recersively if one rule's characters are in decomposition list.
| rules | The rule list |
| r | The rule to check |
| decomp_rec | The decomposition of the character in rule. |
| comp_added | Bitset which marks whether the comp character has been added to rule list. |
|
static |
For every rule in rule list, check and add new rules if it is in decomposition list.
| cs | Character set info |
| rules | The rule list |
|
static |
|
static |
|
static |
|
static |
Convert collation customization lexem to string, for nice error reporting.
| term | lexem code |
|
static |
|
static |
Current parser token.
| p | Collation customization parser |
|
static |
Scan collation customization: settings followed by rules.
<collation customization> ::= [ <setting> ... ] [ <rule>... ]
| p | Collation customization parser |
| 0 | if collation customization expression was not scanned. |
| 1 | if collation customization expression was scanned. |
|
static |
Display error when an unexpected token found.
| p | Collation customization parser |
| term | Which lexem was expected |
|
static |
Initialize collation customization parser.
| p | Collation customization parser |
| rules | Where to store rules |
| str | Beginning of a collation customization string |
| str_end | End of the collation customizations string |
|
static |
Next parser token, to look ahead.
| p | Collation customization parser |
|
static |
Scan one token from the input stream.
| p | Collation customization parser |
|
static |
Scan character list.
<character list> ::= CHAR [ CHAR... ]
| p | Collation customization parser |
| pwc | Character string to add code to |
| limit | The result string cannot be longer than 'limit' characters |
| name | E.g. "contraction", "expansion" |
| 0 | if character sequence was not scanned. |
| 1 | if character sequence was scanned. |
|
static |
Scan logical position and add to the wide string.
| p | Collation customization parser |
| pwc | Wide string to add code to |
| limit | The result string cannot be longer than 'limit' characters |
| 0 | if logical position was not scanned. |
| 1 | if logical position was scanned. |
|
static |
Scan [before xxx] reset option.
| p | Collation customization parser |
| 0 | if reset option was not scanned. |
| 1 | if reset option was scanned. |
|
static |
Scan reset sequence.
<reset sequence> ::= [ <reset before option> ] <character list> | [ <reset before option> ] <logical reset position>
| p | Collation customization parser |
| 0 | if reset sequence was not scanned. |
| 1 | if reset sequence was scanned. |
|
static |
Scan one rule: reset followed by a number of shifts.
<rule> ::= & <reset sequence> <shift> <shift sequence> [ { <shift> <shift sequence> }... ]
| p | Collation customization parser |
| 0 | if rule was not scanned. |
| 1 | if rule was scanned. |
|
static |
Scan a collation setting in brakets, for example UCA version.
| p | Collation customization parser |
| 0 | if setting was scanned. |
| 1 | if setting was not scanned. |
|
static |
Scan multiple collation settings.
| p | Collation customization parser |
| 0 | if no settings were scanned. |
| 1 | if one or more settings were scanned. |
|
static |
Scan shift operator.
<shift> ::= < | << | <<< | <<<< | =
| p | Collation customization parser |
| 0 | if shift operator was not scanned. |
| 1 | if shift operator was scanned. |
|
static |
Scan shift sequence.
<shift sequence> ::= <character list> [ / <character list> ] | <character list> [ | <character list> ]
| p | Collation customization parser |
| 0 | if shift sequence was not scanned. |
| 1 | if shift sequence was scanned. |
|
static |
Scan the given lexem from input stream, or display "expected" error.
| p | Collation customization parser |
| term | Which lexem is expected. |
| 0 | if the required term was not found. |
| 1 | if the required term was found. |
|
static |
Display error when a too long character sequence is met.
| p | Collation customization parser |
| name | Which kind of sequence: contraction, expansion, etc. |
Append new character to the end of a 0-terminated wide string.
| wc | Wide string |
| limit | Maximum possible result length |
| code | Character to add |
|
static |
|
static |
Initialize collation customization rule.
| r | Rule |
|
inlinestatic |
Return length of the "reset" string of a rule.
| r | Collation customization rule |
|
static |
Apply difference at level.
| r | Rule |
| level | Level (0,1,2,3,4) |
|
inlinestatic |
Return length of the "shift" string of a rule.
| r | Collation customization rule |
|
static |
Append one new rule to a rule array.
| rules | Rule container |
| rule | New rule to add |
|
static |
Realloc rule array to a new size.
Reallocate memory for 128 additional rules at once, to reduce the number of reallocs, which is important for long tailorings (e.g. for East Asian collations).
| rules | Rule container |
| n | new number of rules |
|
static |
|
static |
Check whether the composition character is already in rule list.
| rules | The rule list |
| wc | The composition character |
|
inlinestatic |
|
inlinestatic |
Check whether a composition character in the decomposition list is a normal character.
| dec_ind | The index of composition character in list |
Check if a code point is Hangul syllable.
Decompose it to jamos if it is, and return tailored weights.
| syllable | Hangul syllable to be decomposed | |
| [out] | jamo | Corresponding jamos |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Check if a list of combining marks contains the whole list of origin decomposed combining marks.
| origin_dec | The origin list of combining marks decomposed from character in tailoring rule. |
| dec_codes | The list of combining marks decomposed from character in decomposition list. |
| dec_diff | The combining marks exist in dec_codes but not in origin_dec. |
|
static |
Prepare parametric tailoring, like reorder, etc.
| cs | Character set info |
| rules | Collation rule list to add to. |
|
static |
Prepare reorder parameters.
| cs | Character set info |
| bool my_propagate_uca_900 | ( | const CHARSET_INFO * | cs, |
| const uchar * | str, | ||
| size_t | length | ||
| ) |
|
inlinestatic |
Helper function to store weight boundary values.
| [out] | wt_rec | Weight boundary for each character group and gap between groups |
| rec_ind | The position from where to store weight boundary | |
| old_begin | Beginning weight of character group before reorder | |
| old_end | End weight of character group before reorder | |
| new_begin | Beginning weight of character group after reorder | |
| new_end | End weight of character group after reorder |
|
inlinestatic |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
inlinestatic |
Mark a code point as a contraction part.
| flags | Pointer to UCA contraction flag data |
| wc | Unicode code point |
| flag | flag: "is contraction head", "is contraction tail" |
|
inlinestatic |
Check if a code point can be previous context head.
| flags | Pointer to UCA contraction flag data |
| wc | Code point |
| false | - cannot be previous context head |
| true | - can be previous context head |
|
inlinestatic |
Check if a code point can be previous context tail.
| flags | Pointer to UCA contraction flag data |
| wc | Code point |
| false | - cannot be contraction tail |
| true | - can be contraction tail |
|
static |
|
static |
| const uint16 * my_uca_contraction2_weight | ( | const std::vector< MY_CONTRACTION > * | cont_nodes, |
| my_wc_t | wc1, | ||
| my_wc_t | wc2 | ||
| ) |
Find a contraction consisting of two code points and return its weight array.
| cont_nodes | Vector that contains contraction nodes |
| wc1 | First code point |
| wc2 | Second code point |
| NULL | - no contraction found |
| ptr | - contraction weight array |
|
inlinestatic |
Check if a string is a contraction of exactly the given length, and return its weight array on success.
| cont_nodes | Vector that contains contraction nodes |
| wc | Pointer to wide string |
| len | String length |
| NULL | - Input string is not a known contraction |
| ptr | - contraction weight array |
|
static |
Alloc new page and copy the default UCA weights.
| cs | Character set |
| loader | Character set loader |
| src | Default UCA data to copy from |
| dst | UCA data to copy weights to |
| page | page number |
| false | on success |
| true | on error |
|
inlinestatic |
Check if UCA level data has contractions.
| uca | Pointer to UCA data |
| 0 | - no contractions |
| 1 | - there are some contractions |
|
static |
|
static |
|
static |
Return length of a 0-terminated wide string, analogous to strnlen().
| s | Pointer to wide string |
| maxlen | Mamixum string length |
|
static |
|
static |
|
static |
|
static |
|
static |
The array used for "type of characters" bit mask for each character.
The ctype[0] is reserved for EOF(-1), so we use ctype[(char)+1]. Also refer to strings/CHARSET_INFO.txt
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
extern |
| CHARSET_INFO my_charset_gb18030_unicode_520_ci |
| CHARSET_INFO my_charset_ucs2_croatian_uca_ci |
| CHARSET_INFO my_charset_ucs2_czech_uca_ci |
| CHARSET_INFO my_charset_ucs2_danish_uca_ci |
| CHARSET_INFO my_charset_ucs2_esperanto_uca_ci |
| CHARSET_INFO my_charset_ucs2_estonian_uca_ci |
| CHARSET_INFO my_charset_ucs2_german2_uca_ci |
| CHARSET_INFO my_charset_ucs2_hungarian_uca_ci |
| CHARSET_INFO my_charset_ucs2_icelandic_uca_ci |
| CHARSET_INFO my_charset_ucs2_latvian_uca_ci |
| CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci |
| CHARSET_INFO my_charset_ucs2_persian_uca_ci |
| CHARSET_INFO my_charset_ucs2_polish_uca_ci |
| CHARSET_INFO my_charset_ucs2_roman_uca_ci |
| CHARSET_INFO my_charset_ucs2_romanian_uca_ci |
| CHARSET_INFO my_charset_ucs2_sinhala_uca_ci |
| CHARSET_INFO my_charset_ucs2_slovak_uca_ci |
| CHARSET_INFO my_charset_ucs2_slovenian_uca_ci |
| CHARSET_INFO my_charset_ucs2_spanish2_uca_ci |
| CHARSET_INFO my_charset_ucs2_spanish_uca_ci |
| CHARSET_INFO my_charset_ucs2_swedish_uca_ci |
| CHARSET_INFO my_charset_ucs2_turkish_uca_ci |
| CHARSET_INFO my_charset_ucs2_unicode_520_ci |
| CHARSET_INFO my_charset_ucs2_unicode_ci |
| CHARSET_INFO my_charset_ucs2_vietnamese_ci |
| CHARSET_INFO my_charset_utf16_croatian_uca_ci |
| CHARSET_INFO my_charset_utf16_czech_uca_ci |
| CHARSET_INFO my_charset_utf16_danish_uca_ci |
| CHARSET_INFO my_charset_utf16_esperanto_uca_ci |
| CHARSET_INFO my_charset_utf16_estonian_uca_ci |
| CHARSET_INFO my_charset_utf16_german2_uca_ci |
|
extern |
| CHARSET_INFO my_charset_utf16_hungarian_uca_ci |
| CHARSET_INFO my_charset_utf16_icelandic_uca_ci |
| CHARSET_INFO my_charset_utf16_latvian_uca_ci |
| CHARSET_INFO my_charset_utf16_lithuanian_uca_ci |
| CHARSET_INFO my_charset_utf16_persian_uca_ci |
| CHARSET_INFO my_charset_utf16_polish_uca_ci |
| CHARSET_INFO my_charset_utf16_roman_uca_ci |
| CHARSET_INFO my_charset_utf16_romanian_uca_ci |
| CHARSET_INFO my_charset_utf16_sinhala_uca_ci |
| CHARSET_INFO my_charset_utf16_slovak_uca_ci |
| CHARSET_INFO my_charset_utf16_slovenian_uca_ci |
| CHARSET_INFO my_charset_utf16_spanish2_uca_ci |
| CHARSET_INFO my_charset_utf16_spanish_uca_ci |
| CHARSET_INFO my_charset_utf16_swedish_uca_ci |
| CHARSET_INFO my_charset_utf16_turkish_uca_ci |
| CHARSET_INFO my_charset_utf16_unicode_520_ci |
| CHARSET_INFO my_charset_utf16_unicode_ci |
| CHARSET_INFO my_charset_utf16_vietnamese_ci |
| CHARSET_INFO my_charset_utf32_croatian_uca_ci |
| CHARSET_INFO my_charset_utf32_czech_uca_ci |
| CHARSET_INFO my_charset_utf32_danish_uca_ci |
| CHARSET_INFO my_charset_utf32_esperanto_uca_ci |
| CHARSET_INFO my_charset_utf32_estonian_uca_ci |
| CHARSET_INFO my_charset_utf32_german2_uca_ci |
|
extern |
| CHARSET_INFO my_charset_utf32_hungarian_uca_ci |
| CHARSET_INFO my_charset_utf32_icelandic_uca_ci |
| CHARSET_INFO my_charset_utf32_latvian_uca_ci |
| CHARSET_INFO my_charset_utf32_lithuanian_uca_ci |
| CHARSET_INFO my_charset_utf32_persian_uca_ci |
| CHARSET_INFO my_charset_utf32_polish_uca_ci |
| CHARSET_INFO my_charset_utf32_roman_uca_ci |
| CHARSET_INFO my_charset_utf32_romanian_uca_ci |
| CHARSET_INFO my_charset_utf32_sinhala_uca_ci |
| CHARSET_INFO my_charset_utf32_slovak_uca_ci |
| CHARSET_INFO my_charset_utf32_slovenian_uca_ci |
| CHARSET_INFO my_charset_utf32_spanish2_uca_ci |
| CHARSET_INFO my_charset_utf32_spanish_uca_ci |
| CHARSET_INFO my_charset_utf32_swedish_uca_ci |
| CHARSET_INFO my_charset_utf32_turkish_uca_ci |
| CHARSET_INFO my_charset_utf32_unicode_520_ci |
| CHARSET_INFO my_charset_utf32_unicode_ci |
| CHARSET_INFO my_charset_utf32_vietnamese_ci |
| CHARSET_INFO my_charset_utf8mb3_croatian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_czech_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_danish_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_esperanto_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_estonian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_german2_uca_ci |
|
extern |
| CHARSET_INFO my_charset_utf8mb3_hungarian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_icelandic_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_latvian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_lithuanian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_persian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_polish_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_roman_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_romanian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_sinhala_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_slovak_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_slovenian_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_spanish2_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_spanish_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_swedish_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_turkish_uca_ci |
| CHARSET_INFO my_charset_utf8mb3_unicode_520_ci |
| CHARSET_INFO my_charset_utf8mb3_unicode_ci |
| CHARSET_INFO my_charset_utf8mb3_vietnamese_ci |
| CHARSET_INFO my_charset_utf8mb4_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_0900_as_ci |
| CHARSET_INFO my_charset_utf8mb4_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_0900_bin |
| CHARSET_INFO my_charset_utf8mb4_bg_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_bg_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_bs_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_bs_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_croatian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_cs_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_cs_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_czech_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_da_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_da_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_danish_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_de_pb_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_de_pb_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_eo_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_eo_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_es_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_es_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_es_trad_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_es_trad_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_et_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_et_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_german2_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_gl_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_gl_0900_as_cs |
|
extern |
| CHARSET_INFO my_charset_utf8mb4_hr_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_hr_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_hu_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_hu_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_is_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_is_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs_ks |
| CHARSET_INFO my_charset_utf8mb4_la_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_la_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_lt_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_lt_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_lv_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_lv_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_mn_cyrl_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_nb_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_nb_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_nn_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_nn_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_persian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_pl_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_pl_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_polish_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_ro_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_ro_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_roman_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_ru_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_ru_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_sk_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_sk_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_sl_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_sl_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_sr_latn_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_sv_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_sv_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_tr_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_tr_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci |
| CHARSET_INFO my_charset_utf8mb4_unicode_520_ci |
| CHARSET_INFO my_charset_utf8mb4_unicode_ci |
| CHARSET_INFO my_charset_utf8mb4_vi_0900_ai_ci |
| CHARSET_INFO my_charset_utf8mb4_vi_0900_as_cs |
| CHARSET_INFO my_charset_utf8mb4_vietnamese_ci |
| CHARSET_INFO my_charset_utf8mb4_zh_0900_as_cs |
| MY_COLLATION_HANDLER my_collation_any_uca_handler |
| MY_COLLATION_HANDLER my_collation_gb18030_uca_handler |
| MY_COLLATION_HANDLER my_collation_uca_900_handler |
| MY_COLLATION_HANDLER my_collation_ucs2_uca_handler |
| MY_COLLATION_HANDLER my_collation_utf16_uca_handler |
| MY_COLLATION_HANDLER my_collation_utf32_uca_handler |
|
static |
| MY_UCA_INFO my_uca_v400 |
| MY_UCA_INFO my_uca_v520 |
|
static |
|
staticconstexpr |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |