#include <vector>
#include "my_inttypes.h"
Go to the source code of this file.
|
enum | enum_uca_ver { UCA_V400
, UCA_V520
, UCA_V900
} |
|
enum | enum_char_grp {
CHARGRP_NONE
, CHARGRP_CORE
, CHARGRP_LATIN
, CHARGRP_CYRILLIC
,
CHARGRP_ARAB
, CHARGRP_KANA
, CHARGRP_OTHERS
} |
|
enum | enum_case_first { CASE_FIRST_OFF
, CASE_FIRST_UPPER
, CASE_FIRST_LOWER
} |
|
◆ MY_UCA_CNT_FLAG_MASK
#define MY_UCA_CNT_FLAG_MASK 4095 |
◆ MY_UCA_CNT_FLAG_SIZE
#define MY_UCA_CNT_FLAG_SIZE 4096 |
◆ MY_UCA_CNT_HEAD
#define MY_UCA_CNT_HEAD 1 |
Whether the given character can be the first in any contraction.
◆ MY_UCA_CNT_MID1
#define MY_UCA_CNT_MID1 4 |
Whether the given character can be the second in any contraction.
Also defined implicitly through shifting MY_UCA_CNT_MID1:
#define MY_UCA_CNT_MID2 8 #define MY_UCA_CNT_MID3 16 #define MY_UCA_CNT_MID4 32
There's no need for MY_UCA_CNT_MID5 (which would cause us to run out of bits) since MY_UCA_MAX_CONTRACTION is 6 (so head, four in the middle, and then tail).
◆ MY_UCA_CNT_TAIL
#define MY_UCA_CNT_TAIL 2 |
Whether the given character can be the last in any contraction.
◆ MY_UCA_MAX_CONTRACTION
#define MY_UCA_MAX_CONTRACTION 6 |
◆ MY_UCA_MAX_WEIGHT_SIZE
#define MY_UCA_MAX_WEIGHT_SIZE 25 |
◆ MY_UCA_PREVIOUS_CONTEXT_HEAD
#define MY_UCA_PREVIOUS_CONTEXT_HEAD 64 |
Whether the given character is the first part of a context-sensitive contraction.
Context-sensitive contractions are like normal contractions, except that for performance reasons, they trigger on the last character instead of the first. The case given in Unicode TR35 is that in some scripts (such as katakana in Japanese), "a-" should sort as "aa" (except on the tertiary level), "e-" should sort as "ee" and so on. However, adding regular contractions on "a" and "e" would cause undue performance loss, so instead, we add a special "context-sensitive" contraction on "-" that then looks at the previous character.
We don't support context-sensitive contractions longer than two characters at the moment, since none exist in CLDR. Thus, there is no MY_UCA_PREVIOUS_CONTEXT_MID1 and so on.
◆ MY_UCA_PREVIOUS_CONTEXT_TAIL
#define MY_UCA_PREVIOUS_CONTEXT_TAIL 128 |
Similar to MY_UCA_PREVIOUS_CONTEXT_HEAD, just for the tail.
◆ MY_UCA_PSHIFT
◆ MY_UCA_WEIGHT_LEVELS
#define MY_UCA_WEIGHT_LEVELS 1 |
◆ UCA_MAX_CHAR_GRP
#define UCA_MAX_CHAR_GRP 4 |
◆ enum_case_first
Enumerator |
---|
CASE_FIRST_OFF | |
CASE_FIRST_UPPER | |
CASE_FIRST_LOWER | |
◆ enum_char_grp
Enumerator |
---|
CHARGRP_NONE | |
CHARGRP_CORE | |
CHARGRP_LATIN | |
CHARGRP_CYRILLIC | |
CHARGRP_ARAB | |
CHARGRP_KANA | |
CHARGRP_OTHERS | |
◆ enum_uca_ver
Enumerator |
---|
UCA_V400 | |
UCA_V520 | |
UCA_V900 | |
◆ my_uca_can_be_contraction_head()
bool my_uca_can_be_contraction_head |
( |
const char * |
flags, |
|
|
my_wc_t |
wc |
|
) |
| |
|
inline |
Check if a code point can be contraction head.
- Parameters
-
flags | Pointer to UCA contraction flag data |
wc | Code point |
- Return values
-
0 | - cannot be contraction head |
1 | - can be contraction head |
◆ my_uca_can_be_contraction_tail()
bool my_uca_can_be_contraction_tail |
( |
const char * |
flags, |
|
|
my_wc_t |
wc |
|
) |
| |
|
inline |
Check if a code point can be contraction tail.
- Parameters
-
flags | Pointer to UCA contraction flag data |
wc | Code point |
- Return values
-
0 | - cannot be contraction tail |
1 | - can be contraction tail |
◆ my_uca_contraction2_weight()
Find a contraction consisting of two code points and return its weight array.
- Parameters
-
cont_nodes | Vector that contains contraction nodes |
wc1 | First code point |
wc2 | Second code point |
- Returns
- Weight array
- Return values
-
NULL | - no contraction found |
ptr | - contraction weight array |