#include <vector>
#include "my_inttypes.h"

Classes
struct	Weight_boundary

struct	Reorder_wt_rec

struct	Reorder_param

struct	Coll_param

struct	MY_CONTRACTION

struct	MY_UCA_INFO

Macros
#define	UCA_MAX_CHAR_GRP 4

#define	MY_UCA_MAX_CONTRACTION 6

#define	MY_UCA_MAX_WEIGHT_SIZE 25

#define	MY_UCA_WEIGHT_LEVELS 1

#define	MY_UCA_CNT_FLAG_SIZE 4096

#define	MY_UCA_CNT_FLAG_MASK 4095

#define	MY_UCA_CNT_HEAD 1
	Whether the given character can be the first in any contraction. More...

#define	MY_UCA_CNT_TAIL 2
	Whether the given character can be the last in any contraction. More...

#define	MY_UCA_CNT_MID1 4
	Whether the given character can be the second in any contraction. More...

#define	MY_UCA_PREVIOUS_CONTEXT_HEAD 64
	Whether the given character is the first part of a context-sensitive contraction. More...

#define	MY_UCA_PREVIOUS_CONTEXT_TAIL 128
	Similar to MY_UCA_PREVIOUS_CONTEXT_HEAD, just for the tail. More...

#define	MY_UCA_PSHIFT 8

Enumerations
enum	enum_uca_ver { UCA_V400 , UCA_V520 , UCA_V900 }

enum	enum_char_grp { CHARGRP_NONE , CHARGRP_CORE , CHARGRP_LATIN , CHARGRP_CYRILLIC , CHARGRP_ARAB , CHARGRP_KANA , CHARGRP_OTHERS }

enum	enum_case_first { CASE_FIRST_OFF , CASE_FIRST_UPPER , CASE_FIRST_LOWER }

Functions
bool	my_uca_can_be_contraction_head (const char *flags, my_wc_t wc)
	Check if a code point can be contraction head. More...

bool	my_uca_can_be_contraction_tail (const char *flags, my_wc_t wc)
	Check if a code point can be contraction tail. More...

const uint16 *	my_uca_contraction2_weight (const std::vector< MY_CONTRACTION > *cont_nodes, my_wc_t wc1, my_wc_t wc2)
	Find a contraction consisting of two code points and return its weight array. More...

Macro Definition Documentation

◆ MY_UCA_CNT_FLAG_MASK

#define MY_UCA_CNT_FLAG_MASK 4095

◆ MY_UCA_CNT_FLAG_SIZE

#define MY_UCA_CNT_FLAG_SIZE 4096

◆ MY_UCA_CNT_HEAD

#define MY_UCA_CNT_HEAD 1

Whether the given character can be the first in any contraction.

◆ MY_UCA_CNT_MID1

#define MY_UCA_CNT_MID1 4

Whether the given character can be the second in any contraction.

Also defined implicitly through shifting MY_UCA_CNT_MID1:

#define MY_UCA_CNT_MID2 8 #define MY_UCA_CNT_MID3 16 #define MY_UCA_CNT_MID4 32

There's no need for MY_UCA_CNT_MID5 (which would cause us to run out of bits) since MY_UCA_MAX_CONTRACTION is 6 (so head, four in the middle, and then tail).

◆ MY_UCA_CNT_TAIL

#define MY_UCA_CNT_TAIL 2

Whether the given character can be the last in any contraction.

◆ MY_UCA_MAX_CONTRACTION

#define MY_UCA_MAX_CONTRACTION 6

◆ MY_UCA_MAX_WEIGHT_SIZE

#define MY_UCA_MAX_WEIGHT_SIZE 25

◆ MY_UCA_PREVIOUS_CONTEXT_HEAD

#define MY_UCA_PREVIOUS_CONTEXT_HEAD 64

Whether the given character is the first part of a context-sensitive contraction.

Context-sensitive contractions are like normal contractions, except that for performance reasons, they trigger on the last character instead of the first. The case given in Unicode TR35 is that in some scripts (such as katakana in Japanese), "a-" should sort as "aa" (except on the tertiary level), "e-" should sort as "ee" and so on. However, adding regular contractions on "a" and "e" would cause undue performance loss, so instead, we add a special "context-sensitive" contraction on "-" that then looks at the previous character.

We don't support context-sensitive contractions longer than two characters at the moment, since none exist in CLDR. Thus, there is no MY_UCA_PREVIOUS_CONTEXT_MID1 and so on.

◆ MY_UCA_PREVIOUS_CONTEXT_TAIL

#define MY_UCA_PREVIOUS_CONTEXT_TAIL 128

Similar to MY_UCA_PREVIOUS_CONTEXT_HEAD, just for the tail.

◆ MY_UCA_PSHIFT

#define MY_UCA_PSHIFT 8

◆ MY_UCA_WEIGHT_LEVELS

#define MY_UCA_WEIGHT_LEVELS 1

◆ UCA_MAX_CHAR_GRP

#define UCA_MAX_CHAR_GRP 4

Enumeration Type Documentation

◆ enum_case_first

enum enum_case_first

Enumerator
CASE_FIRST_OFF
CASE_FIRST_UPPER
CASE_FIRST_LOWER

◆ enum_char_grp

enum enum_char_grp

Enumerator
CHARGRP_NONE
CHARGRP_CORE
CHARGRP_LATIN
CHARGRP_CYRILLIC
CHARGRP_ARAB
CHARGRP_KANA
CHARGRP_OTHERS

◆ enum_uca_ver

enum enum_uca_ver

Enumerator
UCA_V400
UCA_V520
UCA_V900

Function Documentation

◆ my_uca_can_be_contraction_head()

bool my_uca_can_be_contraction_head	(	const char *	flags,
		my_wc_t	wc
	)

inline

Check if a code point can be contraction head.

Parameters

flags	Pointer to UCA contraction flag data
wc	Code point

Return values

0	- cannot be contraction head
1	- can be contraction head

◆ my_uca_can_be_contraction_tail()

bool my_uca_can_be_contraction_tail	(	const char *	flags,
		my_wc_t	wc
	)

inline

Check if a code point can be contraction tail.

Parameters

flags	Pointer to UCA contraction flag data
wc	Code point

Return values

0	- cannot be contraction tail
1	- can be contraction tail

◆ my_uca_contraction2_weight()

const uint16 * my_uca_contraction2_weight	(	const std::vector< MY_CONTRACTION > *	cont_nodes,
		my_wc_t	wc1,
		my_wc_t	wc2
	)

Find a contraction consisting of two code points and return its weight array.

Parameters

cont_nodes	Vector that contains contraction nodes
wc1	First code point
wc2	Second code point

Returns: Weight array

Return values

NULL	- no contraction found
ptr	- contraction weight array

Classes

Macros

Enumerations

Functions

Macro Definition Documentation

◆ MY_UCA_CNT_FLAG_MASK

◆ MY_UCA_CNT_FLAG_SIZE

◆ MY_UCA_CNT_HEAD

◆ MY_UCA_CNT_MID1

◆ MY_UCA_CNT_TAIL

◆ MY_UCA_MAX_CONTRACTION

◆ MY_UCA_MAX_WEIGHT_SIZE

◆ MY_UCA_PREVIOUS_CONTEXT_HEAD

◆ MY_UCA_PREVIOUS_CONTEXT_TAIL

◆ MY_UCA_PSHIFT

◆ MY_UCA_WEIGHT_LEVELS

◆ UCA_MAX_CHAR_GRP

Enumeration Type Documentation

◆ enum_case_first

◆ enum_char_grp

◆ enum_uca_ver

Function Documentation

◆ my_uca_can_be_contraction_head()

◆ my_uca_can_be_contraction_tail()

◆ my_uca_contraction2_weight()