MySQL  8.0.18
Source Code Documentation
row0ftsort.cc File Reference

Create Full Text Index with (parallel) merge sort. More...

#include <sys/types.h>
#include "btr0bulk.h"
#include "btr0cur.h"
#include "dict0dd.h"
#include "dict0dict.h"
#include "fts0plugin.h"
#include "ha_prototypes.h"
#include "lob0lob.h"
#include "os0thread-create.h"
#include "pars0pars.h"
#include "row0ftsort.h"
#include "row0merge.h"
#include "row0row.h"
#include "my_dbug.h"

Macros

#define ROW_MERGE_READ_GET_NEXT(N)
 Read the next record to buffer N. More...
 

Functions

dict_index_trow_merge_create_fts_sort_index (dict_index_t *index, const dict_table_t *table, ibool *opt_doc_id_size)
 Create a temporary "fts sort index" used to merge sort the tokenized doc string. More...
 
ibool row_fts_psort_info_init (trx_t *trx, row_merge_dup_t *dup, const dict_table_t *old_table, const dict_table_t *new_table, ibool opt_doc_id_size, fts_psort_t **psort, fts_psort_t **merge)
 Initialize FTS parallel sort structures. More...
 
void row_fts_psort_info_destroy (fts_psort_t *psort_info, fts_psort_t *merge_info)
 Clean up and deallocate FTS parallel sort structures, and close the merge sort files. More...
 
void row_fts_free_pll_merge_buf (fts_psort_t *psort_info)
 Free up merge buffers when merge sort is done. More...
 
static int row_merge_fts_doc_add_word_for_parser (MYSQL_FTPARSER_PARAM *param, char *word, int word_len, MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info)
 FTS plugin parser 'myql_add_word' callback function for row merge. More...
 
static void row_merge_fts_doc_tokenize_by_parser (fts_doc_t *doc, st_mysql_ftparser *parser, fts_tokenize_ctx_t *t_ctx)
 Tokenize by fts plugin parser. More...
 
static ibool row_merge_fts_doc_tokenize (row_merge_buf_t **sort_buf, doc_id_t doc_id, fts_doc_t *doc, dtype_t *word_dtype, merge_file_t **merge_file, ibool opt_doc_id_size, fts_tokenize_ctx_t *t_ctx)
 Tokenize incoming text data and add to the sort buffer. More...
 
UNIV_INLINE void row_merge_fts_get_next_doc_item (fts_psort_t *psort_info, fts_doc_item_t **doc_item)
 Get next doc item from fts_doc_list. More...
 
static void fts_parallel_tokenization_thread (fts_psort_t *psort_info)
 Function performs parallel tokenization of the incoming doc strings. More...
 
void row_fts_start_psort (fts_psort_t *psort_info)
 Start the parallel tokenization and parallel merge sort. More...
 
static void fts_parallel_merge_thread (fts_psort_t *psort_info)
 Function performs the merge and insertion of the sorted records. More...
 
void row_fts_start_parallel_merge (fts_psort_t *merge_info)
 Kick off the parallel merge and insert thread. More...
 
static dberr_t row_merge_write_fts_node (const fts_psort_insert_t *ins_ctx, const fts_string_t *word, const fts_node_t *node)
 
Write out a single word's data as new entry/entries in the INDEX table. More...
 
static dberr_t row_merge_write_fts_word (fts_psort_insert_t *ins_ctx, fts_tokenizer_word_t *word)
 Insert processed FTS data to auxillary index tables. More...
 
static void row_fts_insert_tuple (fts_psort_insert_t *ins_ctx, fts_tokenizer_word_t *word, ib_vector_t *positions, doc_id_t *in_doc_id, dtuple_t *dtuple)
 Read sorted FTS data files and insert data tuples to auxillary tables. More...
 
static int row_fts_sel_tree_propagate (int propogated, int *sel_tree, const mrec_t **mrec, ulint **offsets, dict_index_t *index)
 Propagate a newly added record up one level in the selection tree. More...
 
static int row_fts_sel_tree_update (int *sel_tree, ulint propagated, ulint height, const mrec_t **mrec, ulint **offsets, dict_index_t *index)
 Readjust selection tree after popping the root and read a new value. More...
 
static void row_fts_build_sel_tree_level (int *sel_tree, ulint level, const mrec_t **mrec, ulint **offsets, dict_index_t *index)
 Build selection tree at a specified level. More...
 
static ulint row_fts_build_sel_tree (int *sel_tree, const mrec_t **mrec, ulint **offsets, dict_index_t *index)
 Build a selection tree for merge. More...
 
dberr_t row_fts_merge_insert (dict_index_t *index, dict_table_t *table, fts_psort_t *psort_info, ulint id)
 Read sorted file containing index data tuples and insert these data tuples to the index. More...
 

Variables

ulong fts_sort_pll_degree = 2
 Parallel sort degree. More...
 

Detailed Description

Create Full Text Index with (parallel) merge sort.

Created 10/13/2010 Jimmy Yang

Macro Definition Documentation

◆ ROW_MERGE_READ_GET_NEXT

#define ROW_MERGE_READ_GET_NEXT (   N)
Value:
do { \
b[N] = row_merge_read_rec(block[N], buf[N], b[N], index, fd[N], &foffs[N], \
&mrec[N], offsets[N]); \
if (UNIV_UNLIKELY(!b[N])) { \
if (mrec[N]) { \
goto exit; \
} \
} \
} while (0)
#define exit(A)
Definition: lexyy.cc:917
std::atomic< Type > N
Definition: ut0counter.h:230
char * index(const char *, int c)
Definition: mysql.cc:2862
const byte * row_merge_read_rec(row_merge_block_t *block, mrec_buf_t *buf, const byte *b, const dict_index_t *index, int fd, ulint *foffs, const mrec_t **mrec, ulint *offsets)
Read a merge record.
Definition: row0merge.cc:1132

Read the next record to buffer N.

Parameters
Nindex into array of merge info structure

Function Documentation

◆ fts_parallel_merge_thread()

static void fts_parallel_merge_thread ( fts_psort_t psort_info)
static

Function performs the merge and insertion of the sorted records.

Parameters
[in]psort_infoparallel merge info

◆ fts_parallel_tokenization_thread()

static void fts_parallel_tokenization_thread ( fts_psort_t psort_info)
static

Function performs parallel tokenization of the incoming doc strings.

It also performs the initial in memory sort of the parsed records.

◆ row_fts_build_sel_tree()

static ulint row_fts_build_sel_tree ( int *  sel_tree,
const mrec_t **  mrec,
ulint **  offsets,
dict_index_t index 
)
static

Build a selection tree for merge.

The selection tree is a binary tree and should have fts_sort_pll_degree / 2 levels. With root as level 0

Returns
number of tree levels
Parameters
sel_treein/out: selection tree
mrecin: sort record
offsetsin: record offsets
indexin: index dictionary

◆ row_fts_build_sel_tree_level()

static void row_fts_build_sel_tree_level ( int *  sel_tree,
ulint  level,
const mrec_t **  mrec,
ulint **  offsets,
dict_index_t index 
)
static

Build selection tree at a specified level.

Parameters
sel_treein/out: selection tree
levelin: selection tree level
mrecin: sort record
offsetsin: record offsets
indexin: index dictionary

◆ row_fts_free_pll_merge_buf()

void row_fts_free_pll_merge_buf ( fts_psort_t psort_info)

Free up merge buffers when merge sort is done.

in: parallel sort info

Parameters
psort_infoin: parallel sort info

◆ row_fts_insert_tuple()

static void row_fts_insert_tuple ( fts_psort_insert_t ins_ctx,
fts_tokenizer_word_t word,
ib_vector_t positions,
doc_id_t in_doc_id,
dtuple_t dtuple 
)
static

Read sorted FTS data files and insert data tuples to auxillary tables.

Parameters
ins_ctxin: insert context
wordin: last processed tokenized word
positionsin: word position
in_doc_idin: last item doc id
dtuplein: entry to insert

◆ row_fts_merge_insert()

dberr_t row_fts_merge_insert ( dict_index_t index,
dict_table_t table,
fts_psort_t psort_info,
ulint  id 
)

Read sorted file containing index data tuples and insert these data tuples to the index.

Parameters
[in]indexindex
[in]tablenew table
[in]psort_infoparallel sort info
[in]idwhich auxiliary table's data to insert to
Returns
DB_SUCCESS or error number

◆ row_fts_psort_info_destroy()

void row_fts_psort_info_destroy ( fts_psort_t psort_info,
fts_psort_t merge_info 
)

Clean up and deallocate FTS parallel sort structures, and close the merge sort files.

Clean up and deallocate FTS parallel sort structures, and close temparary merge sort files.

Parameters
psort_infoparallel sort info
merge_infoparallel merge info

◆ row_fts_psort_info_init()

ibool row_fts_psort_info_init ( trx_t trx,
row_merge_dup_t dup,
const dict_table_t old_table,
const dict_table_t new_table,
ibool  opt_doc_id_size,
fts_psort_t **  psort,
fts_psort_t **  merge 
)

Initialize FTS parallel sort structures.

Returns
true if all successful
Parameters
trxin: transaction
dupin,own: descriptor of FTS index being created
old_tablein: Needed to fetch LOB from old table
new_tablein: table on which indexes are created
opt_doc_id_sizein: whether to use 4 bytes instead of 8 bytes integer to store Doc ID during sort
psortout: parallel sort info to be instantiated
mergeout: parallel merge info to be instantiated

◆ row_fts_sel_tree_propagate()

static int row_fts_sel_tree_propagate ( int  propogated,
int *  sel_tree,
const mrec_t **  mrec,
ulint **  offsets,
dict_index_t index 
)
static

Propagate a newly added record up one level in the selection tree.

Returns
parent where this value propagated to
Parameters
propogatedin: tree node propagated
sel_treein: selection tree
mrecin: sort record
offsetsin: record offsets
indexin/out: FTS index

◆ row_fts_sel_tree_update()

static int row_fts_sel_tree_update ( int *  sel_tree,
ulint  propagated,
ulint  height,
const mrec_t **  mrec,
ulint **  offsets,
dict_index_t index 
)
static

Readjust selection tree after popping the root and read a new value.

Returns
the new root
Parameters
sel_treein/out: selection tree
propagatedin: node to propagate up
heightin: tree height
mrecin: sort record
offsetsin: record offsets
indexin: index dictionary

◆ row_fts_start_parallel_merge()

void row_fts_start_parallel_merge ( fts_psort_t merge_info)

Kick off the parallel merge and insert thread.

Parameters
[in,out]merge_infoparallel sort info

◆ row_fts_start_psort()

void row_fts_start_psort ( fts_psort_t psort_info)

Start the parallel tokenization and parallel merge sort.

Parameters
[in,out]psort_infoParallel sort structure

◆ row_merge_create_fts_sort_index()

dict_index_t* row_merge_create_fts_sort_index ( dict_index_t index,
const dict_table_t table,
ibool *  opt_doc_id_size 
)

Create a temporary "fts sort index" used to merge sort the tokenized doc string.

The index has three "fields":

1) Tokenized word, 2) Doc ID (depend on number of records to sort, it can be a 4 bytes or 8 bytes integer value) 3) Word's position in original doc.

Returns
dict_index_t structure for the fts sort index
Parameters
indexin: Original FTS index based on which this sort index is created
tablein: table that FTS index is being created on
opt_doc_id_sizeout: whether to use 4 bytes instead of 8 bytes integer to store Doc ID during sort

◆ row_merge_fts_doc_add_word_for_parser()

static int row_merge_fts_doc_add_word_for_parser ( MYSQL_FTPARSER_PARAM param,
char *  word,
int  word_len,
MYSQL_FTPARSER_BOOLEAN_INFO boolean_info 
)
static

FTS plugin parser 'myql_add_word' callback function for row merge.

Refer to 'MYSQL_FTPARSER_PARAM' for more detail.

Returns
always returns 0

◆ row_merge_fts_doc_tokenize()

static ibool row_merge_fts_doc_tokenize ( row_merge_buf_t **  sort_buf,
doc_id_t  doc_id,
fts_doc_t doc,
dtype_t word_dtype,
merge_file_t **  merge_file,
ibool  opt_doc_id_size,
fts_tokenize_ctx_t t_ctx 
)
static

Tokenize incoming text data and add to the sort buffer.

Returns
true if the record passed, false if out of space
Parameters
sort_bufin/out: sort buffer
doc_idin: Doc ID
docin: Doc to be tokenized
word_dtypein: data structure for word col
merge_filein/out: merge file
opt_doc_id_sizein: whether to use 4 bytes instead of 8 bytes integer to store Doc ID during sort
t_ctxin/out: tokenize context

◆ row_merge_fts_doc_tokenize_by_parser()

static void row_merge_fts_doc_tokenize_by_parser ( fts_doc_t doc,
st_mysql_ftparser parser,
fts_tokenize_ctx_t t_ctx 
)
static

Tokenize by fts plugin parser.

◆ row_merge_fts_get_next_doc_item()

UNIV_INLINE void row_merge_fts_get_next_doc_item ( fts_psort_t psort_info,
fts_doc_item_t **  doc_item 
)

Get next doc item from fts_doc_list.

Parameters
psort_infoin: psort_info
doc_itemin/out: doc item

◆ row_merge_write_fts_node()

static dberr_t row_merge_write_fts_node ( const fts_psort_insert_t ins_ctx,
const fts_string_t word,
const fts_node_t node 
)
static


Write out a single word's data as new entry/entries in the INDEX table.

Parameters
[in]ins_ctxinsert context
[in]wordword string
[in]nodenode colmns
Returns
DB_SUCCUESS if insertion runs fine, otherwise error code

◆ row_merge_write_fts_word()

static dberr_t row_merge_write_fts_word ( fts_psort_insert_t ins_ctx,
fts_tokenizer_word_t word 
)
static

Insert processed FTS data to auxillary index tables.

Returns
DB_SUCCESS if insertion runs fine
Parameters
ins_ctxin: insert context
wordin: sorted and tokenized word

Variable Documentation

◆ fts_sort_pll_degree

ulong fts_sort_pll_degree = 2

Parallel sort degree.

Variable specifying the FTS parallel sort degree.