Full Text Search functionality. More...

#include <math.h>
#include <sys/types.h>
#include <iomanip>
#include <vector>
#include "dict0dict.h"
#include "fts0ast.h"
#include "fts0fts.h"
#include "fts0pars.h"
#include "fts0plugin.h"
#include "fts0priv.h"
#include "fts0types.h"
#include "ha_prototypes.h"
#include "lob0lob.h"
#include "my_dbug.h"
#include "mysql/strings/m_ctype.h"
#include "row0sel.h"
#include "ut0new.h"
#include "ut0rbt.h"

Classes
struct	fts_query_t
	State of an FTS query. More...

struct	fts_match_t
	For phrase matching, first we collect the documents and the positions then we match. More...

struct	fts_select_t
	For matching tokens in a phrase search. More...

struct	fts_proximity_t
	structure defines a set of ranges for original documents, each of which has a minimum position and maximum position. More...

struct	fts_phrase_t
	The match positions and tokesn to match. More...

struct	fts_phrase_param_t
	Parameter passed to fts phrase match by parser. More...

struct	fts_doc_freq_t
	For storing the frequency of a word/term in a document. More...

struct	fts_word_freq_t
	To determine the word frequency per document. More...

Macros
#define	FTS_ELEM(t, n, i, j) (t[(i)*n + (j)])

#define	RANK_DOWNGRADE (-1.0F)

#define	RANK_UPGRADE (1.0F)

#define	MAX_PROXIMITY_ITEM 128

#define	SIZEOF_RBT_CREATE sizeof(ib_rbt_t) + sizeof(ib_rbt_node_t) * 2

#define	SIZEOF_RBT_NODE_ADD sizeof(ib_rbt_node_t)

#define	RANKING_WORDS_INIT_LEN 4

Typedefs
typedef std::vector< fts_string_t, ut::allocator< fts_string_t > >	word_vector_t

typedef std::vector< ulint, ut::allocator< ulint > >	pos_vector_t

Functions
int	innobase_fts_nocase_compare (const CHARSET_INFO cs, const fts_string_t s1, const fts_string_t *s2)
	Compare two FTS character strings case insensitively according to their charset. More...

static bool	fts_query_index_fetch_nodes (void row, void user_arg)
	in: pointer to ib_vector_t More...

static dberr_t	fts_query_filter_doc_ids (fts_query_t query, const fts_string_t word, fts_word_freq_t word_freq, const fts_node_t node, void *data, ulint len, bool calc_doc_count)
	in: whether to remember doc count More...

static dberr_t	fts_ast_visit_sub_exp (fts_ast_node_t node, fts_ast_callback visitor, void arg)
	Process (nested) sub-expression, create a new result set to store the sub-expression result by processing nodes under current sub-expression list. More...

static dberr_t	fts_expand_query (dict_index_t index, fts_query_t query)
	This function implements a simple "blind" query expansion search: words in documents found in the first search pass will be used as search arguments to search the document again, thus "expand" the search result set. More...

static bool	fts_phrase_or_proximity_search (fts_query_t query, ib_vector_t tokens)
	This function finds documents that contain all words in a phrase or proximity search. More...

static bool	fts_proximity_get_positions (fts_match_t *match, ulint num_match, ulint distance, fts_proximity_t qualified_pos)
	This function checks whether words in result documents are close to each other (within proximity range as specified by "distance"). More...

static int	fts_freq_doc_id_cmp (const void p1, const void p2)

static int	fts_query_compare_rank (const void p1, const void p2)
	Compare two fts_ranking_t instance on their rank value and doc ids in descending order on the rank and ascending order on doc id. More...

static void	fts_ranking_words_create (fts_query_t query, fts_ranking_t ranking)
	Create words in ranking. More...

static void	fts_ranking_words_add (fts_query_t query, fts_ranking_t ranking, const fts_string_t *word)
	Add a word into ranking. More...

static bool	fts_ranking_words_get_next (const fts_query_t query, fts_ranking_t ranking, ulint pos, fts_string_t word)
	Get a word from a ranking. More...

static fts_word_freq_t *	fts_query_add_word_freq (fts_query_t query, const fts_string_t word)
	Add a word if it doesn't exist, to the term freq RB tree. More...

static fts_doc_freq_t *	fts_query_add_doc_freq (fts_query_t query, ib_rbt_t doc_freqs, doc_id_t doc_id)
	Add a doc id if it doesn't exist, to the doc freq RB tree. More...

static void	fts_query_union_doc_id (fts_query_t *query, doc_id_t doc_id, fts_rank_t rank)
	Add the doc id to the query set only if it's not in the deleted array. More...

static void	fts_query_remove_doc_id (fts_query_t *query, doc_id_t doc_id)
	Remove the doc id from the query set only if it's not in the deleted set. More...

static void	fts_query_change_ranking (fts_query_t *query, doc_id_t doc_id, bool downgrade)
	Find the doc id in the query set but not in the deleted set, artificialy downgrade or upgrade its ranking by a value and make/initialize its ranking under or above its normal range 0 to 1. More...

static void	fts_query_intersect_doc_id (fts_query_t *query, doc_id_t doc_id, fts_rank_t rank)
	Check the doc id in the query set only if it's not in the deleted array. More...

static void	fts_query_free_doc_ids (fts_query_t query, ib_rbt_t doc_ids)
	Free the document ranking rb tree. More...

static void	fts_query_add_word_to_document (fts_query_t query, doc_id_t doc_id, const fts_string_t word)
	Add the word to the documents "list" of matching words from the query. More...

static void	fts_query_check_node (fts_query_t query, const fts_string_t token, const fts_node_t *node)
	Check the node ilist. More...

static ulint	fts_cache_find_wildcard (fts_query_t query, const fts_index_cache_t index_cache, const fts_string_t *token)
	Search index cache for word with wildcard match. More...

static dberr_t	fts_query_difference (fts_query_t query, const fts_string_t token)
	Set difference. More...

static dberr_t	fts_query_intersect (fts_query_t query, const fts_string_t token)
	Intersect the token doc ids with the current set. More...

static dberr_t	fts_query_cache (fts_query_t query, const fts_string_t token)
	Query index cache. More...

static dberr_t	fts_query_union (fts_query_t query, fts_string_t token)
	Set union. More...

static dberr_t	fts_query_process_doc_id (fts_query_t *query, doc_id_t doc_id, fts_rank_t rank)
	Depending upon the current query operator process the doc id. More...

static dberr_t	fts_merge_doc_ids (fts_query_t query, const ib_rbt_t doc_ids)
	Merge two result sets. More...

static byte *	fts_query_skip_word (byte ptr, const byte end)
	Skip non-whitespace in a string. More...

static bool	fts_query_match_phrase_terms (fts_phrase_t phrase, byte start, const byte end, mem_heap_t *heap)
	Check whether the remaining terms in the phrase match the text. More...

static bool	fts_proximity_is_word_in_range (const fts_phrase_t phrase, byte start, ulint total_len)
	Callback function to count the number of words in position ranges, and see whether the word count is in specified "phrase->distance". More...

static int	fts_query_match_phrase_add_word_for_parser (MYSQL_FTPARSER_PARAM param, char word, int word_len, MYSQL_FTPARSER_BOOLEAN_INFO *info)
	FTS plugin parser 'myql_add_word' callback function for phrase match Refer to 'MYSQL_FTPARSER_PARAM' for more detail. More...

static bool	fts_query_match_phrase_terms_by_parser (fts_phrase_param_t phrase_param, st_mysql_ftparser parser, byte *text, ulint len)
	Check whether the terms in the phrase match the text. More...

static bool	fts_query_match_phrase (fts_phrase_t phrase, byte start, ulint cur_len, ulint prev_len, mem_heap_t *heap)
	Callback function to fetch and search the document. More...

static bool	fts_query_fetch_document (void row, void user_arg)
	Callback function to fetch and search the document. More...

static dberr_t	fts_query_match_document (ib_vector_t tokens, fts_get_doc_t get_doc, fts_match_t match, ulint distance, st_mysql_ftparser parser, bool *found)
	Retrieve the document and match the phrase tokens. More...

static bool	fts_query_is_in_proximity_range (const fts_query_t query, fts_match_t match, fts_proximity_t qualified_pos)
	This function fetches the original documents and count the words in between matching words to see that is in specified distance. More...

static dberr_t	fts_query_search_phrase (fts_query_t query, ib_vector_t orig_tokens, ib_vector_t *tokens)
	Iterate over the matched document ids and search the for the actual phrase in the text. More...

static void	fts_query_phrase_split (fts_query_t query, const fts_ast_node_t node, ib_vector_t tokens, ib_vector_t orig_tokens, mem_heap_t *heap)
	Split the phrase into tokens. More...

static dberr_t	fts_query_phrase_search (fts_query_t query, const fts_ast_node_t node)
	Text/Phrase search. More...

static dberr_t	fts_query_execute (fts_query_t query, fts_string_t token)
	Find the word and evaluate. More...

static byte *	fts_query_get_token (fts_ast_node_t node, fts_string_t token)
	Create a wildcard string. More...

static dberr_t	fts_query_visitor (fts_ast_oper_t oper, fts_ast_node_t node, void arg)
	Visit every node of the AST. More...

static dberr_t	fts_query_read_node (fts_query_t query, const fts_string_t word, que_node_t *exp)
	Read the FTS INDEX row. More...

static void	fts_query_calculate_idf (fts_query_t *query)
	Calculate the inverse document frequency (IDF) for all the terms. More...

static void	fts_query_calculate_ranking (const fts_query_t query, fts_ranking_t ranking)
	Calculate the ranking of the document. More...

static void	fts_query_add_ranking (fts_query_t query, ib_rbt_t ranking_tree, const fts_ranking_t *new_ranking)
	Add ranking to the result set. More...

float	fts_retrieve_ranking (fts_result_t *result, doc_id_t doc_id)
	Retrieve the FTS Relevance Ranking result for doc with doc_id. More...

static fts_result_t *	fts_query_prepare_result (fts_query_t query, fts_result_t result)
	Create the result and copy the data to it. More...

static fts_result_t *	fts_query_get_result (fts_query_t query, fts_result_t result)
	Get the result of the query. More...

static void	fts_query_free (fts_query_t *query)
	FTS Query free resources and reset. More...

static fts_ast_node_t *	fts_query_parse (fts_query_t query, byte query_str, ulint query_len)
	Parse the query using flex/bison or plugin parser. More...

static void	fts_query_can_optimize (fts_query_t *query, uint flags)
	FTS Query optimization Set FTS_OPT_RANKING if it is a simple term query. More...

dberr_t	fts_query (trx_t trx, dict_index_t index, uint flags, const byte query_str, ulint query_len, fts_result_t *result, ulonglong limit)
	FTS Query entry point. More...

void	fts_query_free_result (fts_result_t *result)
	FTS Query free result, returned by fts_query(). More...

void	fts_query_sort_result_on_rank (fts_result_t *result)
	FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. More...

static void	fts_print_doc_id (fts_query_t *query)
	A debug function to print result doc_id set. More...

Detailed Description

Full Text Search functionality.

Created 2007/03/27 Sunny Bains Completed 2011/7/10 Sunny and Jimmy Yang

Macro Definition Documentation

◆ FTS_ELEM

#define FTS_ELEM	(	t,
		n,
		i,
		j
	)	(t[(i)*n + (j)])

◆ MAX_PROXIMITY_ITEM

#define MAX_PROXIMITY_ITEM 128

◆ RANK_DOWNGRADE

#define RANK_DOWNGRADE (-1.0F)

◆ RANK_UPGRADE

#define RANK_UPGRADE (1.0F)

◆ RANKING_WORDS_INIT_LEN

#define RANKING_WORDS_INIT_LEN 4

◆ SIZEOF_RBT_CREATE

#define SIZEOF_RBT_CREATE sizeof(ib_rbt_t) + sizeof(ib_rbt_node_t) * 2

◆ SIZEOF_RBT_NODE_ADD

#define SIZEOF_RBT_NODE_ADD sizeof(ib_rbt_node_t)

Typedef Documentation

◆ pos_vector_t

typedef std::vector<ulint, ut::allocator<ulint> > pos_vector_t

◆ word_vector_t

typedef std::vector<fts_string_t, ut::allocator<fts_string_t> > word_vector_t

Function Documentation

◆ fts_ast_visit_sub_exp()

static dberr_t fts_ast_visit_sub_exp	(	fts_ast_node_t *	node,
		fts_ast_callback	visitor,
		void *	arg
	)

static

Process (nested) sub-expression, create a new result set to store the sub-expression result by processing nodes under current sub-expression list.

Merge the sub-expression result with that of parent expression list.

Parameters

[in,out]	node	current root node
[in,out]	visitor	callback function
[in,out]	arg	argument for callback

Returns: DB_SUCCESS if all go well

◆ fts_cache_find_wildcard()

static ulint fts_cache_find_wildcard	(	fts_query_t *	query,
		const fts_index_cache_t *	index_cache,
		const fts_string_t *	token
	)

static

Search index cache for word with wildcard match.

Returns: number of words matched

Parameters

query	in: query instance
index_cache	in: cache to search
token	in: token to search

◆ fts_expand_query()

static dberr_t fts_expand_query	(	dict_index_t *	index,
		fts_query_t *	query
	)

static

This function implements a simple "blind" query expansion search: words in documents found in the first search pass will be used as search arguments to search the document again, thus "expand" the search result set.

Returns: DB_SUCCESS if success, otherwise the error code in: query result, to be freed by the client; DB_SUCCESS if success, otherwise the error code

Parameters

index	in: FTS index to search
query	in: FTS query instance

◆ fts_freq_doc_id_cmp()

static int fts_freq_doc_id_cmp	(	const void *	p1,
		const void *	p2
	)

inlinestatic

Parameters

p1	in: id1
p2	in: id2

◆ fts_merge_doc_ids()

static dberr_t fts_merge_doc_ids	(	fts_query_t *	query,
		const ib_rbt_t *	doc_ids
	)

static

Merge two result sets.

Parameters

query	in,out: query instance
doc_ids	in: result set to merge

◆ fts_phrase_or_proximity_search()

static bool fts_phrase_or_proximity_search	(	fts_query_t *	query,
		ib_vector_t *	tokens
	)

static

This function finds documents that contain all words in a phrase or proximity search.

And if proximity search, verify the words are close enough to each other, as in specified distance. This function is called for phrase and proximity search.

Returns: true if documents are found, false if otherwise in: Tokens contain words

And if proximity search, verify the words are close enough to each other, as in specified distance. This function is called for phrase and proximity search.

Returns: true if documents are found, false if otherwise

Parameters

query	in/out: query instance query->doc_ids might be instantiated with qualified doc IDs
tokens	in: Tokens contain words

◆ fts_print_doc_id()

static void fts_print_doc_id ( fts_query_t * query )

static

A debug function to print result doc_id set.

Parameters

query in : tree that stores doc_ids.

◆ fts_proximity_get_positions()

static bool fts_proximity_get_positions	(	fts_match_t **	match,
		ulint	num_match,
		ulint	distance,
		fts_proximity_t *	qualified_pos
	)

static

This function checks whether words in result documents are close to each other (within proximity range as specified by "distance").

If "distance" is MAX_ULINT, then it will find all combinations of positions of matching words and store min and max positions in the "qualified_pos" for later verification.

Returns: true if words are close to each other, false if otherwise out: the position info records ranges containing all matching words.

If "distance" is MAX_ULINT, then it will find all combinations of positions of matching words and store min and max positions in the "qualified_pos" for later verification.

Returns: true if words are close to each other, false if otherwise

Parameters

match	in: query instance
num_match	in: number of matching items
distance	in: distance value for proximity search
qualified_pos	out: the position info records ranges containing all matching words.

◆ fts_proximity_is_word_in_range()

static bool fts_proximity_is_word_in_range	(	const fts_phrase_t *	phrase,
		byte *	start,
		ulint	total_len
	)

static

Callback function to count the number of words in position ranges, and see whether the word count is in specified "phrase->distance".

Returns: true if the number of characters is less than the "distance"

Parameters

phrase	in: phrase with the search info
start	in: text to search
total_len	in: length of text

◆ fts_query()

dberr_t fts_query	(	trx_t *	trx,
		dict_index_t *	index,
		uint	flags,
		const byte *	query_str,
		ulint	query_len,
		fts_result_t **	result,
		ulonglong	limit
	)

FTS Query entry point.

Parameters

[in]	trx	transaction
[in]	index	fts index to search
[in]	flags	FTS search mode
[in]	query_str	FTS query
[in]	query_len	FTS query string len in bytes
[in,out]	result	result doc ids
[in]	limit	limit value

Returns: DB_SUCCESS if successful otherwise error code

◆ fts_query_add_doc_freq()

static fts_doc_freq_t * fts_query_add_doc_freq	(	fts_query_t *	query,
		ib_rbt_t *	doc_freqs,
		doc_id_t	doc_id
	)

static

Add a doc id if it doesn't exist, to the doc freq RB tree.

Returns: pointer to word

Parameters

query	in: query instance
doc_freqs	in: rb tree of fts_doc_freq_t
doc_id	in: doc id to add

◆ fts_query_add_ranking()

static void fts_query_add_ranking	(	fts_query_t *	query,
		ib_rbt_t *	ranking_tree,
		const fts_ranking_t *	new_ranking
	)

static

Add ranking to the result set.

Parameters

query	in: query state
ranking_tree	in: ranking tree
new_ranking	in: ranking of a document

◆ fts_query_add_word_freq()

static fts_word_freq_t * fts_query_add_word_freq	(	fts_query_t *	query,
		const fts_string_t *	word
	)

static

Add a word if it doesn't exist, to the term freq RB tree.

We store a pointer to the word that is passed in as the argument.

Returns: pointer to word

Parameters

query	in: query instance
word	in: term/word to add

◆ fts_query_add_word_to_document()

static void fts_query_add_word_to_document	(	fts_query_t *	query,
		doc_id_t	doc_id,
		const fts_string_t *	word
	)

static

Add the word to the documents "list" of matching words from the query.

We make a copy of the word from the query heap.

Parameters

query	in: query to update
doc_id	in: the document to update
word	in: the token to add

◆ fts_query_cache()

static dberr_t fts_query_cache	(	fts_query_t *	query,
		const fts_string_t *	token
	)

static

Query index cache.

Returns: DB_SUCCESS if all go well

Parameters

query	in/out: query instance
token	in: token to search

◆ fts_query_calculate_idf()

static void fts_query_calculate_idf ( fts_query_t * query )

static

Calculate the inverse document frequency (IDF) for all the terms.

Parameters

query in: Query state

◆ fts_query_calculate_ranking()

static void fts_query_calculate_ranking	(	const fts_query_t *	query,
		fts_ranking_t *	ranking
	)

static

Calculate the ranking of the document.

Parameters

query	in: query state
ranking	in: Document to rank

◆ fts_query_can_optimize()

static void fts_query_can_optimize	(	fts_query_t *	query,
		uint	flags
	)

static

FTS Query optimization Set FTS_OPT_RANKING if it is a simple term query.

Parameters

query	in/out: query instance
flags	In: FTS search mode

◆ fts_query_change_ranking()

static void fts_query_change_ranking	(	fts_query_t *	query,
		doc_id_t	doc_id,
		bool	downgrade
	)

static

Find the doc id in the query set but not in the deleted set, artificialy downgrade or upgrade its ranking by a value and make/initialize its ranking under or above its normal range 0 to 1.

This is used for Boolean Search operator such as Negation operator, which makes word's contribution to the row's relevance to be negative

Parameters

query	in: query instance
doc_id	in: the doc id to add
downgrade	in: Whether to downgrade ranking

◆ fts_query_check_node()

static void fts_query_check_node	(	fts_query_t *	query,
		const fts_string_t *	token,
		const fts_node_t *	node
	)

static

Check the node ilist.

Parameters

query	in: query to update
token	in: the token to search
node	in: node to check

◆ fts_query_compare_rank()

static int fts_query_compare_rank	(	const void *	p1,
		const void *	p2
	)

static

Compare two fts_ranking_t instance on their rank value and doc ids in descending order on the rank and ascending order on doc id.

Returns: 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2

Parameters

p1	in: pointer to elem
p2	in: pointer to elem

◆ fts_query_difference()

static dberr_t fts_query_difference	(	fts_query_t *	query,
		const fts_string_t *	token
	)

static

Set difference.

Returns: DB_SUCCESS if all go well

Parameters

query	in: query instance
token	in: token to search

◆ fts_query_execute()

static dberr_t fts_query_execute	(	fts_query_t *	query,
		fts_string_t *	token
	)

static

Find the word and evaluate.

Returns: DB_SUCCESS if all go well

Parameters

query	in: query instance
token	in: token to search

◆ fts_query_fetch_document()

static bool fts_query_fetch_document	(	void *	row,
		void *	user_arg
	)

static

Callback function to fetch and search the document.

Returns: whether the phrase is found

Parameters

row	in: sel_node_t*
user_arg	in: fts_doc_t*

◆ fts_query_filter_doc_ids()

static dberr_t fts_query_filter_doc_ids	(	fts_query_t *	query,
		const fts_string_t *	word,
		fts_word_freq_t *	word_freq,
		const fts_node_t *	node,
		void *	data,
		ulint	len,
		bool	calc_doc_count
	)

static

in: whether to remember doc count

Read and filter nodes.

Returns: DB_SUCCESS if all go well, or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT

Parameters

query	in: query instance
word	in: the current word
word_freq	in/out: word frequency
node	in: current FTS node
data	in: doc id ilist
len	in: doc id ilist size
calc_doc_count	in: whether to remember doc count

◆ fts_query_free()

static void fts_query_free ( fts_query_t * query )

static

FTS Query free resources and reset.

Parameters

query in: query instance to free

◆ fts_query_free_doc_ids()

static void fts_query_free_doc_ids	(	fts_query_t *	query,
		ib_rbt_t *	doc_ids
	)

static

Free the document ranking rb tree.

Parameters

query	in: query instance
doc_ids	in: rb tree to free

◆ fts_query_free_result()

void fts_query_free_result ( fts_result_t * result )

FTS Query free result, returned by fts_query().

in: result instance to free.

Parameters

result in: result instance to free.

◆ fts_query_get_result()

static fts_result_t * fts_query_get_result	(	fts_query_t *	query,
		fts_result_t *	result
	)

static

Get the result of the query.

Calculate the similarity coefficient.

Parameters

query	in: query instance
result	in: result

◆ fts_query_get_token()

static byte * fts_query_get_token	(	fts_ast_node_t *	node,
		fts_string_t *	token
	)

static

Create a wildcard string.

It's the responsibility of the caller to free the byte* pointer. It's allocated using ut::malloc_withkey(UT_NEW_THIS_FILE_PSI_KEY).

Returns: ptr to allocated memory

Parameters

node	in: the current sub tree
token	in: token to create

◆ fts_query_index_fetch_nodes()

static bool fts_query_index_fetch_nodes	(	void *	row,
		void *	user_arg
	)

static

in: pointer to ib_vector_t

Callback function to fetch the rows in an FTS INDEX record.

Returns: always returns true

Parameters

row	in: sel_node_t*
user_arg	in: pointer to fts_fetch_t

◆ fts_query_intersect()

static dberr_t fts_query_intersect	(	fts_query_t *	query,
		const fts_string_t *	token
	)

static

Intersect the token doc ids with the current set.

Returns: DB_SUCCESS if all go well

Parameters

query	in: query instance
token	in: the token to search

◆ fts_query_intersect_doc_id()

static void fts_query_intersect_doc_id	(	fts_query_t *	query,
		doc_id_t	doc_id,
		fts_rank_t	rank
	)

static

Check the doc id in the query set only if it's not in the deleted array.

The doc ids that were found are stored in another rb tree (fts_query_t::intersect).

Parameters

query	in: query instance
doc_id	in: the doc id to add
rank	in: if non-zero, it is the rank associated with the doc_id

◆ fts_query_is_in_proximity_range()

static bool fts_query_is_in_proximity_range	(	const fts_query_t *	query,
		fts_match_t **	match,
		fts_proximity_t *	qualified_pos
	)

static

This function fetches the original documents and count the words in between matching words to see that is in specified distance.

Returns: DB_SUCCESS if all OK

Parameters

query	in: query instance
match	in: query instance
qualified_pos	in: position info for qualified ranges

◆ fts_query_match_document()

static dberr_t fts_query_match_document	(	ib_vector_t *	tokens,
		fts_get_doc_t *	get_doc,
		fts_match_t *	match,
		ulint	distance,
		st_mysql_ftparser *	parser,
		bool *	found
	)

static

Retrieve the document and match the phrase tokens.

Returns: DB_SUCCESS or error code

Parameters

tokens	in: phrase tokens
get_doc	in: table and prepared statements
match	in: doc id and positions
distance	in: proximity distance
parser	in: fts plugin parser
found	out: true if phrase found

◆ fts_query_match_phrase()

static bool fts_query_match_phrase	(	fts_phrase_t *	phrase,
		byte *	start,
		ulint	cur_len,
		ulint	prev_len,
		mem_heap_t *	heap
	)

static

Callback function to fetch and search the document.

Parameters

[in]	phrase	phrase to match
[in]	start	text to search, we can't make this const because we need to first convert the string to lowercase
[in]	cur_len	length of text
[in]	prev_len	total length for searched doc fields
[in]	heap	heap

Returns: true if matched else false

◆ fts_query_match_phrase_add_word_for_parser()

static int fts_query_match_phrase_add_word_for_parser	(	MYSQL_FTPARSER_PARAM *	param,
		char *	word,
		int	word_len,
		MYSQL_FTPARSER_BOOLEAN_INFO *	info
	)

static

FTS plugin parser 'myql_add_word' callback function for phrase match Refer to 'MYSQL_FTPARSER_PARAM' for more detail.

Returns: 0 if match, or return non-zero

Parameters

param	in: parser param
word	in: token
word_len	in: token length
info	in: token info

◆ fts_query_match_phrase_terms()

static bool fts_query_match_phrase_terms	(	fts_phrase_t *	phrase,
		byte **	start,
		const byte *	end,
		mem_heap_t *	heap
	)

static

Check whether the remaining terms in the phrase match the text.

Returns: true if matched else false

Parameters

phrase	in: phrase to match
start	in/out: text to search, we can't make this const because we need to first convert the string to lowercase
end	in: pointer to the end of the string to search
heap	in: heap

◆ fts_query_match_phrase_terms_by_parser()

static bool fts_query_match_phrase_terms_by_parser	(	fts_phrase_param_t *	phrase_param,
		st_mysql_ftparser *	parser,
		byte *	text,
		ulint	len
	)

static

Check whether the terms in the phrase match the text.

Returns: true if matched else false

◆ fts_query_parse()

static fts_ast_node_t * fts_query_parse	(	fts_query_t *	query,
		byte *	query_str,
		ulint	query_len
	)

static

Parse the query using flex/bison or plugin parser.

Returns: parse tree node.

Parameters

query	in: query instance
query_str	in: query string
query_len	in: query string length

◆ fts_query_phrase_search()

static dberr_t fts_query_phrase_search	(	fts_query_t *	query,
		const fts_ast_node_t *	node
	)

static

Text/Phrase search.

Returns: DB_SUCCESS or error code

Parameters

query	in: query instance
node	in: node to search

◆ fts_query_phrase_split()

static void fts_query_phrase_split	(	fts_query_t *	query,
		const fts_ast_node_t *	node,
		ib_vector_t *	tokens,
		ib_vector_t *	orig_tokens,
		mem_heap_t *	heap
	)

static

Split the phrase into tokens.

Parameters

[in,out]	query	query instance
[in]	node	query node to search
[in,out]	tokens	token vector
[in,out]	orig_tokens	original node tokens include stopword
[in,out]	heap	mem heap

◆ fts_query_prepare_result()

static fts_result_t * fts_query_prepare_result	(	fts_query_t *	query,
		fts_result_t *	result
	)

static

Create the result and copy the data to it.

Parameters

query	in: Query state
result	in: result this can contain data from a previous search on another FTS index

◆ fts_query_process_doc_id()

static dberr_t fts_query_process_doc_id	(	fts_query_t *	query,
		doc_id_t	doc_id,
		fts_rank_t	rank
	)

static

Depending upon the current query operator process the doc id.

return DB_SUCCESS if all go well or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT

Parameters

query	in: query instance
doc_id	in: doc id to process
rank	in: if non-zero, it is the rank associated with the doc_id

◆ fts_query_read_node()

static dberr_t fts_query_read_node	(	fts_query_t *	query,
		const fts_string_t *	word,
		que_node_t *	exp
	)

static

Read the FTS INDEX row.

Returns: DB_SUCCESS if all go well.

Parameters

query	in: query instance
word	in: current word
exp	in: query graph node

◆ fts_query_remove_doc_id()

static void fts_query_remove_doc_id	(	fts_query_t *	query,
		doc_id_t	doc_id
	)

static

Remove the doc id from the query set only if it's not in the deleted set.

Parameters

query	in: query instance
doc_id	in: the doc id to add

◆ fts_query_search_phrase()

static dberr_t fts_query_search_phrase	(	fts_query_t *	query,
		ib_vector_t *	orig_tokens,
		ib_vector_t *	tokens
	)

static

Iterate over the matched document ids and search the for the actual phrase in the text.

Returns: DB_SUCCESS if all OK

Parameters

query	in: query instance
orig_tokens	in: tokens to search, with any stopwords in the original phrase
tokens	in: tokens that does not include stopwords and can be used to calculate ranking

◆ fts_query_skip_word()

static byte * fts_query_skip_word	(	byte *	ptr,
		const byte *	end
	)

inlinestatic

Skip non-whitespace in a string.

Move ptr to the next word boundary.

Returns: pointer to first whitespace character or end

Parameters

ptr	in: start of scan
end	in: pointer to end of string

◆ fts_query_sort_result_on_rank()

void fts_query_sort_result_on_rank ( fts_result_t * result )

FTS Query sort result, returned by fts_query() on fts_ranking_t::rank.

out: result instance to sort.

Parameters

result out: result instance to sort.

◆ fts_query_union()

static dberr_t fts_query_union	(	fts_query_t *	query,
		fts_string_t *	token
	)

static

Set union.

Returns: DB_SUCCESS if all go well

Parameters

query	in: query instance
token	in: token to search

◆ fts_query_union_doc_id()

static void fts_query_union_doc_id	(	fts_query_t *	query,
		doc_id_t	doc_id,
		fts_rank_t	rank
	)

static

Add the doc id to the query set only if it's not in the deleted array.

Parameters

query	in: query instance
doc_id	in: the doc id to add
rank	in: if non-zero, it is the rank associated with the doc_id

◆ fts_query_visitor()

static dberr_t fts_query_visitor	(	fts_ast_oper_t	oper,
		fts_ast_node_t *	node,
		void *	arg
	)

static

Visit every node of the AST.

Parameters

oper	in: current operator
node	in: The root of the current subtree
arg	in: callback arg

◆ fts_ranking_words_add()

static void fts_ranking_words_add	(	fts_query_t *	query,
		fts_ranking_t *	ranking,
		const fts_string_t *	word
	)

static

Add a word into ranking.

Parameters

query	in: query instance
ranking	in: ranking instance
word	in: term/word to add

◆ fts_ranking_words_create()

static void fts_ranking_words_create	(	fts_query_t *	query,
		fts_ranking_t *	ranking
	)

static

Create words in ranking.

Parameters

query	in: query instance
ranking	in: ranking instance

◆ fts_ranking_words_get_next()

static bool fts_ranking_words_get_next	(	const fts_query_t *	query,
		fts_ranking_t *	ranking,
		ulint *	pos,
		fts_string_t *	word
	)

static

Get a word from a ranking.

Returns: true if it's successful

Parameters

query	in: query instance
ranking	in: ranking instance
pos	in/out: word start pos
word	in/out: term/word to add

◆ fts_retrieve_ranking()

float fts_retrieve_ranking	(	fts_result_t *	result,
		doc_id_t	doc_id
	)

Retrieve the FTS Relevance Ranking result for doc with doc_id.

Returns: the relevance ranking value, 0 if no ranking value present.

Parameters

result	in: FTS result structure
doc_id	in: doc_id of the item to retrieve

◆ innobase_fts_nocase_compare()

int innobase_fts_nocase_compare	(	const CHARSET_INFO *	cs,
		const fts_string_t *	s1,
		const fts_string_t *	s2
	)

Compare two FTS character strings case insensitively according to their charset.

This assumes that s1 is already in lower case.

Parameters

[in]	cs	character set
[in]	s1	key
[in]	s2	node

Returns: 0 if the two strings are equal

Classes

Macros

Typedefs

Functions

Detailed Description

Macro Definition Documentation

◆ FTS_ELEM

◆ MAX_PROXIMITY_ITEM

◆ RANK_DOWNGRADE

◆ RANK_UPGRADE

◆ RANKING_WORDS_INIT_LEN

◆ SIZEOF_RBT_CREATE

◆ SIZEOF_RBT_NODE_ADD

Typedef Documentation

◆ pos_vector_t

◆ word_vector_t

Function Documentation

◆ fts_ast_visit_sub_exp()

◆ fts_cache_find_wildcard()

◆ fts_expand_query()

◆ fts_freq_doc_id_cmp()

◆ fts_merge_doc_ids()

◆ fts_phrase_or_proximity_search()

◆ fts_print_doc_id()

◆ fts_proximity_get_positions()

◆ fts_proximity_is_word_in_range()

◆ fts_query()

◆ fts_query_add_doc_freq()

◆ fts_query_add_ranking()

◆ fts_query_add_word_freq()

◆ fts_query_add_word_to_document()

◆ fts_query_cache()

◆ fts_query_calculate_idf()

◆ fts_query_calculate_ranking()

◆ fts_query_can_optimize()

◆ fts_query_change_ranking()

◆ fts_query_check_node()

◆ fts_query_compare_rank()

◆ fts_query_difference()

◆ fts_query_execute()

◆ fts_query_fetch_document()

◆ fts_query_filter_doc_ids()

◆ fts_query_free()

◆ fts_query_free_doc_ids()

◆ fts_query_free_result()

◆ fts_query_get_result()

◆ fts_query_get_token()

◆ fts_query_index_fetch_nodes()

◆ fts_query_intersect()

◆ fts_query_intersect_doc_id()

◆ fts_query_is_in_proximity_range()

◆ fts_query_match_document()

◆ fts_query_match_phrase()

◆ fts_query_match_phrase_add_word_for_parser()

◆ fts_query_match_phrase_terms()

◆ fts_query_match_phrase_terms_by_parser()

◆ fts_query_parse()

◆ fts_query_phrase_search()

◆ fts_query_phrase_split()

◆ fts_query_prepare_result()

◆ fts_query_process_doc_id()

◆ fts_query_read_node()

◆ fts_query_remove_doc_id()

◆ fts_query_search_phrase()

◆ fts_query_skip_word()

◆ fts_query_sort_result_on_rank()

◆ fts_query_union()

◆ fts_query_union_doc_id()

◆ fts_query_visitor()

◆ fts_ranking_words_add()

◆ fts_ranking_words_create()

◆ fts_ranking_words_get_next()

◆ fts_retrieve_ranking()

◆ innobase_fts_nocase_compare()