MySQL 9.0.1
Source Code Documentation
plugin_ftparser.h
Go to the documentation of this file.
1/* Copyright (c) 2005, 2024, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is designed to work with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have either included with
13 the program or referenced in the documentation.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24#ifndef _my_plugin_ftparser_h
25#define _my_plugin_ftparser_h
26
27/**
28 @file include/mysql/plugin_ftparser.h
29*/
30
31/*************************************************************************
32 API for Full-text parser plugin. (MYSQL_FTPARSER_PLUGIN)
33*/
34
35#ifndef MYSQL_SERVER
36#include "plugin.h"
37#endif
38
39#ifndef MYSQL_ABI_CHECK
40struct CHARSET_INFO;
41#endif
42
43/* Parsing modes. Set in MYSQL_FTPARSER_PARAM::mode */
45 /*
46 Fast and simple mode. This mode is used for indexing, and natural
47 language queries.
48
49 The parser is expected to return only those words that go into the
50 index. Stopwords or too short/long words should not be returned. The
51 'boolean_info' argument of mysql_add_word() does not have to be set.
52 */
54
55 /*
56 Parse with stopwords mode. This mode is used in boolean searches for
57 "phrase matching."
58
59 The parser is not allowed to ignore words in this mode. Every word
60 should be returned, including stopwords and words that are too short
61 or long. The 'boolean_info' argument of mysql_add_word() does not
62 have to be set.
63 */
65
66 /*
67 Parse in boolean mode. This mode is used to parse a boolean query string.
68
69 The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO
70 structure in the 'boolean_info' argument to mysql_add_word().
71 Usually that means that the parser should recognize boolean operators
72 in the parsing stream and set appropriate fields in
73 MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As for
74 MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
75 Instead, use FT_TOKEN_STOPWORD for the token type of such a word.
76 */
78};
79
80/*
81 Token types for boolean mode searching (used for the type member of
82 MYSQL_FTPARSER_BOOLEAN_INFO struct)
83
84 FT_TOKEN_EOF: End of data.
85 FT_TOKEN_WORD: Regular word.
86 FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression).
87 FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression).
88 FT_TOKEN_STOPWORD: Stopword.
89*/
90
97};
98
99/*
100 This structure is used in boolean search mode only. It conveys
101 boolean-mode metadata to the MySQL search engine for every word in
102 the search query. A valid instance of this structure must be filled
103 in by the plugin parser and passed as an argument in the call to
104 mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM
105 structure) when a query is parsed in boolean mode.
106
107 type: The token type. Should be one of the enum_ft_token_type values.
108
109 yesno: Whether the word must be present for a match to occur:
110 >0 Must be present
111 <0 Must not be present
112 0 Neither; the word is optional but its presence increases the relevance
113 With the default settings of the ft_boolean_syntax system variable,
114 >0 corresponds to the '+' operator, <0 corresponds to the '-' operator,
115 and 0 means neither operator was used.
116
117 weight_adjust: A weighting factor that determines how much a match
118 for the word counts. Positive values increase, negative - decrease the
119 relative word's importance in the query.
120
121 wasign: The sign of the word's weight in the query. If it's non-negative
122 the match for the word will increase document relevance, if it's
123 negative - decrease (the word becomes a "noise word", the less of it the
124 better).
125
126 trunc: Corresponds to the '*' operator in the default setting of the
127 ft_boolean_syntax system variable.
128
129 position: Start position in bytes of the word in the document, used by InnoDB
130 FTS.
131*/
132
135 int yesno;
137 char wasign;
138 char trunc;
140 /* These are parser state and must be removed. */
141 char prev;
142 char *quot;
143};
144
145/*
146 The following flag means that buffer with a string (document, word)
147 may be overwritten by the caller before the end of the parsing (that is
148 before st_mysql_ftparser::deinit() call). If one needs the string
149 to survive between two successive calls of the parsing function, she
150 needs to save a copy of it. The flag may be set by MySQL before calling
151 st_mysql_ftparser::parse(), or it may be set by a plugin before calling
152 MYSQL_FTPARSER_PARAM::mysql_parse() or
153 MYSQL_FTPARSER_PARAM::mysql_add_word().
154*/
155#define MYSQL_FTFLAGS_NEED_COPY 1
156
157/*
158 An argument of the full-text parser plugin. This structure is
159 filled in by MySQL server and passed to the parsing function of the
160 plugin as an in/out parameter.
161
162 mysql_parse: A pointer to the built-in parser implementation of the
163 server. It's set by the server and can be used by the parser plugin
164 to invoke the MySQL default parser. If plugin's role is to extract
165 textual data from .doc, .pdf or .xml content, it might extract
166 plaintext from the content, and then pass the text to the default
167 MySQL parser to be parsed.
168
169 mysql_add_word: A server callback to add a new word. When parsing
170 a document, the server sets this to point at a function that adds
171 the word to MySQL full-text index. When parsing a search query,
172 this function will add the new word to the list of words to search
173 for. The boolean_info argument can be NULL for all cases except
174 when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO.
175
176 ftparser_state: A generic pointer. The plugin can set it to point
177 to information to be used internally for its own purposes.
178
179 mysql_ftparam: This is set by the server. It is used by MySQL functions
180 called via mysql_parse() and mysql_add_word() callback. The plugin
181 should not modify it.
182
183 cs: Information about the character set of the document or query string.
184
185 doc: A pointer to the document or query string to be parsed.
186
187 length: Length of the document or query string, in bytes.
188
189 flags: See MYSQL_FTFLAGS_* constants above.
190
191 mode: The parsing mode. With boolean operators, with stopwords, or
192 nothing. See enum_ftparser_mode above.
193*/
194
196 int (*mysql_parse)(MYSQL_FTPARSER_PARAM *, char *doc, int doc_len);
197 int (*mysql_add_word)(MYSQL_FTPARSER_PARAM *, char *word, int word_len,
198 MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info);
202 char *doc;
204 int flags;
206};
207
208/*
209 Full-text parser descriptor.
210
211 interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION.
212 The parsing, initialization, and deinitialization functions are
213 invoked per SQL statement for which the parser is used.
214*/
215
221};
222
223#endif
enum_ft_token_type
Definition: plugin_ftparser.h:91
@ FT_TOKEN_RIGHT_PAREN
Definition: plugin_ftparser.h:95
@ FT_TOKEN_LEFT_PAREN
Definition: plugin_ftparser.h:94
@ FT_TOKEN_STOPWORD
Definition: plugin_ftparser.h:96
@ FT_TOKEN_WORD
Definition: plugin_ftparser.h:93
@ FT_TOKEN_EOF
Definition: plugin_ftparser.h:92
enum_ftparser_mode
Definition: plugin_ftparser.h:44
@ MYSQL_FTPARSER_FULL_BOOLEAN_INFO
Definition: plugin_ftparser.h:77
@ MYSQL_FTPARSER_SIMPLE_MODE
Definition: plugin_ftparser.h:53
@ MYSQL_FTPARSER_WITH_STOPWORDS
Definition: plugin_ftparser.h:64
Definition: m_ctype.h:421
Definition: plugin_ftparser.h:133
char * quot
Definition: plugin_ftparser.h:142
char trunc
Definition: plugin_ftparser.h:138
int weight_adjust
Definition: plugin_ftparser.h:136
char prev
Definition: plugin_ftparser.h:141
enum enum_ft_token_type type
Definition: plugin_ftparser.h:134
int yesno
Definition: plugin_ftparser.h:135
int position
Definition: plugin_ftparser.h:139
char wasign
Definition: plugin_ftparser.h:137
Definition: plugin_ftparser.h:195
const CHARSET_INFO * cs
Definition: plugin_ftparser.h:201
char * doc
Definition: plugin_ftparser.h:202
enum enum_ftparser_mode mode
Definition: plugin_ftparser.h:205
int flags
Definition: plugin_ftparser.h:204
void * ftparser_state
Definition: plugin_ftparser.h:199
int(* mysql_add_word)(MYSQL_FTPARSER_PARAM *, char *word, int word_len, MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info)
Definition: plugin_ftparser.h:197
void * mysql_ftparam
Definition: plugin_ftparser.h:200
int length
Definition: plugin_ftparser.h:203
int(* mysql_parse)(MYSQL_FTPARSER_PARAM *, char *doc, int doc_len)
Definition: plugin_ftparser.h:196
Definition: plugin_ftparser.h:216
int(* init)(MYSQL_FTPARSER_PARAM *param)
Definition: plugin_ftparser.h:219
int(* deinit)(MYSQL_FTPARSER_PARAM *param)
Definition: plugin_ftparser.h:220
int(* parse)(MYSQL_FTPARSER_PARAM *param)
Definition: plugin_ftparser.h:218
int interface_version
Definition: plugin_ftparser.h:217