MySQL  8.0.19
Source Code Documentation
plugin_ftparser.h
Go to the documentation of this file.
1 /* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
2 
3  This program is free software; you can redistribute it and/or modify
4  it under the terms of the GNU General Public License, version 2.0,
5  as published by the Free Software Foundation.
6 
7  This program is also distributed with certain software (including
8  but not limited to OpenSSL) that is licensed under separate terms,
9  as designated in a particular file or component or in included license
10  documentation. The authors of MySQL hereby grant you an additional
11  permission to link the program and your derivative works with the
12  separately licensed software that they have included with MySQL.
13 
14  This program is distributed in the hope that it will be useful,
15  but WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  GNU General Public License, version 2.0, for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program; if not, write to the Free Software
21  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22 
23 #ifndef _my_plugin_ftparser_h
24 #define _my_plugin_ftparser_h
25 
26 /**
27  @file include/mysql/plugin_ftparser.h
28 */
29 
30 /*************************************************************************
31  API for Full-text parser plugin. (MYSQL_FTPARSER_PLUGIN)
32 */
33 
34 #ifndef MYSQL_SERVER
35 #include "plugin.h"
36 #endif
37 
38 /* Parsing modes. Set in MYSQL_FTPARSER_PARAM::mode */
40  /*
41  Fast and simple mode. This mode is used for indexing, and natural
42  language queries.
43 
44  The parser is expected to return only those words that go into the
45  index. Stopwords or too short/long words should not be returned. The
46  'boolean_info' argument of mysql_add_word() does not have to be set.
47  */
49 
50  /*
51  Parse with stopwords mode. This mode is used in boolean searches for
52  "phrase matching."
53 
54  The parser is not allowed to ignore words in this mode. Every word
55  should be returned, including stopwords and words that are too short
56  or long. The 'boolean_info' argument of mysql_add_word() does not
57  have to be set.
58  */
60 
61  /*
62  Parse in boolean mode. This mode is used to parse a boolean query string.
63 
64  The parser should provide a valid MYSQL_FTPARSER_BOOLEAN_INFO
65  structure in the 'boolean_info' argument to mysql_add_word().
66  Usually that means that the parser should recognize boolean operators
67  in the parsing stream and set appropriate fields in
68  MYSQL_FTPARSER_BOOLEAN_INFO structure accordingly. As for
69  MYSQL_FTPARSER_WITH_STOPWORDS mode, no word should be ignored.
70  Instead, use FT_TOKEN_STOPWORD for the token type of such a word.
71  */
73 };
74 
75 /*
76  Token types for boolean mode searching (used for the type member of
77  MYSQL_FTPARSER_BOOLEAN_INFO struct)
78 
79  FT_TOKEN_EOF: End of data.
80  FT_TOKEN_WORD: Regular word.
81  FT_TOKEN_LEFT_PAREN: Left parenthesis (start of group/sub-expression).
82  FT_TOKEN_RIGHT_PAREN: Right parenthesis (end of group/sub-expression).
83  FT_TOKEN_STOPWORD: Stopword.
84 */
85 
92 };
93 
94 /*
95  This structure is used in boolean search mode only. It conveys
96  boolean-mode metadata to the MySQL search engine for every word in
97  the search query. A valid instance of this structure must be filled
98  in by the plugin parser and passed as an argument in the call to
99  mysql_add_word (the callback function in the MYSQL_FTPARSER_PARAM
100  structure) when a query is parsed in boolean mode.
101 
102  type: The token type. Should be one of the enum_ft_token_type values.
103 
104  yesno: Whether the word must be present for a match to occur:
105  >0 Must be present
106  <0 Must not be present
107  0 Neither; the word is optional but its presence increases the relevance
108  With the default settings of the ft_boolean_syntax system variable,
109  >0 corresponds to the '+' operator, <0 corrresponds to the '-' operator,
110  and 0 means neither operator was used.
111 
112  weight_adjust: A weighting factor that determines how much a match
113  for the word counts. Positive values increase, negative - decrease the
114  relative word's importance in the query.
115 
116  wasign: The sign of the word's weight in the query. If it's non-negative
117  the match for the word will increase document relevance, if it's
118  negative - decrease (the word becomes a "noise word", the less of it the
119  better).
120 
121  trunc: Corresponds to the '*' operator in the default setting of the
122  ft_boolean_syntax system variable.
123 
124  position: Start position in bytes of the word in the document, used by InnoDB
125  FTS.
126 */
127 
130  int yesno;
132  char wasign;
133  char trunc;
134  int position;
135  /* These are parser state and must be removed. */
136  char prev;
137  char *quot;
138 };
139 
140 /*
141  The following flag means that buffer with a string (document, word)
142  may be overwritten by the caller before the end of the parsing (that is
143  before st_mysql_ftparser::deinit() call). If one needs the string
144  to survive between two successive calls of the parsing function, she
145  needs to save a copy of it. The flag may be set by MySQL before calling
146  st_mysql_ftparser::parse(), or it may be set by a plugin before calling
147  MYSQL_FTPARSER_PARAM::mysql_parse() or
148  MYSQL_FTPARSER_PARAM::mysql_add_word().
149 */
150 #define MYSQL_FTFLAGS_NEED_COPY 1
151 
152 /*
153  An argument of the full-text parser plugin. This structure is
154  filled in by MySQL server and passed to the parsing function of the
155  plugin as an in/out parameter.
156 
157  mysql_parse: A pointer to the built-in parser implementation of the
158  server. It's set by the server and can be used by the parser plugin
159  to invoke the MySQL default parser. If plugin's role is to extract
160  textual data from .doc, .pdf or .xml content, it might extract
161  plaintext from the content, and then pass the text to the default
162  MySQL parser to be parsed.
163 
164  mysql_add_word: A server callback to add a new word. When parsing
165  a document, the server sets this to point at a function that adds
166  the word to MySQL full-text index. When parsing a search query,
167  this function will add the new word to the list of words to search
168  for. The boolean_info argument can be NULL for all cases except
169  when mode is MYSQL_FTPARSER_FULL_BOOLEAN_INFO.
170 
171  ftparser_state: A generic pointer. The plugin can set it to point
172  to information to be used internally for its own purposes.
173 
174  mysql_ftparam: This is set by the server. It is used by MySQL functions
175  called via mysql_parse() and mysql_add_word() callback. The plugin
176  should not modify it.
177 
178  cs: Information about the character set of the document or query string.
179 
180  doc: A pointer to the document or query string to be parsed.
181 
182  length: Length of the document or query string, in bytes.
183 
184  flags: See MYSQL_FTFLAGS_* constants above.
185 
186  mode: The parsing mode. With boolean operators, with stopwords, or
187  nothing. See enum_ftparser_mode above.
188 */
189 
191  int (*mysql_parse)(MYSQL_FTPARSER_PARAM *, char *doc, int doc_len);
192  int (*mysql_add_word)(MYSQL_FTPARSER_PARAM *, char *word, int word_len,
193  MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info);
196  const CHARSET_INFO *cs;
197  char *doc;
198  int length;
199  int flags;
201 };
202 
203 /*
204  Full-text parser descriptor.
205 
206  interface_version is, e.g., MYSQL_FTPARSER_INTERFACE_VERSION.
207  The parsing, initialization, and deinitialization functions are
208  invoked per SQL statement for which the parser is used.
209 */
210 
213  int (*parse)(MYSQL_FTPARSER_PARAM *param);
214  int (*init)(MYSQL_FTPARSER_PARAM *param);
215  int (*deinit)(MYSQL_FTPARSER_PARAM *param);
216 };
217 
218 #endif
MYSQL_FTPARSER_BOOLEAN_INFO::yesno
int yesno
Definition: plugin_ftparser.h:130
MYSQL_FTPARSER_PARAM::length
int length
Definition: plugin_ftparser.h:198
st_mysql_ftparser::interface_version
int interface_version
Definition: plugin_ftparser.h:212
st_mysql_ftparser::parse
int(* parse)(MYSQL_FTPARSER_PARAM *param)
Definition: plugin_ftparser.h:213
MYSQL_FTPARSER_BOOLEAN_INFO::quot
char * quot
Definition: plugin_ftparser.h:137
CHARSET_INFO
Definition: m_ctype.h:354
MYSQL_FTPARSER_SIMPLE_MODE
@ MYSQL_FTPARSER_SIMPLE_MODE
Definition: plugin_ftparser.h:50
MYSQL_FTPARSER_PARAM
Definition: plugin_ftparser.h:190
MYSQL_FTPARSER_BOOLEAN_INFO::position
int position
Definition: plugin_ftparser.h:134
MYSQL_FTPARSER_WITH_STOPWORDS
@ MYSQL_FTPARSER_WITH_STOPWORDS
Definition: plugin_ftparser.h:61
FT_TOKEN_RIGHT_PAREN
@ FT_TOKEN_RIGHT_PAREN
Definition: plugin_ftparser.h:90
MYSQL_FTPARSER_PARAM::ftparser_state
void * ftparser_state
Definition: plugin_ftparser.h:194
MYSQL_FTPARSER_PARAM::mysql_parse
int(* mysql_parse)(MYSQL_FTPARSER_PARAM *, char *doc, int doc_len)
Definition: plugin_ftparser.h:191
MYSQL_FTPARSER_PARAM::doc
char * doc
Definition: plugin_ftparser.h:197
FT_TOKEN_STOPWORD
@ FT_TOKEN_STOPWORD
Definition: plugin_ftparser.h:91
enum_ftparser_mode
enum_ftparser_mode
Definition: plugin_ftparser.h:39
st_mysql_ftparser::deinit
int(* deinit)(MYSQL_FTPARSER_PARAM *param)
Definition: plugin_ftparser.h:215
MYSQL_FTPARSER_PARAM::mysql_ftparam
void * mysql_ftparam
Definition: plugin_ftparser.h:195
MYSQL_FTPARSER_BOOLEAN_INFO
Definition: plugin_ftparser.h:128
MYSQL_FTPARSER_BOOLEAN_INFO::type
enum enum_ft_token_type type
Definition: plugin_ftparser.h:129
MYSQL_FTPARSER_PARAM::flags
int flags
Definition: plugin_ftparser.h:199
st_mysql_ftparser
Definition: plugin_ftparser.h:211
FT_TOKEN_LEFT_PAREN
@ FT_TOKEN_LEFT_PAREN
Definition: plugin_ftparser.h:89
MYSQL_FTPARSER_PARAM::mode
enum enum_ftparser_mode mode
Definition: plugin_ftparser.h:200
MYSQL_FTPARSER_PARAM::mysql_add_word
int(* mysql_add_word)(MYSQL_FTPARSER_PARAM *, char *word, int word_len, MYSQL_FTPARSER_BOOLEAN_INFO *boolean_info)
Definition: plugin_ftparser.h:192
MYSQL_FTPARSER_BOOLEAN_INFO::weight_adjust
int weight_adjust
Definition: plugin_ftparser.h:131
MYSQL_FTPARSER_BOOLEAN_INFO::trunc
char trunc
Definition: plugin_ftparser.h:133
FT_TOKEN_EOF
@ FT_TOKEN_EOF
Definition: plugin_ftparser.h:87
enum_ft_token_type
enum_ft_token_type
Definition: plugin_ftparser.h:86
MYSQL_FTPARSER_BOOLEAN_INFO::wasign
char wasign
Definition: plugin_ftparser.h:132
st_mysql_ftparser::init
int(* init)(MYSQL_FTPARSER_PARAM *param)
Definition: plugin_ftparser.h:214
MYSQL_FTPARSER_BOOLEAN_INFO::prev
char prev
Definition: plugin_ftparser.h:136
MYSQL_FTPARSER_FULL_BOOLEAN_INFO
@ MYSQL_FTPARSER_FULL_BOOLEAN_INFO
Definition: plugin_ftparser.h:74
MYSQL_FTPARSER_PARAM::cs
const CHARSET_INFO * cs
Definition: plugin_ftparser.h:196
FT_TOKEN_WORD
@ FT_TOKEN_WORD
Definition: plugin_ftparser.h:88