MySQL 8.0.39
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2014, 2024, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is designed to work with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have either included with
14 the program or referenced in the documentation.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
24
25/* A lexical scanner for optimizer hints pseudo-commentary syntax */
26
27#ifndef SQL_LEX_HINTS_ICLUDED
28#define SQL_LEX_HINTS_ICLUDED
29
30#include <string.h>
31#include <sys/types.h>
32
33#include "lex_string.h"
34#include "m_ctype.h"
35#include "my_dbug.h"
36#include "my_inttypes.h"
37#include "sql/lex_symbol.h"
38#include "sql/lexer_yystype.h"
39#include "sql/sql_class.h"
41#include "sql/sql_lex_hash.h"
42#include "sql_chars.h"
43
44#include "sql/sql_hints.yy.h"
45
46class PT_hint_list;
47union YYSTYPE;
48
50
51/// Lexical scanner for hint comments.
52///
53/// When the main lexical scanner recognizes the "/*+" delimiter, it calls
54/// the hint parser (HINT_PARSER_parse) to consume the rest of hint tokens
55/// including the */ delimiter. The hint parser uses Hint_scanner as its own
56/// lexer to scan hint-specific tokens.
60 const bool is_ansi_quotes;
61 size_t lineno;
63
64 const char *input_buf;
65 const char *input_buf_end;
66
67 const char *ptr;
68
70
71 /**
72 Digest buffer interface to append tokens.
73 */
75
76 public:
77 /**
78 Current token (yytext) origin in the input_buf
79 */
80 const char *raw_yytext;
81 /**
82 Current token pointer (may be converted allocated string outside input_buf
83 */
84 const char *yytext;
85 /**
86 Length of the current token (see yytext)
87 */
88 size_t yyleng;
89
90 bool has_hints; ///< True if a hint comment is not empty (has any hints).
91
92 public:
93 Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
94 sql_digest_state *digest_state_arg);
95 size_t get_lineno() const { return lineno; }
96 const char *get_ptr() const { return ptr; }
98 void syntax_warning(const char *msg) const;
99
102 prev_token = scan();
104 return prev_token;
105 }
106
107 protected:
108 int scan();
109
110 template <hint_lex_char_classes Quote>
112 assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
113 Quote == HINT_CHR_QUOTE);
114 assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
115
116 const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
118 const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
119
120 skip_byte("\"'`"); // skip opening quote sign
121 adjust_token(); // reset yytext & yyleng
122
123 size_t double_separators = 0;
124
125 for (;;) {
126 hint_lex_char_classes chr_class = peek_class();
127 switch (chr_class) {
128 case HINT_CHR_NL:
129 skip_newline();
130 continue;
131 case HINT_CHR_MB:
132 if (skip_mb()) return HINT_ERROR;
133 continue;
136 return HINT_ERROR; // we don't support "*/" inside quoted
137 // identifiers
138 skip_byte('*');
139 continue;
140 case HINT_CHR_EOF:
141 return HINT_ERROR;
142 case Quote:
143 if (peek_class2() == Quote) {
144 skip_byte("\"'`");
145 skip_byte("\"'`");
146 double_separators++;
147 continue;
148 } else {
149 if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
150
151 ptr++; // skip closing quote
152
153 if (thd->charset_is_system_charset && double_separators == 0) {
154 yytext = thd->strmake(yytext, yyleng); // null-terminate it.
155 return ret;
156 }
157
158 LEX_STRING s;
161 thd->charset()))
162 return HINT_ERROR; // OOM etc.
163 } else {
164 assert(0 < double_separators && double_separators < yyleng);
165 s.length = yyleng - double_separators;
166 s.str = static_cast<char *>(thd->alloc(s.length));
167 if (s.str == nullptr) return HINT_ERROR; // OOM
168 }
169 if (double_separators > 0)
170 compact<Quote>(&s, yytext, yyleng, double_separators);
171
172 yytext = s.str;
173 yyleng = s.length;
174 return ret;
175 }
176 default:
177 skip_byte();
178 }
179 }
180 }
181
183 for (;;) {
184 hint_lex_char_classes chr_class = peek_class();
185 switch (chr_class) {
186 case HINT_CHR_IDENT:
187 case HINT_CHR_DIGIT:
188 skip_byte();
189 continue;
190 case HINT_CHR_MB:
191 if (skip_mb()) return HINT_ERROR;
192 continue;
193 case HINT_CHR_EOF:
194 default:
195 return HINT_ARG_IDENT;
196 }
197 }
198 }
199
201 assert(peek_class() == HINT_CHR_IDENT);
202 switch (peek_byte()) {
203 case 'K':
204 case 'M':
205 case 'G':
206 break;
207 default:
208 return scan_ident();
209 }
210 skip_byte();
211
212 switch (peek_class()) {
213 case HINT_CHR_IDENT:
214 case HINT_CHR_DIGIT:
215 return scan_ident();
216 default:
218 }
219 }
220
222 skip_byte('@');
223 start_token();
224
225 switch (peek_class()) {
226 case HINT_CHR_IDENT:
227 case HINT_CHR_DIGIT:
228 case HINT_CHR_MB:
231 return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
233 : HINT_ERROR;
235 return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
237 : HINT_ERROR;
238 default:
239 return HINT_ERROR;
240 }
241 }
242
244 for (;;) {
245 switch (peek_class()) {
246 case HINT_CHR_IDENT:
247 case HINT_CHR_DIGIT:
248 skip_byte();
249 continue;
250 case HINT_CHR_MB:
251 return scan_ident();
252 case HINT_CHR_EOF:
253 default:
254 const SYMBOL *symbol =
256 if (symbol) // keyword
257 {
258 /*
259 Override the yytext pointer to the short-living buffer with a
260 long-living pointer to the same text (don't need to allocate a
261 keyword string since symbol array is a global constant).
262 */
263 yytext = symbol->name;
264 assert(yyleng == symbol->length);
265
266 return symbol->tok;
267 }
268
270 return HINT_ARG_IDENT;
271 }
272 }
273 }
274
276 assert(peek_class() == HINT_CHR_DIGIT);
277 skip_byte();
278
279 for (;;) {
280 switch (peek_class()) {
281 case HINT_CHR_DIGIT:
282 skip_byte();
283 continue;
284 case HINT_CHR_DOT:
285 return scan_fraction_digits();
286 case HINT_CHR_IDENT:
288 case HINT_CHR_MB:
289 return scan_ident();
290 case HINT_CHR_EOF:
291 default:
292 return HINT_ARG_NUMBER;
293 }
294 }
295 }
296
298 skip_byte('.');
299
300 if (peek_class() == HINT_CHR_DIGIT)
301 skip_byte();
302 else
303 return HINT_ERROR;
304
305 for (;;) {
306 switch (peek_class()) {
307 case HINT_CHR_DIGIT:
308 skip_byte();
309 continue;
310 case HINT_CHR_IDENT:
311 case HINT_CHR_MB:
312 return HINT_ERROR;
313 default:
315 }
316 }
317 }
318
319 bool eof() const {
320 assert(ptr <= input_buf_end);
321 return ptr >= input_buf_end;
322 }
323
324 char peek_byte() const {
325 assert(!eof());
326 return *ptr;
327 }
328
330 return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
331 }
332
334 assert(ptr + 1 <= input_buf_end);
335 return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
336 : char_classes[static_cast<uchar>(ptr[1])];
337 }
338
340 assert(!eof() && peek_byte() == '\n');
341 skip_byte();
342 lineno++;
343 }
344
346 assert(!eof());
347 char ret = *ptr;
348 yyleng++;
349 ptr++;
350 return ret;
351 }
352
353 void skip_byte() {
354 assert(!eof());
355 yyleng++;
356 ptr++;
357 }
358
359 /**
360 Skips the next byte. In the debug mode, abort if it's not found in @p byte.
361
362 @param byte A byte to compare with the byte we skip.
363 Unused in non-debug builds.
364 */
365 void skip_byte(char byte [[maybe_unused]]) {
366 assert(peek_byte() == byte);
367 skip_byte();
368 }
369
370 /**
371 Skips the next byte. In the debug mode, abort if it's not found in @p str.
372
373 @param str A string of characters to compare with the next byte.
374 Unused in non-debug builds.
375 */
376 void skip_byte(const char *str [[maybe_unused]]) {
377 assert(strchr(str, peek_byte()));
378 skip_byte();
379 }
380
381 bool skip_mb() {
382 size_t len = my_ismbchar(cs, ptr, input_buf_end);
383 if (len == 0) {
384 ptr++;
385 yyleng++;
386 return true;
387 }
388 ptr += len;
389 yyleng += len;
390 return false;
391 }
392
394 yytext = ptr;
395 yyleng = 0;
396 }
397
398 void start_token() {
399 adjust_token();
400 raw_yytext = ptr;
401 }
402
403 template <hint_lex_char_classes Separator>
404 void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
405 assert(doubles > 0);
406
407 size_t d = doubles;
408 char *t = to->str;
409 for (const char *s = from, *end = from + len; s < end;) {
410 switch (char_classes[(uchar)*s]) {
411 case HINT_CHR_MB: {
412 size_t hint_len = my_ismbchar(cs, s, end);
413 assert(hint_len > 1);
414 memcpy(t, s, hint_len);
415 t += hint_len;
416 s += hint_len;
417 }
418 continue;
419 case Separator:
420 assert(char_classes[(uchar)*s] == Separator);
421 *t++ = *s++;
422 s++; // skip the 2nd separator
423 d--;
424 if (d == 0) {
425 memcpy(t, s, end - s);
426 to->length = len - doubles;
427 return;
428 }
429 continue;
430 case HINT_CHR_EOF:
431 assert(0);
432 to->length = 0;
433 return;
434 default:
435 *t++ = *s++;
436 }
437 }
438 assert(0);
439 to->length = 0;
440 return;
441 }
442
444
445 private:
446 /**
447 Helper function to check digest buffer for overflow before adding tokens.
448
449 @param token A token number to add.
450 */
451 void add_digest(uint token) {
452 if (digest_state == nullptr) return; // Digest buffer is full.
453
454 Lexer_yystype fake_yylvalue;
455 /*
456 YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
457 a consideration, that the lexer returns MEM_ROOT-allocated string values
458 there, and the rest of server is welcome to modify that strings inplace
459 (ind it does that in a few rare cases).
460 The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
461 it is not practical to add extra memory allocation there: const_cast is
462 enough.
463 */
464 fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
465 fake_yylvalue.lex_str.length = yyleng;
466
467 digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
468 }
469};
470
471inline int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner) {
472 auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
473 int ret = scanner->get_next_token();
474 yylval->hint_string.str = scanner->yytext;
475 yylval->hint_string.length = scanner->yyleng;
476 return ret;
477}
478
479void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg);
480
481#endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:57
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:74
void adjust_token()
Definition: sql_lex_hints.h:393
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Constructor.
Definition: sql_lex_hints.cc:53
int scan_ident()
Definition: sql_lex_hints.h:182
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:80
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:200
size_t lineno
Definition: sql_lex_hints.h:61
int scan()
Definition: sql_lex_hints.cc:70
const char * input_buf
Definition: sql_lex_hints.h:64
bool eof() const
Definition: sql_lex_hints.h:319
int scan_quoted()
Definition: sql_lex_hints.h:111
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:404
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:62
int scan_query_block_name()
Definition: sql_lex_hints.h:221
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:59
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:329
void skip_byte()
Definition: sql_lex_hints.h:353
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:88
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:275
size_t get_lineno() const
Definition: sql_lex_hints.h:95
void skip_byte(char byte)
Skips the next byte.
Definition: sql_lex_hints.h:365
uchar get_byte()
Definition: sql_lex_hints.h:345
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:451
const char * ptr
Definition: sql_lex_hints.h:67
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:90
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:84
const char * get_ptr() const
Definition: sql_lex_hints.h:96
void skip_newline()
Definition: sql_lex_hints.h:339
const char * input_buf_end
Definition: sql_lex_hints.h:65
bool skip_mb()
Definition: sql_lex_hints.h:381
void skip_byte(const char *str)
Skips the next byte.
Definition: sql_lex_hints.h:376
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:333
int scan_fraction_digits()
Definition: sql_lex_hints.h:297
THD * thd
Definition: sql_lex_hints.h:58
char peek_byte() const
Definition: sql_lex_hints.h:324
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:97
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:243
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:136
void start_token()
Definition: sql_lex_hints.h:398
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:149
int prev_token
Definition: sql_lex_hints.h:69
const bool is_ansi_quotes
Definition: sql_lex_hints.h:60
int get_next_token()
Definition: sql_lex_hints.h:100
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:48
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:75
Definition: parse_tree_hints.h:98
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:34
void * alloc(size_t size)
Definition: sql_lexer_thd.h:48
bool convert_string(LEX_STRING *, const CHARSET_INFO *, const char *, size_t, const CHARSET_INFO *, bool=false)
Definition: sql_lexer_thd.h:41
const CHARSET_INFO * charset() const
Definition: sql_lexer_thd.h:40
char * strmake(const char *str, size_t size) const
Definition: sql_lexer_thd.h:50
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2722
static struct wordvalue doubles[]
Definition: ctype-czech.cc:171
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
static uint my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:723
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1545
#define DBUG_TRACE
Definition: my_dbug.h:146
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:52
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1052
Definition: buf0block_hint.cc:30
Definition: commit_order_queue.h:34
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
hint_lex_char_classes
Definition: sql_chars.h:74
@ HINT_CHR_MB
Definition: sql_chars.h:84
@ HINT_CHR_NL
Definition: sql_chars.h:85
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:77
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:81
@ HINT_CHR_QUOTE
Definition: sql_chars.h:86
@ HINT_CHR_DOT
Definition: sql_chars.h:80
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:75
@ HINT_CHR_IDENT
Definition: sql_chars.h:83
@ HINT_CHR_SLASH
Definition: sql_chars.h:87
@ HINT_CHR_EOF
Definition: sql_chars.h:82
@ HINT_CHR_DIGIT
Definition: sql_chars.h:79
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:81
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:80
@ HINT_ERROR
Definition: sql_hints.yy.h:86
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:84
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:83
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:97
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:82
void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:118
int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:471
void hint_lex_init_maps(CHARSET_INFO *cs, hint_lex_char_classes *hint_map)
sql_digest_state * digest_add_token(sql_digest_state *, uint, Lexer_yystype *)
Definition: sql_lexer.cc:54
Definition: m_ctype.h:385
Definition: mysql_lex_string.h:35
char * str
Definition: mysql_lex_string.h:36
size_t length
Definition: mysql_lex_string.h:37
Definition: lex_symbol.h:39
const unsigned int length
Definition: lex_symbol.h:41
const unsigned int tok
Definition: lex_symbol.h:42
const char * name
Definition: lex_symbol.h:40
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:36
unsigned int uint
Definition: uca9-dump.cc:75
Definition: lexer_yystype.h:33
LEX_STRING lex_str
Definition: lexer_yystype.h:34
Definition: parser_yystype.h:340