MySQL 8.3.0
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2014, 2023, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/* A lexical scanner for optimizer hints pseudo-commentary syntax */
25
26#ifndef SQL_LEX_HINTS_ICLUDED
27#define SQL_LEX_HINTS_ICLUDED
28
29#include <string.h>
30#include <sys/types.h>
31
32#include "lex_string.h"
33#include "my_dbug.h"
34#include "my_inttypes.h"
36#include "sql/lex_symbol.h"
37#include "sql/lexer_yystype.h"
38#include "sql/parser_yystype.h"
39#include "sql/sql_class.h"
41#include "sql/sql_lex_hash.h"
42#include "strings/sql_chars.h"
43
44#include "sql/sql_hints.yy.h"
45
46class PT_hint_list;
47
48/// Lexical scanner for hint comments.
49///
50/// When the main lexical scanner recognizes the "/*+" delimiter, it calls
51/// the hint parser (my_hint_parser_parse) to consume the rest of hint tokens
52/// including the */ delimiter. The hint parser uses Hint_scanner as its own
53/// lexer to scan hint-specific tokens.
57 const bool is_ansi_quotes;
58 size_t lineno;
60
61 const char *input_buf;
62 const char *input_buf_end;
63
64 const char *ptr;
65
67
68 /**
69 Digest buffer interface to append tokens.
70 */
72
73 public:
74 /**
75 Current token (yytext) origin in the input_buf
76 */
77 const char *raw_yytext;
78 /**
79 Current token pointer (may be converted allocated string outside input_buf
80 */
81 const char *yytext;
82 /**
83 Length of the current token (see yytext)
84 */
85 size_t yyleng;
86
87 bool has_hints; ///< True if a hint comment is not empty (has any hints).
88
89 public:
90 Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
91 sql_digest_state *digest_state_arg);
92 size_t get_lineno() const { return lineno; }
93 const char *get_ptr() const { return ptr; }
95 void syntax_warning(const char *msg) const;
96
99 prev_token = scan();
101 return prev_token;
102 }
103
104 protected:
105 int scan();
106
107 template <hint_lex_char_classes Quote>
109 assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
110 Quote == HINT_CHR_QUOTE);
111 assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
112
113 const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
115 const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
116
117 skip_byte("\"'`"); // skip opening quote sign
118 adjust_token(); // reset yytext & yyleng
119
120 size_t double_separators = 0;
121
122 for (;;) {
123 hint_lex_char_classes chr_class = peek_class();
124 switch (chr_class) {
125 case HINT_CHR_NL:
126 skip_newline();
127 continue;
128 case HINT_CHR_MB:
129 if (skip_mb()) return HINT_ERROR;
130 continue;
133 return HINT_ERROR; // we don't support "*/" inside quoted
134 // identifiers
135 skip_byte('*');
136 continue;
137 case HINT_CHR_EOF:
138 return HINT_ERROR;
139 case Quote:
140 if (peek_class2() == Quote) {
141 skip_byte("\"'`");
142 skip_byte("\"'`");
143 double_separators++;
144 continue;
145 } else {
146 if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
147
148 ptr++; // skip closing quote
149
150 if (thd->charset_is_system_charset && double_separators == 0) {
151 yytext = thd->strmake(yytext, yyleng); // null-terminate it.
152 return ret;
153 }
154
155 LEX_STRING s;
158 thd->charset()))
159 return HINT_ERROR; // OOM etc.
160 } else {
161 assert(0 < double_separators && double_separators < yyleng);
162 s.length = yyleng - double_separators;
163 s.str = static_cast<char *>(thd->alloc(s.length));
164 if (s.str == nullptr) return HINT_ERROR; // OOM
165 }
166 if (double_separators > 0)
167 compact<Quote>(&s, yytext, yyleng, double_separators);
168
169 yytext = s.str;
170 yyleng = s.length;
171 return ret;
172 }
173 default:
174 skip_byte();
175 }
176 }
177 }
178
180 for (;;) {
181 const hint_lex_char_classes chr_class = peek_class();
182 switch (chr_class) {
183 case HINT_CHR_IDENT:
184 case HINT_CHR_DIGIT:
185 skip_byte();
186 continue;
187 case HINT_CHR_MB:
188 if (skip_mb()) return HINT_ERROR;
189 continue;
190 case HINT_CHR_EOF:
191 default:
192 return HINT_ARG_IDENT;
193 }
194 }
195 }
196
198 assert(peek_class() == HINT_CHR_IDENT);
199 switch (peek_byte()) {
200 case 'K':
201 case 'M':
202 case 'G':
203 break;
204 default:
205 return scan_ident();
206 }
207 skip_byte();
208
209 switch (peek_class()) {
210 case HINT_CHR_IDENT:
211 case HINT_CHR_DIGIT:
212 return scan_ident();
213 default:
215 }
216 }
217
219 skip_byte('@');
220 start_token();
221
222 switch (peek_class()) {
223 case HINT_CHR_IDENT:
224 case HINT_CHR_DIGIT:
225 case HINT_CHR_MB:
228 return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
230 : HINT_ERROR;
232 return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
234 : HINT_ERROR;
235 default:
236 return HINT_ERROR;
237 }
238 }
239
241 for (;;) {
242 switch (peek_class()) {
243 case HINT_CHR_IDENT:
244 case HINT_CHR_DIGIT:
245 skip_byte();
246 continue;
247 case HINT_CHR_MB:
248 return scan_ident();
249 case HINT_CHR_EOF:
250 default:
251 const SYMBOL *symbol =
253 if (symbol) // keyword
254 {
255 /*
256 Override the yytext pointer to the short-living buffer with a
257 long-living pointer to the same text (don't need to allocate a
258 keyword string since symbol array is a global constant).
259 */
260 yytext = symbol->name;
261 assert(yyleng == symbol->length);
262
263 return symbol->tok;
264 }
265
267 return HINT_ARG_IDENT;
268 }
269 }
270 }
271
273 assert(peek_class() == HINT_CHR_DIGIT);
274 skip_byte();
275
276 for (;;) {
277 switch (peek_class()) {
278 case HINT_CHR_DIGIT:
279 skip_byte();
280 continue;
281 case HINT_CHR_DOT:
282 return scan_fraction_digits();
283 case HINT_CHR_IDENT:
285 case HINT_CHR_MB:
286 return scan_ident();
287 case HINT_CHR_EOF:
288 default:
289 return HINT_ARG_NUMBER;
290 }
291 }
292 }
293
295 skip_byte('.');
296
297 if (peek_class() == HINT_CHR_DIGIT)
298 skip_byte();
299 else
300 return HINT_ERROR;
301
302 for (;;) {
303 switch (peek_class()) {
304 case HINT_CHR_DIGIT:
305 skip_byte();
306 continue;
307 case HINT_CHR_IDENT:
308 case HINT_CHR_MB:
309 return HINT_ERROR;
310 default:
312 }
313 }
314 }
315
316 bool eof() const {
317 assert(ptr <= input_buf_end);
318 return ptr >= input_buf_end;
319 }
320
321 char peek_byte() const {
322 assert(!eof());
323 return *ptr;
324 }
325
327 return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
328 }
329
331 assert(ptr + 1 <= input_buf_end);
332 return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
333 : char_classes[static_cast<uchar>(ptr[1])];
334 }
335
337 assert(!eof() && peek_byte() == '\n');
338 skip_byte();
339 lineno++;
340 }
341
343 assert(!eof());
344 const char ret = *ptr;
345 yyleng++;
346 ptr++;
347 return ret;
348 }
349
350 void skip_byte() {
351 assert(!eof());
352 yyleng++;
353 ptr++;
354 }
355
356 /**
357 Skips the next byte. In the debug mode, abort if it's not found in @p byte.
358
359 @param byte A byte to compare with the byte we skip.
360 Unused in non-debug builds.
361 */
362 void skip_byte(char byte [[maybe_unused]]) {
363 assert(peek_byte() == byte);
364 skip_byte();
365 }
366
367 /**
368 Skips the next byte. In the debug mode, abort if it's not found in @p str.
369
370 @param str A string of characters to compare with the next byte.
371 Unused in non-debug builds.
372 */
373 void skip_byte(const char *str [[maybe_unused]]) {
374 assert(strchr(str, peek_byte()));
375 skip_byte();
376 }
377
378 bool skip_mb() {
379 const size_t len = my_ismbchar(cs, ptr, input_buf_end);
380 if (len == 0) {
381 ptr++;
382 yyleng++;
383 return true;
384 }
385 ptr += len;
386 yyleng += len;
387 return false;
388 }
389
391 yytext = ptr;
392 yyleng = 0;
393 }
394
395 void start_token() {
396 adjust_token();
397 raw_yytext = ptr;
398 }
399
400 template <hint_lex_char_classes Separator>
401 void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
402 assert(doubles > 0);
403
404 size_t d = doubles;
405 char *t = to->str;
406 for (const char *s = from, *end = from + len; s < end;) {
407 switch (char_classes[(uchar)*s]) {
408 case HINT_CHR_MB: {
409 size_t hint_len = my_ismbchar(cs, s, end);
410 assert(hint_len > 1);
411 memcpy(t, s, hint_len);
412 t += hint_len;
413 s += hint_len;
414 }
415 continue;
416 case Separator:
417 assert(char_classes[(uchar)*s] == Separator);
418 *t++ = *s++;
419 s++; // skip the 2nd separator
420 d--;
421 if (d == 0) {
422 memcpy(t, s, end - s);
423 to->length = len - doubles;
424 return;
425 }
426 continue;
427 case HINT_CHR_EOF:
428 assert(0);
429 to->length = 0;
430 return;
431 default:
432 *t++ = *s++;
433 }
434 }
435 assert(0);
436 to->length = 0;
437 return;
438 }
439
441
442 private:
443 /**
444 Helper function to check digest buffer for overflow before adding tokens.
445
446 @param token A token number to add.
447 */
448 void add_digest(uint token) {
449 if (digest_state == nullptr) return; // Digest buffer is full.
450
451 Lexer_yystype fake_yylvalue;
452 /*
453 YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
454 a consideration, that the lexer returns MEM_ROOT-allocated string values
455 there, and the rest of server is welcome to modify that strings inplace
456 (ind it does that in a few rare cases).
457 The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
458 it is not practical to add extra memory allocation there: const_cast is
459 enough.
460 */
461 fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
462 fake_yylvalue.lex_str.length = yyleng;
463
464 digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
465 }
466};
467
469 Hint_scanner *scanner) {
470 auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
471 const int ret = scanner->get_next_token();
472 yylval->hint_string.str = scanner->yytext;
473 yylval->hint_string.length = scanner->yyleng;
474 return ret;
475}
476
478 const char *msg);
479
480#endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:54
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:71
void adjust_token()
Definition: sql_lex_hints.h:390
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Constructor.
Definition: sql_lex_hints.cc:52
int scan_ident()
Definition: sql_lex_hints.h:179
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:77
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:197
size_t lineno
Definition: sql_lex_hints.h:58
int scan()
Definition: sql_lex_hints.cc:69
const char * input_buf
Definition: sql_lex_hints.h:61
bool eof() const
Definition: sql_lex_hints.h:316
int scan_quoted()
Definition: sql_lex_hints.h:108
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:401
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:59
int scan_query_block_name()
Definition: sql_lex_hints.h:218
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:56
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:326
void skip_byte()
Definition: sql_lex_hints.h:350
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:85
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:272
size_t get_lineno() const
Definition: sql_lex_hints.h:92
void skip_byte(char byte)
Skips the next byte.
Definition: sql_lex_hints.h:362
uchar get_byte()
Definition: sql_lex_hints.h:342
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:448
const char * ptr
Definition: sql_lex_hints.h:64
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:87
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:81
const char * get_ptr() const
Definition: sql_lex_hints.h:93
void skip_newline()
Definition: sql_lex_hints.h:336
const char * input_buf_end
Definition: sql_lex_hints.h:62
bool skip_mb()
Definition: sql_lex_hints.h:378
void skip_byte(const char *str)
Skips the next byte.
Definition: sql_lex_hints.h:373
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:330
int scan_fraction_digits()
Definition: sql_lex_hints.h:294
THD * thd
Definition: sql_lex_hints.h:55
char peek_byte() const
Definition: sql_lex_hints.h:321
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:94
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:240
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:135
void start_token()
Definition: sql_lex_hints.h:395
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:148
int prev_token
Definition: sql_lex_hints.h:66
const bool is_ansi_quotes
Definition: sql_lex_hints.h:57
int get_next_token()
Definition: sql_lex_hints.h:97
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:47
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:74
Definition: parse_tree_hints.h:99
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:35
void * alloc(size_t size)
Definition: sql_lexer_thd.h:49
bool convert_string(LEX_STRING *, const CHARSET_INFO *, const char *, size_t, const CHARSET_INFO *, bool=false)
Definition: sql_lexer_thd.h:42
const CHARSET_INFO * charset() const
Definition: sql_lexer_thd.h:41
char * strmake(const char *str, size_t size) const
Definition: sql_lexer_thd.h:51
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2740
static struct wordvalue doubles[]
Definition: ctype-czech.cc:170
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
unsigned my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:672
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1556
#define DBUG_TRACE
Definition: my_dbug.h:145
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:51
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1065
Definition: buf0block_hint.cc:29
Definition: commit_order_queue.h:33
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:191
hint_lex_char_classes
Definition: sql_chars.h:72
@ HINT_CHR_MB
Definition: sql_chars.h:82
@ HINT_CHR_NL
Definition: sql_chars.h:83
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:75
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:79
@ HINT_CHR_QUOTE
Definition: sql_chars.h:84
@ HINT_CHR_DOT
Definition: sql_chars.h:78
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:73
@ HINT_CHR_IDENT
Definition: sql_chars.h:81
@ HINT_CHR_SLASH
Definition: sql_chars.h:85
@ HINT_CHR_EOF
Definition: sql_chars.h:80
@ HINT_CHR_DIGIT
Definition: sql_chars.h:77
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:89
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:88
@ HINT_ERROR
Definition: sql_hints.yy.h:94
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:92
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:91
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:105
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:90
int my_hint_parser_lex(MY_HINT_PARSER_STYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:468
void my_hint_parser_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:117
sql_digest_state * digest_add_token(sql_digest_state *, uint, Lexer_yystype *)
Definition: sql_lexer.cc:52
Definition: m_ctype.h:422
Definition: mysql_lex_string.h:34
char * str
Definition: mysql_lex_string.h:35
size_t length
Definition: mysql_lex_string.h:36
Definition: lex_symbol.h:38
const unsigned int length
Definition: lex_symbol.h:40
const unsigned int tok
Definition: lex_symbol.h:41
const char * name
Definition: lex_symbol.h:39
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:35
Definition: lexer_yystype.h:32
LEX_STRING lex_str
Definition: lexer_yystype.h:33
Definition: parser_yystype.h:338