MySQL 8.4.2
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2014, 2024, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is designed to work with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have either included with
14 the program or referenced in the documentation.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
24
25/* A lexical scanner for optimizer hints pseudo-commentary syntax */
26
27#ifndef SQL_LEX_HINTS_ICLUDED
28#define SQL_LEX_HINTS_ICLUDED
29
30#include <string.h>
31#include <sys/types.h>
32
33#include "lex_string.h"
34#include "my_dbug.h"
35#include "my_inttypes.h"
37#include "sql/lex_symbol.h"
38#include "sql/lexer_yystype.h"
39#include "sql/parser_yystype.h"
40#include "sql/sql_class.h"
42#include "sql/sql_lex_hash.h"
43#include "strings/sql_chars.h"
44
45#include "sql/sql_hints.yy.h"
46
47class PT_hint_list;
48
49/// Lexical scanner for hint comments.
50///
51/// When the main lexical scanner recognizes the "/*+" delimiter, it calls
52/// the hint parser (my_hint_parser_parse) to consume the rest of hint tokens
53/// including the */ delimiter. The hint parser uses Hint_scanner as its own
54/// lexer to scan hint-specific tokens.
58 const bool is_ansi_quotes;
59 size_t lineno;
61
62 const char *input_buf;
63 const char *input_buf_end;
64
65 const char *ptr;
66
68
69 /**
70 Digest buffer interface to append tokens.
71 */
73
74 public:
75 /**
76 Current token (yytext) origin in the input_buf
77 */
78 const char *raw_yytext;
79 /**
80 Current token pointer (may be converted allocated string outside input_buf
81 */
82 const char *yytext;
83 /**
84 Length of the current token (see yytext)
85 */
86 size_t yyleng;
87
88 bool has_hints; ///< True if a hint comment is not empty (has any hints).
89
90 public:
91 Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
92 sql_digest_state *digest_state_arg);
93 size_t get_lineno() const { return lineno; }
94 const char *get_ptr() const { return ptr; }
96 void syntax_warning(const char *msg) const;
97
100 prev_token = scan();
102 return prev_token;
103 }
104
105 protected:
106 int scan();
107
108 template <hint_lex_char_classes Quote>
110 assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
111 Quote == HINT_CHR_QUOTE);
112 assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
113
114 const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
116 const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
117
118 skip_byte("\"'`"); // skip opening quote sign
119 adjust_token(); // reset yytext & yyleng
120
121 size_t double_separators = 0;
122
123 for (;;) {
124 hint_lex_char_classes chr_class = peek_class();
125 switch (chr_class) {
126 case HINT_CHR_NL:
127 skip_newline();
128 continue;
129 case HINT_CHR_MB:
130 if (skip_mb()) return HINT_ERROR;
131 continue;
134 return HINT_ERROR; // we don't support "*/" inside quoted
135 // identifiers
136 skip_byte('*');
137 continue;
138 case HINT_CHR_EOF:
139 return HINT_ERROR;
140 case Quote:
141 if (peek_class2() == Quote) {
142 skip_byte("\"'`");
143 skip_byte("\"'`");
144 double_separators++;
145 continue;
146 } else {
147 if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
148
149 ptr++; // skip closing quote
150
151 if (thd->charset_is_system_charset && double_separators == 0) {
152 yytext = thd->strmake(yytext, yyleng); // null-terminate it.
153 return ret;
154 }
155
156 LEX_STRING s;
159 thd->charset()))
160 return HINT_ERROR; // OOM etc.
161 } else {
162 assert(0 < double_separators && double_separators < yyleng);
163 s.length = yyleng - double_separators;
164 s.str = static_cast<char *>(thd->alloc(s.length));
165 if (s.str == nullptr) return HINT_ERROR; // OOM
166 }
167 if (double_separators > 0)
168 compact<Quote>(&s, yytext, yyleng, double_separators);
169
170 yytext = s.str;
171 yyleng = s.length;
172 return ret;
173 }
174 default:
175 skip_byte();
176 }
177 }
178 }
179
181 for (;;) {
182 const hint_lex_char_classes chr_class = peek_class();
183 switch (chr_class) {
184 case HINT_CHR_IDENT:
185 case HINT_CHR_DIGIT:
186 skip_byte();
187 continue;
188 case HINT_CHR_MB:
189 if (skip_mb()) return HINT_ERROR;
190 continue;
191 case HINT_CHR_EOF:
192 default:
193 return HINT_ARG_IDENT;
194 }
195 }
196 }
197
199 assert(peek_class() == HINT_CHR_IDENT);
200 switch (peek_byte()) {
201 case 'K':
202 case 'M':
203 case 'G':
204 break;
205 default:
206 return scan_ident();
207 }
208 skip_byte();
209
210 switch (peek_class()) {
211 case HINT_CHR_IDENT:
212 case HINT_CHR_DIGIT:
213 return scan_ident();
214 default:
216 }
217 }
218
220 skip_byte('@');
221 start_token();
222
223 switch (peek_class()) {
224 case HINT_CHR_IDENT:
225 case HINT_CHR_DIGIT:
226 case HINT_CHR_MB:
229 return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
231 : HINT_ERROR;
233 return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
235 : HINT_ERROR;
236 default:
237 return HINT_ERROR;
238 }
239 }
240
242 for (;;) {
243 switch (peek_class()) {
244 case HINT_CHR_IDENT:
245 case HINT_CHR_DIGIT:
246 skip_byte();
247 continue;
248 case HINT_CHR_MB:
249 return scan_ident();
250 case HINT_CHR_EOF:
251 default:
252 const SYMBOL *symbol =
254 if (symbol) // keyword
255 {
256 /*
257 Override the yytext pointer to the short-living buffer with a
258 long-living pointer to the same text (don't need to allocate a
259 keyword string since symbol array is a global constant).
260 */
261 yytext = symbol->name;
262 assert(yyleng == symbol->length);
263
264 return symbol->tok;
265 }
266
268 return HINT_ARG_IDENT;
269 }
270 }
271 }
272
274 assert(peek_class() == HINT_CHR_DIGIT);
275 skip_byte();
276
277 for (;;) {
278 switch (peek_class()) {
279 case HINT_CHR_DIGIT:
280 skip_byte();
281 continue;
282 case HINT_CHR_DOT:
283 return scan_fraction_digits();
284 case HINT_CHR_IDENT:
286 case HINT_CHR_MB:
287 return scan_ident();
288 case HINT_CHR_EOF:
289 default:
290 return HINT_ARG_NUMBER;
291 }
292 }
293 }
294
296 skip_byte('.');
297
298 if (peek_class() == HINT_CHR_DIGIT)
299 skip_byte();
300 else
301 return HINT_ERROR;
302
303 for (;;) {
304 switch (peek_class()) {
305 case HINT_CHR_DIGIT:
306 skip_byte();
307 continue;
308 case HINT_CHR_IDENT:
309 case HINT_CHR_MB:
310 return HINT_ERROR;
311 default:
313 }
314 }
315 }
316
317 bool eof() const {
318 assert(ptr <= input_buf_end);
319 return ptr >= input_buf_end;
320 }
321
322 char peek_byte() const {
323 assert(!eof());
324 return *ptr;
325 }
326
328 return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
329 }
330
332 assert(ptr + 1 <= input_buf_end);
333 return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
334 : char_classes[static_cast<uchar>(ptr[1])];
335 }
336
338 assert(!eof() && peek_byte() == '\n');
339 skip_byte();
340 lineno++;
341 }
342
344 assert(!eof());
345 const char ret = *ptr;
346 yyleng++;
347 ptr++;
348 return ret;
349 }
350
351 void skip_byte() {
352 assert(!eof());
353 yyleng++;
354 ptr++;
355 }
356
357 /**
358 Skips the next byte. In the debug mode, abort if it's not found in @p byte.
359
360 @param byte A byte to compare with the byte we skip.
361 Unused in non-debug builds.
362 */
363 void skip_byte(char byte [[maybe_unused]]) {
364 assert(peek_byte() == byte);
365 skip_byte();
366 }
367
368 /**
369 Skips the next byte. In the debug mode, abort if it's not found in @p str.
370
371 @param str A string of characters to compare with the next byte.
372 Unused in non-debug builds.
373 */
374 void skip_byte(const char *str [[maybe_unused]]) {
375 assert(strchr(str, peek_byte()));
376 skip_byte();
377 }
378
379 bool skip_mb() {
380 const size_t len = my_ismbchar(cs, ptr, input_buf_end);
381 if (len == 0) {
382 ptr++;
383 yyleng++;
384 return true;
385 }
386 ptr += len;
387 yyleng += len;
388 return false;
389 }
390
392 yytext = ptr;
393 yyleng = 0;
394 }
395
396 void start_token() {
397 adjust_token();
398 raw_yytext = ptr;
399 }
400
401 template <hint_lex_char_classes Separator>
402 void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
403 assert(doubles > 0);
404
405 size_t d = doubles;
406 char *t = to->str;
407 for (const char *s = from, *end = from + len; s < end;) {
408 switch (char_classes[(uchar)*s]) {
409 case HINT_CHR_MB: {
410 size_t hint_len = my_ismbchar(cs, s, end);
411 assert(hint_len > 1);
412 memcpy(t, s, hint_len);
413 t += hint_len;
414 s += hint_len;
415 }
416 continue;
417 case Separator:
418 assert(char_classes[(uchar)*s] == Separator);
419 *t++ = *s++;
420 s++; // skip the 2nd separator
421 d--;
422 if (d == 0) {
423 memcpy(t, s, end - s);
424 to->length = len - doubles;
425 return;
426 }
427 continue;
428 case HINT_CHR_EOF:
429 assert(0);
430 to->length = 0;
431 return;
432 default:
433 *t++ = *s++;
434 }
435 }
436 assert(0);
437 to->length = 0;
438 return;
439 }
440
442
443 private:
444 /**
445 Helper function to check digest buffer for overflow before adding tokens.
446
447 @param token A token number to add.
448 */
449 void add_digest(uint token) {
450 if (digest_state == nullptr) return; // Digest buffer is full.
451
452 Lexer_yystype fake_yylvalue;
453 /*
454 YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
455 a consideration, that the lexer returns MEM_ROOT-allocated string values
456 there, and the rest of server is welcome to modify that strings inplace
457 (ind it does that in a few rare cases).
458 The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
459 it is not practical to add extra memory allocation there: const_cast is
460 enough.
461 */
462 fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
463 fake_yylvalue.lex_str.length = yyleng;
464
465 digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
466 }
467};
468
470 Hint_scanner *scanner) {
471 auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
472 const int ret = scanner->get_next_token();
473 yylval->hint_string.str = scanner->yytext;
474 yylval->hint_string.length = scanner->yyleng;
475 return ret;
476}
477
479 const char *msg);
480
481#endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:55
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:72
void adjust_token()
Definition: sql_lex_hints.h:391
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Constructor.
Definition: sql_lex_hints.cc:53
int scan_ident()
Definition: sql_lex_hints.h:180
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:78
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:198
size_t lineno
Definition: sql_lex_hints.h:59
int scan()
Definition: sql_lex_hints.cc:70
const char * input_buf
Definition: sql_lex_hints.h:62
bool eof() const
Definition: sql_lex_hints.h:317
int scan_quoted()
Definition: sql_lex_hints.h:109
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:402
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:60
int scan_query_block_name()
Definition: sql_lex_hints.h:219
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:57
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:327
void skip_byte()
Definition: sql_lex_hints.h:351
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:86
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:273
size_t get_lineno() const
Definition: sql_lex_hints.h:93
void skip_byte(char byte)
Skips the next byte.
Definition: sql_lex_hints.h:363
uchar get_byte()
Definition: sql_lex_hints.h:343
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:449
const char * ptr
Definition: sql_lex_hints.h:65
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:88
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:82
const char * get_ptr() const
Definition: sql_lex_hints.h:94
void skip_newline()
Definition: sql_lex_hints.h:337
const char * input_buf_end
Definition: sql_lex_hints.h:63
bool skip_mb()
Definition: sql_lex_hints.h:379
void skip_byte(const char *str)
Skips the next byte.
Definition: sql_lex_hints.h:374
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:331
int scan_fraction_digits()
Definition: sql_lex_hints.h:295
THD * thd
Definition: sql_lex_hints.h:56
char peek_byte() const
Definition: sql_lex_hints.h:322
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:95
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:241
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:136
void start_token()
Definition: sql_lex_hints.h:396
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:149
int prev_token
Definition: sql_lex_hints.h:67
const bool is_ansi_quotes
Definition: sql_lex_hints.h:58
int get_next_token()
Definition: sql_lex_hints.h:98
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:48
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:75
Definition: parse_tree_hints.h:100
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
void * alloc(size_t size)
Definition: sql_lexer_thd.h:50
bool convert_string(LEX_STRING *, const CHARSET_INFO *, const char *, size_t, const CHARSET_INFO *, bool=false)
Definition: sql_lexer_thd.h:43
const CHARSET_INFO * charset() const
Definition: sql_lexer_thd.h:42
char * strmake(const char *str, size_t size) const
Definition: sql_lexer_thd.h:52
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2794
static struct wordvalue doubles[]
Definition: ctype-czech.cc:171
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
unsigned my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:673
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1542
#define DBUG_TRACE
Definition: my_dbug.h:146
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:52
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1081
Definition: buf0block_hint.cc:30
Definition: commit_order_queue.h:34
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
hint_lex_char_classes
Definition: sql_chars.h:73
@ HINT_CHR_MB
Definition: sql_chars.h:83
@ HINT_CHR_NL
Definition: sql_chars.h:84
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:76
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:80
@ HINT_CHR_QUOTE
Definition: sql_chars.h:85
@ HINT_CHR_DOT
Definition: sql_chars.h:79
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:74
@ HINT_CHR_IDENT
Definition: sql_chars.h:82
@ HINT_CHR_SLASH
Definition: sql_chars.h:86
@ HINT_CHR_EOF
Definition: sql_chars.h:81
@ HINT_CHR_DIGIT
Definition: sql_chars.h:78
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:89
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:88
@ HINT_ERROR
Definition: sql_hints.yy.h:94
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:92
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:91
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:105
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:90
int my_hint_parser_lex(MY_HINT_PARSER_STYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:469
void my_hint_parser_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:118
sql_digest_state * digest_add_token(sql_digest_state *, uint, Lexer_yystype *)
Definition: sql_lexer.cc:53
Definition: m_ctype.h:423
Definition: mysql_lex_string.h:35
char * str
Definition: mysql_lex_string.h:36
size_t length
Definition: mysql_lex_string.h:37
Definition: lex_symbol.h:39
const unsigned int length
Definition: lex_symbol.h:41
const unsigned int tok
Definition: lex_symbol.h:42
const char * name
Definition: lex_symbol.h:40
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:36
Definition: lexer_yystype.h:33
LEX_STRING lex_str
Definition: lexer_yystype.h:34
Definition: parser_yystype.h:341