MySQL 8.0.40
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2014, 2024, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is designed to work with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have either included with
14 the program or referenced in the documentation.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
24
25/* A lexical scanner for optimizer hints pseudo-commentary syntax */
26
27#ifndef SQL_LEX_HINTS_ICLUDED
28#define SQL_LEX_HINTS_ICLUDED
29
30#include <string.h>
31#include <sys/types.h>
32
33#include "lex_string.h"
34#include "m_ctype.h"
35#include "my_dbug.h"
36#include "my_inttypes.h"
37#include "sql/lex_symbol.h"
38#include "sql/lexer_yystype.h"
39#include "sql/sql_class.h"
41#include "sql/sql_lex_hash.h"
42#include "sql_chars.h"
43
44#include "sql/sql_hints.yy.h"
45
46class PT_hint_list;
47union YYSTYPE;
48
50
51/// Lexical scanner for hint comments.
52///
53/// When the main lexical scanner recognizes the "/*+" delimiter, it calls
54/// the hint parser (HINT_PARSER_parse) to consume the rest of hint tokens
55/// including the */ delimiter. The hint parser uses Hint_scanner as its own
56/// lexer to scan hint-specific tokens.
60 const bool is_ansi_quotes;
61 size_t lineno;
63
64 const char *input_buf;
65 const char *input_buf_end;
66
67 const char *ptr;
68
70
71 /**
72 Digest buffer interface to append tokens.
73 */
75
76 public:
77 /**
78 Current token (yytext) origin in the input_buf
79 */
80 const char *raw_yytext;
81 /**
82 Current token pointer (may be converted allocated string outside input_buf
83 */
84 const char *yytext;
85 /**
86 Length of the current token (see yytext)
87 */
88 size_t yyleng;
89
90 bool has_hints; ///< True if a hint comment is not empty (has any hints).
91
92 public:
93 Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
94 sql_digest_state *digest_state_arg);
95 size_t get_lineno() const { return lineno; }
96 const char *get_ptr() const { return ptr; }
98 void syntax_warning(const char *msg) const;
99
102 prev_token = scan();
104 return prev_token;
105 }
106
107 protected:
108 int scan();
109
110 template <hint_lex_char_classes Quote>
112 assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
113 Quote == HINT_CHR_QUOTE);
114 assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
115
116 const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
118 const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
119
120 skip_byte("\"'`"); // skip opening quote sign
121 adjust_token(); // reset yytext & yyleng
122
123 size_t double_separators = 0;
124
125 for (;;) {
126 hint_lex_char_classes chr_class = peek_class();
127 switch (chr_class) {
128 case HINT_CHR_NL:
129 skip_newline();
130 continue;
131 case HINT_CHR_MB:
132 if (skip_mb()) return HINT_ERROR;
133 continue;
136 return HINT_ERROR; // we don't support "*/" inside quoted
137 // identifiers
138 skip_byte('*');
139 continue;
140 case HINT_CHR_EOF:
141 return HINT_ERROR;
142 case Quote:
143 if (peek_class2() == Quote) {
144 skip_byte("\"'`");
145 skip_byte("\"'`");
146 double_separators++;
147 continue;
148 } else {
149 if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
150
151 ptr++; // skip closing quote
152
153 if (thd->charset_is_system_charset && double_separators == 0) {
154 yytext = thd->strmake(yytext, yyleng); // null-terminate it.
155 return ret;
156 }
157
158 LEX_STRING s;
161 thd->charset()))
162 return HINT_ERROR; // OOM etc.
163 } else {
164 assert(0 < double_separators && double_separators < yyleng);
165 s.length = yyleng - double_separators;
166 s.str = static_cast<char *>(thd->alloc(s.length));
167 if (s.str == nullptr) return HINT_ERROR; // OOM
168 }
169 if (double_separators > 0)
170 compact<Quote>(&s, yytext, yyleng, double_separators);
171
172 yytext = s.str;
173 yyleng = s.length;
174 return ret;
175 }
176 default:
177 skip_byte();
178 }
179 }
180 }
181
183 for (;;) {
184 hint_lex_char_classes chr_class = peek_class();
185 switch (chr_class) {
186 case HINT_CHR_IDENT:
187 case HINT_CHR_DIGIT:
188 skip_byte();
189 continue;
190 case HINT_CHR_MB:
191 if (skip_mb()) return HINT_ERROR;
192 continue;
193 case HINT_CHR_EOF:
194 default:
195 return HINT_ARG_IDENT;
196 }
197 }
198 }
199
201 assert(peek_class() == HINT_CHR_IDENT);
202 switch (peek_byte()) {
203 case 'K':
204 case 'M':
205 case 'G':
206 break;
207 default:
208 return scan_ident();
209 }
210 skip_byte();
211
212 switch (peek_class()) {
213 case HINT_CHR_IDENT:
214 case HINT_CHR_DIGIT:
215 return scan_ident();
216 default:
218 }
219 }
220
222 skip_byte('@');
223 start_token();
224
225 switch (peek_class()) {
226 case HINT_CHR_IDENT:
227 case HINT_CHR_DIGIT:
228 case HINT_CHR_MB:
229 if (scan_ident() == HINT_ARG_IDENT) {
231 return HINT_ARG_QB_NAME;
232 } else
233 return HINT_ERROR;
235 return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
237 : HINT_ERROR;
239 return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
241 : HINT_ERROR;
242 default:
243 return HINT_ERROR;
244 }
245 }
246
248 for (;;) {
249 switch (peek_class()) {
250 case HINT_CHR_IDENT:
251 case HINT_CHR_DIGIT:
252 skip_byte();
253 continue;
254 case HINT_CHR_MB:
255 return scan_ident();
256 case HINT_CHR_EOF:
257 default:
258 const SYMBOL *symbol =
260 if (symbol) // keyword
261 {
262 /*
263 Override the yytext pointer to the short-living buffer with a
264 long-living pointer to the same text (don't need to allocate a
265 keyword string since symbol array is a global constant).
266 */
267 yytext = symbol->name;
268 assert(yyleng == symbol->length);
269
270 return symbol->tok;
271 }
272
274 return HINT_ARG_IDENT;
275 }
276 }
277 }
278
280 assert(peek_class() == HINT_CHR_DIGIT);
281 skip_byte();
282
283 for (;;) {
284 switch (peek_class()) {
285 case HINT_CHR_DIGIT:
286 skip_byte();
287 continue;
288 case HINT_CHR_DOT:
289 return scan_fraction_digits();
290 case HINT_CHR_IDENT:
292 case HINT_CHR_MB:
293 return scan_ident();
294 case HINT_CHR_EOF:
295 default:
296 return HINT_ARG_NUMBER;
297 }
298 }
299 }
300
302 skip_byte('.');
303
304 if (peek_class() == HINT_CHR_DIGIT)
305 skip_byte();
306 else
307 return HINT_ERROR;
308
309 for (;;) {
310 switch (peek_class()) {
311 case HINT_CHR_DIGIT:
312 skip_byte();
313 continue;
314 case HINT_CHR_IDENT:
315 case HINT_CHR_MB:
316 return HINT_ERROR;
317 default:
319 }
320 }
321 }
322
323 bool eof() const {
324 assert(ptr <= input_buf_end);
325 return ptr >= input_buf_end;
326 }
327
328 char peek_byte() const {
329 assert(!eof());
330 return *ptr;
331 }
332
334 return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
335 }
336
338 assert(ptr + 1 <= input_buf_end);
339 return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
340 : char_classes[static_cast<uchar>(ptr[1])];
341 }
342
344 assert(!eof() && peek_byte() == '\n');
345 skip_byte();
346 lineno++;
347 }
348
350 assert(!eof());
351 char ret = *ptr;
352 yyleng++;
353 ptr++;
354 return ret;
355 }
356
357 void skip_byte() {
358 assert(!eof());
359 yyleng++;
360 ptr++;
361 }
362
363 /**
364 Skips the next byte. In the debug mode, abort if it's not found in @p byte.
365
366 @param byte A byte to compare with the byte we skip.
367 Unused in non-debug builds.
368 */
369 void skip_byte(char byte [[maybe_unused]]) {
370 assert(peek_byte() == byte);
371 skip_byte();
372 }
373
374 /**
375 Skips the next byte. In the debug mode, abort if it's not found in @p str.
376
377 @param str A string of characters to compare with the next byte.
378 Unused in non-debug builds.
379 */
380 void skip_byte(const char *str [[maybe_unused]]) {
381 assert(strchr(str, peek_byte()));
382 skip_byte();
383 }
384
385 bool skip_mb() {
386 size_t len = my_ismbchar(cs, ptr, input_buf_end);
387 if (len == 0) {
388 ptr++;
389 yyleng++;
390 return true;
391 }
392 ptr += len;
393 yyleng += len;
394 return false;
395 }
396
398 yytext = ptr;
399 yyleng = 0;
400 }
401
402 void start_token() {
403 adjust_token();
404 raw_yytext = ptr;
405 }
406
407 template <hint_lex_char_classes Separator>
408 void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
409 assert(doubles > 0);
410
411 size_t d = doubles;
412 char *t = to->str;
413 for (const char *s = from, *end = from + len; s < end;) {
414 switch (char_classes[(uchar)*s]) {
415 case HINT_CHR_MB: {
416 size_t hint_len = my_ismbchar(cs, s, end);
417 assert(hint_len > 1);
418 memcpy(t, s, hint_len);
419 t += hint_len;
420 s += hint_len;
421 }
422 continue;
423 case Separator:
424 assert(char_classes[(uchar)*s] == Separator);
425 *t++ = *s++;
426 s++; // skip the 2nd separator
427 d--;
428 if (d == 0) {
429 memcpy(t, s, end - s);
430 to->length = len - doubles;
431 return;
432 }
433 continue;
434 case HINT_CHR_EOF:
435 assert(0);
436 to->length = 0;
437 return;
438 default:
439 *t++ = *s++;
440 }
441 }
442 assert(0);
443 to->length = 0;
444 return;
445 }
446
448
449 private:
450 /**
451 Helper function to check digest buffer for overflow before adding tokens.
452
453 @param token A token number to add.
454 */
455 void add_digest(uint token) {
456 if (digest_state == nullptr) return; // Digest buffer is full.
457
458 Lexer_yystype fake_yylvalue;
459 /*
460 YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
461 a consideration, that the lexer returns MEM_ROOT-allocated string values
462 there, and the rest of server is welcome to modify that strings inplace
463 (ind it does that in a few rare cases).
464 The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
465 it is not practical to add extra memory allocation there: const_cast is
466 enough.
467 */
468 fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
469 fake_yylvalue.lex_str.length = yyleng;
470
471 digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
472 }
473};
474
475inline int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner) {
476 auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
477 int ret = scanner->get_next_token();
478 yylval->hint_string.str = scanner->yytext;
479 yylval->hint_string.length = scanner->yyleng;
480 return ret;
481}
482
483void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg);
484
485#endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:57
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:74
void adjust_token()
Definition: sql_lex_hints.h:397
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Constructor.
Definition: sql_lex_hints.cc:53
int scan_ident()
Definition: sql_lex_hints.h:182
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:80
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:200
size_t lineno
Definition: sql_lex_hints.h:61
int scan()
Definition: sql_lex_hints.cc:70
const char * input_buf
Definition: sql_lex_hints.h:64
bool eof() const
Definition: sql_lex_hints.h:323
int scan_quoted()
Definition: sql_lex_hints.h:111
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:408
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:62
int scan_query_block_name()
Definition: sql_lex_hints.h:221
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:59
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:333
void skip_byte()
Definition: sql_lex_hints.h:357
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:88
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:279
size_t get_lineno() const
Definition: sql_lex_hints.h:95
void skip_byte(char byte)
Skips the next byte.
Definition: sql_lex_hints.h:369
uchar get_byte()
Definition: sql_lex_hints.h:349
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:455
const char * ptr
Definition: sql_lex_hints.h:67
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:90
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:84
const char * get_ptr() const
Definition: sql_lex_hints.h:96
void skip_newline()
Definition: sql_lex_hints.h:343
const char * input_buf_end
Definition: sql_lex_hints.h:65
bool skip_mb()
Definition: sql_lex_hints.h:385
void skip_byte(const char *str)
Skips the next byte.
Definition: sql_lex_hints.h:380
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:337
int scan_fraction_digits()
Definition: sql_lex_hints.h:301
THD * thd
Definition: sql_lex_hints.h:58
char peek_byte() const
Definition: sql_lex_hints.h:328
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:97
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:247
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:136
void start_token()
Definition: sql_lex_hints.h:402
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:149
int prev_token
Definition: sql_lex_hints.h:69
const bool is_ansi_quotes
Definition: sql_lex_hints.h:60
int get_next_token()
Definition: sql_lex_hints.h:100
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:48
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:75
Definition: parse_tree_hints.h:98
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:34
void * alloc(size_t size)
Definition: sql_lexer_thd.h:48
bool convert_string(LEX_STRING *, const CHARSET_INFO *, const char *, size_t, const CHARSET_INFO *, bool=false)
Definition: sql_lexer_thd.h:41
const CHARSET_INFO * charset() const
Definition: sql_lexer_thd.h:40
char * strmake(const char *str, size_t size) const
Definition: sql_lexer_thd.h:50
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2740
static struct wordvalue doubles[]
Definition: ctype-czech.cc:171
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
static uint my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:723
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1545
#define DBUG_TRACE
Definition: my_dbug.h:146
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:52
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1052
Definition: buf0block_hint.cc:30
Definition: commit_order_queue.h:34
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
hint_lex_char_classes
Definition: sql_chars.h:74
@ HINT_CHR_MB
Definition: sql_chars.h:84
@ HINT_CHR_NL
Definition: sql_chars.h:85
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:77
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:81
@ HINT_CHR_QUOTE
Definition: sql_chars.h:86
@ HINT_CHR_DOT
Definition: sql_chars.h:80
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:75
@ HINT_CHR_IDENT
Definition: sql_chars.h:83
@ HINT_CHR_SLASH
Definition: sql_chars.h:87
@ HINT_CHR_EOF
Definition: sql_chars.h:82
@ HINT_CHR_DIGIT
Definition: sql_chars.h:79
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:81
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:80
@ HINT_ERROR
Definition: sql_hints.yy.h:86
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:84
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:83
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:97
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:82
void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:118
int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:475
void hint_lex_init_maps(CHARSET_INFO *cs, hint_lex_char_classes *hint_map)
sql_digest_state * digest_add_token(sql_digest_state *, uint, Lexer_yystype *)
Definition: sql_lexer.cc:54
Definition: m_ctype.h:385
Definition: mysql_lex_string.h:35
char * str
Definition: mysql_lex_string.h:36
size_t length
Definition: mysql_lex_string.h:37
Definition: lex_symbol.h:39
const unsigned int length
Definition: lex_symbol.h:41
const unsigned int tok
Definition: lex_symbol.h:42
const char * name
Definition: lex_symbol.h:40
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:36
unsigned int uint
Definition: uca9-dump.cc:75
Definition: lexer_yystype.h:33
LEX_STRING lex_str
Definition: lexer_yystype.h:34
Definition: parser_yystype.h:340