MySQL 8.0.33
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2014, 2023, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/* A lexical scanner for optimizer hints pseudo-commentary syntax */
25
26#ifndef SQL_LEX_HINTS_ICLUDED
27#define SQL_LEX_HINTS_ICLUDED
28
29#include <string.h>
30#include <sys/types.h>
31
32#include "lex_string.h"
33#include "m_ctype.h"
34#include "my_dbug.h"
35#include "my_inttypes.h"
36#include "sql/lex_symbol.h"
37#include "sql/lexer_yystype.h"
38#include "sql/sql_class.h"
40#include "sql/sql_lex_hash.h"
41#include "sql_chars.h"
42
43// This must be last, due to bison 2.3 on OsX
44#ifndef YYSTYPE_IS_DECLARED
45#define YYSTYPE_IS_DECLARED 1
46#endif // YYSTYPE_IS_DECLARED
47#include "sql/sql_hints.yy.h"
48
49class PT_hint_list;
50union YYSTYPE;
51
53
54/// Lexical scanner for hint comments.
55///
56/// When the main lexical scanner recognizes the "/*+" delimiter, it calls
57/// the hint parser (HINT_PARSER_parse) to consume the rest of hint tokens
58/// including the */ delimiter. The hint parser uses Hint_scanner as its own
59/// lexer to scan hint-specific tokens.
63 const bool is_ansi_quotes;
64 size_t lineno;
66
67 const char *input_buf;
68 const char *input_buf_end;
69
70 const char *ptr;
71
73
74 /**
75 Digest buffer interface to append tokens.
76 */
78
79 public:
80 /**
81 Current token (yytext) origin in the input_buf
82 */
83 const char *raw_yytext;
84 /**
85 Current token pointer (may be converted allocated string outside input_buf
86 */
87 const char *yytext;
88 /**
89 Length of the current token (see yytext)
90 */
91 size_t yyleng;
92
93 bool has_hints; ///< True if a hint comment is not empty (has any hints).
94
95 public:
96 Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
97 sql_digest_state *digest_state_arg);
98 size_t get_lineno() const { return lineno; }
99 const char *get_ptr() const { return ptr; }
101 void syntax_warning(const char *msg) const;
102
105 prev_token = scan();
107 return prev_token;
108 }
109
110 protected:
111 int scan();
112
113 template <hint_lex_char_classes Quote>
115 assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
116 Quote == HINT_CHR_QUOTE);
117 assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
118
119 const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
121 const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
122
123 skip_byte("\"'`"); // skip opening quote sign
124 adjust_token(); // reset yytext & yyleng
125
126 size_t double_separators = 0;
127
128 for (;;) {
129 hint_lex_char_classes chr_class = peek_class();
130 switch (chr_class) {
131 case HINT_CHR_NL:
132 skip_newline();
133 continue;
134 case HINT_CHR_MB:
135 if (skip_mb()) return HINT_ERROR;
136 continue;
139 return HINT_ERROR; // we don't support "*/" inside quoted
140 // identifiers
141 skip_byte('*');
142 continue;
143 case HINT_CHR_EOF:
144 return HINT_ERROR;
145 case Quote:
146 if (peek_class2() == Quote) {
147 skip_byte("\"'`");
148 skip_byte("\"'`");
149 double_separators++;
150 continue;
151 } else {
152 if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
153
154 ptr++; // skip closing quote
155
156 if (thd->charset_is_system_charset && double_separators == 0) {
157 yytext = thd->strmake(yytext, yyleng); // null-terminate it.
158 return ret;
159 }
160
161 LEX_STRING s;
164 thd->charset()))
165 return HINT_ERROR; // OOM etc.
166 } else {
167 assert(0 < double_separators && double_separators < yyleng);
168 s.length = yyleng - double_separators;
169 s.str = static_cast<char *>(thd->alloc(s.length));
170 if (s.str == nullptr) return HINT_ERROR; // OOM
171 }
172 if (double_separators > 0)
173 compact<Quote>(&s, yytext, yyleng, double_separators);
174
175 yytext = s.str;
176 yyleng = s.length;
177 return ret;
178 }
179 default:
180 skip_byte();
181 }
182 }
183 }
184
186 for (;;) {
187 hint_lex_char_classes chr_class = peek_class();
188 switch (chr_class) {
189 case HINT_CHR_IDENT:
190 case HINT_CHR_DIGIT:
191 skip_byte();
192 continue;
193 case HINT_CHR_MB:
194 if (skip_mb()) return HINT_ERROR;
195 continue;
196 case HINT_CHR_EOF:
197 default:
198 return HINT_ARG_IDENT;
199 }
200 }
201 }
202
204 assert(peek_class() == HINT_CHR_IDENT);
205 switch (peek_byte()) {
206 case 'K':
207 case 'M':
208 case 'G':
209 break;
210 default:
211 return scan_ident();
212 }
213 skip_byte();
214
215 switch (peek_class()) {
216 case HINT_CHR_IDENT:
217 case HINT_CHR_DIGIT:
218 return scan_ident();
219 default:
221 }
222 }
223
225 skip_byte('@');
226 start_token();
227
228 switch (peek_class()) {
229 case HINT_CHR_IDENT:
230 case HINT_CHR_DIGIT:
231 case HINT_CHR_MB:
234 return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
236 : HINT_ERROR;
238 return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
240 : HINT_ERROR;
241 default:
242 return HINT_ERROR;
243 }
244 }
245
247 for (;;) {
248 switch (peek_class()) {
249 case HINT_CHR_IDENT:
250 case HINT_CHR_DIGIT:
251 skip_byte();
252 continue;
253 case HINT_CHR_MB:
254 return scan_ident();
255 case HINT_CHR_EOF:
256 default:
257 const SYMBOL *symbol =
259 if (symbol) // keyword
260 {
261 /*
262 Override the yytext pointer to the short-living buffer with a
263 long-living pointer to the same text (don't need to allocate a
264 keyword string since symbol array is a global constant).
265 */
266 yytext = symbol->name;
267 assert(yyleng == symbol->length);
268
269 return symbol->tok;
270 }
271
273 return HINT_ARG_IDENT;
274 }
275 }
276 }
277
279 assert(peek_class() == HINT_CHR_DIGIT);
280 skip_byte();
281
282 for (;;) {
283 switch (peek_class()) {
284 case HINT_CHR_DIGIT:
285 skip_byte();
286 continue;
287 case HINT_CHR_DOT:
288 return scan_fraction_digits();
289 case HINT_CHR_IDENT:
291 case HINT_CHR_MB:
292 return scan_ident();
293 case HINT_CHR_EOF:
294 default:
295 return HINT_ARG_NUMBER;
296 }
297 }
298 }
299
301 skip_byte('.');
302
303 if (peek_class() == HINT_CHR_DIGIT)
304 skip_byte();
305 else
306 return HINT_ERROR;
307
308 for (;;) {
309 switch (peek_class()) {
310 case HINT_CHR_DIGIT:
311 skip_byte();
312 continue;
313 case HINT_CHR_IDENT:
314 case HINT_CHR_MB:
315 return HINT_ERROR;
316 default:
318 }
319 }
320 }
321
322 bool eof() const {
323 assert(ptr <= input_buf_end);
324 return ptr >= input_buf_end;
325 }
326
327 char peek_byte() const {
328 assert(!eof());
329 return *ptr;
330 }
331
333 return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
334 }
335
337 assert(ptr + 1 <= input_buf_end);
338 return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
339 : char_classes[static_cast<uchar>(ptr[1])];
340 }
341
343 assert(!eof() && peek_byte() == '\n');
344 skip_byte();
345 lineno++;
346 }
347
349 assert(!eof());
350 char ret = *ptr;
351 yyleng++;
352 ptr++;
353 return ret;
354 }
355
356 void skip_byte() {
357 assert(!eof());
358 yyleng++;
359 ptr++;
360 }
361
362 /**
363 Skips the next byte. In the debug mode, abort if it's not found in @p byte.
364
365 @param byte A byte to compare with the byte we skip.
366 Unused in non-debug builds.
367 */
368 void skip_byte(char byte [[maybe_unused]]) {
369 assert(peek_byte() == byte);
370 skip_byte();
371 }
372
373 /**
374 Skips the next byte. In the debug mode, abort if it's not found in @p str.
375
376 @param str A string of characters to compare with the next byte.
377 Unused in non-debug builds.
378 */
379 void skip_byte(const char *str [[maybe_unused]]) {
380 assert(strchr(str, peek_byte()));
381 skip_byte();
382 }
383
384 bool skip_mb() {
385 size_t len = my_ismbchar(cs, ptr, input_buf_end);
386 if (len == 0) {
387 ptr++;
388 yyleng++;
389 return true;
390 }
391 ptr += len;
392 yyleng += len;
393 return false;
394 }
395
397 yytext = ptr;
398 yyleng = 0;
399 }
400
401 void start_token() {
402 adjust_token();
403 raw_yytext = ptr;
404 }
405
406 template <hint_lex_char_classes Separator>
407 void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
408 assert(doubles > 0);
409
410 size_t d = doubles;
411 char *t = to->str;
412 for (const char *s = from, *end = from + len; s < end;) {
413 switch (char_classes[(uchar)*s]) {
414 case HINT_CHR_MB: {
415 size_t hint_len = my_ismbchar(cs, s, end);
416 assert(hint_len > 1);
417 memcpy(t, s, hint_len);
418 t += hint_len;
419 s += hint_len;
420 }
421 continue;
422 case Separator:
423 assert(char_classes[(uchar)*s] == Separator);
424 *t++ = *s++;
425 s++; // skip the 2nd separator
426 d--;
427 if (d == 0) {
428 memcpy(t, s, end - s);
429 to->length = len - doubles;
430 return;
431 }
432 continue;
433 case HINT_CHR_EOF:
434 assert(0);
435 to->length = 0;
436 return;
437 default:
438 *t++ = *s++;
439 }
440 }
441 assert(0);
442 to->length = 0;
443 return;
444 }
445
447
448 private:
449 /**
450 Helper function to check digest buffer for overflow before adding tokens.
451
452 @param token A token number to add.
453 */
454 void add_digest(uint token) {
455 if (digest_state == nullptr) return; // Digest buffer is full.
456
457 Lexer_yystype fake_yylvalue;
458 /*
459 YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
460 a consideration, that the lexer returns MEM_ROOT-allocated string values
461 there, and the rest of server is welcome to modify that strings inplace
462 (ind it does that in a few rare cases).
463 The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
464 it is not practical to add extra memory allocation there: const_cast is
465 enough.
466 */
467 fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
468 fake_yylvalue.lex_str.length = yyleng;
469
470 digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
471 }
472};
473
474inline int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner) {
475 auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
476 int ret = scanner->get_next_token();
477 yylval->hint_string.str = scanner->yytext;
478 yylval->hint_string.length = scanner->yyleng;
479 return ret;
480}
481
482void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg);
483
484#endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:60
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:77
void adjust_token()
Definition: sql_lex_hints.h:396
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Constructor.
Definition: sql_lex_hints.cc:52
int scan_ident()
Definition: sql_lex_hints.h:185
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:83
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:203
size_t lineno
Definition: sql_lex_hints.h:64
int scan()
Definition: sql_lex_hints.cc:69
const char * input_buf
Definition: sql_lex_hints.h:67
bool eof() const
Definition: sql_lex_hints.h:322
int scan_quoted()
Definition: sql_lex_hints.h:114
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:407
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:65
int scan_query_block_name()
Definition: sql_lex_hints.h:224
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:62
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:332
void skip_byte()
Definition: sql_lex_hints.h:356
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:91
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:278
size_t get_lineno() const
Definition: sql_lex_hints.h:98
void skip_byte(char byte)
Skips the next byte.
Definition: sql_lex_hints.h:368
uchar get_byte()
Definition: sql_lex_hints.h:348
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:454
const char * ptr
Definition: sql_lex_hints.h:70
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:93
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:87
const char * get_ptr() const
Definition: sql_lex_hints.h:99
void skip_newline()
Definition: sql_lex_hints.h:342
const char * input_buf_end
Definition: sql_lex_hints.h:68
bool skip_mb()
Definition: sql_lex_hints.h:384
void skip_byte(const char *str)
Skips the next byte.
Definition: sql_lex_hints.h:379
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:336
int scan_fraction_digits()
Definition: sql_lex_hints.h:300
THD * thd
Definition: sql_lex_hints.h:61
char peek_byte() const
Definition: sql_lex_hints.h:327
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:100
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:246
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:135
void start_token()
Definition: sql_lex_hints.h:401
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:148
int prev_token
Definition: sql_lex_hints.h:72
const bool is_ansi_quotes
Definition: sql_lex_hints.h:63
int get_next_token()
Definition: sql_lex_hints.h:103
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:47
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:74
Definition: parse_tree_hints.h:97
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:33
void * alloc(size_t size)
Definition: sql_lexer_thd.h:47
bool convert_string(LEX_STRING *, const CHARSET_INFO *, const char *, size_t, const CHARSET_INFO *, bool=false)
Definition: sql_lexer_thd.h:40
const CHARSET_INFO * charset() const
Definition: sql_lexer_thd.h:39
char * strmake(const char *str, size_t size) const
Definition: sql_lexer_thd.h:49
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2716
static struct wordvalue doubles[]
Definition: ctype-czech.cc:170
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
static uint my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:714
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1541
#define DBUG_TRACE
Definition: my_dbug.h:145
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:51
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1054
Definition: buf0block_hint.cc:29
Definition: commit_order_queue.h:33
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:191
hint_lex_char_classes
Definition: sql_chars.h:73
@ HINT_CHR_MB
Definition: sql_chars.h:83
@ HINT_CHR_NL
Definition: sql_chars.h:84
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:76
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:80
@ HINT_CHR_QUOTE
Definition: sql_chars.h:85
@ HINT_CHR_DOT
Definition: sql_chars.h:79
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:74
@ HINT_CHR_IDENT
Definition: sql_chars.h:82
@ HINT_CHR_SLASH
Definition: sql_chars.h:86
@ HINT_CHR_EOF
Definition: sql_chars.h:81
@ HINT_CHR_DIGIT
Definition: sql_chars.h:78
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:81
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:80
@ HINT_ERROR
Definition: sql_hints.yy.h:86
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:84
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:83
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:97
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:82
void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:117
int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:474
void hint_lex_init_maps(CHARSET_INFO *cs, hint_lex_char_classes *hint_map)
sql_digest_state * digest_add_token(sql_digest_state *, uint, Lexer_yystype *)
Definition: sql_lexer.cc:53
Definition: m_ctype.h:382
Definition: mysql_lex_string.h:34
char * str
Definition: mysql_lex_string.h:35
size_t length
Definition: mysql_lex_string.h:36
Definition: lex_symbol.h:38
const unsigned int length
Definition: lex_symbol.h:40
const unsigned int tok
Definition: lex_symbol.h:41
const char * name
Definition: lex_symbol.h:39
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:35
unsigned int uint
Definition: uca9-dump.cc:74
Definition: lexer_yystype.h:32
LEX_STRING lex_str
Definition: lexer_yystype.h:33
Definition: parser_yystype.h:344