MySQL 8.0.29
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2014, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23
24/* A lexical scanner for optimizer hints pseudo-commentary syntax */
25
26#ifndef SQL_LEX_HINTS_ICLUDED
27#define SQL_LEX_HINTS_ICLUDED
28
29#include <string.h>
30#include <sys/types.h>
31
32#include "lex_string.h"
33#include "m_ctype.h"
34#include "my_dbug.h"
35#include "my_inttypes.h"
36#include "sql/lex_symbol.h"
37#include "sql/lexer_yystype.h"
38#include "sql/sql_class.h"
40#include "sql/sql_lex_hash.h"
41#include "sql_chars.h"
42
43// This must be last, due to bison 2.3 on OsX
44#ifndef YYSTYPE_IS_DECLARED
45#define YYSTYPE_IS_DECLARED 1
46#endif // YYSTYPE_IS_DECLARED
47#include "sql/sql_hints.yy.h"
48
49class PT_hint_list;
50union YYSTYPE;
51
53
54/// Lexical scanner for hint comments.
55///
56/// When the main lexical scanner recognizes the "/*+" delimiter, it calls
57/// the hint parser (HINT_PARSER_parse) to consume the rest of hint tokens
58/// including the */ delimiter. The hint parser uses Hint_scanner as its own
59/// lexer to scan hint-specific tokens.
63 const bool is_ansi_quotes;
64 size_t lineno;
66
67 const char *input_buf;
68 const char *input_buf_end;
69
70 const char *ptr;
71
73
74 /**
75 Digest buffer interface to append tokens.
76 */
78
79 public:
80 /**
81 Current token (yytext) origin in the input_buf
82 */
83 const char *raw_yytext;
84 /**
85 Current token pointer (may be converted allocated string outside input_buf
86 */
87 const char *yytext;
88 /**
89 Length of the current token (see yytext)
90 */
91 size_t yyleng;
92
93 bool has_hints; ///< True if a hint comment is not empty (has any hints).
94
95 public:
96 Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
97 sql_digest_state *digest_state_arg);
98 size_t get_lineno() const { return lineno; }
99 const char *get_ptr() const { return ptr; }
101 void syntax_warning(const char *msg) const;
102
105 prev_token = scan();
107 return prev_token;
108 }
109
110 protected:
111 int scan();
112
113 template <hint_lex_char_classes Quote>
115 assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
116 Quote == HINT_CHR_QUOTE);
117 assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
118
119 const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
121 const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
122
123 skip_byte("\"'`"); // skip opening quote sign
124 adjust_token(); // reset yytext & yyleng
125
126 size_t double_separators = 0;
127
128 for (;;) {
129 hint_lex_char_classes chr_class = peek_class();
130 switch (chr_class) {
131 case HINT_CHR_NL:
132 skip_newline();
133 continue;
134 case HINT_CHR_MB:
135 if (skip_mb()) return HINT_ERROR;
136 continue;
139 return HINT_ERROR; // we don't support "*/" inside quoted
140 // identifiers
141 skip_byte('*');
142 continue;
143 case HINT_CHR_EOF:
144 return HINT_ERROR;
145 case Quote:
146 if (peek_class2() == Quote) {
147 skip_byte("\"'`");
148 skip_byte("\"'`");
149 double_separators++;
150 continue;
151 } else {
152 if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
153
154 ptr++; // skip closing quote
155
156 if (thd->charset_is_system_charset && double_separators == 0)
157 return ret;
158
159 LEX_STRING s;
162 thd->charset()))
163 return HINT_ERROR; // OOM etc.
164 } else {
165 assert(0 < double_separators && double_separators < yyleng);
166 s.length = yyleng - double_separators;
167 s.str = static_cast<char *>(thd->alloc(s.length));
168 if (s.str == nullptr) return HINT_ERROR; // OOM
169 }
170 if (double_separators > 0)
171 compact<Quote>(&s, yytext, yyleng, double_separators);
172
173 yytext = s.str;
174 yyleng = s.length;
175 return ret;
176 }
177 default:
178 skip_byte();
179 }
180 }
181 }
182
184 for (;;) {
185 hint_lex_char_classes chr_class = peek_class();
186 switch (chr_class) {
187 case HINT_CHR_IDENT:
188 case HINT_CHR_DIGIT:
189 skip_byte();
190 continue;
191 case HINT_CHR_MB:
192 if (skip_mb()) return HINT_ERROR;
193 continue;
194 case HINT_CHR_EOF:
195 default:
196 return HINT_ARG_IDENT;
197 }
198 }
199 }
200
202 assert(peek_class() == HINT_CHR_IDENT);
203 switch (peek_byte()) {
204 case 'K':
205 case 'M':
206 case 'G':
207 break;
208 default:
209 return scan_ident();
210 }
211 skip_byte();
212
213 switch (peek_class()) {
214 case HINT_CHR_IDENT:
215 case HINT_CHR_DIGIT:
216 return scan_ident();
217 default:
219 }
220 }
221
223 skip_byte('@');
224 start_token();
225
226 switch (peek_class()) {
227 case HINT_CHR_IDENT:
228 case HINT_CHR_DIGIT:
229 case HINT_CHR_MB:
232 return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
234 : HINT_ERROR;
236 return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
238 : HINT_ERROR;
239 default:
240 return HINT_ERROR;
241 }
242 }
243
245 for (;;) {
246 switch (peek_class()) {
247 case HINT_CHR_IDENT:
248 case HINT_CHR_DIGIT:
249 skip_byte();
250 continue;
251 case HINT_CHR_MB:
252 return scan_ident();
253 case HINT_CHR_EOF:
254 default:
255 const SYMBOL *symbol =
257 if (symbol) // keyword
258 {
259 /*
260 Override the yytext pointer to the short-living buffer with a
261 long-living pointer to the same text (don't need to allocate a
262 keyword string since symbol array is a global constant).
263 */
264 yytext = symbol->name;
265 assert(yyleng == symbol->length);
266
267 return symbol->tok;
268 }
269
271 return HINT_ARG_IDENT;
272 }
273 }
274 }
275
277 assert(peek_class() == HINT_CHR_DIGIT);
278 skip_byte();
279
280 for (;;) {
281 switch (peek_class()) {
282 case HINT_CHR_DIGIT:
283 skip_byte();
284 continue;
285 case HINT_CHR_DOT:
286 return scan_fraction_digits();
287 case HINT_CHR_IDENT:
289 case HINT_CHR_MB:
290 return scan_ident();
291 case HINT_CHR_EOF:
292 default:
293 return HINT_ARG_NUMBER;
294 }
295 }
296 }
297
299 skip_byte('.');
300
301 if (peek_class() == HINT_CHR_DIGIT)
302 skip_byte();
303 else
304 return HINT_ERROR;
305
306 for (;;) {
307 switch (peek_class()) {
308 case HINT_CHR_DIGIT:
309 skip_byte();
310 continue;
311 case HINT_CHR_IDENT:
312 case HINT_CHR_MB:
313 return HINT_ERROR;
314 default:
316 }
317 }
318 }
319
320 bool eof() const {
321 assert(ptr <= input_buf_end);
322 return ptr >= input_buf_end;
323 }
324
325 char peek_byte() const {
326 assert(!eof());
327 return *ptr;
328 }
329
331 return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
332 }
333
335 assert(ptr + 1 <= input_buf_end);
336 return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
337 : char_classes[static_cast<uchar>(ptr[1])];
338 }
339
341 assert(!eof() && peek_byte() == '\n');
342 skip_byte();
343 lineno++;
344 }
345
347 assert(!eof());
348 char ret = *ptr;
349 yyleng++;
350 ptr++;
351 return ret;
352 }
353
354 void skip_byte() {
355 assert(!eof());
356 yyleng++;
357 ptr++;
358 }
359
360 /**
361 Skips the next byte. In the debug mode, abort if it's not found in @p byte.
362
363 @param byte A byte to compare with the byte we skip.
364 Unused in non-debug builds.
365 */
366 void skip_byte(char byte [[maybe_unused]]) {
367 assert(peek_byte() == byte);
368 skip_byte();
369 }
370
371 /**
372 Skips the next byte. In the debug mode, abort if it's not found in @p str.
373
374 @param str A string of characters to compare with the next byte.
375 Unused in non-debug builds.
376 */
377 void skip_byte(const char *str [[maybe_unused]]) {
378 assert(strchr(str, peek_byte()));
379 skip_byte();
380 }
381
382 bool skip_mb() {
383 size_t len = my_ismbchar(cs, ptr, input_buf_end);
384 if (len == 0) {
385 ptr++;
386 yyleng++;
387 return true;
388 }
389 ptr += len;
390 yyleng += len;
391 return false;
392 }
393
395 yytext = ptr;
396 yyleng = 0;
397 }
398
399 void start_token() {
400 adjust_token();
401 raw_yytext = ptr;
402 }
403
404 template <hint_lex_char_classes Separator>
405 void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
406 assert(doubles > 0);
407
408 size_t d = doubles;
409 char *t = to->str;
410 for (const char *s = from, *end = from + len; s < end;) {
411 switch (char_classes[(uchar)*s]) {
412 case HINT_CHR_MB: {
413 size_t hint_len = my_ismbchar(cs, s, end);
414 assert(hint_len > 1);
415 memcpy(t, s, hint_len);
416 t += hint_len;
417 s += hint_len;
418 }
419 continue;
420 case Separator:
421 assert(char_classes[(uchar)*s] == Separator);
422 *t++ = *s++;
423 s++; // skip the 2nd separator
424 d--;
425 if (d == 0) {
426 memcpy(t, s, end - s);
427 to->length = len - doubles;
428 return;
429 }
430 continue;
431 case HINT_CHR_EOF:
432 assert(0);
433 to->length = 0;
434 return;
435 default:
436 *t++ = *s++;
437 }
438 }
439 assert(0);
440 to->length = 0;
441 return;
442 }
443
445
446 private:
447 /**
448 Helper function to check digest buffer for overflow before adding tokens.
449
450 @param token A token number to add.
451 */
452 void add_digest(uint token) {
453 if (digest_state == nullptr) return; // Digest buffer is full.
454
455 Lexer_yystype fake_yylvalue;
456 /*
457 YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
458 a consideration, that the lexer returns MEM_ROOT-allocated string values
459 there, and the rest of server is welcome to modify that strings inplace
460 (ind it does that in a few rare cases).
461 The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
462 it is not practical to add extra memory allocation there: const_cast is
463 enough.
464 */
465 fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
466 fake_yylvalue.lex_str.length = yyleng;
467
468 digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
469 }
470};
471
472inline int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner) {
473 auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
474 int ret = scanner->get_next_token();
475 yylval->hint_string.str = scanner->yytext;
476 yylval->hint_string.length = scanner->yyleng;
477 return ret;
478}
479
480void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg);
481
482#endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:60
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:77
void adjust_token()
Definition: sql_lex_hints.h:394
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Consrtuctor.
Definition: sql_lex_hints.cc:52
int scan_ident()
Definition: sql_lex_hints.h:183
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:83
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:201
size_t lineno
Definition: sql_lex_hints.h:64
int scan()
Definition: sql_lex_hints.cc:69
const char * input_buf
Definition: sql_lex_hints.h:67
bool eof() const
Definition: sql_lex_hints.h:320
int scan_quoted()
Definition: sql_lex_hints.h:114
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:405
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:65
int scan_query_block_name()
Definition: sql_lex_hints.h:222
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:62
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:330
void skip_byte()
Definition: sql_lex_hints.h:354
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:91
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:276
size_t get_lineno() const
Definition: sql_lex_hints.h:98
void skip_byte(char byte)
Skips the next byte.
Definition: sql_lex_hints.h:366
uchar get_byte()
Definition: sql_lex_hints.h:346
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:452
const char * ptr
Definition: sql_lex_hints.h:70
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:93
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:87
const char * get_ptr() const
Definition: sql_lex_hints.h:99
void skip_newline()
Definition: sql_lex_hints.h:340
const char * input_buf_end
Definition: sql_lex_hints.h:68
bool skip_mb()
Definition: sql_lex_hints.h:382
void skip_byte(const char *str)
Skips the next byte.
Definition: sql_lex_hints.h:377
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:334
int scan_fraction_digits()
Definition: sql_lex_hints.h:298
THD * thd
Definition: sql_lex_hints.h:61
char peek_byte() const
Definition: sql_lex_hints.h:325
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:100
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:244
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:135
void start_token()
Definition: sql_lex_hints.h:399
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:148
int prev_token
Definition: sql_lex_hints.h:72
const bool is_ansi_quotes
Definition: sql_lex_hints.h:63
int get_next_token()
Definition: sql_lex_hints.h:103
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:47
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:74
Definition: parse_tree_hints.h:97
void * alloc(size_t size)
Definition: sql_class.h:432
char * strmake(const char *str, size_t size) const
Definition: sql_class.h:448
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_class.h:945
const CHARSET_INFO * charset() const
Definition: sql_class.h:3238
bool convert_string(LEX_STRING *to, const CHARSET_INFO *to_cs, const char *from, size_t from_length, const CHARSET_INFO *from_cs, bool report_error=false)
Definition: sql_class.cc:1819
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2666
static struct wordvalue doubles[]
Definition: ctype-czech.cc:170
DBUG_TRACE
Definition: do_ctype.cc:46
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
static uint my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:682
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1523
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:51
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1055
Definition: buf0block_hint.cc:29
Definition: commit_order_queue.h:33
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:191
hint_lex_char_classes
Definition: sql_chars.h:72
@ HINT_CHR_MB
Definition: sql_chars.h:82
@ HINT_CHR_NL
Definition: sql_chars.h:83
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:75
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:79
@ HINT_CHR_QUOTE
Definition: sql_chars.h:84
@ HINT_CHR_DOT
Definition: sql_chars.h:78
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:73
@ HINT_CHR_IDENT
Definition: sql_chars.h:81
@ HINT_CHR_SLASH
Definition: sql_chars.h:85
@ HINT_CHR_EOF
Definition: sql_chars.h:80
@ HINT_CHR_DIGIT
Definition: sql_chars.h:77
sql_digest_state * digest_add_token(sql_digest_state *state, uint token, Lexer_yystype *yylval)
Definition: sql_digest.cc:379
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:81
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:80
@ HINT_ERROR
Definition: sql_hints.yy.h:86
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:84
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:83
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:97
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:82
void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:117
int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:472
void hint_lex_init_maps(CHARSET_INFO *cs, hint_lex_char_classes *hint_map)
Definition: m_ctype.h:354
Definition: mysql_lex_string.h:34
char * str
Definition: mysql_lex_string.h:35
size_t length
Definition: mysql_lex_string.h:36
Definition: lex_symbol.h:38
const unsigned int length
Definition: lex_symbol.h:40
const unsigned int tok
Definition: lex_symbol.h:41
const char * name
Definition: lex_symbol.h:39
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:35
unsigned int uint
Definition: uca-dump.cc:29
Definition: lexer_yystype.h:32
LEX_STRING lex_str
Definition: lexer_yystype.h:33
Definition: parser_yystype.h:345