MySQL 9.1.0
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1/*
2 Copyright (c) 2014, 2024, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is designed to work with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have either included with
14 the program or referenced in the documentation.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
24
25/* A lexical scanner for optimizer hints pseudo-commentary syntax */
26
27#ifndef SQL_LEX_HINTS_ICLUDED
28#define SQL_LEX_HINTS_ICLUDED
29
30#include <string.h>
31#include <sys/types.h>
32
33#include "lex_string.h"
34#include "my_dbug.h"
35#include "my_inttypes.h"
37#include "sql/lex_symbol.h"
38#include "sql/lexer_yystype.h"
39#include "sql/parser_yystype.h"
40#include "sql/sql_class.h"
42#include "sql/sql_lex_hash.h"
43#include "strings/sql_chars.h"
44
45#include "sql/sql_hints.yy.h"
46
47class PT_hint_list;
48
49/// Lexical scanner for hint comments.
50///
51/// When the main lexical scanner recognizes the "/*+" delimiter, it calls
52/// the hint parser (my_hint_parser_parse) to consume the rest of hint tokens
53/// including the */ delimiter. The hint parser uses Hint_scanner as its own
54/// lexer to scan hint-specific tokens.
58 const bool is_ansi_quotes;
59 size_t lineno;
61
62 const char *input_buf;
63 const char *input_buf_end;
64
65 const char *ptr;
66
68
69 /**
70 Digest buffer interface to append tokens.
71 */
73
74 public:
75 /**
76 Current token (yytext) origin in the input_buf
77 */
78 const char *raw_yytext;
79 /**
80 Current token pointer (may be converted allocated string outside input_buf
81 */
82 const char *yytext;
83 /**
84 Length of the current token (see yytext)
85 */
86 size_t yyleng;
87
88 bool has_hints; ///< True if a hint comment is not empty (has any hints).
89
90 public:
91 Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
92 sql_digest_state *digest_state_arg);
93 size_t get_lineno() const { return lineno; }
94 const char *get_ptr() const { return ptr; }
96 void syntax_warning(const char *msg) const;
97
100 prev_token = scan();
102 return prev_token;
103 }
104
105 protected:
106 int scan();
107
108 template <hint_lex_char_classes Quote>
110 assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
111 Quote == HINT_CHR_QUOTE);
112 assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
113
114 const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
116 const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
117
118 skip_byte("\"'`"); // skip opening quote sign
119 adjust_token(); // reset yytext & yyleng
120
121 size_t double_separators = 0;
122
123 for (;;) {
124 hint_lex_char_classes chr_class = peek_class();
125 switch (chr_class) {
126 case HINT_CHR_NL:
127 skip_newline();
128 continue;
129 case HINT_CHR_MB:
130 if (skip_mb()) return HINT_ERROR;
131 continue;
134 return HINT_ERROR; // we don't support "*/" inside quoted
135 // identifiers
136 skip_byte('*');
137 continue;
138 case HINT_CHR_EOF:
139 return HINT_ERROR;
140 case Quote:
141 if (peek_class2() == Quote) {
142 skip_byte("\"'`");
143 skip_byte("\"'`");
144 double_separators++;
145 continue;
146 } else {
147 if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
148
149 ptr++; // skip closing quote
150
151 if (thd->charset_is_system_charset && double_separators == 0) {
152 yytext = thd->strmake(yytext, yyleng); // null-terminate it.
153 return ret;
154 }
155
156 LEX_STRING s;
159 thd->charset()))
160 return HINT_ERROR; // OOM etc.
161 } else {
162 assert(0 < double_separators && double_separators < yyleng);
163 s.length = yyleng - double_separators;
164 s.str = static_cast<char *>(thd->alloc(s.length));
165 if (s.str == nullptr) return HINT_ERROR; // OOM
166 }
167 if (double_separators > 0)
168 compact<Quote>(&s, yytext, yyleng, double_separators);
169
170 yytext = s.str;
171 yyleng = s.length;
172 return ret;
173 }
174 default:
175 skip_byte();
176 }
177 }
178 }
179
181 for (;;) {
182 const hint_lex_char_classes chr_class = peek_class();
183 switch (chr_class) {
184 case HINT_CHR_IDENT:
185 case HINT_CHR_DIGIT:
186 skip_byte();
187 continue;
188 case HINT_CHR_MB:
189 if (skip_mb()) return HINT_ERROR;
190 continue;
191 case HINT_CHR_EOF:
192 default:
193 return HINT_ARG_IDENT;
194 }
195 }
196 }
197
199 assert(peek_class() == HINT_CHR_IDENT);
200 switch (peek_byte()) {
201 case 'K':
202 case 'M':
203 case 'G':
204 break;
205 default:
206 return scan_ident();
207 }
208 skip_byte();
209
210 switch (peek_class()) {
211 case HINT_CHR_IDENT:
212 case HINT_CHR_DIGIT:
213 return scan_ident();
214 default:
216 }
217 }
218
220 skip_byte('@');
221 start_token();
222
223 switch (peek_class()) {
224 case HINT_CHR_IDENT:
225 case HINT_CHR_DIGIT:
226 case HINT_CHR_MB:
227 if (scan_ident() == HINT_ARG_IDENT) {
229 return HINT_ARG_QB_NAME;
230 } else
231 return HINT_ERROR;
233 return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
235 : HINT_ERROR;
237 return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
239 : HINT_ERROR;
240 default:
241 return HINT_ERROR;
242 }
243 }
244
246 for (;;) {
247 switch (peek_class()) {
248 case HINT_CHR_IDENT:
249 case HINT_CHR_DIGIT:
250 skip_byte();
251 continue;
252 case HINT_CHR_MB:
253 return scan_ident();
254 case HINT_CHR_EOF:
255 default:
256 const SYMBOL *symbol =
258 if (symbol) // keyword
259 {
260 /*
261 Override the yytext pointer to the short-living buffer with a
262 long-living pointer to the same text (don't need to allocate a
263 keyword string since symbol array is a global constant).
264 */
265 yytext = symbol->name;
266 assert(yyleng == symbol->length);
267
268 return symbol->tok;
269 }
270
272 return HINT_ARG_IDENT;
273 }
274 }
275 }
276
278 assert(peek_class() == HINT_CHR_DIGIT);
279 skip_byte();
280
281 for (;;) {
282 switch (peek_class()) {
283 case HINT_CHR_DIGIT:
284 skip_byte();
285 continue;
286 case HINT_CHR_DOT:
287 return scan_fraction_digits();
288 case HINT_CHR_IDENT:
290 case HINT_CHR_MB:
291 return scan_ident();
292 case HINT_CHR_EOF:
293 default:
294 return HINT_ARG_NUMBER;
295 }
296 }
297 }
298
300 skip_byte('.');
301
302 if (peek_class() == HINT_CHR_DIGIT)
303 skip_byte();
304 else
305 return HINT_ERROR;
306
307 for (;;) {
308 switch (peek_class()) {
309 case HINT_CHR_DIGIT:
310 skip_byte();
311 continue;
312 case HINT_CHR_IDENT:
313 case HINT_CHR_MB:
314 return HINT_ERROR;
315 default:
317 }
318 }
319 }
320
321 bool eof() const {
322 assert(ptr <= input_buf_end);
323 return ptr >= input_buf_end;
324 }
325
326 char peek_byte() const {
327 assert(!eof());
328 return *ptr;
329 }
330
332 return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
333 }
334
336 assert(ptr + 1 <= input_buf_end);
337 return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
338 : char_classes[static_cast<uchar>(ptr[1])];
339 }
340
342 assert(!eof() && peek_byte() == '\n');
343 skip_byte();
344 lineno++;
345 }
346
348 assert(!eof());
349 const char ret = *ptr;
350 yyleng++;
351 ptr++;
352 return ret;
353 }
354
355 void skip_byte() {
356 assert(!eof());
357 yyleng++;
358 ptr++;
359 }
360
361 /**
362 Skips the next byte. In the debug mode, abort if it's not found in @p byte.
363
364 @param byte A byte to compare with the byte we skip.
365 Unused in non-debug builds.
366 */
367 void skip_byte(char byte [[maybe_unused]]) {
368 assert(peek_byte() == byte);
369 skip_byte();
370 }
371
372 /**
373 Skips the next byte. In the debug mode, abort if it's not found in @p str.
374
375 @param str A string of characters to compare with the next byte.
376 Unused in non-debug builds.
377 */
378 void skip_byte(const char *str [[maybe_unused]]) {
379 assert(strchr(str, peek_byte()));
380 skip_byte();
381 }
382
383 bool skip_mb() {
384 const size_t len = my_ismbchar(cs, ptr, input_buf_end);
385 if (len == 0) {
386 ptr++;
387 yyleng++;
388 return true;
389 }
390 ptr += len;
391 yyleng += len;
392 return false;
393 }
394
396 yytext = ptr;
397 yyleng = 0;
398 }
399
400 void start_token() {
401 adjust_token();
402 raw_yytext = ptr;
403 }
404
405 template <hint_lex_char_classes Separator>
406 void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
407 assert(doubles > 0);
408
409 size_t d = doubles;
410 char *t = to->str;
411 for (const char *s = from, *end = from + len; s < end;) {
412 switch (char_classes[(uchar)*s]) {
413 case HINT_CHR_MB: {
414 size_t hint_len = my_ismbchar(cs, s, end);
415 assert(hint_len > 1);
416 memcpy(t, s, hint_len);
417 t += hint_len;
418 s += hint_len;
419 }
420 continue;
421 case Separator:
422 assert(char_classes[(uchar)*s] == Separator);
423 *t++ = *s++;
424 s++; // skip the 2nd separator
425 d--;
426 if (d == 0) {
427 memcpy(t, s, end - s);
428 to->length = len - doubles;
429 return;
430 }
431 continue;
432 case HINT_CHR_EOF:
433 assert(0);
434 to->length = 0;
435 return;
436 default:
437 *t++ = *s++;
438 }
439 }
440 assert(0);
441 to->length = 0;
442 return;
443 }
444
446
447 private:
448 /**
449 Helper function to check digest buffer for overflow before adding tokens.
450
451 @param token A token number to add.
452 */
453 void add_digest(uint token) {
454 if (digest_state == nullptr) return; // Digest buffer is full.
455
456 Lexer_yystype fake_yylvalue;
457 /*
458 YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
459 a consideration, that the lexer returns MEM_ROOT-allocated string values
460 there, and the rest of server is welcome to modify that strings inplace
461 (ind it does that in a few rare cases).
462 The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
463 it is not practical to add extra memory allocation there: const_cast is
464 enough.
465 */
466 fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
467 fake_yylvalue.lex_str.length = yyleng;
468
469 digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
470 }
471};
472
474 Hint_scanner *scanner) {
475 auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
476 const int ret = scanner->get_next_token();
477 yylval->hint_string.str = scanner->yytext;
478 yylval->hint_string.length = scanner->yyleng;
479 return ret;
480}
481
483 const char *msg);
484
485#endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:55
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:72
void adjust_token()
Definition: sql_lex_hints.h:395
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Constructor.
Definition: sql_lex_hints.cc:53
int scan_ident()
Definition: sql_lex_hints.h:180
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:78
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:198
size_t lineno
Definition: sql_lex_hints.h:59
int scan()
Definition: sql_lex_hints.cc:70
const char * input_buf
Definition: sql_lex_hints.h:62
bool eof() const
Definition: sql_lex_hints.h:321
int scan_quoted()
Definition: sql_lex_hints.h:109
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:406
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:60
int scan_query_block_name()
Definition: sql_lex_hints.h:219
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:57
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:331
void skip_byte()
Definition: sql_lex_hints.h:355
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:86
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:277
size_t get_lineno() const
Definition: sql_lex_hints.h:93
void skip_byte(char byte)
Skips the next byte.
Definition: sql_lex_hints.h:367
uchar get_byte()
Definition: sql_lex_hints.h:347
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:453
const char * ptr
Definition: sql_lex_hints.h:65
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:88
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:82
const char * get_ptr() const
Definition: sql_lex_hints.h:94
void skip_newline()
Definition: sql_lex_hints.h:341
const char * input_buf_end
Definition: sql_lex_hints.h:63
bool skip_mb()
Definition: sql_lex_hints.h:383
void skip_byte(const char *str)
Skips the next byte.
Definition: sql_lex_hints.h:378
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:335
int scan_fraction_digits()
Definition: sql_lex_hints.h:299
THD * thd
Definition: sql_lex_hints.h:56
char peek_byte() const
Definition: sql_lex_hints.h:326
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:95
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:245
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:136
void start_token()
Definition: sql_lex_hints.h:400
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:149
int prev_token
Definition: sql_lex_hints.h:67
const bool is_ansi_quotes
Definition: sql_lex_hints.h:58
int get_next_token()
Definition: sql_lex_hints.h:98
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:48
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:75
Definition: parse_tree_hints.h:100
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_lexer_thd.h:36
void * alloc(size_t size)
Definition: sql_lexer_thd.h:50
bool convert_string(LEX_STRING *, const CHARSET_INFO *, const char *, size_t, const CHARSET_INFO *, bool=false)
Definition: sql_lexer_thd.h:43
const CHARSET_INFO * charset() const
Definition: sql_lexer_thd.h:42
char * strmake(const char *str, size_t size) const
Definition: sql_lexer_thd.h:52
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2818
static struct wordvalue doubles[]
Definition: ctype-czech.cc:171
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
unsigned my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:671
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1554
#define DBUG_TRACE
Definition: my_dbug.h:146
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:52
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1105
Definition: buf0block_hint.cc:30
Definition: commit_order_queue.h:34
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:192
hint_lex_char_classes
Definition: sql_chars.h:73
@ HINT_CHR_MB
Definition: sql_chars.h:83
@ HINT_CHR_NL
Definition: sql_chars.h:84
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:76
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:80
@ HINT_CHR_QUOTE
Definition: sql_chars.h:85
@ HINT_CHR_DOT
Definition: sql_chars.h:79
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:74
@ HINT_CHR_IDENT
Definition: sql_chars.h:82
@ HINT_CHR_SLASH
Definition: sql_chars.h:86
@ HINT_CHR_EOF
Definition: sql_chars.h:81
@ HINT_CHR_DIGIT
Definition: sql_chars.h:78
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:89
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:88
@ HINT_ERROR
Definition: sql_hints.yy.h:94
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:92
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:91
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:105
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:90
int my_hint_parser_lex(MY_HINT_PARSER_STYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:473
void my_hint_parser_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:118
sql_digest_state * digest_add_token(sql_digest_state *, uint, Lexer_yystype *)
Definition: sql_lexer.cc:53
Definition: m_ctype.h:421
Definition: mysql_lex_string.h:35
char * str
Definition: mysql_lex_string.h:36
size_t length
Definition: mysql_lex_string.h:37
Definition: lex_symbol.h:39
const unsigned int length
Definition: lex_symbol.h:41
const unsigned int tok
Definition: lex_symbol.h:42
const char * name
Definition: lex_symbol.h:40
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:36
Definition: lexer_yystype.h:33
LEX_STRING lex_str
Definition: lexer_yystype.h:34
Definition: parser_yystype.h:341