MySQL  8.0.27
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2014, 2021, Oracle and/or its affiliates.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License, version 2.0,
6  as published by the Free Software Foundation.
7 
8  This program is also distributed with certain software (including
9  but not limited to OpenSSL) that is licensed under separate terms,
10  as designated in a particular file or component or in included license
11  documentation. The authors of MySQL hereby grant you an additional
12  permission to link the program and your derivative works with the
13  separately licensed software that they have included with MySQL.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License, version 2.0, for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23 
24 /* A lexical scanner for optimizer hints pseudo-commentary syntax */
25 
26 #ifndef SQL_LEX_HINTS_ICLUDED
27 #define SQL_LEX_HINTS_ICLUDED
28 
29 #include <string.h>
30 #include <sys/types.h>
31 
32 #include "lex_string.h"
33 #include "m_ctype.h"
34 #include "my_dbug.h"
35 #include "my_inttypes.h"
36 #include "sql/lex_symbol.h"
37 #include "sql/lexer_yystype.h"
38 #include "sql/sql_class.h"
39 #include "sql/sql_digest_stream.h"
40 #include "sql/sql_lex_hash.h"
41 #include "sql_chars.h"
42 
43 // This must be last, due to bison 2.3 on OsX
44 #ifndef YYSTYPE_IS_DECLARED
45 #define YYSTYPE_IS_DECLARED 1
46 #endif // YYSTYPE_IS_DECLARED
47 #include "sql/sql_hints.yy.h"
48 
49 class PT_hint_list;
50 union YYSTYPE;
51 
53 
54 /// Lexical scanner for hint comments.
55 ///
56 /// When the main lexical scanner recognizes the "/*+" delimiter, it calls
57 /// the hint parser (HINT_PARSER_parse) to consume the rest of hint tokens
58 /// including the */ delimiter. The hint parser uses Hint_scanner as its own
59 /// lexer to scan hint-specific tokens.
60 class Hint_scanner {
61  THD *thd;
62  const CHARSET_INFO *cs;
63  const bool is_ansi_quotes;
64  size_t lineno;
66 
67  const char *input_buf;
68  const char *input_buf_end;
69 
70  const char *ptr;
71 
73 
74  /**
75  Digest buffer interface to append tokens.
76  */
78 
79  public:
80  /**
81  Current token (yytext) origin in the input_buf
82  */
83  const char *raw_yytext;
84  /**
85  Current token pointer (may be converted allocated string outside input_buf
86  */
87  const char *yytext;
88  /**
89  Length of the current token (see yytext)
90  */
91  size_t yyleng;
92 
93  bool has_hints; ///< True if a hint comment is not empty (has any hints).
94 
95  public:
96  Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
97  sql_digest_state *digest_state_arg);
98  size_t get_lineno() const { return lineno; }
99  const char *get_ptr() const { return ptr; }
101  void syntax_warning(const char *msg) const;
102 
104  DBUG_TRACE;
105  prev_token = scan();
107  return prev_token;
108  }
109 
110  protected:
111  int scan();
112 
113  template <hint_lex_char_classes Quote>
114  int scan_quoted() {
115  assert(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
116  Quote == HINT_CHR_QUOTE);
117  assert(*ptr == '`' || *ptr == '"' || *ptr == '\'');
118 
119  const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
120  (is_ansi_quotes && Quote == HINT_CHR_DOUBLEQUOTE);
121  const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
122 
123  skip_byte("\"'`"); // skip opening quote sign
124  adjust_token(); // reset yytext & yyleng
125 
126  size_t double_separators = 0;
127 
128  for (;;) {
129  hint_lex_char_classes chr_class = peek_class();
130  switch (chr_class) {
131  case HINT_CHR_NL:
132  skip_newline();
133  continue;
134  case HINT_CHR_MB:
135  if (skip_mb()) return HINT_ERROR;
136  continue;
137  case HINT_CHR_ASTERISK:
138  if (peek_class2() == HINT_CHR_SLASH)
139  return HINT_ERROR; // we don't support "*/" inside quoted
140  // identifiers
141  skip_byte('*');
142  continue;
143  case HINT_CHR_EOF:
144  return HINT_ERROR;
145  case Quote:
146  if (peek_class2() == Quote) {
147  skip_byte("\"'`");
148  skip_byte("\"'`");
149  double_separators++;
150  continue;
151  } else {
152  if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
153 
154  ptr++; // skip closing quote
155 
156  if (thd->charset_is_system_charset && double_separators == 0)
157  return ret;
158 
159  LEX_STRING s;
162  thd->charset()))
163  return HINT_ERROR; // OOM etc.
164  } else {
165  assert(0 < double_separators && double_separators < yyleng);
166  s.length = yyleng - double_separators;
167  s.str = static_cast<char *>(thd->alloc(s.length));
168  if (s.str == nullptr) return HINT_ERROR; // OOM
169  }
170  if (double_separators > 0)
171  compact<Quote>(&s, yytext, yyleng, double_separators);
172 
173  yytext = s.str;
174  yyleng = s.length;
175  return ret;
176  }
177  default:
178  skip_byte();
179  }
180  }
181  }
182 
183  int scan_ident() {
184  for (;;) {
185  hint_lex_char_classes chr_class = peek_class();
186  switch (chr_class) {
187  case HINT_CHR_IDENT:
188  case HINT_CHR_DIGIT:
189  skip_byte();
190  continue;
191  case HINT_CHR_MB:
192  if (skip_mb()) return HINT_ERROR;
193  continue;
194  case HINT_CHR_EOF:
195  default:
196  return HINT_ARG_IDENT;
197  }
198  }
199  }
200 
202  assert(peek_class() == HINT_CHR_IDENT);
203  switch (peek_byte()) {
204  case 'K':
205  case 'M':
206  case 'G':
207  break;
208  default:
209  return scan_ident();
210  }
211  skip_byte();
212 
213  switch (peek_class()) {
214  case HINT_CHR_IDENT:
215  case HINT_CHR_DIGIT:
216  return scan_ident();
217  default:
219  }
220  }
221 
223  skip_byte('@');
224  start_token();
225 
226  switch (peek_class()) {
227  case HINT_CHR_IDENT:
228  case HINT_CHR_DIGIT:
229  case HINT_CHR_MB:
231  case HINT_CHR_BACKQUOTE:
232  return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
234  : HINT_ERROR;
236  return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
238  : HINT_ERROR;
239  default:
240  return HINT_ERROR;
241  }
242  }
243 
245  for (;;) {
246  switch (peek_class()) {
247  case HINT_CHR_IDENT:
248  case HINT_CHR_DIGIT:
249  skip_byte();
250  continue;
251  case HINT_CHR_MB:
252  return scan_ident();
253  case HINT_CHR_EOF:
254  default:
255  const SYMBOL *symbol =
257  if (symbol) // keyword
258  {
259  /*
260  Override the yytext pointer to the short-living buffer with a
261  long-living pointer to the same text (don't need to allocate a
262  keyword string since symbol array is a global constant).
263  */
264  yytext = symbol->name;
265  assert(yyleng == symbol->length);
266 
267  return symbol->tok;
268  }
269 
271  return HINT_ARG_IDENT;
272  }
273  }
274  }
275 
277  assert(peek_class() == HINT_CHR_DIGIT);
278  skip_byte();
279 
280  for (;;) {
281  switch (peek_class()) {
282  case HINT_CHR_DIGIT:
283  skip_byte();
284  continue;
285  case HINT_CHR_DOT:
286  return scan_fraction_digits();
287  case HINT_CHR_IDENT:
288  return scan_multiplier_or_ident();
289  case HINT_CHR_MB:
290  return scan_ident();
291  case HINT_CHR_EOF:
292  default:
293  return HINT_ARG_NUMBER;
294  }
295  }
296  }
297 
299  skip_byte('.');
300 
301  if (peek_class() == HINT_CHR_DIGIT)
302  skip_byte();
303  else
304  return HINT_ERROR;
305 
306  for (;;) {
307  switch (peek_class()) {
308  case HINT_CHR_DIGIT:
309  skip_byte();
310  continue;
311  case HINT_CHR_IDENT:
312  case HINT_CHR_MB:
313  return HINT_ERROR;
314  default:
316  }
317  }
318  }
319 
320  bool eof() const {
321  assert(ptr <= input_buf_end);
322  return ptr >= input_buf_end;
323  }
324 
325  char peek_byte() const {
326  assert(!eof());
327  return *ptr;
328  }
329 
331  return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
332  }
333 
335  assert(ptr + 1 <= input_buf_end);
336  return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
337  : char_classes[static_cast<uchar>(ptr[1])];
338  }
339 
340  void skip_newline() {
341  assert(!eof() && peek_byte() == '\n');
342  skip_byte();
343  lineno++;
344  }
345 
347  assert(!eof());
348  char ret = *ptr;
349  yyleng++;
350  ptr++;
351  return ret;
352  }
353 
354  void skip_byte() {
355  assert(!eof());
356  yyleng++;
357  ptr++;
358  }
359 
360  /**
361  Skips the next byte. In the debug mode, abort if it's not found in @p byte.
362 
363  @param byte A byte to compare with the byte we skip.
364  Unused in non-debug builds.
365  */
366  void skip_byte(char byte [[maybe_unused]]) {
367  assert(peek_byte() == byte);
368  skip_byte();
369  }
370 
371  /**
372  Skips the next byte. In the debug mode, abort if it's not found in @p str.
373 
374  @param str A string of characters to compare with the next byte.
375  Unused in non-debug builds.
376  */
377  void skip_byte(const char *str [[maybe_unused]]) {
378  assert(strchr(str, peek_byte()));
379  skip_byte();
380  }
381 
382  bool skip_mb() {
383  size_t len = my_ismbchar(cs, ptr, input_buf_end);
384  if (len == 0) {
385  ptr++;
386  yyleng++;
387  return true;
388  }
389  ptr += len;
390  yyleng += len;
391  return false;
392  }
393 
394  void adjust_token() {
395  yytext = ptr;
396  yyleng = 0;
397  }
398 
399  void start_token() {
400  adjust_token();
401  raw_yytext = ptr;
402  }
403 
404  template <hint_lex_char_classes Separator>
405  void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
406  assert(doubles > 0);
407 
408  size_t d = doubles;
409  char *t = to->str;
410  for (const char *s = from, *end = from + len; s < end;) {
411  switch (char_classes[(uchar)*s]) {
412  case HINT_CHR_MB: {
413  size_t hint_len = my_ismbchar(cs, s, end);
414  assert(hint_len > 1);
415  memcpy(t, s, hint_len);
416  t += hint_len;
417  s += hint_len;
418  }
419  continue;
420  case Separator:
421  assert(char_classes[(uchar)*s] == Separator);
422  *t++ = *s++;
423  s++; // skip the 2nd separator
424  d--;
425  if (d == 0) {
426  memcpy(t, s, end - s);
427  to->length = len - doubles;
428  return;
429  }
430  continue;
431  case HINT_CHR_EOF:
432  assert(0);
433  to->length = 0;
434  return;
435  default:
436  *t++ = *s++;
437  }
438  }
439  assert(0);
440  to->length = 0;
441  return;
442  }
443 
444  void add_hint_token_digest();
445 
446  private:
447  /**
448  Helper function to check digest buffer for overflow before adding tokens.
449 
450  @param token A token number to add.
451  */
452  void add_digest(uint token) {
453  if (digest_state == nullptr) return; // Digest buffer is full.
454 
455  Lexer_yystype fake_yylvalue;
456  /*
457  YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
458  a consideration, that the lexer returns MEM_ROOT-allocated string values
459  there, and the rest of server is welcome to modify that strings inplace
460  (ind it does that in a few rare cases).
461  The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
462  it is not practical to add extra memory allocation there: const_cast is
463  enough.
464  */
465  fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
466  fake_yylvalue.lex_str.length = yyleng;
467 
468  digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
469  }
470 };
471 
472 inline int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner) {
473  auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
474  int ret = scanner->get_next_token();
475  yylval->hint_string.str = scanner->yytext;
476  yylval->hint_string.length = scanner->yyleng;
477  return ret;
478 }
479 
480 void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg);
481 
482 #endif /* SQL_LEX_HINTS_ICLUDED */
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:60
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:77
void adjust_token()
Definition: sql_lex_hints.h:394
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Consrtuctor.
Definition: sql_lex_hints.cc:52
const char * get_ptr() const
Definition: sql_lex_hints.h:99
int scan_ident()
Definition: sql_lex_hints.h:183
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:83
int scan_multiplier_or_ident()
Definition: sql_lex_hints.h:201
size_t lineno
Definition: sql_lex_hints.h:64
int scan()
Definition: sql_lex_hints.cc:69
const char * input_buf
Definition: sql_lex_hints.h:67
bool eof() const
Definition: sql_lex_hints.h:320
int scan_quoted()
Definition: sql_lex_hints.h:114
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:405
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:65
int scan_query_block_name()
Definition: sql_lex_hints.h:222
void skip_byte(char byte[[maybe_unused]])
Skips the next byte.
Definition: sql_lex_hints.h:366
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:62
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:330
void skip_byte()
Definition: sql_lex_hints.h:354
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:91
int scan_number_or_multiplier_or_ident()
Definition: sql_lex_hints.h:276
size_t get_lineno() const
Definition: sql_lex_hints.h:98
uchar get_byte()
Definition: sql_lex_hints.h:346
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:452
const char * ptr
Definition: sql_lex_hints.h:70
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:93
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:87
void skip_newline()
Definition: sql_lex_hints.h:340
const char * input_buf_end
Definition: sql_lex_hints.h:68
bool skip_mb()
Definition: sql_lex_hints.h:382
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:334
int scan_fraction_digits()
Definition: sql_lex_hints.h:298
THD * thd
Definition: sql_lex_hints.h:61
char peek_byte() const
Definition: sql_lex_hints.h:325
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:244
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:135
void start_token()
Definition: sql_lex_hints.h:399
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:100
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:148
int prev_token
Definition: sql_lex_hints.h:72
const bool is_ansi_quotes
Definition: sql_lex_hints.h:63
int get_next_token()
Definition: sql_lex_hints.h:103
void skip_byte(const char *str[[maybe_unused]])
Skips the next byte.
Definition: sql_lex_hints.h:377
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:47
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:74
Definition: parse_tree_hints.h:97
char * strmake(const char *str, size_t size) const
Definition: sql_class.h:324
void * alloc(size_t size)
Definition: sql_class.h:308
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_class.h:821
bool convert_string(LEX_STRING *to, const CHARSET_INFO *to_cs, const char *from, size_t from_length, const CHARSET_INFO *from_cs, bool report_error=false)
Definition: sql_class.cc:1587
const CHARSET_INFO * charset() const
Definition: sql_class.h:3086
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2513
static struct wordvalue doubles[]
Definition: ctype-czech.cc:170
DBUG_TRACE
Definition: do_ctype.cc:46
#define yylval
Definition: fts0pars.cc:68
A better implementation of the UNIX ctype(3) library.
static uint my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:682
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1512
Some integer typedefs for easier portability.
unsigned char uchar
Definition: my_inttypes.h:51
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1056
Definition: buf0block_hint.cc:29
Definition: commit_order_queue.h:33
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:191
hint_lex_char_classes
Definition: sql_chars.h:72
@ HINT_CHR_MB
Definition: sql_chars.h:82
@ HINT_CHR_NL
Definition: sql_chars.h:83
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:75
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:79
@ HINT_CHR_QUOTE
Definition: sql_chars.h:84
@ HINT_CHR_DOT
Definition: sql_chars.h:78
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:73
@ HINT_CHR_IDENT
Definition: sql_chars.h:81
@ HINT_CHR_SLASH
Definition: sql_chars.h:85
@ HINT_CHR_EOF
Definition: sql_chars.h:80
@ HINT_CHR_DIGIT
Definition: sql_chars.h:77
sql_digest_state * digest_add_token(sql_digest_state *state, uint token, Lexer_yystype *yylval)
Definition: sql_digest.cc:379
@ HINT_ARG_IDENT
Definition: sql_hints.yy.h:81
@ HINT_ARG_NUMBER
Definition: sql_hints.yy.h:80
@ HINT_ERROR
Definition: sql_hints.yy.h:86
@ HINT_IDENT_OR_NUMBER_WITH_SCALE
Definition: sql_hints.yy.h:84
@ HINT_ARG_TEXT
Definition: sql_hints.yy.h:83
@ HINT_ARG_FLOATING_POINT_NUMBER
Definition: sql_hints.yy.h:97
@ HINT_ARG_QB_NAME
Definition: sql_hints.yy.h:82
void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:472
void hint_lex_init_maps(CHARSET_INFO *cs, hint_lex_char_classes *hint_map)
Definition: m_ctype.h:354
Definition: mysql_lex_string.h:34
char * str
Definition: mysql_lex_string.h:35
size_t length
Definition: mysql_lex_string.h:36
Definition: lex_symbol.h:38
const unsigned int length
Definition: lex_symbol.h:40
const unsigned int tok
Definition: lex_symbol.h:41
const char * name
Definition: lex_symbol.h:39
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:35
unsigned int uint
Definition: uca-dump.cc:29
Definition: lexer_yystype.h:32
LEX_STRING lex_str
Definition: lexer_yystype.h:33
Definition: parser_yystype.h:341