MySQL  8.0.19
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License, version 2.0,
6  as published by the Free Software Foundation.
7 
8  This program is also distributed with certain software (including
9  but not limited to OpenSSL) that is licensed under separate terms,
10  as designated in a particular file or component or in included license
11  documentation. The authors of MySQL hereby grant you an additional
12  permission to link the program and your derivative works with the
13  separately licensed software that they have included with MySQL.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License, version 2.0, for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23 
24 /* A lexical scanner for optimizer hints pseudo-commentary syntax */
25 
26 #ifndef SQL_LEX_HINTS_ICLUDED
27 #define SQL_LEX_HINTS_ICLUDED
28 
29 #include <string.h>
30 #include <sys/types.h>
31 
32 #include "lex_string.h"
33 #include "m_ctype.h"
34 #include "my_dbug.h"
35 #include "my_inttypes.h"
36 #include "sql/lex_symbol.h"
37 #include "sql/lexer_yystype.h"
38 #include "sql/sql_class.h"
39 #include "sql/sql_digest_stream.h"
40 #include "sql/sql_lex_hash.h"
41 #include "sql_chars.h"
42 
43 // This must be last, due to bison 2.3 on OsX
44 #ifndef YYSTYPE_IS_DECLARED
45 #define YYSTYPE_IS_DECLARED 1
46 #endif // YYSTYPE_IS_DECLARED
47 #include "sql/sql_hints.yy.h"
48 
49 class PT_hint_list;
50 union YYSTYPE;
51 
53 
54 /// Lexical scanner for hint comments.
55 ///
56 /// When the main lexical scanner recognizes the "/*+" delimiter, it calls
57 /// the hint parser (HINT_PARSER_parse) to consume the rest of hint tokens
58 /// including the */ delimiter. The hint parser uses Hint_scanner as its own
59 /// lexer to scan hint-specific tokens.
60 class Hint_scanner {
61  THD *thd;
62  const CHARSET_INFO *cs;
63  const bool is_ansi_quotes;
64  const bool backslash_escapes;
65  size_t lineno;
67 
68  const char *input_buf;
69  const char *input_buf_end;
70 
71  const char *ptr;
72 
74 
75  /**
76  Digest buffer interface to append tokens.
77  */
79 
80  public:
81  /**
82  Current token (yytext) origin in the input_buf
83  */
84  const char *raw_yytext;
85  /**
86  Current token pointer (may be converted allocated string outside input_buf
87  */
88  const char *yytext;
89  /**
90  Length of the current token (see yytext)
91  */
92  size_t yyleng;
93 
94  bool has_hints; ///< True if a hint comment is not empty (has any hints).
95 
96  public:
97  Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
98  sql_digest_state *digest_state_arg);
99  size_t get_lineno() const { return lineno; }
100  const char *get_ptr() const { return ptr; }
102  void syntax_warning(const char *msg) const;
103 
105  DBUG_TRACE;
106  prev_token = scan();
108  return prev_token;
109  }
110 
111  protected:
112  int scan() {
113  int whitespaces = 0;
114  for (;;) {
115  start_token();
116  switch (peek_class()) {
117  case HINT_CHR_NL:
118  skip_newline();
119  whitespaces++;
120  continue;
121  case HINT_CHR_SPACE:
122  skip_byte();
123  whitespaces++;
124  continue;
125  case HINT_CHR_DIGIT:
126  return scan_number_or_ident();
127  case HINT_CHR_IDENT:
128  return scan_ident_or_keyword();
129  case HINT_CHR_MB:
130  return scan_ident();
131  case HINT_CHR_QUOTE:
132  return scan_quoted<HINT_CHR_QUOTE>();
133  case HINT_CHR_BACKQUOTE:
134  return scan_quoted<HINT_CHR_BACKQUOTE>();
136  return scan_quoted<HINT_CHR_DOUBLEQUOTE>();
137  case HINT_CHR_ASTERISK:
138  if (peek_class2() == HINT_CHR_SLASH) {
139  ptr += 2; // skip "*/"
140  input_buf_end = ptr;
141  return HINT_CLOSE;
142  } else
143  return get_byte();
144  case HINT_CHR_AT:
145  if (prev_token == '(' ||
146  (prev_token == HINT_ARG_IDENT && whitespaces == 0))
147  return scan_query_block_name();
148  else
149  return get_byte();
150  case HINT_CHR_EOF:
151  return 0;
152  default:
153  return get_byte();
154  }
155  }
156  }
157 
158  template <hint_lex_char_classes Quote>
159  int scan_quoted() {
160  DBUG_ASSERT(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
161  Quote == HINT_CHR_QUOTE);
162  DBUG_ASSERT(*ptr == '`' || *ptr == '"' || *ptr == '\'');
163 
164  const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
165  (is_ansi_quotes && Quote == HINT_CHR_DOUBLEQUOTE);
166  const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
167 
168  skip_byte(); // skip opening quote sign
169  adjust_token(); // reset yytext & yyleng
170 
171  size_t double_separators = 0;
172 
173  for (;;) {
174  hint_lex_char_classes chr_class = peek_class();
175  switch (chr_class) {
176  case HINT_CHR_NL:
177  skip_newline();
178  continue;
179  case HINT_CHR_MB:
180  if (skip_mb()) return HINT_ERROR;
181  continue;
182  case HINT_CHR_ASTERISK:
183  if (peek_class2() == HINT_CHR_SLASH)
184  return HINT_ERROR; // we don't support "*/" inside quoted
185  // identifiers
186  skip_byte();
187  continue;
188  case HINT_CHR_EOF:
189  return HINT_ERROR;
190  case Quote:
191  if (peek_class2() == Quote) {
192  skip_byte(); // skip quote
193  skip_byte(); // skip quote
194  double_separators++;
195  continue;
196  } else {
197  if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
198 
199  ptr++; // skip closing quote
200 
201  if (thd->charset_is_system_charset && double_separators == 0)
202  return ret;
203 
204  LEX_STRING s;
207  thd->charset()))
208  return HINT_ERROR; // OOM etc.
209  } else {
210  DBUG_ASSERT(0 < double_separators && double_separators < yyleng);
211  s.length = yyleng - double_separators;
212  s.str = (char *)thd->alloc(s.length);
213  if (s.str == NULL) return HINT_ERROR; // OOM
214  }
215  if (double_separators > 0)
216  compact<Quote>(&s, yytext, yyleng, double_separators);
217 
218  yytext = s.str;
219  yyleng = s.length;
220  return ret;
221  }
222  default:
223  skip_byte();
224  }
225  }
226  }
227 
228  int scan_ident() {
229  for (;;) {
230  hint_lex_char_classes chr_class = peek_class();
231  switch (chr_class) {
232  case HINT_CHR_IDENT:
233  case HINT_CHR_DIGIT:
234  skip_byte();
235  continue;
236  case HINT_CHR_MB:
237  if (skip_mb()) return HINT_ERROR;
238  continue;
239  case HINT_CHR_EOF:
240  default:
241  return HINT_ARG_IDENT;
242  }
243  }
244  }
245 
248  switch (peek_byte()) {
249  case 'K':
250  case 'M':
251  case 'G':
252  break;
253  default:
254  return scan_ident();
255  }
256  skip_byte();
257 
258  switch (peek_class()) {
259  case HINT_CHR_IDENT:
260  case HINT_CHR_DIGIT:
261  return scan_ident();
262  default:
263  return HINT_IDENT_OR_NUMBER_WITH_SCALE;
264  }
265  }
266 
268  DBUG_ASSERT(*ptr == '@');
269 
270  skip_byte(); // skip '@'
271  start_token();
272 
273  switch (peek_class()) {
274  case HINT_CHR_IDENT:
275  case HINT_CHR_DIGIT:
276  case HINT_CHR_MB:
277  return scan_ident() == HINT_ARG_IDENT ? HINT_ARG_QB_NAME : HINT_ERROR;
278  case HINT_CHR_BACKQUOTE:
279  return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
280  ? HINT_ARG_QB_NAME
281  : HINT_ERROR;
283  return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
284  ? HINT_ARG_QB_NAME
285  : HINT_ERROR;
286  default:
287  return HINT_ERROR;
288  }
289  }
290 
292  for (;;) {
293  switch (peek_class()) {
294  case HINT_CHR_IDENT:
295  case HINT_CHR_DIGIT:
296  skip_byte();
297  continue;
298  case HINT_CHR_MB:
299  return scan_ident();
300  case HINT_CHR_EOF:
301  default:
302  const SYMBOL *symbol =
304  if (symbol) // keyword
305  {
306  /*
307  Override the yytext pointer to the short-living buffer with a
308  long-living pointer to the same text (don't need to allocate a
309  keyword string since symbol array is a global constant).
310  */
311  yytext = symbol->name;
312  DBUG_ASSERT(yyleng == symbol->length);
313 
314  return symbol->tok;
315  }
316 
318  return HINT_ARG_IDENT;
319  }
320  }
321  }
322 
324  for (;;) {
325  switch (peek_class()) {
326  case HINT_CHR_DIGIT:
327  skip_byte();
328  continue;
329  case HINT_CHR_IDENT:
330  return scan_scale_or_ident();
331  case HINT_CHR_MB:
332  return scan_ident();
333  case HINT_CHR_EOF:
334  default:
335  return HINT_ARG_NUMBER;
336  }
337  }
338  }
339 
340  bool eof() const {
342  return ptr >= input_buf_end;
343  }
344 
345  char peek_byte() const {
346  DBUG_ASSERT(!eof());
347  return *ptr;
348  }
349 
351  return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
352  }
353 
355  DBUG_ASSERT(ptr + 1 <= input_buf_end);
356  return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
357  : char_classes[static_cast<uchar>(ptr[1])];
358  }
359 
360  void skip_newline() {
361  DBUG_ASSERT(!eof() && peek_byte() == '\n');
362  skip_byte();
363  lineno++;
364  }
365 
367  DBUG_ASSERT(!eof());
368  char ret = *ptr;
369  yyleng++;
370  ptr++;
371  return ret;
372  }
373 
374  void skip_byte() {
375  DBUG_ASSERT(!eof());
376  yyleng++;
377  ptr++;
378  }
379 
380  bool skip_mb() {
381  size_t len = my_ismbchar(cs, ptr, input_buf_end);
382  if (len == 0) {
383  ptr++;
384  yyleng++;
385  return true;
386  }
387  ptr += len;
388  yyleng += len;
389  return false;
390  }
391 
392  void adjust_token() {
393  yytext = ptr;
394  yyleng = 0;
395  }
396 
397  void start_token() {
398  adjust_token();
399  raw_yytext = ptr;
400  }
401 
402  template <hint_lex_char_classes Separator>
403  void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
404  DBUG_ASSERT(doubles > 0);
405 
406  size_t d = doubles;
407  char *t = to->str;
408  for (const char *s = from, *end = from + len; s < end;) {
409  switch (char_classes[(uchar)*s]) {
410  case HINT_CHR_MB: {
411  size_t hint_len = my_ismbchar(cs, s, end);
412  DBUG_ASSERT(hint_len > 1);
413  memcpy(t, s, hint_len);
414  t += hint_len;
415  s += hint_len;
416  }
417  continue;
418  case Separator:
419  DBUG_ASSERT(char_classes[(uchar)*s] == Separator);
420  *t++ = *s++;
421  s++; // skip the 2nd separator
422  d--;
423  if (d == 0) {
424  memcpy(t, s, end - s);
425  to->length = len - doubles;
426  return;
427  }
428  continue;
429  case HINT_CHR_EOF:
430  DBUG_ASSERT(0);
431  to->length = 0;
432  return;
433  default:
434  *t++ = *s++;
435  }
436  }
437  DBUG_ASSERT(0);
438  to->length = 0;
439  return;
440  }
441 
442  void add_hint_token_digest();
443 
444  private:
445  /**
446  Helper function to check digest buffer for overflow before adding tokens.
447 
448  @param token A token number to add.
449  */
450  void add_digest(uint token) {
451  if (digest_state == NULL) return; // Digest buffer is full.
452 
453  Lexer_yystype fake_yylvalue;
454  /*
455  YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
456  a consideration, that the lexer returns MEM_ROOT-allocated string values
457  there, and the rest of server is welcome to modify that strings inplace
458  (ind it does that in a few rare cases).
459  The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
460  it is not practical to add extra memory allocation there: const_cast is
461  enough.
462  */
463  fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
464  fake_yylvalue.lex_str.length = yyleng;
465 
466  digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
467  }
468 };
469 
470 inline int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner) {
471  auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
472  int ret = scanner->get_next_token();
473  yylval->hint_string.str = scanner->yytext;
474  yylval->hint_string.length = scanner->yyleng;
475  return ret;
476 }
477 
478 void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg);
479 
480 #endif /* SQL_LEX_HINTS_ICLUDED */
Hint_scanner::digest_state
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:78
sql_class.h
THD::charset
const CHARSET_INFO * charset() const
Definition: sql_class.h:2908
THD
Definition: sql_class.h:764
hint_lex_init_maps
void hint_lex_init_maps(CHARSET_INFO *cs, hint_lex_char_classes *hint_map)
DBUG_TRACE
DBUG_TRACE
Definition: do_ctype.cc:46
lexer_yystype.h
msg
char msg[1024]
Definition: test_sql_9_sessions.cc:281
HINT_CHR_SLASH
@ HINT_CHR_SLASH
Definition: sql_chars.h:84
SYMBOL::name
const char * name
Definition: lex_symbol.h:39
NULL
#define NULL
Definition: types.h:55
Lex_hash::hint_keywords
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:47
Hint_scanner::scan
int scan()
Definition: sql_lex_hints.h:112
Hint_scanner::yyleng
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:92
CHARSET_INFO
Definition: m_ctype.h:354
string.h
Hint_scanner::peek_byte
char peek_byte() const
Definition: sql_lex_hints.h:345
Hint_scanner::skip_byte
void skip_byte()
Definition: sql_lex_hints.h:374
my_dbug.h
Query_arena::strmake
char * strmake(const char *str, size_t size) const
Definition: sql_class.h:314
HINT_CHR_SPACE
@ HINT_CHR_SPACE
Definition: sql_chars.h:85
Hint_scanner::scan_quoted
int scan_quoted()
Definition: sql_lex_hints.h:159
Hint_scanner::adjust_token
void adjust_token()
Definition: sql_lex_hints.h:392
HINT_CHR_EOF
@ HINT_CHR_EOF
Definition: sql_chars.h:79
Hint_scanner::get_digest
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:101
hint_lex_char_classes
hint_lex_char_classes
Definition: sql_chars.h:72
Hint_scanner::thd
THD * thd
Definition: sql_lex_hints.h:61
Hint_scanner::has_hints
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:94
Hint_scanner::cs
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:62
Hint_scanner::input_buf_end
const char * input_buf_end
Definition: sql_lex_hints.h:69
Hint_scanner::add_digest
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:450
SYMBOL::length
const unsigned int length
Definition: lex_symbol.h:40
Hint_scanner::scan_ident
int scan_ident()
Definition: sql_lex_hints.h:228
Hint_scanner::peek_class2
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:354
Hint_scanner::start_token
void start_token()
Definition: sql_lex_hints.h:397
HINT_CHR_DIGIT
@ HINT_CHR_DIGIT
Definition: sql_chars.h:77
Hint_scanner::char_classes
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:66
Hint_scanner::get_ptr
const char * get_ptr() const
Definition: sql_lex_hints.h:100
sql_digest_stream.h
Hint_scanner::add_hint_token_digest
void add_hint_token_digest()
Add hint tokens to main lexer's digest calculation buffer.
Definition: sql_lex_hints.cc:102
Lex_hash::get_hash_symbol
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:75
doubles
static struct wordvalue doubles[]
Definition: ctype-czech.cc:170
Hint_scanner::ptr
const char * ptr
Definition: sql_lex_hints.h:71
lex_symbol.h
my_inttypes.h
my_ismbchar
static uint my_ismbchar(const CHARSET_INFO *cs, const char *str, const char *strend)
Definition: m_ctype.h:683
m_ctype.h
HINT_PARSER_error
void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:68
Hint_scanner::lineno
size_t lineno
Definition: sql_lex_hints.h:65
Hint_scanner::peek_class
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:350
Hint_scanner::skip_newline
void skip_newline()
Definition: sql_lex_hints.h:360
Hint_scanner::scan_query_block_name
int scan_query_block_name()
Definition: sql_lex_hints.h:267
lex_string.h
Hint_scanner::Hint_scanner
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Consrtuctor.
Definition: sql_lex_hints.cc:50
Hint_scanner::scan_number_or_ident
int scan_number_or_ident()
Definition: sql_lex_hints.h:323
uint
unsigned int uint
Definition: uca-dump.cc:29
YYSTYPE
Definition: parser_yystype.h:238
Hint_scanner::scan_ident_or_keyword
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:291
Hint_scanner::backslash_escapes
const bool backslash_escapes
Definition: sql_lex_hints.h:64
system_charset_info
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1316
uchar
unsigned char uchar
Definition: my_inttypes.h:51
Hint_scanner::compact
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:403
sql_chars.h
rules_table_service::end
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:188
SYMBOL::tok
const unsigned int tok
Definition: lex_symbol.h:41
Hint_scanner::get_byte
uchar get_byte()
Definition: sql_lex_hints.h:366
THD::convert_string
bool convert_string(LEX_STRING *to, const CHARSET_INFO *to_cs, const char *from, size_t from_length, const CHARSET_INFO *from_cs, bool report_error=false)
Definition: sql_class.cc:1494
Hint_scanner::syntax_warning
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information.
Definition: sql_lex_hints.cc:86
Hint_scanner::prev_token
int prev_token
Definition: sql_lex_hints.h:73
HINT_CHR_AT
@ HINT_CHR_AT
Definition: sql_chars.h:74
digest_add_token
sql_digest_state * digest_add_token(sql_digest_state *state, uint token, Lexer_yystype *yylval)
Definition: sql_digest.cc:380
Hint_scanner::is_ansi_quotes
const bool is_ansi_quotes
Definition: sql_lex_hints.h:63
HINT_CHR_QUOTE
@ HINT_CHR_QUOTE
Definition: sql_chars.h:83
Query_arena::alloc
void * alloc(size_t size)
Definition: sql_class.h:298
MYSQL_LEX_STRING
Definition: mysql_lex_string.h:34
Lexer_yystype::lex_str
LEX_STRING lex_str
Definition: lexer_yystype.h:33
Hint_scanner::yytext
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:88
HINT_PARSER_lex
int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:470
Hint_scanner::skip_mb
bool skip_mb()
Definition: sql_lex_hints.h:380
DBUG_ASSERT
#define DBUG_ASSERT(A)
Definition: my_dbug.h:197
THD::charset_is_system_charset
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE.
Definition: sql_class.h:2395
HINT_CHR_MB
@ HINT_CHR_MB
Definition: sql_chars.h:81
PT_hint_list
Definition: parse_tree_hints.h:90
sql_digest_state
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:35
HINT_CHR_BACKQUOTE
@ HINT_CHR_BACKQUOTE
Definition: sql_chars.h:75
HINT_CHR_ASTERISK
@ HINT_CHR_ASTERISK
Definition: sql_chars.h:73
HINT_CHR_IDENT
@ HINT_CHR_IDENT
Definition: sql_chars.h:80
Hint_scanner::raw_yytext
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:84
Lexer_yystype
Definition: lexer_yystype.h:32
Hint_scanner::get_lineno
size_t get_lineno() const
Definition: sql_lex_hints.h:99
Hint_scanner::get_next_token
int get_next_token()
Definition: sql_lex_hints.h:104
MYSQL_LEX_STRING::str
char * str
Definition: mysql_lex_string.h:35
HINT_CHR_DOUBLEQUOTE
@ HINT_CHR_DOUBLEQUOTE
Definition: sql_chars.h:78
Hint_scanner::scan_scale_or_ident
int scan_scale_or_ident()
Definition: sql_lex_hints.h:246
Hint_scanner::input_buf
const char * input_buf
Definition: sql_lex_hints.h:68
MYSQL_LEX_STRING::length
size_t length
Definition: mysql_lex_string.h:36
Hint_scanner
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:60
yylval
#define yylval
Definition: fts0pars.cc:68
Hint_scanner::eof
bool eof() const
Definition: sql_lex_hints.h:340
HINT_CHR_NL
@ HINT_CHR_NL
Definition: sql_chars.h:82
sql_lex_hash.h
SYMBOL
Definition: lex_symbol.h:38