MySQL  8.0.16
Source Code Documentation
sql_lex_hints.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2014, 2019, Oracle and/or its affiliates. All rights reserved.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License, version 2.0,
6  as published by the Free Software Foundation.
7 
8  This program is also distributed with certain software (including
9  but not limited to OpenSSL) that is licensed under separate terms,
10  as designated in a particular file or component or in included license
11  documentation. The authors of MySQL hereby grant you an additional
12  permission to link the program and your derivative works with the
13  separately licensed software that they have included with MySQL.
14 
15  This program is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License, version 2.0, for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with this program; if not, write to the Free Software
22  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
23 
24 /* A lexical scanner for optimizer hints pseudo-commentary syntax */
25 
26 #ifndef SQL_LEX_HINTS_ICLUDED
27 #define SQL_LEX_HINTS_ICLUDED
28 
29 #include <string.h>
30 #include <sys/types.h>
31 
32 #include "lex_string.h"
33 #include "m_ctype.h"
34 #include "my_dbug.h"
35 #include "my_inttypes.h"
36 #include "sql/lex_symbol.h"
37 #include "sql/lexer_yystype.h"
38 #include "sql/sql_class.h"
39 #include "sql/sql_digest_stream.h"
40 #include "sql/sql_lex_hash.h"
41 #include "sql_chars.h"
42 
43 // This must be last, due to bison 2.3 on OsX
44 #ifndef YYSTYPE_IS_DECLARED
45 #define YYSTYPE_IS_DECLARED 1
46 #endif // YYSTYPE_IS_DECLARED
47 #include "sql/sql_hints.yy.h"
48 
49 class PT_hint_list;
50 union YYSTYPE;
51 
53 
54 /// Lexical scanner for hint comments.
55 ///
56 /// When the main lexical scanner recognizes the "/*+" delimiter, it calls
57 /// the hint parser (HINT_PARSER_parse) to consume the rest of hint tokens
58 /// including the */ delimiter. The hint parser uses Hint_scanner as its own
59 /// lexer to scan hint-specific tokens.
60 class Hint_scanner {
61  THD *thd;
62  const CHARSET_INFO *cs;
63  const bool is_ansi_quotes;
64  const bool backslash_escapes;
65  size_t lineno;
67 
68  const char *input_buf;
69  const char *input_buf_end;
70 
71  const char *ptr;
72 
74 
75  /**
76  Digest buffer interface to append tokens.
77  */
79 
80  public:
81  /**
82  Current token (yytext) origin in the input_buf
83  */
84  const char *raw_yytext;
85  /**
86  Current token pointer (may be converted allocated string outside input_buf
87  */
88  const char *yytext;
89  /**
90  Length of the current token (see yytext)
91  */
92  size_t yyleng;
93 
94  bool has_hints; ///< True if a hint comment is not empty (has any hints).
95 
96  public:
97  Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len,
98  sql_digest_state *digest_state_arg);
99  size_t get_lineno() const { return lineno; }
100  const char *get_ptr() const { return ptr; }
102  void syntax_warning(const char *msg) const;
103 
105  DBUG_ENTER("Hint_scanner::get_next_token");
106  prev_token = scan();
109  }
110 
111  protected:
112  int scan() {
113  int whitespaces = 0;
114  for (;;) {
115  start_token();
116  switch (peek_class()) {
117  case HINT_CHR_NL:
118  skip_newline();
119  whitespaces++;
120  continue;
121  case HINT_CHR_SPACE:
122  skip_byte();
123  whitespaces++;
124  continue;
125  case HINT_CHR_DIGIT:
126  return scan_number_or_ident();
127  case HINT_CHR_IDENT:
128  return scan_ident_or_keyword();
129  case HINT_CHR_MB:
130  return scan_ident();
131  case HINT_CHR_QUOTE:
132  return scan_quoted<HINT_CHR_QUOTE>();
133  case HINT_CHR_BACKQUOTE:
134  return scan_quoted<HINT_CHR_BACKQUOTE>();
136  return scan_quoted<HINT_CHR_DOUBLEQUOTE>();
137  case HINT_CHR_ASTERISK:
138  if (peek_class2() == HINT_CHR_SLASH) {
139  ptr += 2; // skip "*/"
140  input_buf_end = ptr;
141  return HINT_CLOSE;
142  } else
143  return get_byte();
144  case HINT_CHR_AT:
145  if (prev_token == '(' ||
146  (prev_token == HINT_ARG_IDENT && whitespaces == 0))
147  return scan_query_block_name();
148  else
149  return get_byte();
150  case HINT_CHR_EOF:
151  return 0;
152  default:
153  return get_byte();
154  }
155  }
156  }
157 
158  template <hint_lex_char_classes Quote>
159  int scan_quoted() {
160  DBUG_ASSERT(Quote == HINT_CHR_BACKQUOTE || Quote == HINT_CHR_DOUBLEQUOTE ||
161  Quote == HINT_CHR_QUOTE);
162  DBUG_ASSERT(*ptr == '`' || *ptr == '"' || *ptr == '\'');
163 
164  const bool is_ident = (Quote == HINT_CHR_BACKQUOTE) ||
165  (is_ansi_quotes && Quote == HINT_CHR_DOUBLEQUOTE);
166  const int ret = is_ident ? HINT_ARG_IDENT : HINT_ARG_TEXT;
167 
168  skip_byte(); // skip opening quote sign
169  adjust_token(); // reset yytext & yyleng
170 
171  size_t double_separators = 0;
172 
173  for (;;) {
174  hint_lex_char_classes chr_class = peek_class();
175  switch (chr_class) {
176  case HINT_CHR_NL:
177  skip_newline();
178  continue;
179  case HINT_CHR_MB:
180  if (skip_mb()) return HINT_ERROR;
181  continue;
182  case HINT_CHR_ASTERISK:
183  if (peek_class2() == HINT_CHR_SLASH)
184  return HINT_ERROR; // we don't support "*/" inside quoted
185  // identifiers
186  skip_byte();
187  continue;
188  case HINT_CHR_EOF:
189  return HINT_ERROR;
190  case Quote:
191  if (peek_class2() == Quote) {
192  skip_byte(); // skip quote
193  skip_byte(); // skip quote
194  double_separators++;
195  continue;
196  } else {
197  if (yyleng == 0) return HINT_ERROR; // empty quoted identifier
198 
199  ptr++; // skip closing quote
200 
201  if (thd->charset_is_system_charset && double_separators == 0)
202  return ret;
203 
204  LEX_STRING s;
207  thd->charset()))
208  return HINT_ERROR; // OOM etc.
209  } else {
210  DBUG_ASSERT(0 < double_separators && double_separators < yyleng);
211  s.length = yyleng - double_separators;
212  s.str = (char *)thd->alloc(s.length);
213  if (s.str == NULL) return HINT_ERROR; // OOM
214  }
215  if (double_separators > 0)
216  compact<Quote>(&s, yytext, yyleng, double_separators);
217 
218  yytext = s.str;
219  yyleng = s.length;
220  return ret;
221  }
222  default:
223  skip_byte();
224  }
225  }
226  }
227 
228  int scan_ident() {
229  for (;;) {
230  hint_lex_char_classes chr_class = peek_class();
231  switch (chr_class) {
232  case HINT_CHR_IDENT:
233  case HINT_CHR_DIGIT:
234  skip_byte();
235  continue;
236  case HINT_CHR_MB:
237  if (skip_mb()) return HINT_ERROR;
238  continue;
239  case HINT_CHR_EOF:
240  default:
241  return HINT_ARG_IDENT;
242  }
243  }
244  }
245 
248  switch (peek_byte()) {
249  case 'K':
250  case 'M':
251  case 'G':
252  break;
253  default:
254  return scan_ident();
255  }
256  skip_byte();
257 
258  switch (peek_class()) {
259  case HINT_CHR_IDENT:
260  case HINT_CHR_DIGIT:
261  return scan_ident();
262  default:
263  return HINT_IDENT_OR_NUMBER_WITH_SCALE;
264  }
265  }
266 
268  DBUG_ASSERT(*ptr == '@');
269 
270  skip_byte(); // skip '@'
271  start_token();
272 
273  switch (peek_class()) {
274  case HINT_CHR_IDENT:
275  case HINT_CHR_DIGIT:
276  case HINT_CHR_MB:
277  return scan_ident() == HINT_ARG_IDENT ? HINT_ARG_QB_NAME : HINT_ERROR;
278  case HINT_CHR_BACKQUOTE:
279  return scan_quoted<HINT_CHR_BACKQUOTE>() == HINT_ARG_IDENT
280  ? HINT_ARG_QB_NAME
281  : HINT_ERROR;
283  return scan_quoted<HINT_CHR_DOUBLEQUOTE>() == HINT_ARG_IDENT
284  ? HINT_ARG_QB_NAME
285  : HINT_ERROR;
286  default:
287  return HINT_ERROR;
288  }
289  }
290 
292  for (;;) {
293  switch (peek_class()) {
294  case HINT_CHR_IDENT:
295  case HINT_CHR_DIGIT:
296  skip_byte();
297  continue;
298  case HINT_CHR_MB:
299  return scan_ident();
300  case HINT_CHR_EOF:
301  default:
302  const SYMBOL *symbol =
304  if (symbol) // keyword
305  {
306  /*
307  Override the yytext pointer to the short-living buffer with a
308  long-living pointer to the same text (don't need to allocate a
309  keyword string since symbol array is a global constant).
310  */
311  yytext = symbol->name;
312  DBUG_ASSERT(yyleng == symbol->length);
313 
314  return symbol->tok;
315  }
316 
318  return HINT_ARG_IDENT;
319  }
320  }
321  }
322 
324  for (;;) {
325  switch (peek_class()) {
326  case HINT_CHR_DIGIT:
327  skip_byte();
328  continue;
329  case HINT_CHR_IDENT:
330  return scan_scale_or_ident();
331  case HINT_CHR_MB:
332  return scan_ident();
333  case HINT_CHR_EOF:
334  default:
335  return HINT_ARG_NUMBER;
336  }
337  }
338  }
339 
340  bool eof() const {
342  return ptr >= input_buf_end;
343  }
344 
345  char peek_byte() const {
346  DBUG_ASSERT(!eof());
347  return *ptr;
348  }
349 
351  return eof() ? HINT_CHR_EOF : char_classes[static_cast<uchar>(peek_byte())];
352  }
353 
355  DBUG_ASSERT(ptr + 1 <= input_buf_end);
356  return ptr + 1 >= input_buf_end ? HINT_CHR_EOF
357  : char_classes[static_cast<uchar>(ptr[1])];
358  }
359 
360  void skip_newline() {
361  DBUG_ASSERT(!eof() && peek_byte() == '\n');
362  skip_byte();
363  lineno++;
364  }
365 
367  DBUG_ASSERT(!eof());
368  char ret = *ptr;
369  yyleng++;
370  ptr++;
371  return ret;
372  }
373 
374  void skip_byte() {
375  DBUG_ASSERT(!eof());
376  yyleng++;
377  ptr++;
378  }
379 
380  bool skip_mb() {
381  size_t len = my_ismbchar(cs, ptr, input_buf_end);
382  if (len == 0) {
383  ptr++;
384  yyleng++;
385  return true;
386  }
387  ptr += len;
388  yyleng += len;
389  return false;
390  }
391 
392  void adjust_token() {
393  yytext = ptr;
394  yyleng = 0;
395  }
396 
397  void start_token() {
398  adjust_token();
399  raw_yytext = ptr;
400  }
401 
402  template <hint_lex_char_classes Separator>
403  void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles) {
404  DBUG_ASSERT(doubles > 0);
405 
406  size_t d = doubles;
407  char *t = to->str;
408  for (const char *s = from, *end = from + len; s < end;) {
409  switch (char_classes[(uchar)*s]) {
410  case HINT_CHR_MB: {
411  size_t len = my_ismbchar(cs, s, end);
412  DBUG_ASSERT(len > 1);
413  memcpy(t, s, len);
414  t += len;
415  s += len;
416  }
417  continue;
418  case Separator:
419  DBUG_ASSERT(char_classes[(uchar)*s] == Separator);
420  *t++ = *s++;
421  s++; // skip the 2nd separator
422  d--;
423  if (d == 0) {
424  memcpy(t, s, end - s);
425  to->length = len - doubles;
426  return;
427  }
428  continue;
429  case HINT_CHR_EOF:
430  DBUG_ASSERT(0);
431  to->length = 0;
432  return;
433  default:
434  *t++ = *s++;
435  }
436  }
437  DBUG_ASSERT(0);
438  to->length = 0;
439  return;
440  }
441 
442  void add_hint_token_digest();
443 
444  private:
445  /**
446  Helper function to check digest buffer for overflow before adding tokens.
447 
448  @param token A token number to add.
449  */
450  void add_digest(uint token) {
451  if (digest_state == NULL) return; // Digest buffer is full.
452 
453  Lexer_yystype fake_yylvalue;
454  /*
455  YYSTYPE::LEX_STRING is designed to accept non-constant "char *": there is
456  a consideration, that the lexer returns MEM_ROOT-allocated string values
457  there, and the rest of server is welcome to modify that strings inplace
458  (ind it does that in a few rare cases).
459  The digest calculator never modify YYSTYPE::LEX_STRING::str data, so
460  it is not practical to add extra memory allocation there: const_cast is
461  enough.
462  */
463  fake_yylvalue.lex_str.str = const_cast<char *>(yytext);
464  fake_yylvalue.lex_str.length = yyleng;
465 
466  digest_state = digest_add_token(digest_state, token, &fake_yylvalue);
467  }
468 };
469 
470 inline int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner) {
471  auto yylval = reinterpret_cast<Lexer_yystype *>(yacc_yylval);
472  int ret = scanner->get_next_token();
473  yylval->hint_string.str = scanner->yytext;
474  yylval->hint_string.length = scanner->yyleng;
475  return ret;
476 }
477 
478 void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg);
479 
480 #endif /* SQL_LEX_HINTS_ICLUDED */
#define DBUG_RETURN(a1)
Definition: my_dbug.h:84
const char * input_buf_end
Definition: sql_lex_hints.h:69
unsigned char uchar
Definition: my_inttypes.h:49
char * str
Definition: mysql_lex_string.h:35
void hint_lex_init_maps(CHARSET_INFO *cs, hint_lex_char_classes *hint_map)
Definition: mysql_lex_string.h:34
MYSQL_PLUGIN_IMPORT CHARSET_INFO * system_charset_info
Definition: mysqld.cc:1274
void skip_newline()
Definition: sql_lex_hints.h:360
void syntax_warning(const char *msg) const
Push a warning message into MySQL error stack with line and position information. ...
Definition: sql_lex_hints.cc:86
Some integer typedefs for easier portability.
size_t get_lineno() const
Definition: sql_lex_hints.h:99
Definition: sql_chars.h:84
int scan_number_or_ident()
Definition: sql_lex_hints.h:323
sql_digest_state * digest_state
Digest buffer interface to append tokens.
Definition: sql_lex_hints.h:78
const bool backslash_escapes
Definition: sql_lex_hints.h:64
Hint_scanner(THD *thd, size_t lineno_arg, const char *buf, size_t len, sql_digest_state *digest_state_arg)
Consrtuctor.
Definition: sql_lex_hints.cc:50
const char * ptr
Definition: sql_lex_hints.h:71
Lexical scanner for hint comments.
Definition: sql_lex_hints.h:60
Sergei Dialog Client Authentication NULL
Definition: dialog.cc:352
Definition: parse_tree_hints.h:90
Definition: lex_symbol.h:38
void HINT_PARSER_error(THD *, Hint_scanner *, PT_hint_list **, const char *msg)
Definition: sql_lex_hints.cc:68
Definition: sql_chars.h:85
const struct SYMBOL * get_hash_symbol(const char *s, unsigned int len) const
Definition: sql_lex_hash.cc:75
Definition: sql_chars.h:77
uchar get_byte()
Definition: sql_lex_hints.h:366
char peek_byte() const
Definition: sql_lex_hints.h:345
bool skip_mb()
Definition: sql_lex_hints.h:380
void compact(LEX_STRING *to, const char *from, size_t len, size_t doubles)
Definition: sql_lex_hints.h:403
size_t yyleng
Length of the current token (see yytext)
Definition: sql_lex_hints.h:92
LEX_STRING lex_str
Definition: lexer_yystype.h:33
Definition: sql_chars.h:79
const unsigned int tok
Definition: lex_symbol.h:41
bool convert_string(LEX_STRING *to, const CHARSET_INFO *to_cs, const char *from, size_t from_length, const CHARSET_INFO *from_cs)
Definition: sql_class.cc:1475
char * strmake(const char *str, size_t size) const
Definition: sql_class.h:315
#define DBUG_ASSERT(A)
Definition: my_dbug.h:128
const char * yytext
Current token pointer (may be converted allocated string outside input_buf.
Definition: sql_lex_hints.h:88
hint_lex_char_classes
Definition: sql_chars.h:72
int scan_ident_or_keyword()
Definition: sql_lex_hints.h:291
int scan_query_block_name()
Definition: sql_lex_hints.h:267
#define DBUG_ENTER(a)
Definition: my_dbug.h:80
int scan_ident()
Definition: sql_lex_hints.h:228
int get_next_token()
Definition: sql_lex_hints.h:104
Definition: sql_chars.h:74
hint_lex_char_classes peek_class() const
Definition: sql_lex_hints.h:350
static struct wordvalue doubles[]
Definition: ctype-czech.cc:170
Definition: sql_chars.h:83
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:191
void add_hint_token_digest()
Add hint tokens to main lexer&#39;s digest calculation buffer.
Definition: sql_lex_hints.cc:101
const hint_lex_char_classes * char_classes
Definition: sql_lex_hints.h:66
unsigned int len
Definition: dbug_analyze.cc:216
unsigned int uint
Definition: uca-dump.cc:29
const char * name
Definition: lex_symbol.h:39
THD * thd
Definition: sql_lex_hints.h:61
t
Definition: dbug_analyze.cc:147
const CHARSET_INFO * cs
Definition: sql_lex_hints.h:62
int scan_quoted()
Definition: sql_lex_hints.h:159
char msg[1024]
Definition: test_sql_9_sessions.cc:282
Definition: m_ctype.h:358
const char * raw_yytext
Current token (yytext) origin in the input_buf.
Definition: sql_lex_hints.h:84
size_t length
Definition: mysql_lex_string.h:36
hint_lex_char_classes peek_class2() const
Definition: sql_lex_hints.h:354
bool charset_is_system_charset
is set if a statement accesses a temporary table created through CREATE TEMPORARY TABLE...
Definition: sql_class.h:2348
Definition: sql_chars.h:78
int HINT_PARSER_lex(YYSTYPE *yacc_yylval, Hint_scanner *scanner)
Definition: sql_lex_hints.h:470
Definition: sql_chars.h:75
void skip_byte()
Definition: sql_lex_hints.h:374
int prev_token
Definition: sql_lex_hints.h:73
const char * get_ptr() const
Definition: sql_lex_hints.h:100
Definition: sql_lex.h:2066
bool eof() const
Definition: sql_lex_hints.h:340
bool has_hints
True if a hint comment is not empty (has any hints).
Definition: sql_lex_hints.h:94
State data storage for digest_start, digest_add_token.
Definition: sql_digest_stream.h:35
#define my_ismbchar(s, a, b)
Definition: m_ctype.h:688
void adjust_token()
Definition: sql_lex_hints.h:392
A better implementation of the UNIX ctype(3) library.
void * alloc(size_t size)
Definition: sql_class.h:299
const char * input_buf
Definition: sql_lex_hints.h:68
sql_digest_state * digest_add_token(sql_digest_state *state, uint token, Lexer_yystype *yylval)
Definition: sql_digest.cc:379
int scan()
Definition: sql_lex_hints.h:112
void start_token()
Definition: sql_lex_hints.h:397
Definition: sql_chars.h:82
sql_digest_state * get_digest()
Definition: sql_lex_hints.h:101
#define yylval
Definition: fts0pars.cc:68
Definition: lexer_yystype.h:32
Definition: sql_chars.h:81
size_t lineno
Definition: sql_lex_hints.h:65
Definition: sql_chars.h:80
Definition: sql_chars.h:73
const CHARSET_INFO * charset() const
Definition: sql_class.h:2896
int scan_scale_or_ident()
Definition: sql_lex_hints.h:246
void add_digest(uint token)
Helper function to check digest buffer for overflow before adding tokens.
Definition: sql_lex_hints.h:450
static const Lex_hash hint_keywords
Definition: sql_lex_hash.h:47
const bool is_ansi_quotes
Definition: sql_lex_hints.h:63
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_class.h:776
const unsigned int length
Definition: lex_symbol.h:40