MySQL  8.0.27
Source Code Documentation
item_regexp_func.h
Go to the documentation of this file.
1 #ifndef SQL_ITEM_REGEXP_FUNC_H_
2 #define SQL_ITEM_REGEXP_FUNC_H_
3 
4 /* Copyright (c) 2017, 2021, Oracle and/or its affiliates.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
9 
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
16 
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License, version 2.0, for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25 
26 /**
27  @file item_regexp_func.h
28 
29  The function classes for regular expression functions. They have a common
30  base class Item_func_regexp, which is also the prefix of their class
31  names. After the %Item_func prefix comes the name of the SQL function,
32  e.g. Item_func_regexp_instr represents the SQL function `REGEXP_INSTR`.
33 
34  Type resolution
35  ===============
36 
37  The type and name resolution procedure is hooked into by the
38  Item_func_regexp class, which implement both
39  Item_result_field::resolve_type() and Item::fix_fields().
40 
41  Collations
42  ==========
43 
44  The regular expression library doesn't deal with collations at all, but we
45  need them because the 'winning' collation of the pattern and the subject
46  strings dictates case-sensitivity. The winning collation is defined by
47  coercion rules, and we don't delve into that here. See
48  Item_func::agg_arg_charsets_for_comparison(). The call to this function is
49  done in resolve_type() as this appears to be an unwritten convention.
50 
51  Implementation
52  ==============
53 
54  All communication with the regular expression library is done through a
55  Regexp_facade object, instantiated in Item_func_regexp::fix_fields().
56 
57  @todo We now clean up ICU heap memory in Item_func_regexp::cleanup. Should
58  it be done more rarely? On session close?
59 */
60 
61 #include <assert.h>
62 #include <unicode/uregex.h>
63 
64 #include <optional>
65 #include <string>
66 
67 // assert
68 #include "my_inttypes.h" // MY_INT32_NUM_DECIMAL_DIGITS
69 #include "sql/item_cmpfunc.h"
70 #include "sql/item_strfunc.h"
71 #include "sql/mysqld.h" // make_unique_destroy_only
73 #include "sql_string.h" // String
74 
75 // GCC bug 80635.
76 #if defined(__GNUC__) && !defined(__clang__)
77 #pragma GCC diagnostic push
78 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
79 #endif
80 
81 /**
82  Base class for all regular expression function classes. Is responsible for
83  creating the Regexp_facade object.
84 */
85 class Item_func_regexp : public Item_func {
86  public:
87  Item_func_regexp(const POS &pos, PT_item_list *opt_list)
88  : Item_func(pos, opt_list) {}
89 
90  /**
91  Resolves the collation to use for comparison. The type resolution is done
92  in the subclass constructors.
93 
94  For all regular expression functions, i.e. REGEXP_INSTR, REGEXP_LIKE,
95  REGEXP_REPLACE and REGEXP_SUBSTR, it goes that the first two arguments
96  have to agree on a common collation. This collation is used to control
97  case-sensitivity.
98 
99  @see fix_fields()
100  */
101  bool resolve_type(THD *) override;
102 
103  /// Decides on the mode for matching, case sensitivity etc.
104  bool fix_fields(THD *thd, Item **) override;
105 
106  /// The expression for the subject string.
107  Item *subject() const { return args[0]; }
108 
109  /// The expression for the pattern string.
110  Item *pattern() const { return args[1]; }
111 
112  /// The value of the `position` argument, or its default if absent.
113  std::optional<int> position() const {
114  int the_index = pos_arg_pos();
115  if (the_index != -1 && arg_count >= static_cast<uint>(the_index) + 1) {
116  int value = args[the_index]->val_int();
117  /*
118  Note: Item::null_value() can't be trusted alone here; there are cases
119  (for the DATE data type in particular) where we can have it set
120  without Item::m_nullable being set! This really should be cleaned up,
121  but until that happens, we need to have a more conservative check.
122  */
123  if (args[the_index]->is_nullable() && args[the_index]->null_value)
124  return {};
125  else
126  return value;
127  }
128  return 1;
129  }
130 
131  /// The value of the `occurrence` argument, or its default if absent.
132  std::optional<int> occurrence() const {
133  int the_index = occ_arg_pos();
134  if (the_index != -1 && arg_count >= static_cast<uint>(the_index) + 1) {
135  int value = args[the_index]->val_int();
136  /*
137  Note: Item::null_value() can't be trusted alone here; there are cases
138  (for the DATE data type in particular) where we can have it set
139  without Item::maybe_null being set! This really should be cleaned up,
140  but until that happens, we need to have a more conservative check.
141  */
142  if (args[the_index]->is_nullable() && args[the_index]->null_value)
143  return {};
144  else
145  return value;
146  }
147  return 0;
148  }
149 
150  /// The value of the `match_parameter` argument, or an empty string if absent.
151  std::optional<std::string> match_parameter() const {
152  int the_index = match_arg_pos();
153  if (the_index != -1 && arg_count >= static_cast<uint>(the_index) + 1) {
154  StringBuffer<5> buf; // Longer match_parameter doesn't make sense.
155  String *s = args[the_index]->val_str(&buf);
156  if (s != nullptr)
157  return to_string(*s);
158  else
159  return {};
160  }
161  return std::string{};
162  }
163 
164  void cleanup() override;
165 
166  protected:
168  assert(fixed == 1);
169  longlong nr = val_int();
170  if (null_value) return nullptr;
171  str->set_int(nr, unsigned_flag, collation.collation);
172  return str;
173  }
174 
176  assert(fixed == 1);
177  longlong nr = val_int();
178  if (null_value) return nullptr; /* purecov: inspected */
180  return value;
181  }
182 
184  assert(fixed == 1);
185  return val_int();
186  }
187 
189  assert(fixed == 1);
190  int err_not_used;
191  const char *end_not_used;
192  String *res = val_str(&str_value);
193  if (res == nullptr) return 0.0;
194  return my_strntod(res->charset(), res->ptr(), res->length(), &end_not_used,
195  &err_not_used);
196  }
197 
199  assert(fixed == 1);
200  int err;
201  String *res = val_str(&str_value);
202  if (res == nullptr) return 0;
203  return my_strntoll(res->charset(), res->ptr(), res->length(), 10, nullptr,
204  &err);
205  }
206 
207  /**
208  The position in the argument list of 'position'. -1 means that the default
209  should be used.
210  */
211  virtual int pos_arg_pos() const = 0;
212 
213  /**
214  The position in the argument list of 'occurrence'. -1 means that the default
215  should be used.
216  */
217  virtual int occ_arg_pos() const = 0;
218 
219  /// The position in the argument list of match_parameter.
220  virtual int match_arg_pos() const = 0;
221 
222  bool set_pattern();
223 
225 };
226 
228  public:
230  : Item_func_regexp(pos, opt_list) {
232  }
233 
234  Item_result result_type() const override { return INT_RESULT; }
235 
236  bool fix_fields(THD *thd, Item **arguments) override;
237 
238  String *val_str(String *str) override { return convert_int_to_str(str); }
239 
240  double val_real() override { return convert_int_to_real(); }
241 
242  longlong val_int() override;
243 
244  const char *func_name() const override { return "regexp_instr"; }
245 
246  /// The value of the `return_option` argument, or its default if absent.
247  std::optional<int> return_option() const {
248  int the_index = retopt_arg_pos();
249  if (the_index != -1 && arg_count >= static_cast<uint>(the_index) + 1) {
250  int value = args[the_index]->val_int();
251  if (args[the_index]->null_value)
252  return std::optional<int>();
253  else
254  return value;
255  }
256  return 0;
257  }
258 
259  /**
260  @{
261 
262  Copy-pasted from Item_int_func. Usually, an SQL function returning INTEGER
263  just inherits Item_int_func and thus the implementation, but these classes
264  need to have Item_func_regexp as base class because of fix_fields().
265  */
266  bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override {
267  return get_date_from_int(ltime, fuzzydate);
268  }
269 
270  bool get_time(MYSQL_TIME *t) override { return get_time_from_int(t); }
271  /// @}
272 
273  protected:
274  int pos_arg_pos() const override { return 2; }
275  int occ_arg_pos() const override { return 3; }
276  /// The position in the argument list of `occurrence`.
277  int retopt_arg_pos() const { return 4; }
278  int match_arg_pos() const override { return 5; }
279 
280  private:
281  bool resolve_type(THD *) final;
282 };
283 
285  public:
287  : Item_func_regexp(pos, opt_list) {
289  }
290 
291  Item_result result_type() const override { return INT_RESULT; }
292 
293  String *val_str(String *str) override { return convert_int_to_str(str); }
294 
295  double val_real() override { return convert_int_to_real(); }
296 
297  longlong val_int() override;
298 
299  const char *func_name() const override { return "regexp_like"; }
300 
301  bool is_bool_func() const override { return true; }
302 
303  /**
304  @{
305 
306  Copy-pasted from Item_int_func. Usually, an SQL function returning INTEGER
307  just inherits Item_int_func and thus the implementation, but these classes
308  need to have Item_func_regexp as base class because of fix_fields().
309  */
310  bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override {
311  return get_date_from_int(ltime, fuzzydate);
312  }
313 
314  bool get_time(MYSQL_TIME *t) override { return get_time_from_int(t); }
315  /// @}
316 
317  protected:
318  int pos_arg_pos() const override { return -1; }
319  int occ_arg_pos() const override { return -1; }
320  int match_arg_pos() const override { return 2; }
321 
322  private:
323  bool resolve_type(THD *) final;
324 };
325 
327  public:
329  : Item_func_regexp(pos, item_list) {}
330 
331  Item_result result_type() const override { return STRING_RESULT; }
332 
333  bool resolve_type(THD *) final;
334 
335  Item *replacement() { return args[2]; }
336 
337  longlong val_int() override { return convert_str_to_int(); }
338 
339  String *val_str(String *result) override;
340 
341  double val_real() override { return convert_str_to_real(); }
342 
343  const char *func_name() const override { return "regexp_replace"; }
344 
345  /**
346  @{
347 
348  Copy-pasted from Item_str_func. Usually, an SQL function returning INTEGER
349  just inherits Item_str_func and thus the implementation, but these classes
350  need to have Item_func_regexp as base class because of fix_fields().
351  */
352  bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override {
353  return get_date_from_string(ltime, fuzzydate);
354  }
355 
356  bool get_time(MYSQL_TIME *t) override { return get_time_from_string(t); }
357  /// @}
358 
359  protected:
360  int pos_arg_pos() const override { return 3; }
361  int occ_arg_pos() const override { return 4; }
362  int match_arg_pos() const override { return 5; }
363 };
364 
366  public:
368  : Item_func_regexp(pos, item_list) {}
369 
370  Item_result result_type() const override { return STRING_RESULT; }
371 
372  bool resolve_type(THD *) final;
373 
374  longlong val_int() override { return convert_str_to_int(); }
375 
376  String *val_str(String *result) override;
377 
378  double val_real() override { return convert_str_to_real(); }
379 
380  const char *func_name() const override { return "regexp_substr"; }
381 
382  /**
383  @{
384 
385  Copy-pasted from Item_str_func. Usually, an SQL function returning INTEGER
386  just inherits Item_str_func and thus the implementation, but these classes
387  need to have Item_func_regexp as base class because of fix_fields().
388  */
389  bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override {
390  return get_date_from_string(ltime, fuzzydate);
391  }
392 
393  bool get_time(MYSQL_TIME *t) override { return get_time_from_string(t); }
394  /// @}
395 
396  protected:
397  int pos_arg_pos() const override { return 2; }
398  int occ_arg_pos() const override { return 3; }
399  int match_arg_pos() const override { return 4; }
400 };
401 
404 
405  public:
406  explicit Item_func_icu_version(const POS &pos);
407 
408  bool itemize(Parse_context *pc, Item **res) override;
409 };
410 
411 #if defined(__GNUC__) && !defined(__clang__)
412 #pragma GCC diagnostic pop
413 #endif
414 
415 #endif // SQL_ITEM_REGEXP_FUNC_H_
const CHARSET_INFO * collation
Definition: item.h:174
Definition: item_regexp_func.h:402
Item_func_icu_version(const POS &pos)
Definition: item_regexp_func.cc:316
bool itemize(Parse_context *pc, Item **res) override
The same as contextualize() but with additional parameter.
Definition: item_regexp_func.cc:322
Definition: item_regexp_func.h:227
bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override
Copy-pasted from Item_int_func.
Definition: item_regexp_func.h:266
int retopt_arg_pos() const
The position in the argument list of occurrence.
Definition: item_regexp_func.h:277
const char * func_name() const override
Definition: item_regexp_func.h:244
bool fix_fields(THD *thd, Item **arguments) override
Decides on the mode for matching, case sensitivity etc.
Definition: item_regexp_func.cc:157
Item_func_regexp_instr(const POS &pos, PT_item_list *opt_list)
Definition: item_regexp_func.h:229
String * val_str(String *str) override
Definition: item_regexp_func.h:238
double val_real() override
Definition: item_regexp_func.h:240
bool get_time(MYSQL_TIME *t) override
Definition: item_regexp_func.h:270
int occ_arg_pos() const override
The position in the argument list of 'occurrence'.
Definition: item_regexp_func.h:275
int match_arg_pos() const override
The position in the argument list of match_parameter.
Definition: item_regexp_func.h:278
std::optional< int > return_option() const
The value of the return_option argument, or its default if absent.
Definition: item_regexp_func.h:247
bool resolve_type(THD *) final
Resolves the collation to use for comparison.
Definition: item_regexp_func.cc:170
Item_result result_type() const override
Definition: item_regexp_func.h:234
longlong val_int() override
Definition: item_regexp_func.cc:178
int pos_arg_pos() const override
The position in the argument list of 'position'.
Definition: item_regexp_func.h:274
Definition: item_regexp_func.h:284
String * val_str(String *str) override
Definition: item_regexp_func.h:293
bool is_bool_func() const override
Definition: item_regexp_func.h:301
int occ_arg_pos() const override
The position in the argument list of 'occurrence'.
Definition: item_regexp_func.h:319
bool resolve_type(THD *) final
Resolves the collation to use for comparison.
Definition: item_regexp_func.cc:226
int pos_arg_pos() const override
The position in the argument list of 'position'.
Definition: item_regexp_func.h:318
longlong val_int() override
Definition: item_regexp_func.cc:202
Item_func_regexp_like(const POS &pos, PT_item_list *opt_list)
Definition: item_regexp_func.h:286
bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override
Copy-pasted from Item_int_func.
Definition: item_regexp_func.h:310
const char * func_name() const override
Definition: item_regexp_func.h:299
int match_arg_pos() const override
The position in the argument list of match_parameter.
Definition: item_regexp_func.h:320
double val_real() override
Definition: item_regexp_func.h:295
Item_result result_type() const override
Definition: item_regexp_func.h:291
bool get_time(MYSQL_TIME *t) override
Definition: item_regexp_func.h:314
Definition: item_regexp_func.h:326
const char * func_name() const override
Definition: item_regexp_func.h:343
double val_real() override
Definition: item_regexp_func.h:341
Item_func_regexp_replace(const POS &pos, PT_item_list *item_list)
Definition: item_regexp_func.h:328
bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override
Copy-pasted from Item_str_func.
Definition: item_regexp_func.h:352
longlong val_int() override
Definition: item_regexp_func.h:337
bool get_time(MYSQL_TIME *t) override
Definition: item_regexp_func.h:356
int pos_arg_pos() const override
The position in the argument list of 'position'.
Definition: item_regexp_func.h:360
int occ_arg_pos() const override
The position in the argument list of 'occurrence'.
Definition: item_regexp_func.h:361
bool resolve_type(THD *) final
Resolves the collation to use for comparison.
Definition: item_regexp_func.cc:233
Item * replacement()
Definition: item_regexp_func.h:335
Item_result result_type() const override
Definition: item_regexp_func.h:331
String * val_str(String *result) override
Definition: item_regexp_func.cc:260
int match_arg_pos() const override
The position in the argument list of match_parameter.
Definition: item_regexp_func.h:362
Definition: item_regexp_func.h:365
longlong val_int() override
Definition: item_regexp_func.h:374
int pos_arg_pos() const override
The position in the argument list of 'position'.
Definition: item_regexp_func.h:397
bool resolve_type(THD *) final
Resolves the collation to use for comparison.
Definition: item_regexp_func.cc:284
bool get_date(MYSQL_TIME *ltime, my_time_flags_t fuzzydate) override
Copy-pasted from Item_str_func.
Definition: item_regexp_func.h:389
double val_real() override
Definition: item_regexp_func.h:378
const char * func_name() const override
Definition: item_regexp_func.h:380
int match_arg_pos() const override
The position in the argument list of match_parameter.
Definition: item_regexp_func.h:399
int occ_arg_pos() const override
The position in the argument list of 'occurrence'.
Definition: item_regexp_func.h:398
String * val_str(String *result) override
Definition: item_regexp_func.cc:294
Item_func_regexp_substr(const POS &pos, PT_item_list *item_list)
Definition: item_regexp_func.h:367
bool get_time(MYSQL_TIME *t) override
Definition: item_regexp_func.h:393
Item_result result_type() const override
Definition: item_regexp_func.h:370
Base class for all regular expression function classes.
Definition: item_regexp_func.h:85
void cleanup() override
Called for every Item after use (preparation and execution).
Definition: item_regexp_func.cc:135
my_decimal * convert_int_to_decimal(my_decimal *value)
Definition: item_regexp_func.h:175
bool fix_fields(THD *thd, Item **) override
Decides on the mode for matching, case sensitivity etc.
Definition: item_regexp_func.cc:124
bool set_pattern()
Definition: item_regexp_func.cc:140
virtual int pos_arg_pos() const =0
The position in the argument list of 'position'.
unique_ptr_destroy_only< regexp::Regexp_facade > m_facade
Definition: item_regexp_func.h:224
std::optional< std::string > match_parameter() const
The value of the match_parameter argument, or an empty string if absent.
Definition: item_regexp_func.h:151
virtual int match_arg_pos() const =0
The position in the argument list of match_parameter.
Item * pattern() const
The expression for the pattern string.
Definition: item_regexp_func.h:110
longlong convert_str_to_int()
Definition: item_regexp_func.h:198
virtual int occ_arg_pos() const =0
The position in the argument list of 'occurrence'.
bool resolve_type(THD *) override
Resolves the collation to use for comparison.
Definition: item_regexp_func.cc:108
std::optional< int > occurrence() const
The value of the occurrence argument, or its default if absent.
Definition: item_regexp_func.h:132
String * convert_int_to_str(String *str)
Definition: item_regexp_func.h:167
std::optional< int > position() const
The value of the position argument, or its default if absent.
Definition: item_regexp_func.h:113
double convert_int_to_real()
Definition: item_regexp_func.h:183
Item_func_regexp(const POS &pos, PT_item_list *opt_list)
Definition: item_regexp_func.h:87
double convert_str_to_real()
Definition: item_regexp_func.h:188
Item * subject() const
The expression for the subject string.
Definition: item_regexp_func.h:107
Definition: item_func.h:93
Item ** args
Array of pointers to arguments.
Definition: item_func.h:100
Item ** arguments() const
Definition: item_func.h:127
uint arg_count
How many arguments in 'args'.
Definition: item_func.h:123
Definition: item.h:5273
Item_static_string_func(const Name_string &name_par, const char *str, size_t length, const CHARSET_INFO *cs, Derivation dv=DERIVATION_COERCIBLE)
Definition: item.h:5277
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:802
String str_value
str_values's main purpose is to cache the value in save_in_field
Definition: item.h:3245
DTCollation collation
Character set and collation properties assigned for this Item.
Definition: item.h:3252
void set_data_type_bool()
Definition: item.h:1333
bool is_nullable() const
Definition: item.h:3327
bool get_time_from_string(MYSQL_TIME *ltime)
Convert val_str() to time in MYSQL_TIME.
Definition: item.cc:1456
virtual longlong val_int()=0
bool fixed
True if item has been resolved.
Definition: item.h:3316
bool null_value
True if item is null.
Definition: item.h:3353
virtual String * val_str(String *str)=0
bool unsigned_flag
Definition: item.h:3354
bool get_date_from_string(MYSQL_TIME *ltime, my_time_flags_t flags)
Convert val_str() to date in MYSQL_TIME.
Definition: item.cc:1369
bool get_date_from_int(MYSQL_TIME *ltime, my_time_flags_t flags)
Convert val_int() to date in MYSQL_TIME.
Definition: item.cc:1397
void set_data_type_longlong()
Set the data type of the Item to be longlong.
Definition: item.h:1345
bool get_time_from_int(MYSQL_TIME *ltime)
Convert val_int() to time in MYSQL_TIME.
Definition: item.cc:1484
Wrapper class for an Item list head, used to allocate Item lists in the parser in a context-independe...
Definition: parse_tree_helpers.h:101
Base class for parse tree nodes (excluding the Parse_tree_root hierarchy)
Definition: parse_tree_node_base.h:102
String class wrapper with a preallocated buffer of size buff_sz.
Definition: sql_string.h:628
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:165
const char * ptr() const
Definition: sql_string.h:247
size_t length() const
Definition: sql_string.h:239
const CHARSET_INFO * charset() const
Definition: sql_string.h:238
For each client connection we create a separate thread with THD serving as a thread/connection descri...
Definition: sql_class.h:821
my_decimal class limits 'decimal_t' type to what we need in MySQL.
Definition: my_decimal.h:92
#define E_DEC_FATAL_ERROR
Definition: decimal.h:148
char * pos
Definition: do_ctype.cc:76
static std::string to_string(const LEX_STRING &str)
Definition: lex_string.h:48
#define my_strntoll(s, a, b, c, d, e)
Definition: m_ctype.h:734
#define my_strntod(s, a, b, c, d)
Definition: m_ctype.h:738
std::unique_ptr< T, Destroy_only< T > > unique_ptr_destroy_only
std::unique_ptr, but only destroying.
Definition: my_alloc.h:464
int int2my_decimal(uint mask, longlong i, bool unsigned_flag, my_decimal *d)
Definition: my_decimal.h:355
Some integer typedefs for easier portability.
long long int longlong
Definition: my_inttypes.h:54
unsigned int my_time_flags_t
Flags to str_to_datetime and number_to_datetime.
Definition: my_time.h:82
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1056
Definition: buf0block_hint.cc:29
static Value err()
Create a Value object that represents an error condition.
Definition: json_binary.cc:908
const string value("\"Value\"")
This file hides most of ICU from the Item_func_regexp subclasses.
Our own string classes, used pervasively throughout the executor.
Definition: mysql_time.h:81
Environment data for the contextualization phase.
Definition: parse_tree_node_base.h:90
Bison "location" class.
Definition: parse_location.h:42
Definition: result.h:29
unsigned int uint
Definition: uca-dump.cc:29
Item_result
Type of the user defined function return slot and arguments.
Definition: udf_registration_types.h:38
@ STRING_RESULT
not valid for UDFs
Definition: udf_registration_types.h:40
@ INT_RESULT
double
Definition: udf_registration_types.h:42