MySQL 8.0.40
Source Code Documentation
regexp_facade.h
Go to the documentation of this file.
1#ifndef SQL_REGEXP_REGEXP_FACADE_H_
2#define SQL_REGEXP_REGEXP_FACADE_H_
3
4/* Copyright (c) 2017, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file regexp_facade.h
29
30 This file hides most of ICU from the Item_func_regexp subclasses.
31*/
32
33#include <stdint.h>
34
35#include <optional>
36#include <string>
37
38#include "sql/item.h"
40#include "sql_string.h"
41
42extern int32_t opt_regexp_time_limit;
43extern int32_t opt_regexp_stack_limit;
44
45namespace regexp {
46
47/**
48 This class handles
49
50 - Conversion to the regexp library's character set, and buffers the
51 converted strings during matching.
52
53 - Re-compilation of the regular expression in case the pattern is a field
54 reference or otherwise non-constant.
55
56 - `NULL` handling.
57
58 - Conversion between indexing conventions. Clients of this class can use
59 one-based indexing, while the classes used by this class use zero-based
60 indexing.
61*/
63 public:
64 /**
65 Sets the pattern if called for the first time or the pattern_expr is
66 non-constant. This function is meant to be called for every row in a
67 command such as
68
69 SELECT regexp_like( column, 'a+' ) FROM table;
70
71 In this case, the client of this class may call SetPattern() for every
72 row without paying any penalty, as this becomes a no-op for all
73 consecutive calls. In cases such as
74
75 SELECT regexp_like( column, regexp_column ) FROM table;
76
77 The `regexp_column` expression is non-constant and hence we have to
78 recompile the regular expression for each row.
79 */
80 bool SetPattern(Item *pattern_expr, uint32_t flags);
81
82 /**
83 Tries to match the subject against the compiled regular expression.
84
85 @param subject_expr Is evaluated into a string to search.
86 @param start Start position, 1-based.
87 @param occurrence Which occurrence of the pattern should be searched for.
88
89 @retval true A match was found.
90 @retval false A match was not found.
91
92 @retval nullptr Either the engine was not compiled, or subject_expr
93 evaluates to NULL. This is useful for the Item_func_regexp object, since it
94 doesn't have to make a special case for when the regular expression is
95 NULL. Instead, the case is handled here in the facade.
96 */
97 std::optional<bool> Matches(Item *subject_expr, int start, int occurrence);
98
99 /**
100 Searches the subject for a match of the compiled regular expression and
101 returns a position.
102
103 @param subject_expr The string to search.
104 @param start Start position, 1-based.
105 @param occurrence Which occurrence of the pattern should be searched for.
106 @param after_match If true, the position following the end of the match
107 is returned. If false, the position before the match is returned.
108
109 @return The first character of the match, or a null value if not found.
110 */
111 std::optional<int> Find(Item *subject_expr, int start, int occurrence,
112 bool after_match);
113
114 /**
115 @param subject_expr The string to search.
116 @param replacement_expr The string to replace the match with.
117 @param start Start position, 1-based.
118 @param occurrence Which occurrence of the pattern should be searched for.
119 @param[in,out] result Holds the buffer for writing the result.
120 */
121 String *Replace(Item *subject_expr, Item *replacement_expr, int start,
122 int occurrence, String *result);
123
124 String *Substr(Item *subject_expr, int start, int occurrence, String *result);
125
126 /// Delete the "engine" data structure after execution.
127 void cleanup() { m_engine = nullptr; }
128
129 /// Did any operation return a warning? For unit testing.
130 bool EngineHasWarning() const {
131 return m_engine != nullptr && m_engine->HasWarning();
132 }
133
134 private:
135 /**
136 Resets the compiled regular expression with a new string.
137
138 @param subject_expr The new string to search.
139 @param start If present, start on this code point.
140
141 @retval false OK.
142 @retval true Either there is no compiled regular expression, or the
143 expression evaluated to `NULL`.
144 */
145 bool Reset(Item *subject_expr, int start = 1);
146
147 /**
148 Actually compiles the regular expression.
149 */
150 bool SetupEngine(Item *pattern_expr, uint flags);
151
152 /**
153 Converts a string position in m_current_subject.
154 @param position One-based code point position.
155 @return Zero-based byte position.
156 */
157 int ConvertCodePointToLibPosition(int position) const;
158
159 /**
160 Converts a string position in m_current_subject.
161 @param position Zero-based UTF-16 position.
162 @return Zero-based code point position.
163 */
164 int ConvertLibPositionToCodePoint(int position) const;
165
166 /**
167 Helper function for setting the result from SQL regular expression
168 functions that return a string value. Depending on character sets used by
169 arguments and result, this function may copy, convert or just set the
170 result. In particular, it handles the special case of the BINARY character
171 set being interpreted as CP-1252.
172
173 @param str The result string from the regexp function.
174 @param length Length in bytes.
175 @param[out] result The result string.
176 @return A pointer to the same string as the argument, or nullptr in case of
177 failure.
178 */
179 String *AssignResult(const char *str, size_t length, String *result);
180
181 /**
182 Used for all the actual regular expression matching, search-and-replace,
183 and positional and string information. If either the regular expression
184 pattern or the subject is `NULL`, this pointer is empty.
185 */
187
188 /**
189 ICU does not copy the subject string, so we keep the subject buffer
190 here. A call to Reset() causes it to be overwritten.
191
192 @see Regexp_engine::reset()
193 */
194 std::u16string m_current_subject;
195};
196
197} // namespace regexp
198
199#endif // SQL_REGEXP_REGEXP_FACADE_H_
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:853
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:168
This class handles.
Definition: regexp_facade.h:62
bool SetPattern(Item *pattern_expr, uint32_t flags)
Sets the pattern if called for the first time or the pattern_expr is non-constant.
Definition: regexp_facade.cc:127
int ConvertCodePointToLibPosition(int position) const
Converts a string position in m_current_subject.
Definition: regexp_facade.cc:155
String * Replace(Item *subject_expr, Item *replacement_expr, int start, int occurrence, String *result)
Definition: regexp_facade.cc:193
bool Reset(Item *subject_expr, int start=1)
Resets the compiled regular expression with a new string.
Definition: regexp_facade.cc:144
int ConvertLibPositionToCodePoint(int position) const
Converts a string position in m_current_subject.
Definition: regexp_facade.cc:163
std::optional< bool > Matches(Item *subject_expr, int start, int occurrence)
Tries to match the subject against the compiled regular expression.
Definition: regexp_facade.cc:170
bool EngineHasWarning() const
Did any operation return a warning? For unit testing.
Definition: regexp_facade.h:130
String * AssignResult(const char *str, size_t length, String *result)
Helper function for setting the result from SQL regular expression functions that return a string val...
Definition: regexp_facade.cc:210
void cleanup()
Delete the "engine" data structure after execution.
Definition: regexp_facade.h:127
String * Substr(Item *subject_expr, int start, int occurrence, String *result)
Definition: regexp_facade.cc:234
std::u16string m_current_subject
ICU does not copy the subject string, so we keep the subject buffer here.
Definition: regexp_facade.h:194
unique_ptr_destroy_only< Regexp_engine > m_engine
Used for all the actual regular expression matching, search-and-replace, and positional and string in...
Definition: regexp_facade.h:186
bool SetupEngine(Item *pattern_expr, uint flags)
Actually compiles the regular expression.
Definition: regexp_facade.cc:252
std::optional< int > Find(Item *subject_expr, int start, int occurrence, bool after_match)
Searches the subject for a match of the compiled regular expression and returns a position.
Definition: regexp_facade.cc:183
static int flags[50]
Definition: hp_test1.cc:40
static void start(mysql_harness::PluginFuncEnv *env)
Definition: http_auth_backend_plugin.cc:177
std::unique_ptr< T, Destroy_only< T > > unique_ptr_destroy_only
std::unique_ptr, but only destroying.
Definition: my_alloc.h:489
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1052
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76
Definition: errors.cc:45
int32_t opt_regexp_stack_limit
Definition: mysqld.cc:1298
int32_t opt_regexp_time_limit
Definition: mysqld.cc:1297
Our own string classes, used pervasively throughout the executor.
Definition: result.h:30
unsigned int uint
Definition: uca9-dump.cc:75