MySQL 9.1.0
Source Code Documentation
regexp_facade.h
Go to the documentation of this file.
1#ifndef SQL_REGEXP_REGEXP_FACADE_H_
2#define SQL_REGEXP_REGEXP_FACADE_H_
3
4/* Copyright (c) 2017, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file regexp_facade.h
29
30 This file hides most of ICU from the Item_func_regexp subclasses.
31*/
32
33#include <stdint.h>
34
35#include <optional>
36#include <string>
37
38#include "sql/item.h"
40#include "sql_string.h"
41
42namespace regexp {
43
45
46/**
47 This class handles
48
49 - Conversion to the regexp library's character set, and buffers the
50 converted strings during matching.
51
52 - Re-compilation of the regular expression in case the pattern is a field
53 reference or otherwise non-constant.
54
55 - `NULL` handling.
56
57 - Conversion between indexing conventions. Clients of this class can use
58 one-based indexing, while the classes used by this class use zero-based
59 indexing.
60*/
62 public:
63 /**
64 Sets the pattern if called for the first time or the pattern_expr is
65 non-constant. This function is meant to be called for every row in a
66 command such as
67
68 SELECT regexp_like( column, 'a+' ) FROM table;
69
70 In this case, the client of this class may call SetPattern() for every
71 row without paying any penalty, as this becomes a no-op for all
72 consecutive calls. In cases such as
73
74 SELECT regexp_like( column, regexp_column ) FROM table;
75
76 The `regexp_column` expression is non-constant and hence we have to
77 recompile the regular expression for each row.
78 */
79 bool SetPattern(Item *pattern_expr, uint32_t flags);
80
81 /**
82 Tries to match the subject against the compiled regular expression.
83
84 @param subject_expr Is evaluated into a string to search.
85 @param start Start position, 1-based.
86 @param occurrence Which occurrence of the pattern should be searched for.
87
88 @retval true A match was found.
89 @retval false A match was not found.
90
91 @retval nullptr Either the engine was not compiled, or subject_expr
92 evaluates to NULL. This is useful for the Item_func_regexp object, since it
93 doesn't have to make a special case for when the regular expression is
94 NULL. Instead, the case is handled here in the facade.
95 */
96 std::optional<bool> Matches(Item *subject_expr, int start, int occurrence);
97
98 /**
99 Searches the subject for a match of the compiled regular expression and
100 returns a position.
101
102 @param subject_expr The string to search.
103 @param start Start position, 1-based.
104 @param occurrence Which occurrence of the pattern should be searched for.
105 @param after_match If true, the position following the end of the match
106 is returned. If false, the position before the match is returned.
107
108 @return The first character of the match, or a null value if not found.
109 */
110 std::optional<int> Find(Item *subject_expr, int start, int occurrence,
111 bool after_match);
112
113 /**
114 @param subject_expr The string to search.
115 @param replacement_expr The string to replace the match with.
116 @param start Start position, 1-based.
117 @param occurrence Which occurrence of the pattern should be searched for.
118 @param[in,out] result Holds the buffer for writing the result.
119 */
120 String *Replace(Item *subject_expr, Item *replacement_expr, int start,
121 int occurrence, String *result);
122
123 String *Substr(Item *subject_expr, int start, int occurrence, String *result);
124
125 /// Delete the "engine" data structure after execution.
126 void cleanup() { m_engine = nullptr; }
127
128 /// Did any operation return a warning? For unit testing.
129 bool EngineHasWarning() const {
130 return m_engine != nullptr && m_engine->HasWarning();
131 }
132
133 private:
134 /**
135 Resets the compiled regular expression with a new string.
136
137 @param subject_expr The new string to search.
138 @param start If present, start on this code point.
139
140 @retval false OK.
141 @retval true Either there is no compiled regular expression, or the
142 expression evaluated to `NULL`.
143 */
144 bool Reset(Item *subject_expr, int start = 1);
145
146 /**
147 Actually compiles the regular expression.
148 */
149 bool SetupEngine(Item *pattern_expr, uint flags);
150
151 /**
152 Converts a string position in m_current_subject.
153 @param position One-based code point position.
154 @return Zero-based byte position.
155 */
156 int ConvertCodePointToLibPosition(int position) const;
157
158 /**
159 Converts a string position in m_current_subject.
160 @param position Zero-based UTF-16 position.
161 @return Zero-based code point position.
162 */
163 int ConvertLibPositionToCodePoint(int position) const;
164
165 /**
166 Helper function for setting the result from SQL regular expression
167 functions that return a string value. Depending on character sets used by
168 arguments and result, this function may copy, convert or just set the
169 result. In particular, it handles the special case of the BINARY character
170 set being interpreted as CP-1252.
171
172 @param str The result string from the regexp function.
173 @param length Length in bytes.
174 @param[out] result The result string.
175 @return A pointer to the same string as the argument, or nullptr in case of
176 failure.
177 */
178 String *AssignResult(const char *str, size_t length, String *result);
179
180 /**
181 Used for all the actual regular expression matching, search-and-replace,
182 and positional and string information. If either the regular expression
183 pattern or the subject is `NULL`, this pointer is empty.
184 */
186
187 /**
188 ICU does not copy the subject string, so we keep the subject buffer
189 here. A call to Reset() causes it to be overwritten.
190
191 @see Regexp_engine::reset()
192 */
193 std::u16string m_current_subject;
194};
195
196} // namespace regexp
197
198#endif // SQL_REGEXP_REGEXP_FACADE_H_
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:930
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:167
This class handles.
Definition: regexp_facade.h:61
bool SetPattern(Item *pattern_expr, uint32_t flags)
Sets the pattern if called for the first time or the pattern_expr is non-constant.
Definition: regexp_facade.cc:130
int ConvertCodePointToLibPosition(int position) const
Converts a string position in m_current_subject.
Definition: regexp_facade.cc:158
String * Replace(Item *subject_expr, Item *replacement_expr, int start, int occurrence, String *result)
Definition: regexp_facade.cc:196
bool Reset(Item *subject_expr, int start=1)
Resets the compiled regular expression with a new string.
Definition: regexp_facade.cc:147
int ConvertLibPositionToCodePoint(int position) const
Converts a string position in m_current_subject.
Definition: regexp_facade.cc:166
std::optional< bool > Matches(Item *subject_expr, int start, int occurrence)
Tries to match the subject against the compiled regular expression.
Definition: regexp_facade.cc:173
bool EngineHasWarning() const
Did any operation return a warning? For unit testing.
Definition: regexp_facade.h:129
String * AssignResult(const char *str, size_t length, String *result)
Helper function for setting the result from SQL regular expression functions that return a string val...
Definition: regexp_facade.cc:213
void cleanup()
Delete the "engine" data structure after execution.
Definition: regexp_facade.h:126
String * Substr(Item *subject_expr, int start, int occurrence, String *result)
Definition: regexp_facade.cc:237
std::u16string m_current_subject
ICU does not copy the subject string, so we keep the subject buffer here.
Definition: regexp_facade.h:193
unique_ptr_destroy_only< Regexp_engine > m_engine
Used for all the actual regular expression matching, search-and-replace, and positional and string in...
Definition: regexp_facade.h:185
bool SetupEngine(Item *pattern_expr, uint flags)
Actually compiles the regular expression.
Definition: regexp_facade.cc:255
std::optional< int > Find(Item *subject_expr, int start, int occurrence, bool after_match)
Searches the subject for a match of the compiled regular expression and returns a position.
Definition: regexp_facade.cc:186
static int flags[50]
Definition: hp_test1.cc:40
static void start(mysql_harness::PluginFuncEnv *env)
Definition: http_auth_backend_plugin.cc:180
std::unique_ptr< T, Destroy_only< T > > unique_ptr_destroy_only
std::unique_ptr, but only destroying.
Definition: my_alloc.h:480
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1105
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76
Definition: errors.cc:45
CHARSET_INFO * regexp_lib_charset
Definition: regexp_facade.cc:40
Our own string classes, used pervasively throughout the executor.
Definition: m_ctype.h:421
Definition: result.h:30