MySQL 8.3.0
Source Code Documentation
regexp_facade.h
Go to the documentation of this file.
1#ifndef SQL_REGEXP_REGEXP_FACADE_H_
2#define SQL_REGEXP_REGEXP_FACADE_H_
3
4/* Copyright (c) 2017, 2023, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file regexp_facade.h
28
29 This file hides most of ICU from the Item_func_regexp subclasses.
30*/
31
32#include <stdint.h>
33
34#include <optional>
35#include <string>
36
37#include "sql/item.h"
39#include "sql_string.h"
40
41namespace regexp {
42
44
45/**
46 This class handles
47
48 - Conversion to the regexp library's character set, and buffers the
49 converted strings during matching.
50
51 - Re-compilation of the regular expression in case the pattern is a field
52 reference or otherwise non-constant.
53
54 - `NULL` handling.
55
56 - Conversion between indexing conventions. Clients of this class can use
57 one-based indexing, while the classes used by this class use zero-based
58 indexing.
59*/
61 public:
62 /**
63 Sets the pattern if called for the first time or the pattern_expr is
64 non-constant. This function is meant to be called for every row in a
65 command such as
66
67 SELECT regexp_like( column, 'a+' ) FROM table;
68
69 In this case, the client of this class may call SetPattern() for every
70 row without paying any penalty, as this becomes a no-op for all
71 consecutive calls. In cases such as
72
73 SELECT regexp_like( column, regexp_column ) FROM table;
74
75 The `regexp_column` expression is non-constant and hence we have to
76 recompile the regular expression for each row.
77 */
78 bool SetPattern(Item *pattern_expr, uint32_t flags);
79
80 /**
81 Tries to match the subject against the compiled regular expression.
82
83 @param subject_expr Is evaluated into a string to search.
84 @param start Start position, 1-based.
85 @param occurrence Which occurrence of the pattern should be searched for.
86
87 @retval true A match was found.
88 @retval false A match was not found.
89
90 @retval nullptr Either the engine was not compiled, or subject_expr
91 evaluates to NULL. This is useful for the Item_func_regexp object, since it
92 doesn't have to make a special case for when the regular expression is
93 NULL. Instead, the case is handled here in the facade.
94 */
95 std::optional<bool> Matches(Item *subject_expr, int start, int occurrence);
96
97 /**
98 Searches the subject for a match of the compiled regular expression and
99 returns a position.
100
101 @param subject_expr The string to search.
102 @param start Start position, 1-based.
103 @param occurrence Which occurrence of the pattern should be searched for.
104 @param after_match If true, the position following the end of the match
105 is returned. If false, the position before the match is returned.
106
107 @return The first character of the match, or a null value if not found.
108 */
109 std::optional<int> Find(Item *subject_expr, int start, int occurrence,
110 bool after_match);
111
112 /**
113 @param subject_expr The string to search.
114 @param replacement_expr The string to replace the match with.
115 @param start Start position, 1-based.
116 @param occurrence Which occurrence of the pattern should be searched for.
117 @param[in,out] result Holds the buffer for writing the result.
118 */
119 String *Replace(Item *subject_expr, Item *replacement_expr, int start,
120 int occurrence, String *result);
121
122 String *Substr(Item *subject_expr, int start, int occurrence, String *result);
123
124 /// Delete the "engine" data structure after execution.
125 void cleanup() { m_engine = nullptr; }
126
127 /// Did any operation return a warning? For unit testing.
128 bool EngineHasWarning() const {
129 return m_engine != nullptr && m_engine->HasWarning();
130 }
131
132 private:
133 /**
134 Resets the compiled regular expression with a new string.
135
136 @param subject_expr The new string to search.
137 @param start If present, start on this code point.
138
139 @retval false OK.
140 @retval true Either there is no compiled regular expression, or the
141 expression evaluated to `NULL`.
142 */
143 bool Reset(Item *subject_expr, int start = 1);
144
145 /**
146 Actually compiles the regular expression.
147 */
148 bool SetupEngine(Item *pattern_expr, uint flags);
149
150 /**
151 Converts a string position in m_current_subject.
152 @param position One-based code point position.
153 @return Zero-based byte position.
154 */
155 int ConvertCodePointToLibPosition(int position) const;
156
157 /**
158 Converts a string position in m_current_subject.
159 @param position Zero-based UTF-16 position.
160 @return Zero-based code point position.
161 */
162 int ConvertLibPositionToCodePoint(int position) const;
163
164 /**
165 Helper function for setting the result from SQL regular expression
166 functions that return a string value. Depending on character sets used by
167 arguments and result, this function may copy, convert or just set the
168 result. In particular, it handles the special case of the BINARY character
169 set being interpreted as CP-1252.
170
171 @param str The result string from the regexp function.
172 @param length Length in bytes.
173 @param[out] result The result string.
174 @return A pointer to the same string as the argument, or nullptr in case of
175 failure.
176 */
177 String *AssignResult(const char *str, size_t length, String *result);
178
179 /**
180 Used for all the actual regular expression matching, search-and-replace,
181 and positional and string information. If either the regular expression
182 pattern or the subject is `NULL`, this pointer is empty.
183 */
185
186 /**
187 ICU does not copy the subject string, so we keep the subject buffer
188 here. A call to Reset() causes it to be overwritten.
189
190 @see Regexp_engine::reset()
191 */
192 std::u16string m_current_subject;
193};
194
195} // namespace regexp
196
197#endif // SQL_REGEXP_REGEXP_FACADE_H_
Base class that is used to represent any kind of expression in a relational query.
Definition: item.h:933
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:166
This class handles.
Definition: regexp_facade.h:60
bool SetPattern(Item *pattern_expr, uint32_t flags)
Sets the pattern if called for the first time or the pattern_expr is non-constant.
Definition: regexp_facade.cc:129
int ConvertCodePointToLibPosition(int position) const
Converts a string position in m_current_subject.
Definition: regexp_facade.cc:157
String * Replace(Item *subject_expr, Item *replacement_expr, int start, int occurrence, String *result)
Definition: regexp_facade.cc:195
bool Reset(Item *subject_expr, int start=1)
Resets the compiled regular expression with a new string.
Definition: regexp_facade.cc:146
int ConvertLibPositionToCodePoint(int position) const
Converts a string position in m_current_subject.
Definition: regexp_facade.cc:165
std::optional< bool > Matches(Item *subject_expr, int start, int occurrence)
Tries to match the subject against the compiled regular expression.
Definition: regexp_facade.cc:172
bool EngineHasWarning() const
Did any operation return a warning? For unit testing.
Definition: regexp_facade.h:128
String * AssignResult(const char *str, size_t length, String *result)
Helper function for setting the result from SQL regular expression functions that return a string val...
Definition: regexp_facade.cc:212
void cleanup()
Delete the "engine" data structure after execution.
Definition: regexp_facade.h:125
String * Substr(Item *subject_expr, int start, int occurrence, String *result)
Definition: regexp_facade.cc:236
std::u16string m_current_subject
ICU does not copy the subject string, so we keep the subject buffer here.
Definition: regexp_facade.h:192
unique_ptr_destroy_only< Regexp_engine > m_engine
Used for all the actual regular expression matching, search-and-replace, and positional and string in...
Definition: regexp_facade.h:184
bool SetupEngine(Item *pattern_expr, uint flags)
Actually compiles the regular expression.
Definition: regexp_facade.cc:254
std::optional< int > Find(Item *subject_expr, int start, int occurrence, bool after_match)
Searches the subject for a match of the compiled regular expression and returns a position.
Definition: regexp_facade.cc:185
static int flags[50]
Definition: hp_test1.cc:39
static void start(mysql_harness::PluginFuncEnv *env)
Definition: http_auth_backend_plugin.cc:176
std::unique_ptr< T, Destroy_only< T > > unique_ptr_destroy_only
std::unique_ptr, but only destroying.
Definition: my_alloc.h:476
std::string str(const mysqlrouter::ConfigGenerator::Options::Endpoint &ep)
Definition: config_generator.cc:1065
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
Definition: errors.cc:44
CHARSET_INFO * regexp_lib_charset
Definition: regexp_facade.cc:39
Our own string classes, used pervasively throughout the executor.
Definition: m_ctype.h:422
Definition: result.h:29