MySQL 9.6.0
Source Code Documentation
parser.h
Go to the documentation of this file.
1// Copyright (c) 2024, 2025, Oracle and/or its affiliates.
2//
3// This program is free software; you can redistribute it and/or modify
4// it under the terms of the GNU General Public License, version 2.0,
5// as published by the Free Software Foundation.
6//
7// This program is designed to work with certain software (including
8// but not limited to OpenSSL) that is licensed under separate terms,
9// as designated in a particular file or component or in included license
10// documentation. The authors of MySQL hereby grant you an additional
11// permission to link the program and your derivative works with the
12// separately licensed software that they have either included with
13// the program or referenced in the documentation.
14//
15// This program is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18// GNU General Public License, version 2.0, for more details.
19//
20// You should have received a copy of the GNU General Public License
21// along with this program; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23
24#ifndef MYSQL_STRCONV_DECODE_PARSER_H
25#define MYSQL_STRCONV_DECODE_PARSER_H
26
27/// @file
28/// Experimental API header
29
30#include <string_view> // string_view
31#include <type_traits> // remove_cvref_t
32#include "mysql/strconv/decode/parse_options.h" // Is_parse_options
33#include "mysql/strconv/decode/parse_position.h" // Parse_position
34#include "mysql/strconv/decode/parse_result.h" // Parse_result
35#include "mysql/strconv/encode/out_str_write.h" // out_str_write
36#include "mysql/strconv/encode/string_target.h" // Is_string_target
37#include "mysql/strconv/formats/resolve_format.h" // resolve_format
38#include "mysql/utils/return_status.h" // Return_status
39
40/// @addtogroup GroupLibsMysqlStrconv
41/// @{
42
43namespace mysql::strconv {
44
45// Forward declarations
46class Parser;
47
48template <Is_format Format_t>
49class Fluent_parser;
50
51namespace detail {
52/// True if `decode_impl` can be invoked with the given format type and
53/// object type.
54template <class Format_t, class Object_t>
56 requires(Format_t format, Parser parser, Object_t obj) {
58 };
59} // namespace detail
60
61/// Object used to parse strings. This holds state, including the parsed string,
62/// position, success/failure status, and error messages. It is used and
63/// manipulated by `decode_impl` functions, and returned by `decode`.
64///
65/// It aggregates a `Parse_position` and a `Parse_result`. We extend it with
66/// higher-level functions that read from the string, advance the position, and
67/// store the success/failure status.
68class Parser : public detail::Parse_position<Parser>,
71
72 public:
73 /// Be default-constructible. The resulting Parser is a singular object,
74 /// which may only be used as the target of an assignment operation.
75 Parser() = default;
76
77 /// Construct a new Parser object.
78 ///
79 /// @param source Source string.
80 ///
81 /// @param pos_arg Current position. Defaults to 0, i.e., the beginning.
82 explicit Parser(const std::string_view &source, std::size_t pos_arg = 0)
83 : Parse_position(source, pos_arg) {}
84
85 // ==== Report parse_error at current position ====
86
87 /// Store a result representing that the requested object could not be parsed
88 /// because the string is wrong at the current position.
89 ///
90 /// @param message Error message, in the form of a full sentence with leading
91 /// capital but no period. The full message will be constructed by
92 /// concatenating a string like " after N characters [...]" to the given
93 /// message.
94 void set_parse_error(const std::string_view &message) {
96 }
97
98 /// Store a result representing that the requested object could not be parsed
99 /// because the string is wrong at the current position.
100 ///
101 /// @param expected_string A string that was expected but not found at the
102 /// current position. For example, if a comma was expected, this should be
103 /// ",". The full message will be constructed by escaping any special
104 /// characters in `expected_string` and inserting it into a message like
105 /// "Expected "expected_string" after N characters [...]""
107 const std::string_view &expected_string) {
109 int_pos());
110 }
111
112 // ==== Skip a string ====
113
114 /// Skip occurrences of the literal string `sv`, if found.
115 ///
116 /// This overload is for parse options where the Repeat component allows for 0
117 /// matches. Thus the function cannot fail and can return void.
118 ///
119 /// @param opt Parse options that describe how to parse. The Repeat option is
120 /// used to determine the number of repetitions. Any `before_token` and
121 /// `after_token` members of the Format are invoked before and after skipping
122 /// `sv`.
123 ///
124 /// @param sv String to skip.
125 void skip(const Is_parse_options_optional auto &opt,
126 const std::string_view &sv) {
127 [[maybe_unused]] auto ret = do_skip(opt, sv);
128 assert(ret == mysql::utils::Return_status::ok);
129 }
130
131 /// Skip occurrences of the literal string `sv`, if found.
132 ///
133 /// This overload is for parse options where the Repeat component may require
134 /// 1 or more matches. Therefore, the function can fail and is declared
135 /// `[[nodiscard]]`.
136 ///
137 /// @param opt Parse options that describe how to parse. The Repeat option is
138 /// used to determine the number of repetitions. Any `before_token` and
139 /// `after_token` members of the Format are invoked before and after skipping
140 /// `sv`.
141 ///
142 /// @param sv String to skip.
143 ///
144 /// @return `Return_status::ok` or `Return_status::error`.
145 [[nodiscard]] Return_status_t skip(const Is_parse_options auto &opt,
146 const std::string_view &sv) {
147 return do_skip(opt, sv);
148 }
149
150 /// Determine if a prefix of @c remaining_str() matches @c str and return the
151 /// length.
152 ///
153 /// If the format has @c before_token and/or @c after_token members, they will
154 /// be invoked before/after attempting the match.
155 ///
156 /// This function does not alter the position or status of this object.
157 ///
158 /// @param format The format: in case this has a `before_token` and/or an
159 /// `after_token` member, those functions will be invoked before and after
160 /// skipping `sv`. Otherwise, the format is unused (in which case you may use
161 /// the overload that does not take a format parameter).
162 ///
163 /// @param str String to skip
164 ///
165 /// @return If there is a match, return its length. This is at least
166 /// `str.size()`, but may be longer in case `before_match` and/or
167 /// `after_match` advanced the position. If there is no match, returns 0.
168 [[nodiscard]] std::size_t match_length(const Is_format auto &format,
169 std::string_view str) {
170 assert(!str.empty());
171 // If there is a match, advance the position to the end of the match and
172 // return true. If there is no match, return false; in this case the
173 // position may or may not have advanced and the caller needs to restore it.
174 auto worker = [&]() -> Return_status_t {
175 this->before_token(format);
176 if (this->remaining_size() < str.size()) return Return_status_t::error;
177 if (std::memcmp(this->pos(), str.data(), str.size()) != 0)
179 this->advance(str.size());
180 this->after_token(format);
181 return Return_status_t::ok;
182 };
183 std::size_t ret{0};
184 auto before_pos = int_pos();
185 if (worker() == Return_status_t::ok) ret = int_pos() - before_pos;
186 set_int_pos(before_pos);
187 return ret;
188 }
189
190 private:
191 /// Helper to implement the two overloads of @c skip.
192 ///
193 /// @param opt Parse options
194 ///
195 /// @param sv String to skip
196 ///
197 /// @return Return_status::ok or `Return_status::error`.
198 [[nodiscard]] Return_status_t do_skip(const Is_parse_options auto &opt,
199 const std::string_view &sv) {
200 return call(opt, [&] {
201 std::size_t length = this->match_length(get_format(opt), sv);
202 if (length == 0) {
204 return;
205 }
207 });
208 }
209
210 // ==== Parse into an object ====
211
212 public:
213 /// Parse into the given object.
214 ///
215 /// @param opt Parse options that describe how to parse. The Repeat component
216 /// is used to determine the number of repetitions. The Format component is
217 /// used to invoke the correct decode_impl function, and also the
218 /// `before_token` and `after_token` members are invoked, if present. The
219 /// Checker component is used to validate the object after parsing.
220 ///
221 /// @param obj Target object to parse into.
222 template <class Object_t>
223 requires(!Is_out_str<Object_t>)
224 [[nodiscard]] Return_status_t
225 read(const Is_parse_options auto &opt, Object_t &obj) {
226 return do_read(opt, obj);
227 }
228
229 private:
230 /// If @c format has a member function @c before_token, call it with *this
231 /// as argument.
232 void before_token(const Is_format auto &format) {
233 if constexpr (requires(Parser & pos) { format.before_token(pos); }) {
234 format.before_token(*this);
235 }
236 }
237
238 /// If @c format has a member function @c after_token, call it with *this
239 /// as argument.
240 void after_token(const Is_format auto &format) {
241 if constexpr (requires(Parser & pos) { format.after_token(pos); }) {
242 format.after_token(*this);
243 }
244 }
245
246 /// Helper type predicate used by detail::resolve_format. It has the static
247 /// constexpr bool member variable `value` which is true if `encode_impl`
248 /// has been defined for the `Format_t` and `Object_t` types given by the
249 /// template arguments.
250 ///
251 /// @tparam Format_t Format to test.
252 ///
253 /// @tparam Object_t Object type to test.
254 template <class Format_t, class Object_t>
256 : public std::bool_constant<
257 detail::Can_invoke_decode_impl<Format_t, Object_t>> {};
258
259 /// Resolve the format, using the rules to deduce format based on default
260 /// format and parent format, and parse the given object using the resolved
261 /// format.
262 ///
263 /// This invokes `before_token` and `after_token` for the format given by
264 /// `opt`, not for the resolved format.
265 ///
266 /// @param opt Parse options
267 ///
268 /// @param[in,out] out Object to parse.
269 ///
270 /// @return Return_status::ok on success; Return_status::error if an error
271 /// occurred.
272 [[nodiscard]] Return_status_t do_read(const Is_parse_options auto &opt,
273 auto &out) {
274 // clang-tidy complains that resolve_format may return void. That's true
275 // only in cases where we expect a compilation error so this is valid.
276 // NOLINTBEGIN
277 auto format = get_format(opt);
278 auto resolved_format =
281 return call(opt, [&] {
283 decode_impl(resolved_format, *this, out);
284 if (!is_ok()) return;
286 });
287 // NOLINTEND
288 }
289
290 // ==== Parse into an out_str ====
291
292 public:
293 /// Read from this object to the given Output String Wrapper, using a
294 /// decode_impl function that takes an Is_string_target output object.
295 ///
296 /// This function will create String Target objects wrapping the Output String
297 /// Wrapper and pass them to `decode_impl`.
298 ///
299 /// @param opt Parse Options that describe how to parse. The Repeat component
300 /// is used to determine the number of repetitions. The Format component is
301 /// used to invoke the correct decode_impl function, and also the
302 /// `before_token` and `after_token` members are invoked, if present.
303 ///
304 /// For this function, the Parse Options must not have a Checker component. If
305 /// validation is needed for the object type, perform the validation in
306 /// decode_impl when passed a String_counter. (This pattern prevents
307 /// allocation in case the string cannot be parsed. That would result in
308 /// unnecessary allocations e.g. when this is invoked from a higher level
309 /// object which is invoked using Repeat::any.)
310 ///
311 /// @param out_str Output String Wrapper to parse into.
312 ///
313 /// @return Return_status_t::error if `is_ok` returns false after the call,
314 /// i.e., either the string could not be parsed, or an out-of-memory condition
315 /// occurred when growing the output buffer. Otherwise, returns
316 /// `Return_status::ok`.
318 const Is_parse_options_nocheck auto &opt,
319 const Is_out_str auto &out_str) {
320 auto ret = read_to_out_str(
321 [&](Is_string_target auto &target) {
322 return this->do_read(opt, target);
323 },
324 out_str);
325 assert((ret == Return_status_t::ok) == is_ok());
326 return ret;
327 }
328
329 /// Invoke the given string producer function, which is assumed to read from
330 /// the current Parser object and report errors to this object.
331 ///
332 /// The output willl be written to the given `out_str` object.
333 ///
334 /// @param producer String producer to invoke. This accept take a single
335 /// argument whose type is either String_writer or String_counter.
336 ///
337 /// @param out_str Output string wrapper used as back-end by the
338 /// String_writer. This may be resized according to its resize policy.
339 ///
340 /// @return Return_status_t::error if `is_ok` returns false after the call,
341 /// i.e., either the string could not be parsed, or an out-of-memory condition
342 /// occurred when growing the output buffer. Otherwise, returns
343 /// `Return_status::ok`.
345 const Is_string_producer auto &producer, const Is_out_str auto &out_str) {
346 return out_str_write(
347 out_str,
348 // Wrapper around the producer that rewinds the parse position after
349 // computing the size.
350 [&](Is_string_target auto &target) {
351 using Target_t = std::remove_cvref_t<decltype(target)>;
352 [[maybe_unused]] auto before = this->int_pos();
353 auto ret = mysql::utils::void_to_ok([&] { producer(target); });
354 // The string producer may only return error if it first sets set an
355 // error status in this object.
356 if (ret == Return_status_t::error) assert(!this->is_ok());
357 if constexpr (Target_t::target_type == Target_type::counter) {
358 // For counters: restore the position, since counters must not have
359 // side effects (semantic requirements for Is_string_producer), and
360 // because we must start the parser at the same position when
361 // invoking the producer on the writer.
362 this->set_int_pos(before);
363 }
365 },
366 // Handle OOM by setting the status in parser.
367 [&] { this->set_oom(); });
368 }
369
370 // ==== Parse using an arbitrary function ====
371
372 /// Parse using the given function, using empty parse options.
373 ///
374 /// @param func Function that parses a single token. This must be invocable
375 /// without arguments, and update the status of this object in case an error
376 /// occurs.
377 [[nodiscard]] Return_status_t call(const std::invocable auto &func) {
378 return call(Empty_parse_options{}, func);
379 }
380
381 /// Parse using the given function, using the given parse options.
382 ///
383 /// @param opt Parse options that describe how to parse. The Repeat component
384 /// is used to determine the number of repetitions. The Format component is
385 /// unused. The Checker component is used to validate the string after
386 /// parsing (once per repeated element).
387 ///
388 /// @param func Function that parses a single token. This must be invocable
389 /// without arguments, and update the status of this object in case an error
390 /// occurs.
391 [[nodiscard]] Return_status_t call(const Is_parse_options auto &opt,
392 const std::invocable auto &func) {
393 assert(is_ok());
394
395 // The position to rewind to in case parsing/checking fails
396 auto before_pos = int_pos();
397
398 // Invoke `func`. On success, invoke the checker. If either the parser or
399 // the checker fails, restore the position, and update the error position,
400 // to `before_pos`.
401 auto invoke_and_check = [&] {
402 func();
403 if (is_ok()) {
404 invoke_checker(opt);
405 if (!is_ok()) {
406 // Validation error refers to what we parsed at `before_pos`
407 if (is_parse_error()) update_parse_error_pos(before_pos);
408 // After error, rewind to `before_pos`
409 set_int_pos(before_pos);
410 }
411 } else {
412 // After error, rewind to `before_pos`
413 set_int_pos(before_pos);
414 }
415 };
416
417 // Mandatorily parse `min` times
418 std::size_t count = 0;
419 if (get_repeat(opt).min() != 0) {
420 for (; count != get_repeat(opt).min(); ++count) {
421 invoke_and_check();
422 if (!is_ok()) return Return_status_t::error;
423 }
424 }
425
426 // Optionally parse `max-min` times
427 for (; count != get_repeat(opt).max(); ++count) {
428 // If last iteration didn't advance the position or produce a parse error,
429 // next iteration won't either. So we know we would loop the maximum
430 // repetitions if we tried. No need to actually loop, just return instead.
431 // This optimization is essential in order for Repeat::any() to terminate
432 // in case each iteration may match a zero-length string.
433 if (count > 0 && int_pos() == before_pos) return Return_status_t::ok;
434
435 before_pos = int_pos();
436 invoke_and_check();
437 if (!is_ok()) {
438 if (is_parse_error()) {
439 // Replace parse error by ok status.
442 return Return_status_t::ok;
443 }
444 // Propagate non-parse-errors to caller.
445 assert(is_store_error());
447 }
448 }
450 return Return_status_t::ok;
451 }
452
453 template <class Format_t>
455 return {*this, format};
456 }
457}; // class Parser
458
459static_assert(std::contiguous_iterator<Parser>);
460
461} // namespace mysql::strconv
462
463// addtogroup GroupLibsMysqlStrconv
464/// @}
465
466#endif // ifndef MYSQL_STRCONV_DECODE_PARSER_H
Fluent API for parsing strings.
Definition: fluent_parser.h:116
Object used to parse strings.
Definition: parser.h:69
std::size_t match_length(const Is_format auto &format, std::string_view str)
Determine if a prefix of remaining_str() matches str and return the length.
Definition: parser.h:168
Return_status_t call(const std::invocable auto &func)
Parse using the given function, using empty parse options.
Definition: parser.h:377
Return_status_t read(const Is_parse_options auto &opt, Object_t &obj)
Parse into the given object.
Definition: parser.h:225
mysql::utils::Return_status Return_status_t
Definition: parser.h:70
Fluent_parser< Format_t > fluent(const Format_t &format)
Definition: parser.h:454
void after_token(const Is_format auto &format)
If format has a member function after_token, call it with *this as argument.
Definition: parser.h:240
Parser()=default
Be default-constructible.
void before_token(const Is_format auto &format)
If format has a member function before_token, call it with *this as argument.
Definition: parser.h:232
Return_status_t call(const Is_parse_options auto &opt, const std::invocable auto &func)
Parse using the given function, using the given parse options.
Definition: parser.h:391
void set_parse_error(const std::string_view &message)
Store a result representing that the requested object could not be parsed because the string is wrong...
Definition: parser.h:94
Parser(const std::string_view &source, std::size_t pos_arg=0)
Construct a new Parser object.
Definition: parser.h:82
Return_status_t skip(const Is_parse_options auto &opt, const std::string_view &sv)
Skip occurrences of the literal string sv, if found.
Definition: parser.h:145
Return_status_t do_read(const Is_parse_options auto &opt, auto &out)
Resolve the format, using the rules to deduce format based on default format and parent format,...
Definition: parser.h:272
void set_parse_error_expected_string(const std::string_view &expected_string)
Store a result representing that the requested object could not be parsed because the string is wrong...
Definition: parser.h:106
Return_status_t do_skip(const Is_parse_options auto &opt, const std::string_view &sv)
Helper to implement the two overloads of skip.
Definition: parser.h:198
Return_status_t read_to_out_str(const Is_parse_options_nocheck auto &opt, const Is_out_str auto &out_str)
Read from this object to the given Output String Wrapper, using a decode_impl function that takes an ...
Definition: parser.h:317
Return_status_t read_to_out_str(const Is_string_producer auto &producer, const Is_out_str auto &out_str)
Invoke the given string producer function, which is assumed to read from the current Parser object an...
Definition: parser.h:344
void skip(const Is_parse_options_optional auto &opt, const std::string_view &sv)
Skip occurrences of the literal string sv, if found.
Definition: parser.h:125
Base class for the current position of a string parser, holding both the parsed string and the positi...
Definition: parse_position.h:48
void advance(std::ptrdiff_t delta)
Move the iterator delta steps.
Definition: parse_position.h:79
std::size_t remaining_size() const
Return the remaining size.
Definition: parse_position.h:153
Parse_position()=default
Construct a new object.
const char * pos() const
Return the current position as a char pointer.
Definition: parse_position.h:110
std::string_view str() const
Return a string_view over the underlying string.
Definition: parse_position.h:167
std::size_t int_pos() const
Return the current position as an integer.
Definition: parse_position.h:107
void set_int_pos(std::size_t int_pos_arg)
Set the position to the given one.
Definition: parse_position.h:101
Class holding the result from parsing a string, in the form of a status and a message.
Definition: parse_result.h:45
void set_match_count(std::size_t count) noexcept
Store a result representing that the requested object was successfully parsed, overriding a previous ...
Definition: parse_result.h:175
void revert_parse_error_to_ok() noexcept
Definition: parse_result.h:47
@ sentence
The message is expressed as a full sentence, for example "Value out of range".
@ expected_string
The message is a string that was expected but not found at the current position.
void update_parse_error_pos(std::size_t position)
Update the position of a parse error to the given position.
Definition: parse_result.h:58
bool is_parse_error() const
Return true if a parse error occurred.
Definition: parse_result.h:126
void set_oom() noexcept
Store a result representing that the requested object could not be parsed because and out-of-memory c...
Definition: parse_result.h:88
bool is_ok() const
Return true if the last operation succeeded, i.e., either a full match was requested and an object wa...
Definition: parse_result.h:97
std::string_view message() const
Return the message.
Definition: parse_result.h:186
void do_set_parse_error(const std::string_view &string, Message_form message_form, std::size_t position)
Common implementation of set_parse_error and set_parse_error_expected_string.
Definition: parse_result.h:157
bool is_store_error() const
Return true if an environment error occurred.
Definition: parse_result.h:121
True if Test is a format.
Definition: format.h:42
True if Test is an Output String Wrapper, i.e., derived from Out_str_base.
Definition: out_str.h:219
True for any kind of parse options: Format, Repeat, Checker, or Compound_parse_options.
Definition: parse_options.h:133
True for any kind of parse options for which get_repeat() returns Repeat_optional,...
Definition: parse_options.h:143
True for any kind of parse options: Format, Repeat, Checker, or Compound_parse_options.
Definition: parse_options.h:126
True for invocables that can be used with out_str_write, i.e., which accept either a String_writer & ...
Definition: out_str_write.h:88
Concept that holds for String_counter and String_writer.
Definition: string_target.h:111
True if decode_impl can be invoked with the given format type and object type.
Definition: parser.h:55
static int count
Definition: myisam_ftdump.cc:45
struct Parser parser
std::string format(const routing_guidelines::Session_info &session_info, bool extended_session_info)
Definition: dest_metadata_cache.cc:170
Definition: fts0fts.cc:236
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76
constexpr auto resolve_format(const Format_t &format, const Object_t &object)
Return the format to pass to the implementation function, given the format and object type passed by ...
Definition: resolve_format.h:165
Definition: gtid_binary_format.h:41
void invoke_checker(const Is_parse_options auto &)
Invoke the Checker member of any parse options, if it exists; otherwise do nothing.
Definition: parse_options.h:172
auto get_repeat(const Is_parse_options auto &)
Return the Repeat component of any parse options, if it exists; otherwise a default-constructed Repea...
Definition: parse_options.h:162
mysql::utils::Return_status out_str_write(const Out_str_t &out_str, const Producer_counter_t &producer_counter, const Producer_writer_t &producer_writer, const Oom_action_t &oom_action=detail::nop)
Given an Is_out_str object, a String_producer_counter, and a String_producer_writer,...
Definition: out_str_write.h:178
auto get_format(const Is_parse_options auto &)
Return the Format component of any parse options that has one.
Definition: parse_options.h:152
void decode_impl(const Gtid_binary_format &format, Parser &parser, mysql::gtids::Is_tag auto &tag)
Definition: gtid_binary_format_conv.h:63
Return_status
Simple, strongly-typed enumeration to indicate internal status: ok, error.
Definition: return_status.h:40
@ ok
operation succeeded
@ error
operation failed
Return_t void_to_ok(const Func_t &func, Args_t &&...args)
Helper that calls the given function and returns its result, or returns Return_status::ok if the func...
Definition: return_status.h:113
Experimental API header.
Experimental API header.
Experimental API header.
Experimental API header.
repeated Source source
Definition: replication_asynchronous_connection_failover.proto:42
Experimental API header.
Experimental API header.
Experimental API header.
Definition: mysqltest.cc:395
Represents parse options consisting of a tuple where each of the following elements occurs optionally...
Definition: parse_options.h:94
Helper type predicate used by detail::resolve_format.
Definition: parser.h:257