MySQL 8.0.37
Source Code Documentation
json_path.h
Go to the documentation of this file.
1#ifndef SQL_JSON_PATH_INCLUDED
2#define SQL_JSON_PATH_INCLUDED
3
4/* Copyright (c) 2015, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file json_path.h
29
30 This file contains interface support for the JSON path abstraction.
31 The path abstraction is described by the functional spec
32 attached to WL#7909.
33*/
34
35#include <assert.h>
36#include <stddef.h>
37#include <algorithm>
38#include <functional>
39#include <new>
40#include <string>
41#include <utility>
42
43#include "my_alloc.h" // MEM_ROOT
44 // assert
45#include "my_inttypes.h"
46#include "my_sys.h"
47#include "prealloced_array.h" // Prealloced_array
49
50class String;
51
52/** The type of a Json_path_leg. */
54 /**
55 A path leg that represents a JSON object member (such as `.name`).
56 This path leg matches a single member in a JSON object.
57 */
59
60 /**
61 A path leg that represents a JSON array cell (such as `[10]`).
62 This path leg matches a single element in a JSON object.
63 */
65
66 /**
67 A path leg that represents a range in a JSON array
68 (such as `[2 to 7]`).
69 */
71
72 /**
73 @brief A path leg that represents the member wildcard.
74
75 A path leg that represents the member wildcard (`.*`), which
76 matches all the members of a JSON object.
77 */
79
80 /**
81 A path leg that represents the array wildcard (`[*]`), which
82 matches all the elements of a JSON array.
83 */
85
86 /**
87 A path leg that represents the ellipsis (`**`), which matches any
88 JSON value and recursively all the JSON values nested within it if
89 it is an object or an array.
90 */
92};
93
94/**
95 A class that represents the index of an element in a JSON array. The
96 index is 0-based and relative to the beginning of the array.
97*/
98class Json_array_index final {
99 /**
100 The array index. It is 0 if the specified index was before the
101 first element of the array, or equal to the array length if the
102 specified index was after the last element of the array.
103 */
104 size_t m_index;
105
106 /** True if the array index is within the bounds of the array. */
108
109 public:
110 /**
111 Construct a new Json_array_index object representing the specified
112 position in an array of the given length.
113
114 @param index the array index
115 @param from_end true if @a index is relative to the end of the array
116 @param array_length the length of the array
117 */
118 Json_array_index(size_t index, bool from_end, size_t array_length)
119 : m_index(from_end ? (index < array_length ? array_length - index - 1 : 0)
120 : std::min(index, array_length)),
121 m_within_bounds(index < array_length) {}
122
123 /**
124 Is the array index within the bounds of the array?
125
126 @retval true if the array index is within bounds
127 @retval false otherwise
128 */
129 bool within_bounds() const { return m_within_bounds; }
130
131 /**
132 Get the position in the array pointed to by this array index.
133
134 If the index is out of bounds, 0 will be returned if the array
135 index is before the first element in the array, or a value equal
136 to the length of the array if the index is after the last element.
137
138 @return the position in the array (0-based index relative to the
139 start of the array)
140 */
141 size_t position() const { return m_index; }
142};
143
144/**
145 One path leg in a JSON path expression.
146
147 A path leg describes either a key/value pair in an object
148 or a 0-based index into an array.
149*/
150class Json_path_leg final {
151 /// The type of this path leg.
153
154 /// The index of an array cell, or the start of an array range.
156
157 /// Is #m_first_array_index relative to the end of the array?
159
160 /// The end (inclusive) of an array range.
162
163 /// Is #m_last_array_index relative to the end of the array?
165
166 /// The member name of a member path leg.
167 std::string m_member_name;
168
169 public:
170 /**
171 Construct a wildcard or ellipsis path leg.
172
173 @param leg_type the type of wildcard (#jpl_ellipsis,
174 #jpl_member_wildcard or #jpl_array_cell_wildcard)
175 */
177 : m_leg_type(leg_type) {
178 assert(leg_type == jpl_ellipsis || leg_type == jpl_member_wildcard ||
179 leg_type == jpl_array_cell_wildcard);
180 }
181
182 /**
183 Construct an array cell path leg.
184
185 @param index the 0-based index in the array,
186 relative to the beginning of the array
187 */
188 explicit Json_path_leg(size_t index) : Json_path_leg(index, false) {}
189
190 /**
191 Construct an array cell path leg.
192
193 @param index the 0-based index in the array
194 @param from_end true if @a index is relative to the end of the array
195 */
196 Json_path_leg(size_t index, bool from_end)
198 m_first_array_index(index),
200
201 /**
202 Construct an array range path leg.
203
204 @param idx1 the start index of the range, inclusive
205 @param idx1_from_end true if the start index is relative
206 to the end of the array
207 @param idx2 the last index of the range, inclusive
208 @param idx2_from_end true if the last index is relative
209 to the end of the array
210 */
211 Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2,
212 bool idx2_from_end)
215 m_first_array_index_from_end(idx1_from_end),
216 m_last_array_index(idx2),
217 m_last_array_index_from_end(idx2_from_end) {}
218
219 /**
220 Construct an object member path leg.
221
222 @param member_name the name of the object member
223 @param length the length of the member name
224 */
225 Json_path_leg(const char *member_name, size_t length)
226 : m_leg_type(jpl_member), m_member_name(member_name, length) {}
227
228 /** Construct an object member path leg. */
229 Json_path_leg(const std::string &member_name)
230 : Json_path_leg(member_name.c_str(), member_name.length()) {}
231
232 /** Get the type of the path leg. */
234
235 /** Get the member name of a ::jpl_member path leg. */
236 const std::string &get_member_name() const { return m_member_name; }
237
238 /** Turn into a human-readable string. */
239 bool to_string(String *buf) const;
240
241 /**
242 Is this path leg an auto-wrapping array accessor?
243
244 An auto-wrapping array accessor is an array accessor that matches
245 non-arrays by auto-wrapping them in a single-element array before doing
246 the matching.
247
248 This function returns true for any ::jpl_array_cell or ::jpl_array_range
249 path leg that would match the element contained in a single-element
250 array, and which therefore would also match non-arrays that have been
251 auto-wrapped in single-element arrays.
252 */
253 bool is_autowrap() const;
254
255 /**
256 Get the first array cell pointed to by an array range, or the
257 array cell pointed to by an array cell index.
258
259 @param array_length the length of the array
260 */
261 Json_array_index first_array_index(size_t array_length) const {
264 array_length);
265 }
266
267 /**
268 Get the last array cell pointed to by an array range. The range
269 includes this cell.
270
271 @param array_length the length of the array
272 */
273 Json_array_index last_array_index(size_t array_length) const {
274 assert(m_leg_type == jpl_array_range);
276 array_length);
277 }
278
279 /**
280 A structure that represents an array range.
281 */
282 struct Array_range {
283 size_t m_begin; ///< Beginning of the range, inclusive.
284 size_t m_end; ///< End of the range, exclusive.
285 };
286
287 /**
288 Get the array range pointed to by a path leg of type
289 ::jpl_array_range or ::jpl_array_cell_wildcard.
290 @param array_length the length of the array
291 */
292 Array_range get_array_range(size_t array_length) const;
293};
294
297
298/**
299 A path expression which can be used to seek to
300 a position inside a JSON value.
301*/
303 protected:
304 /** An array of pointers to the legs of the JSON path. */
306
308
309 public:
310 /** Return the number of legs in this searchable path */
311 size_t leg_count() const { return m_path_legs.size(); }
312
313 /** Get an iterator pointing to the first path leg. */
315
316 /** Get an iterator pointing just past the last path leg. */
317 Json_path_iterator end() const { return m_path_legs.end(); }
318
319 /** Get a pointer to the last path leg. The path must not be empty. */
320 const Json_path_leg *last_leg() const { return m_path_legs.back(); }
321};
322
323/**
324 A JSON path expression.
325
326 From the user's point of view, a path expression is a string literal
327 with the following structure. We parse this structure into a
328 Json_path object:
329
330 pathExpression ::= scope pathLeg (pathLeg)*
331
332 scope ::= dollarSign
333
334 pathLeg ::= member | arrayLocation | doubleAsterisk
335
336 member ::= period (keyName | asterisk)
337
338 arrayLocation ::=
339 leftBracket
340 (arrayIndex | arrayRange | asterisk)
341 rightBracket
342
343 arrayIndex ::=
344 non-negative-integer |
345 last [ minus non-negative-integer ]
346
347 arrayRange ::= arrayIndex to arrayIndex
348
349 keyName ::= ECMAScript-identifier | ECMAScript-string-literal
350
351 doubleAsterisk ::= **
352
353 to ::= "to"
354
355 last ::= "last"
356*/
357class Json_path final : public Json_seekable_path {
358 private:
359 /**
360 A MEM_ROOT in which the Json_path_leg objects pointed to by
361 #Json_seekable_path::m_path_legs are allocated.
362 */
364 /**
365 Key used to instrument memory usage.
366 */
368
369 public:
370 explicit Json_path(PSI_memory_key key);
371
373 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
374 }
375
376 /** Move constructor. */
379 m_mem_root(std::move(other.m_mem_root)),
380 m_psi_key(other.m_psi_key) {
381 // Move the contents of m_path_legs from other into this.
382 m_path_legs = std::move(other.m_path_legs);
383
384 /*
385 Must also make sure that other.m_path_legs is empty, so that we
386 don't end up destroying the same objects twice; once from this's
387 destructor and once from other's destructor.
388
389 Move-constructing a vector would usually leave "other" empty,
390 but it is not guaranteed. Furthermore, m_path_legs is a
391 Prealloced_array, not a std::vector, so often moving will mean
392 copying from one prealloced area to another instead of simply
393 swapping pointers to the backing array. (And at the time of
394 writing Prealloced_array doesn't even have a move-assignment
395 operator, so the above assignment will always copy and leave
396 "other" unchanged.)
397 */
398 other.m_path_legs.clear();
399 }
400
401 /** Move assignment. */
403 if (&other != this) {
404 this->~Json_path();
405 new (this) Json_path(std::move(other));
406 }
407 return *this;
408 }
409
410 /**
411 Add a path leg to the end of this path.
412 @param[in] leg the leg to add
413 @return false on success, true on error
414 */
415 bool append(const Json_path_leg &leg) {
416 auto ptr = new (&m_mem_root) Json_path_leg(leg);
417 return ptr == nullptr || m_path_legs.push_back(ptr);
418 }
419
420 /**
421 Resets this to an empty path with no legs.
422 */
423 void clear() {
424 // Destruct all the Json_path_leg objects, and clear the pointers to them.
425 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
427 // Mark the memory as ready for reuse.
429 }
430
431 /**
432 Return true if the path can match more than one value in a JSON document.
433
434 @retval true if the path contains a path leg which is a wildcard,
435 ellipsis or array range
436 @retval false otherwise
437 */
438 bool can_match_many() const;
439
440 /** Turn into a human-readable string. */
441 bool to_string(String *buf) const;
442};
443
444/**
445 A lightweight path expression. This exists so that paths can be cloned
446 from the path legs of other paths without allocating heap memory
447 to copy those legs into. This class does not own the memory of the
448 Json_path_leg objects pointed to by #Json_seekable_path::m_path_legs, it
449 just points to Json_path_leg objects that belong to a Json_path instance.
450*/
452 public:
454 /**
455 Add a path leg to the end of this cloned path.
456 @param[in] leg the leg to add
457 @return false on success, true on error
458 */
459 bool append(const Json_path_leg *leg) { return m_path_legs.push_back(leg); }
460
461 /**
462 Resets this to an empty path with no legs.
463 */
464 void clear() { m_path_legs.clear(); }
465};
466
467/**
468 Initialize a Json_path from a path expression.
469
470 Stops parsing on the first error. It initializes the Json_path and
471 returns false if the path is parsed successfully. Otherwise, it
472 returns false. In that case, the output bad_index argument will
473 contain an index into the path expression. The parsing failed near
474 that index.
475
476 @param[in] path_length The length of the path expression.
477 @param[in] path_expression The string form of the path expression.
478 @param[out] path The Json_path object to be initialized.
479 @param[out] bad_index If null is returned, the parsing failed around here.
480 @param[in] depth_handler Pointer to a function that should handle error
481 occurred when depth is exceeded.
482 @return false on success, true on error
483*/
484bool parse_path(size_t path_length, const char *path_expression,
485 Json_path *path, size_t *bad_index,
486 const JsonDocumentDepthHandler &depth_handler);
487
488/**
489 A helper function that uses the above one as workhorse. Entry point for
490 for JSON_TABLE (Table_function_json class) and Json_path_cache. Raises an
491 error if the path expression is syntactically incorrect. Raises an
492 error if the path expression contains wildcard tokens but is not
493 supposed to. Otherwise updates the supplied Json_path object with
494 the parsed path.
495
496 @param[in] path_value A String to be interpreted as a path.
497 @param[in] forbid_wildcards True if the path shouldn't contain * or **
498 @param[out] json_path The object that will hold the parsed path
499
500 @returns false on success (valid path or NULL), true on error
501*/
502bool parse_path(const String &path_value, bool forbid_wildcards,
503 Json_path *json_path);
504#endif /* SQL_JSON_PATH_INCLUDED */
A class that represents the index of an element in a JSON array.
Definition: json_path.h:98
size_t m_index
The array index.
Definition: json_path.h:104
size_t position() const
Get the position in the array pointed to by this array index.
Definition: json_path.h:141
Json_array_index(size_t index, bool from_end, size_t array_length)
Construct a new Json_array_index object representing the specified position in an array of the given ...
Definition: json_path.h:118
bool m_within_bounds
True if the array index is within the bounds of the array.
Definition: json_path.h:107
bool within_bounds() const
Is the array index within the bounds of the array?
Definition: json_path.h:129
A lightweight path expression.
Definition: json_path.h:451
bool append(const Json_path_leg *leg)
Add a path leg to the end of this cloned path.
Definition: json_path.h:459
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:464
Json_path_clone(PSI_memory_key key)
Definition: json_path.h:453
One path leg in a JSON path expression.
Definition: json_path.h:150
size_t m_first_array_index
The index of an array cell, or the start of an array range.
Definition: json_path.h:155
size_t m_last_array_index
The end (inclusive) of an array range.
Definition: json_path.h:161
Json_path_leg(enum_json_path_leg_type leg_type)
Construct a wildcard or ellipsis path leg.
Definition: json_path.h:176
Json_array_index first_array_index(size_t array_length) const
Get the first array cell pointed to by an array range, or the array cell pointed to by an array cell ...
Definition: json_path.h:261
Array_range get_array_range(size_t array_length) const
Get the array range pointed to by a path leg of type jpl_array_range or jpl_array_cell_wildcard.
Definition: json_path.cc:149
bool m_last_array_index_from_end
Is m_last_array_index relative to the end of the array?
Definition: json_path.h:164
std::string m_member_name
The member name of a member path leg.
Definition: json_path.h:167
enum_json_path_leg_type m_leg_type
The type of this path leg.
Definition: json_path.h:152
bool is_autowrap() const
Is this path leg an auto-wrapping array accessor?
Definition: json_path.cc:126
bool m_first_array_index_from_end
Is m_first_array_index relative to the end of the array?
Definition: json_path.h:158
Json_path_leg(const char *member_name, size_t length)
Construct an object member path leg.
Definition: json_path.h:225
Json_path_leg(size_t index)
Construct an array cell path leg.
Definition: json_path.h:188
Json_path_leg(const std::string &member_name)
Construct an object member path leg.
Definition: json_path.h:229
Json_path_leg(size_t index, bool from_end)
Construct an array cell path leg.
Definition: json_path.h:196
enum_json_path_leg_type get_type() const
Get the type of the path leg.
Definition: json_path.h:233
const std::string & get_member_name() const
Get the member name of a jpl_member path leg.
Definition: json_path.h:236
Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2, bool idx2_from_end)
Construct an array range path leg.
Definition: json_path.h:211
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:91
Json_array_index last_array_index(size_t array_length) const
Get the last array cell pointed to by an array range.
Definition: json_path.h:273
A JSON path expression.
Definition: json_path.h:357
Json_path(Json_path &&other)
Move constructor.
Definition: json_path.h:377
~Json_path()
Definition: json_path.h:372
Json_path(PSI_memory_key key)
Definition: json_path.cc:168
bool append(const Json_path_leg &leg)
Add a path leg to the end of this path.
Definition: json_path.h:415
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:423
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:171
PSI_memory_key m_psi_key
Key used to instrument memory usage.
Definition: json_path.h:367
Json_path & operator=(Json_path &&other)
Move assignment.
Definition: json_path.h:402
bool can_match_many() const
Return true if the path can match more than one value in a JSON document.
Definition: json_path.cc:181
MEM_ROOT m_mem_root
A MEM_ROOT in which the Json_path_leg objects pointed to by Json_seekable_path::m_path_legs are alloc...
Definition: json_path.h:363
A path expression which can be used to seek to a position inside a JSON value.
Definition: json_path.h:302
Json_path_iterator end() const
Get an iterator pointing just past the last path leg.
Definition: json_path.h:317
Json_seekable_path(PSI_memory_key key)
Definition: json_path.cc:165
size_t leg_count() const
Return the number of legs in this searchable path.
Definition: json_path.h:311
const Json_path_leg * last_leg() const
Get a pointer to the last path leg.
Definition: json_path.h:320
Json_path_iterator begin() const
Get an iterator pointing to the first path leg.
Definition: json_path.h:314
Json_path_leg_pointers m_path_legs
An array of pointers to the legs of the JSON path.
Definition: json_path.h:305
bool push_back(const Element_type &element)
Copies an element into the back of the array.
Definition: prealloced_array.h:318
void clear()
Removes (and destroys) all elements.
Definition: prealloced_array.h:601
size_t size() const
Definition: prealloced_array.h:227
iterator begin()
begin : Returns a pointer to the first element in the array.
Definition: prealloced_array.h:254
Element_type & back()
Definition: prealloced_array.h:244
iterator end()
Definition: prealloced_array.h:255
const const Json_path_leg * * const_iterator
Definition: prealloced_array.h:117
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:168
unsigned int PSI_memory_key
Instrumented memory key.
Definition: psi_memory_bits.h:49
std::function< void()> JsonDocumentDepthHandler
Definition: json_error_handler.h:32
enum_json_path_leg_type
The type of a Json_path_leg.
Definition: json_path.h:53
@ jpl_array_range
A path leg that represents a range in a JSON array (such as [2 to 7]).
Definition: json_path.h:70
@ jpl_array_cell_wildcard
A path leg that represents the array wildcard ([*]), which matches all the elements of a JSON array.
Definition: json_path.h:84
@ jpl_ellipsis
A path leg that represents the ellipsis (**), which matches any JSON value and recursively all the JS...
Definition: json_path.h:91
@ jpl_member
A path leg that represents a JSON object member (such as .name).
Definition: json_path.h:58
@ jpl_array_cell
A path leg that represents a JSON array cell (such as [10]).
Definition: json_path.h:64
@ jpl_member_wildcard
A path leg that represents the member wildcard.
Definition: json_path.h:78
bool parse_path(size_t path_length, const char *path_expression, Json_path *path, size_t *bad_index, const JsonDocumentDepthHandler &depth_handler)
Initialize a Json_path from a path expression.
Definition: json_path.cc:258
Json_path_leg_pointers::const_iterator Json_path_iterator
Definition: json_path.h:296
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
Some integer typedefs for easier portability.
Common header for many mysys elements.
static char * path
Definition: mysqldump.cc:137
Definition: buf0block_hint.cc:30
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:76
Definition: gcs_xcom_synode.h:64
required string key
Definition: replication_asynchronous_connection_failover.proto:60
A structure that represents an array range.
Definition: json_path.h:282
size_t m_end
End of the range, exclusive.
Definition: json_path.h:284
size_t m_begin
Beginning of the range, inclusive.
Definition: json_path.h:283
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83
void ClearForReuse()
Similar to Clear(), but anticipates that the block will be reused for further allocations.
Definition: my_alloc.cc:189