MySQL 8.0.30
Source Code Documentation
json_path.h
Go to the documentation of this file.
1#ifndef SQL_JSON_PATH_INCLUDED
2#define SQL_JSON_PATH_INCLUDED
3
4/* Copyright (c) 2015, 2022, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file json_path.h
28
29 This file contains interface support for the JSON path abstraction.
30 The path abstraction is described by the functional spec
31 attached to WL#7909.
32*/
33
34#include <assert.h>
35#include <stddef.h>
36#include <algorithm>
37#include <functional>
38#include <new>
39#include <string>
40#include <utility>
41
42#include "my_alloc.h" // MEM_ROOT
43 // assert
44#include "my_inttypes.h"
45#include "my_sys.h"
46#include "prealloced_array.h" // Prealloced_array
48
49class String;
50
51/** The type of a Json_path_leg. */
53 /**
54 A path leg that represents a JSON object member (such as `.name`).
55 This path leg matches a single member in a JSON object.
56 */
58
59 /**
60 A path leg that represents a JSON array cell (such as `[10]`).
61 This path leg matches a single element in a JSON object.
62 */
64
65 /**
66 A path leg that represents a range in a JSON array
67 (such as `[2 to 7]`).
68 */
70
71 /**
72 @brief A path leg that represents the member wildcard.
73
74 A path leg that represents the member wildcard (`.*`), which
75 matches all the members of a JSON object.
76 */
78
79 /**
80 A path leg that represents the array wildcard (`[*]`), which
81 matches all the elements of a JSON array.
82 */
84
85 /**
86 A path leg that represents the ellipsis (`**`), which matches any
87 JSON value and recursively all the JSON values nested within it if
88 it is an object or an array.
89 */
91};
92
93/**
94 A class that represents the index of an element in a JSON array. The
95 index is 0-based and relative to the beginning of the array.
96*/
97class Json_array_index final {
98 /**
99 The array index. It is 0 if the specified index was before the
100 first element of the array, or equal to the array length if the
101 specified index was after the last element of the array.
102 */
103 size_t m_index;
104
105 /** True if the array index is within the bounds of the array. */
107
108 public:
109 /**
110 Construct a new Json_array_index object representing the specified
111 position in an array of the given length.
112
113 @param index the array index
114 @param from_end true if @a index is relative to the end of the array
115 @param array_length the length of the array
116 */
117 Json_array_index(size_t index, bool from_end, size_t array_length)
118 : m_index(from_end ? (index < array_length ? array_length - index - 1 : 0)
119 : std::min(index, array_length)),
120 m_within_bounds(index < array_length) {}
121
122 /**
123 Is the array index within the bounds of the array?
124
125 @retval true if the array index is within bounds
126 @retval false otherwise
127 */
128 bool within_bounds() const { return m_within_bounds; }
129
130 /**
131 Get the position in the array pointed to by this array index.
132
133 If the index is out of bounds, 0 will be returned if the array
134 index is before the first element in the array, or a value equal
135 to the length of the array if the index is after the last element.
136
137 @return the position in the array (0-based index relative to the
138 start of the array)
139 */
140 size_t position() const { return m_index; }
141};
142
143/**
144 One path leg in a JSON path expression.
145
146 A path leg describes either a key/value pair in an object
147 or a 0-based index into an array.
148*/
149class Json_path_leg final {
150 /// The type of this path leg.
152
153 /// The index of an array cell, or the start of an array range.
155
156 /// Is #m_first_array_index relative to the end of the array?
158
159 /// The end (inclusive) of an array range.
161
162 /// Is #m_last_array_index relative to the end of the array?
164
165 /// The member name of a member path leg.
166 std::string m_member_name;
167
168 public:
169 /**
170 Construct a wildcard or ellipsis path leg.
171
172 @param leg_type the type of wildcard (#jpl_ellipsis,
173 #jpl_member_wildcard or #jpl_array_cell_wildcard)
174 */
176 : m_leg_type(leg_type) {
177 assert(leg_type == jpl_ellipsis || leg_type == jpl_member_wildcard ||
178 leg_type == jpl_array_cell_wildcard);
179 }
180
181 /**
182 Construct an array cell path leg.
183
184 @param index the 0-based index in the array,
185 relative to the beginning of the array
186 */
187 explicit Json_path_leg(size_t index) : Json_path_leg(index, false) {}
188
189 /**
190 Construct an array cell path leg.
191
192 @param index the 0-based index in the array
193 @param from_end true if @a index is relative to the end of the array
194 */
195 Json_path_leg(size_t index, bool from_end)
197 m_first_array_index(index),
199
200 /**
201 Construct an array range path leg.
202
203 @param idx1 the start index of the range, inclusive
204 @param idx1_from_end true if the start index is relative
205 to the end of the array
206 @param idx2 the last index of the range, inclusive
207 @param idx2_from_end true if the last index is relative
208 to the end of the array
209 */
210 Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2,
211 bool idx2_from_end)
214 m_first_array_index_from_end(idx1_from_end),
215 m_last_array_index(idx2),
216 m_last_array_index_from_end(idx2_from_end) {}
217
218 /**
219 Construct an object member path leg.
220
221 @param member_name the name of the object member
222 @param length the length of the member name
223 */
224 Json_path_leg(const char *member_name, size_t length)
225 : m_leg_type(jpl_member), m_member_name(member_name, length) {}
226
227 /** Construct an object member path leg. */
228 Json_path_leg(const std::string &member_name)
229 : Json_path_leg(member_name.c_str(), member_name.length()) {}
230
231 /** Get the type of the path leg. */
233
234 /** Get the member name of a ::jpl_member path leg. */
235 const std::string &get_member_name() const { return m_member_name; }
236
237 /** Turn into a human-readable string. */
238 bool to_string(String *buf) const;
239
240 /**
241 Is this path leg an auto-wrapping array accessor?
242
243 An auto-wrapping array accessor is an array accessor that matches
244 non-arrays by auto-wrapping them in a single-element array before doing
245 the matching.
246
247 This function returns true for any ::jpl_array_cell or ::jpl_array_range
248 path leg that would match the element contained in a single-element
249 array, and which therefore would also match non-arrays that have been
250 auto-wrapped in single-element arrays.
251 */
252 bool is_autowrap() const;
253
254 /**
255 Get the first array cell pointed to by an array range, or the
256 array cell pointed to by an array cell index.
257
258 @param array_length the length of the array
259 */
260 Json_array_index first_array_index(size_t array_length) const {
263 array_length);
264 }
265
266 /**
267 Get the last array cell pointed to by an array range. The range
268 includes this cell.
269
270 @param array_length the length of the array
271 */
272 Json_array_index last_array_index(size_t array_length) const {
273 assert(m_leg_type == jpl_array_range);
275 array_length);
276 }
277
278 /**
279 A structure that represents an array range.
280 */
281 struct Array_range {
282 size_t m_begin; ///< Beginning of the range, inclusive.
283 size_t m_end; ///< End of the range, exclusive.
284 };
285
286 /**
287 Get the array range pointed to by a path leg of type
288 ::jpl_array_range or ::jpl_array_cell_wildcard.
289 @param array_length the length of the array
290 */
291 Array_range get_array_range(size_t array_length) const;
292};
293
296
297/**
298 A path expression which can be used to seek to
299 a position inside a JSON value.
300*/
302 protected:
303 /** An array of pointers to the legs of the JSON path. */
305
307
308 public:
309 /** Return the number of legs in this searchable path */
310 size_t leg_count() const { return m_path_legs.size(); }
311
312 /** Get an iterator pointing to the first path leg. */
314
315 /** Get an iterator pointing just past the last path leg. */
316 Json_path_iterator end() const { return m_path_legs.end(); }
317
318 /** Get a pointer to the last path leg. The path must not be empty. */
319 const Json_path_leg *last_leg() const { return m_path_legs.back(); }
320};
321
322/**
323 A JSON path expression.
324
325 From the user's point of view, a path expression is a string literal
326 with the following structure. We parse this structure into a
327 Json_path object:
328
329 pathExpression ::= scope pathLeg (pathLeg)*
330
331 scope ::= dollarSign
332
333 pathLeg ::= member | arrayLocation | doubleAsterisk
334
335 member ::= period (keyName | asterisk)
336
337 arrayLocation ::=
338 leftBracket
339 (arrayIndex | arrayRange | asterisk)
340 rightBracket
341
342 arrayIndex ::=
343 non-negative-integer |
344 last [ minus non-negative-integer ]
345
346 arrayRange ::= arrayIndex to arrayIndex
347
348 keyName ::= ECMAScript-identifier | ECMAScript-string-literal
349
350 doubleAsterisk ::= **
351
352 to ::= "to"
353
354 last ::= "last"
355*/
356class Json_path final : public Json_seekable_path {
357 private:
358 /**
359 A MEM_ROOT in which the Json_path_leg objects pointed to by
360 #Json_seekable_path::m_path_legs are allocated.
361 */
363 /**
364 Key used to instrument memory usage.
365 */
367
368 public:
369 explicit Json_path(PSI_memory_key key);
370
372 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
373 }
374
375 /** Move constructor. */
378 m_mem_root(std::move(other.m_mem_root)),
379 m_psi_key(other.m_psi_key) {
380 // Move the contents of m_path_legs from other into this.
381 m_path_legs = std::move(other.m_path_legs);
382
383 /*
384 Must also make sure that other.m_path_legs is empty, so that we
385 don't end up destroying the same objects twice; once from this's
386 destructor and once from other's destructor.
387
388 Move-constructing a vector would usually leave "other" empty,
389 but it is not guaranteed. Furthermore, m_path_legs is a
390 Prealloced_array, not a std::vector, so often moving will mean
391 copying from one prealloced area to another instead of simply
392 swapping pointers to the backing array. (And at the time of
393 writing Prealloced_array doesn't even have a move-assignment
394 operator, so the above assignment will always copy and leave
395 "other" unchanged.)
396 */
397 other.m_path_legs.clear();
398 }
399
400 /** Move assignment. */
402 if (&other != this) {
403 this->~Json_path();
404 new (this) Json_path(std::move(other));
405 }
406 return *this;
407 }
408
409 /**
410 Add a path leg to the end of this path.
411 @param[in] leg the leg to add
412 @return false on success, true on error
413 */
414 bool append(const Json_path_leg &leg) {
415 auto ptr = new (&m_mem_root) Json_path_leg(leg);
416 return ptr == nullptr || m_path_legs.push_back(ptr);
417 }
418
419 /**
420 Resets this to an empty path with no legs.
421 */
422 void clear() {
423 // Destruct all the Json_path_leg objects, and clear the pointers to them.
424 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
426 // Mark the memory as ready for reuse.
428 }
429
430 /**
431 Return true if the path can match more than one value in a JSON document.
432
433 @retval true if the path contains a path leg which is a wildcard,
434 ellipsis or array range
435 @retval false otherwise
436 */
437 bool can_match_many() const;
438
439 /** Turn into a human-readable string. */
440 bool to_string(String *buf) const;
441};
442
443/**
444 A lightweight path expression. This exists so that paths can be cloned
445 from the path legs of other paths without allocating heap memory
446 to copy those legs into. This class does not own the memory of the
447 Json_path_leg objects pointed to by #Json_seekable_path::m_path_legs, it
448 just points to Json_path_leg objects that belong to a Json_path instance.
449*/
451 public:
453 /**
454 Add a path leg to the end of this cloned path.
455 @param[in] leg the leg to add
456 @return false on success, true on error
457 */
458 bool append(const Json_path_leg *leg) { return m_path_legs.push_back(leg); }
459
460 /**
461 Resets this to an empty path with no legs.
462 */
463 void clear() { m_path_legs.clear(); }
464};
465
466/**
467 Initialize a Json_path from a path expression.
468
469 Stops parsing on the first error. It initializes the Json_path and
470 returns false if the path is parsed successfully. Otherwise, it
471 returns false. In that case, the output bad_index argument will
472 contain an index into the path expression. The parsing failed near
473 that index.
474
475 @param[in] path_length The length of the path expression.
476 @param[in] path_expression The string form of the path expression.
477 @param[out] path The Json_path object to be initialized.
478 @param[out] bad_index If null is returned, the parsing failed around here.
479 @param[in] depth_handler Pointer to a function that should handle error
480 occurred when depth is exceeded.
481 @return false on success, true on error
482*/
483bool parse_path(size_t path_length, const char *path_expression,
484 Json_path *path, size_t *bad_index,
485 const JsonDocumentDepthHandler &depth_handler);
486
487/**
488 A helper function that uses the above one as workhorse. Entry point for
489 for JSON_TABLE (Table_function_json class) and Json_path_cache. Raises an
490 error if the path expression is syntactically incorrect. Raises an
491 error if the path expression contains wildcard tokens but is not
492 supposed to. Otherwise updates the supplied Json_path object with
493 the parsed path.
494
495 @param[in] path_value A String to be interpreted as a path.
496 @param[in] forbid_wildcards True if the path shouldn't contain * or **
497 @param[out] json_path The object that will hold the parsed path
498
499 @returns false on success (valid path or NULL), true on error
500*/
501bool parse_path(const String &path_value, bool forbid_wildcards,
502 Json_path *json_path);
503#endif /* SQL_JSON_PATH_INCLUDED */
A class that represents the index of an element in a JSON array.
Definition: json_path.h:97
size_t m_index
The array index.
Definition: json_path.h:103
size_t position() const
Get the position in the array pointed to by this array index.
Definition: json_path.h:140
Json_array_index(size_t index, bool from_end, size_t array_length)
Construct a new Json_array_index object representing the specified position in an array of the given ...
Definition: json_path.h:117
bool m_within_bounds
True if the array index is within the bounds of the array.
Definition: json_path.h:106
bool within_bounds() const
Is the array index within the bounds of the array?
Definition: json_path.h:128
A lightweight path expression.
Definition: json_path.h:450
bool append(const Json_path_leg *leg)
Add a path leg to the end of this cloned path.
Definition: json_path.h:458
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:463
Json_path_clone(PSI_memory_key key)
Definition: json_path.h:452
One path leg in a JSON path expression.
Definition: json_path.h:149
size_t m_first_array_index
The index of an array cell, or the start of an array range.
Definition: json_path.h:154
size_t m_last_array_index
The end (inclusive) of an array range.
Definition: json_path.h:160
Json_path_leg(enum_json_path_leg_type leg_type)
Construct a wildcard or ellipsis path leg.
Definition: json_path.h:175
Json_array_index first_array_index(size_t array_length) const
Get the first array cell pointed to by an array range, or the array cell pointed to by an array cell ...
Definition: json_path.h:260
Array_range get_array_range(size_t array_length) const
Get the array range pointed to by a path leg of type jpl_array_range or jpl_array_cell_wildcard.
Definition: json_path.cc:148
bool m_last_array_index_from_end
Is m_last_array_index relative to the end of the array?
Definition: json_path.h:163
std::string m_member_name
The member name of a member path leg.
Definition: json_path.h:166
enum_json_path_leg_type m_leg_type
The type of this path leg.
Definition: json_path.h:151
bool is_autowrap() const
Is this path leg an auto-wrapping array accessor?
Definition: json_path.cc:125
bool m_first_array_index_from_end
Is m_first_array_index relative to the end of the array?
Definition: json_path.h:157
Json_path_leg(const char *member_name, size_t length)
Construct an object member path leg.
Definition: json_path.h:224
Json_path_leg(size_t index)
Construct an array cell path leg.
Definition: json_path.h:187
Json_path_leg(const std::string &member_name)
Construct an object member path leg.
Definition: json_path.h:228
Json_path_leg(size_t index, bool from_end)
Construct an array cell path leg.
Definition: json_path.h:195
enum_json_path_leg_type get_type() const
Get the type of the path leg.
Definition: json_path.h:232
const std::string & get_member_name() const
Get the member name of a jpl_member path leg.
Definition: json_path.h:235
Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2, bool idx2_from_end)
Construct an array range path leg.
Definition: json_path.h:210
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:90
Json_array_index last_array_index(size_t array_length) const
Get the last array cell pointed to by an array range.
Definition: json_path.h:272
A JSON path expression.
Definition: json_path.h:356
Json_path(Json_path &&other)
Move constructor.
Definition: json_path.h:376
~Json_path()
Definition: json_path.h:371
Json_path(PSI_memory_key key)
Definition: json_path.cc:167
bool append(const Json_path_leg &leg)
Add a path leg to the end of this path.
Definition: json_path.h:414
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:422
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:170
PSI_memory_key m_psi_key
Key used to instrument memory usage.
Definition: json_path.h:366
Json_path & operator=(Json_path &&other)
Move assignment.
Definition: json_path.h:401
bool can_match_many() const
Return true if the path can match more than one value in a JSON document.
Definition: json_path.cc:180
MEM_ROOT m_mem_root
A MEM_ROOT in which the Json_path_leg objects pointed to by Json_seekable_path::m_path_legs are alloc...
Definition: json_path.h:362
A path expression which can be used to seek to a position inside a JSON value.
Definition: json_path.h:301
Json_path_iterator end() const
Get an iterator pointing just past the last path leg.
Definition: json_path.h:316
Json_seekable_path(PSI_memory_key key)
Definition: json_path.cc:164
size_t leg_count() const
Return the number of legs in this searchable path.
Definition: json_path.h:310
const Json_path_leg * last_leg() const
Get a pointer to the last path leg.
Definition: json_path.h:319
Json_path_iterator begin() const
Get an iterator pointing to the first path leg.
Definition: json_path.h:313
Json_path_leg_pointers m_path_legs
An array of pointers to the legs of the JSON path.
Definition: json_path.h:304
bool push_back(const Element_type &element)
Copies an element into the back of the array.
Definition: prealloced_array.h:317
void clear()
Removes (and destroys) all elements.
Definition: prealloced_array.h:600
size_t size() const
Definition: prealloced_array.h:226
iterator begin()
begin : Returns a pointer to the first element in the array.
Definition: prealloced_array.h:253
Element_type & back()
Definition: prealloced_array.h:243
iterator end()
Definition: prealloced_array.h:254
const const Json_path_leg * * const_iterator
Definition: prealloced_array.h:116
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:166
unsigned int PSI_memory_key
Instrumented memory key.
Definition: psi_memory_bits.h:48
std::function< void()> JsonDocumentDepthHandler
Definition: json_error_handler.h:31
enum_json_path_leg_type
The type of a Json_path_leg.
Definition: json_path.h:52
@ jpl_array_range
A path leg that represents a range in a JSON array (such as [2 to 7]).
Definition: json_path.h:69
@ jpl_array_cell_wildcard
A path leg that represents the array wildcard ([*]), which matches all the elements of a JSON array.
Definition: json_path.h:83
@ jpl_ellipsis
A path leg that represents the ellipsis (**), which matches any JSON value and recursively all the JS...
Definition: json_path.h:90
@ jpl_member
A path leg that represents a JSON object member (such as .name).
Definition: json_path.h:57
@ jpl_array_cell
A path leg that represents a JSON array cell (such as [10]).
Definition: json_path.h:63
@ jpl_member_wildcard
A path leg that represents the member wildcard.
Definition: json_path.h:77
bool parse_path(size_t path_length, const char *path_expression, Json_path *path, size_t *bad_index, const JsonDocumentDepthHandler &depth_handler)
Initialize a Json_path from a path expression.
Definition: json_path.cc:257
Json_path_leg_pointers::const_iterator Json_path_iterator
Definition: json_path.h:295
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
Some integer typedefs for easier portability.
Common header for many mysys elements.
static char * path
Definition: mysqldump.cc:133
Definition: buf0block_hint.cc:29
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
Definition: varlen_sort.h:183
required string key
Definition: replication_asynchronous_connection_failover.proto:59
A structure that represents an array range.
Definition: json_path.h:281
size_t m_end
End of the range, exclusive.
Definition: json_path.h:283
size_t m_begin
Beginning of the range, inclusive.
Definition: json_path.h:282
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:82
void ClearForReuse()
Similar to Clear(), but anticipates that the block will be reused for further allocations.
Definition: my_alloc.cc:186