MySQL 9.1.0
Source Code Documentation
json_path.h
Go to the documentation of this file.
1#ifndef SQL_JSON_PATH_INCLUDED
2#define SQL_JSON_PATH_INCLUDED
3
4/* Copyright (c) 2015, 2024, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is designed to work with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have either included with
16 the program or referenced in the documentation.
17
18 This program is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License, version 2.0, for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
26
27/**
28 @file json_path.h
29
30 This file contains interface support for the JSON path abstraction.
31 The path abstraction is described by the functional spec
32 attached to WL#7909.
33*/
34
35#include <assert.h>
36#include <stddef.h>
37#include <algorithm>
38#include <new>
39#include <string>
40#include <string_view>
41#include <utility>
42
43#include "my_alloc.h" // MEM_ROOT
45#include "prealloced_array.h" // Prealloced_array
46
47class String;
48class Json_wrapper;
49
50/** The type of a Json_path_leg. */
52 /**
53 A path leg that represents a JSON object member (such as `.name`).
54 This path leg matches a single member in a JSON object.
55 */
57
58 /**
59 A path leg that represents a JSON array cell (such as `[10]`).
60 This path leg matches a single element in a JSON object.
61 */
63
64 /**
65 A path leg that represents a range in a JSON array
66 (such as `[2 to 7]`).
67 */
69
70 /**
71 @brief A path leg that represents the member wildcard.
72
73 A path leg that represents the member wildcard (`.*`), which
74 matches all the members of a JSON object.
75 */
77
78 /**
79 A path leg that represents the array wildcard (`[*]`), which
80 matches all the elements of a JSON array.
81 */
83
84 /**
85 A path leg that represents the ellipsis (`**`), which matches any
86 JSON value and recursively all the JSON values nested within it if
87 it is an object or an array.
88 */
90};
91
92/**
93 A class that represents the index of an element in a JSON array. The
94 index is 0-based and relative to the beginning of the array.
95*/
96class Json_array_index final {
97 /**
98 The array index. It is 0 if the specified index was before the
99 first element of the array, or equal to the array length if the
100 specified index was after the last element of the array.
101 */
102 size_t m_index;
103
104 /** True if the array index is within the bounds of the array. */
106
107 public:
108 /**
109 Construct a new Json_array_index object representing the specified
110 position in an array of the given length.
111
112 @param index the array index
113 @param from_end true if @a index is relative to the end of the array
114 @param array_length the length of the array
115 */
116 Json_array_index(size_t index, bool from_end, size_t array_length)
117 : m_index(from_end ? (index < array_length ? array_length - index - 1 : 0)
118 : std::min(index, array_length)),
119 m_within_bounds(index < array_length) {}
120
121 /**
122 Is the array index within the bounds of the array?
123
124 @retval true if the array index is within bounds
125 @retval false otherwise
126 */
127 bool within_bounds() const { return m_within_bounds; }
128
129 /**
130 Get the position in the array pointed to by this array index.
131
132 If the index is out of bounds, 0 will be returned if the array
133 index is before the first element in the array, or a value equal
134 to the length of the array if the index is after the last element.
135
136 @return the position in the array (0-based index relative to the
137 start of the array)
138 */
139 size_t position() const { return m_index; }
140};
141
142/**
143 One path leg in a JSON path expression.
144
145 A path leg describes either a key/value pair in an object
146 or a 0-based index into an array.
147*/
148class Json_path_leg final {
149 /// The type of this path leg.
151
152 /// The index of an array cell, or the start of an array range.
154
155 /// Is #m_first_array_index relative to the end of the array?
157
158 /// The end (inclusive) of an array range.
160
161 /// Is #m_last_array_index relative to the end of the array?
163
164 /// The member name of a member path leg.
165 std::string m_member_name;
166
167 public:
168 /**
169 Construct a wildcard or ellipsis path leg.
170
171 @param leg_type the type of wildcard (#jpl_ellipsis,
172 #jpl_member_wildcard or #jpl_array_cell_wildcard)
173 */
175 : m_leg_type(leg_type) {
176 assert(leg_type == jpl_ellipsis || leg_type == jpl_member_wildcard ||
177 leg_type == jpl_array_cell_wildcard);
178 }
179
180 /**
181 Construct an array cell path leg.
182
183 @param index the 0-based index in the array,
184 relative to the beginning of the array
185 */
186 explicit Json_path_leg(size_t index) : Json_path_leg(index, false) {}
187
188 /**
189 Construct an array cell path leg.
190
191 @param index the 0-based index in the array
192 @param from_end true if @a index is relative to the end of the array
193 */
194 Json_path_leg(size_t index, bool from_end)
196 m_first_array_index(index),
198
199 /**
200 Construct an array range path leg.
201
202 @param idx1 the start index of the range, inclusive
203 @param idx1_from_end true if the start index is relative
204 to the end of the array
205 @param idx2 the last index of the range, inclusive
206 @param idx2_from_end true if the last index is relative
207 to the end of the array
208 */
209 Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2,
210 bool idx2_from_end)
213 m_first_array_index_from_end(idx1_from_end),
214 m_last_array_index(idx2),
215 m_last_array_index_from_end(idx2_from_end) {}
216
217 /**
218 Construct an object member path leg.
219
220 @param member_name the name of the object member
221 */
222 explicit Json_path_leg(std::string_view member_name)
223 : m_leg_type(jpl_member), m_member_name(member_name) {}
224
225 /** Get the type of the path leg. */
227
228 /** Get the member name of a ::jpl_member path leg. */
229 const std::string &get_member_name() const { return m_member_name; }
230
231 /** Turn into a human-readable string. */
232 bool to_string(String *buf) const;
233
234 /**
235 Is this path leg an auto-wrapping array accessor?
236
237 An auto-wrapping array accessor is an array accessor that matches
238 non-arrays by auto-wrapping them in a single-element array before doing
239 the matching.
240
241 This function returns true for any ::jpl_array_cell or ::jpl_array_range
242 path leg that would match the element contained in a single-element
243 array, and which therefore would also match non-arrays that have been
244 auto-wrapped in single-element arrays.
245 */
246 bool is_autowrap() const;
247
248 /**
249 Get the first array cell pointed to by an array range, or the
250 array cell pointed to by an array cell index.
251
252 @param array_length the length of the array
253 */
254 Json_array_index first_array_index(size_t array_length) const {
257 array_length);
258 }
259
260 /**
261 Get the last array cell pointed to by an array range. The range
262 includes this cell.
263
264 @param array_length the length of the array
265 */
266 Json_array_index last_array_index(size_t array_length) const {
267 assert(m_leg_type == jpl_array_range);
269 array_length);
270 }
271
272 /**
273 A structure that represents an array range.
274 */
275 struct Array_range {
276 size_t m_begin; ///< Beginning of the range, inclusive.
277 size_t m_end; ///< End of the range, exclusive.
278 };
279
280 /**
281 Get the array range pointed to by a path leg of type
282 ::jpl_array_range or ::jpl_array_cell_wildcard.
283 @param array_length the length of the array
284 */
285 Array_range get_array_range(size_t array_length) const;
286};
287
290
291/**
292 A path expression which can be used to seek to
293 a position inside a JSON value.
294*/
296 protected:
297 /** An array of pointers to the legs of the JSON path. */
299
301
302 public:
303 /** Return the number of legs in this searchable path */
304 size_t leg_count() const { return m_path_legs.size(); }
305
306 /** Get an iterator pointing to the first path leg. */
308
309 /** Get an iterator pointing just past the last path leg. */
310 Json_path_iterator end() const { return m_path_legs.end(); }
311
312 /** Get a pointer to the last path leg. The path must not be empty. */
313 const Json_path_leg *last_leg() const { return m_path_legs.back(); }
314};
315
316/**
317 A JSON path expression.
318
319 From the user's point of view, a path expression is a string literal
320 with the following structure. We parse this structure into a
321 Json_path object:
322
323 pathExpression ::= scope pathLeg (pathLeg)*
324
325 scope ::= dollarSign
326
327 pathLeg ::= member | arrayLocation | doubleAsterisk
328
329 member ::= period (keyName | asterisk)
330
331 arrayLocation ::=
332 leftBracket
333 (arrayIndex | arrayRange | asterisk)
334 rightBracket
335
336 arrayIndex ::=
337 non-negative-integer |
338 last [ minus non-negative-integer ]
339
340 arrayRange ::= arrayIndex to arrayIndex
341
342 keyName ::= ECMAScript-identifier | ECMAScript-string-literal
343
344 doubleAsterisk ::= **
345
346 to ::= "to"
347
348 last ::= "last"
349*/
350class Json_path final : public Json_seekable_path {
351 private:
352 /**
353 A MEM_ROOT in which the Json_path_leg objects pointed to by
354 #Json_seekable_path::m_path_legs are allocated.
355 */
357 /**
358 Key used to instrument memory usage.
359 */
361
362 public:
363 explicit Json_path(PSI_memory_key key);
364
366 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
367 }
368
369 /** Move constructor. */
372 m_mem_root(std::move(other.m_mem_root)),
373 m_psi_key(other.m_psi_key) {
374 // Move the contents of m_path_legs from other into this.
375 m_path_legs = std::move(other.m_path_legs);
376
377 /*
378 Must also make sure that other.m_path_legs is empty, so that we
379 don't end up destroying the same objects twice; once from this's
380 destructor and once from other's destructor.
381
382 Move-constructing a vector would usually leave "other" empty,
383 but it is not guaranteed. Furthermore, m_path_legs is a
384 Prealloced_array, not a std::vector, so often moving will mean
385 copying from one prealloced area to another instead of simply
386 swapping pointers to the backing array. (And at the time of
387 writing Prealloced_array doesn't even have a move-assignment
388 operator, so the above assignment will always copy and leave
389 "other" unchanged.)
390 */
391 other.m_path_legs.clear();
392 }
393
394 /** Move assignment. */
396 if (&other != this) {
397 this->~Json_path();
398 new (this) Json_path(std::move(other));
399 }
400 return *this;
401 }
402
403 /**
404 Add a path leg to the end of this path.
405 @param[in] leg the leg to add
406 @return false on success, true on error
407 */
408 bool append(const Json_path_leg &leg) {
409 auto ptr = new (&m_mem_root) Json_path_leg(leg);
410 return ptr == nullptr || m_path_legs.push_back(ptr);
411 }
412
413 /**
414 Resets this to an empty path with no legs.
415 */
416 void clear() {
417 // Destruct all the Json_path_leg objects, and clear the pointers to them.
418 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
420 // Mark the memory as ready for reuse.
422 }
423
424 /**
425 Return true if the path can match more than one value in a JSON document.
426
427 @retval true if the path contains a path leg which is a wildcard,
428 ellipsis or array range
429 @retval false otherwise
430 */
431 bool can_match_many() const;
432
433 /** Turn into a human-readable string. */
434 bool to_string(String *buf) const;
435};
436
437/**
438 A lightweight path expression. This exists so that paths can be cloned
439 from the path legs of other paths without allocating heap memory
440 to copy those legs into. This class does not own the memory of the
441 Json_path_leg objects pointed to by #Json_seekable_path::m_path_legs, it
442 just points to Json_path_leg objects that belong to a Json_path instance.
443*/
445 public:
447 /**
448 Add a path leg to the end of this cloned path.
449 @param[in] leg the leg to add
450 @return false on success, true on error
451 */
452 bool append(const Json_path_leg *leg) { return m_path_legs.push_back(leg); }
453
454 /**
455 Resets this to an empty path with no legs.
456 */
457 void clear() { m_path_legs.clear(); }
458};
459
460/**
461 Initialize a Json_path from a path expression.
462
463 Stops parsing on the first error. It initializes the Json_path and
464 returns false if the path is parsed successfully. Otherwise, it
465 returns false. In that case, the output bad_index argument will
466 contain an index into the path expression. The parsing failed near
467 that index.
468
469 @param[in] path_length The length of the path expression.
470 @param[in] path_expression The string form of the path expression.
471 @param[out] path The Json_path object to be initialized.
472 @param[out] bad_index If null is returned, the parsing failed around here.
473 @return false on success, true on error
474*/
475bool parse_path(size_t path_length, const char *path_expression,
476 Json_path *path, size_t *bad_index);
477
478/**
479 A helper function that uses the above one as workhorse. Entry point for
480 for JSON_TABLE (Table_function_json class) and Json_path_cache. Raises an
481 error if the path expression is syntactically incorrect. Raises an
482 error if the path expression contains wildcard tokens but is not
483 supposed to. Otherwise updates the supplied Json_path object with
484 the parsed path.
485
486 @param[in] path_value A String to be interpreted as a path.
487 @param[in] forbid_wildcards True if the path shouldn't contain * or **
488 @param[out] json_path The object that will hold the parsed path
489
490 @returns false on success (valid path or NULL), true on error
491*/
492bool parse_path(const String &path_value, bool forbid_wildcards,
493 Json_path *json_path);
494
495/**
496 Clone a source path to a target path, stripping out legs which are made
497 redundant by the auto-wrapping rule from the WL#7909 spec and further
498 extended in the WL#9831 spec:
499
500 "If an array cell path leg or an array range path leg is evaluated against a
501 non-array value, the result of the evaluation is the same as if the non-array
502 value had been wrapped in a single-element array."
503
504 @see Json_path_leg::is_autowrap
505
506 @param[in] source_path The original path.
507 @param[in,out] target_path The clone to be filled in.
508 @param[in] doc The document to seek through.
509 @param[in] key Instrumented memory key
510 @returns True if an error occurred. False otherwise.
511*/
512bool clone_without_autowrapping(const Json_path *source_path,
513 Json_path_clone *target_path, Json_wrapper *doc,
515
516#endif /* SQL_JSON_PATH_INCLUDED */
A class that represents the index of an element in a JSON array.
Definition: json_path.h:96
size_t m_index
The array index.
Definition: json_path.h:102
size_t position() const
Get the position in the array pointed to by this array index.
Definition: json_path.h:139
Json_array_index(size_t index, bool from_end, size_t array_length)
Construct a new Json_array_index object representing the specified position in an array of the given ...
Definition: json_path.h:116
bool m_within_bounds
True if the array index is within the bounds of the array.
Definition: json_path.h:105
bool within_bounds() const
Is the array index within the bounds of the array?
Definition: json_path.h:127
A lightweight path expression.
Definition: json_path.h:444
bool append(const Json_path_leg *leg)
Add a path leg to the end of this cloned path.
Definition: json_path.h:452
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:457
Json_path_clone(PSI_memory_key key)
Definition: json_path.h:446
One path leg in a JSON path expression.
Definition: json_path.h:148
size_t m_first_array_index
The index of an array cell, or the start of an array range.
Definition: json_path.h:153
size_t m_last_array_index
The end (inclusive) of an array range.
Definition: json_path.h:159
Json_path_leg(enum_json_path_leg_type leg_type)
Construct a wildcard or ellipsis path leg.
Definition: json_path.h:174
Json_array_index first_array_index(size_t array_length) const
Get the first array cell pointed to by an array range, or the array cell pointed to by an array cell ...
Definition: json_path.h:254
Array_range get_array_range(size_t array_length) const
Get the array range pointed to by a path leg of type jpl_array_range or jpl_array_cell_wildcard.
Definition: json_path.cc:148
bool m_last_array_index_from_end
Is m_last_array_index relative to the end of the array?
Definition: json_path.h:162
std::string m_member_name
The member name of a member path leg.
Definition: json_path.h:165
enum_json_path_leg_type m_leg_type
The type of this path leg.
Definition: json_path.h:150
bool is_autowrap() const
Is this path leg an auto-wrapping array accessor?
Definition: json_path.cc:125
bool m_first_array_index_from_end
Is m_first_array_index relative to the end of the array?
Definition: json_path.h:156
Json_path_leg(std::string_view member_name)
Construct an object member path leg.
Definition: json_path.h:222
Json_path_leg(size_t index)
Construct an array cell path leg.
Definition: json_path.h:186
Json_path_leg(size_t index, bool from_end)
Construct an array cell path leg.
Definition: json_path.h:194
enum_json_path_leg_type get_type() const
Get the type of the path leg.
Definition: json_path.h:226
const std::string & get_member_name() const
Get the member name of a jpl_member path leg.
Definition: json_path.h:229
Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2, bool idx2_from_end)
Construct an array range path leg.
Definition: json_path.h:209
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:90
Json_array_index last_array_index(size_t array_length) const
Get the last array cell pointed to by an array range.
Definition: json_path.h:266
A JSON path expression.
Definition: json_path.h:350
Json_path(Json_path &&other)
Move constructor.
Definition: json_path.h:370
~Json_path()
Definition: json_path.h:365
Json_path(PSI_memory_key key)
Definition: json_path.cc:168
bool append(const Json_path_leg &leg)
Add a path leg to the end of this path.
Definition: json_path.h:408
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:416
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:171
PSI_memory_key m_psi_key
Key used to instrument memory usage.
Definition: json_path.h:360
Json_path & operator=(Json_path &&other)
Move assignment.
Definition: json_path.h:395
bool can_match_many() const
Return true if the path can match more than one value in a JSON document.
Definition: json_path.cc:181
MEM_ROOT m_mem_root
A MEM_ROOT in which the Json_path_leg objects pointed to by Json_seekable_path::m_path_legs are alloc...
Definition: json_path.h:356
A path expression which can be used to seek to a position inside a JSON value.
Definition: json_path.h:295
Json_path_iterator end() const
Get an iterator pointing just past the last path leg.
Definition: json_path.h:310
Json_seekable_path(PSI_memory_key key)
Definition: json_path.cc:165
size_t leg_count() const
Return the number of legs in this searchable path.
Definition: json_path.h:304
const Json_path_leg * last_leg() const
Get a pointer to the last path leg.
Definition: json_path.h:313
Json_path_iterator begin() const
Get an iterator pointing to the first path leg.
Definition: json_path.h:307
Json_path_leg_pointers m_path_legs
An array of pointers to the legs of the JSON path.
Definition: json_path.h:298
Abstraction for accessing JSON values irrespective of whether they are (started out as) binary JSON v...
Definition: json_dom.h:1150
bool push_back(const Element_type &element)
Copies an element into the back of the array.
Definition: prealloced_array.h:327
void clear()
Removes (and destroys) all elements.
Definition: prealloced_array.h:610
size_t size() const
Definition: prealloced_array.h:227
iterator begin()
begin : Returns a pointer to the first element in the array.
Definition: prealloced_array.h:254
Element_type & back()
Definition: prealloced_array.h:244
iterator end()
Definition: prealloced_array.h:255
const const Json_path_leg * * const_iterator
Definition: prealloced_array.h:117
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:167
unsigned int PSI_memory_key
Instrumented memory key.
Definition: psi_memory_bits.h:49
bool clone_without_autowrapping(const Json_path *source_path, Json_path_clone *target_path, Json_wrapper *doc, PSI_memory_key key)
Clone a source path to a target path, stripping out legs which are made redundant by the auto-wrappin...
Definition: json_path.cc:775
enum_json_path_leg_type
The type of a Json_path_leg.
Definition: json_path.h:51
@ jpl_array_range
A path leg that represents a range in a JSON array (such as [2 to 7]).
Definition: json_path.h:68
@ jpl_array_cell_wildcard
A path leg that represents the array wildcard ([*]), which matches all the elements of a JSON array.
Definition: json_path.h:82
@ jpl_ellipsis
A path leg that represents the ellipsis (**), which matches any JSON value and recursively all the JS...
Definition: json_path.h:89
@ jpl_member
A path leg that represents a JSON object member (such as .name).
Definition: json_path.h:56
@ jpl_array_cell
A path leg that represents a JSON array cell (such as [10]).
Definition: json_path.h:62
@ jpl_member_wildcard
A path leg that represents the member wildcard.
Definition: json_path.h:76
Json_path_leg_pointers::const_iterator Json_path_iterator
Definition: json_path.h:289
bool parse_path(size_t path_length, const char *path_expression, Json_path *path, size_t *bad_index)
Initialize a Json_path from a path expression.
Definition: json_path.cc:258
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
static char * path
Definition: mysqldump.cc:149
Definition: buf0block_hint.cc:30
Definition: gcs_xcom_synode.h:64
Performance schema instrumentation interface.
required string key
Definition: replication_asynchronous_connection_failover.proto:60
A structure that represents an array range.
Definition: json_path.h:275
size_t m_end
End of the range, exclusive.
Definition: json_path.h:277
size_t m_begin
Beginning of the range, inclusive.
Definition: json_path.h:276
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:83
void ClearForReuse()
Similar to Clear(), but anticipates that the block will be reused for further allocations.
Definition: my_alloc.cc:190