MySQL 8.3.0
Source Code Documentation
json_path.h
Go to the documentation of this file.
1#ifndef SQL_JSON_PATH_INCLUDED
2#define SQL_JSON_PATH_INCLUDED
3
4/* Copyright (c) 2015, 2023, Oracle and/or its affiliates.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26/**
27 @file json_path.h
28
29 This file contains interface support for the JSON path abstraction.
30 The path abstraction is described by the functional spec
31 attached to WL#7909.
32*/
33
34#include <assert.h>
35#include <stddef.h>
36#include <algorithm>
37#include <new>
38#include <string>
39#include <utility>
40
41#include "my_alloc.h" // MEM_ROOT
43#include "prealloced_array.h" // Prealloced_array
44
45class String;
46
47/** The type of a Json_path_leg. */
49 /**
50 A path leg that represents a JSON object member (such as `.name`).
51 This path leg matches a single member in a JSON object.
52 */
54
55 /**
56 A path leg that represents a JSON array cell (such as `[10]`).
57 This path leg matches a single element in a JSON object.
58 */
60
61 /**
62 A path leg that represents a range in a JSON array
63 (such as `[2 to 7]`).
64 */
66
67 /**
68 @brief A path leg that represents the member wildcard.
69
70 A path leg that represents the member wildcard (`.*`), which
71 matches all the members of a JSON object.
72 */
74
75 /**
76 A path leg that represents the array wildcard (`[*]`), which
77 matches all the elements of a JSON array.
78 */
80
81 /**
82 A path leg that represents the ellipsis (`**`), which matches any
83 JSON value and recursively all the JSON values nested within it if
84 it is an object or an array.
85 */
87};
88
89/**
90 A class that represents the index of an element in a JSON array. The
91 index is 0-based and relative to the beginning of the array.
92*/
93class Json_array_index final {
94 /**
95 The array index. It is 0 if the specified index was before the
96 first element of the array, or equal to the array length if the
97 specified index was after the last element of the array.
98 */
99 size_t m_index;
100
101 /** True if the array index is within the bounds of the array. */
103
104 public:
105 /**
106 Construct a new Json_array_index object representing the specified
107 position in an array of the given length.
108
109 @param index the array index
110 @param from_end true if @a index is relative to the end of the array
111 @param array_length the length of the array
112 */
113 Json_array_index(size_t index, bool from_end, size_t array_length)
114 : m_index(from_end ? (index < array_length ? array_length - index - 1 : 0)
115 : std::min(index, array_length)),
116 m_within_bounds(index < array_length) {}
117
118 /**
119 Is the array index within the bounds of the array?
120
121 @retval true if the array index is within bounds
122 @retval false otherwise
123 */
124 bool within_bounds() const { return m_within_bounds; }
125
126 /**
127 Get the position in the array pointed to by this array index.
128
129 If the index is out of bounds, 0 will be returned if the array
130 index is before the first element in the array, or a value equal
131 to the length of the array if the index is after the last element.
132
133 @return the position in the array (0-based index relative to the
134 start of the array)
135 */
136 size_t position() const { return m_index; }
137};
138
139/**
140 One path leg in a JSON path expression.
141
142 A path leg describes either a key/value pair in an object
143 or a 0-based index into an array.
144*/
145class Json_path_leg final {
146 /// The type of this path leg.
148
149 /// The index of an array cell, or the start of an array range.
151
152 /// Is #m_first_array_index relative to the end of the array?
154
155 /// The end (inclusive) of an array range.
157
158 /// Is #m_last_array_index relative to the end of the array?
160
161 /// The member name of a member path leg.
162 std::string m_member_name;
163
164 public:
165 /**
166 Construct a wildcard or ellipsis path leg.
167
168 @param leg_type the type of wildcard (#jpl_ellipsis,
169 #jpl_member_wildcard or #jpl_array_cell_wildcard)
170 */
172 : m_leg_type(leg_type) {
173 assert(leg_type == jpl_ellipsis || leg_type == jpl_member_wildcard ||
174 leg_type == jpl_array_cell_wildcard);
175 }
176
177 /**
178 Construct an array cell path leg.
179
180 @param index the 0-based index in the array,
181 relative to the beginning of the array
182 */
183 explicit Json_path_leg(size_t index) : Json_path_leg(index, false) {}
184
185 /**
186 Construct an array cell path leg.
187
188 @param index the 0-based index in the array
189 @param from_end true if @a index is relative to the end of the array
190 */
191 Json_path_leg(size_t index, bool from_end)
193 m_first_array_index(index),
195
196 /**
197 Construct an array range path leg.
198
199 @param idx1 the start index of the range, inclusive
200 @param idx1_from_end true if the start index is relative
201 to the end of the array
202 @param idx2 the last index of the range, inclusive
203 @param idx2_from_end true if the last index is relative
204 to the end of the array
205 */
206 Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2,
207 bool idx2_from_end)
210 m_first_array_index_from_end(idx1_from_end),
211 m_last_array_index(idx2),
212 m_last_array_index_from_end(idx2_from_end) {}
213
214 /**
215 Construct an object member path leg.
216
217 @param member_name the name of the object member
218 @param length the length of the member name
219 */
220 Json_path_leg(const char *member_name, size_t length)
221 : m_leg_type(jpl_member), m_member_name(member_name, length) {}
222
223 /** Construct an object member path leg. */
224 Json_path_leg(const std::string &member_name)
225 : Json_path_leg(member_name.c_str(), member_name.length()) {}
226
227 /** Get the type of the path leg. */
229
230 /** Get the member name of a ::jpl_member path leg. */
231 const std::string &get_member_name() const { return m_member_name; }
232
233 /** Turn into a human-readable string. */
234 bool to_string(String *buf) const;
235
236 /**
237 Is this path leg an auto-wrapping array accessor?
238
239 An auto-wrapping array accessor is an array accessor that matches
240 non-arrays by auto-wrapping them in a single-element array before doing
241 the matching.
242
243 This function returns true for any ::jpl_array_cell or ::jpl_array_range
244 path leg that would match the element contained in a single-element
245 array, and which therefore would also match non-arrays that have been
246 auto-wrapped in single-element arrays.
247 */
248 bool is_autowrap() const;
249
250 /**
251 Get the first array cell pointed to by an array range, or the
252 array cell pointed to by an array cell index.
253
254 @param array_length the length of the array
255 */
256 Json_array_index first_array_index(size_t array_length) const {
259 array_length);
260 }
261
262 /**
263 Get the last array cell pointed to by an array range. The range
264 includes this cell.
265
266 @param array_length the length of the array
267 */
268 Json_array_index last_array_index(size_t array_length) const {
269 assert(m_leg_type == jpl_array_range);
271 array_length);
272 }
273
274 /**
275 A structure that represents an array range.
276 */
277 struct Array_range {
278 size_t m_begin; ///< Beginning of the range, inclusive.
279 size_t m_end; ///< End of the range, exclusive.
280 };
281
282 /**
283 Get the array range pointed to by a path leg of type
284 ::jpl_array_range or ::jpl_array_cell_wildcard.
285 @param array_length the length of the array
286 */
287 Array_range get_array_range(size_t array_length) const;
288};
289
292
293/**
294 A path expression which can be used to seek to
295 a position inside a JSON value.
296*/
298 protected:
299 /** An array of pointers to the legs of the JSON path. */
301
303
304 public:
305 /** Return the number of legs in this searchable path */
306 size_t leg_count() const { return m_path_legs.size(); }
307
308 /** Get an iterator pointing to the first path leg. */
310
311 /** Get an iterator pointing just past the last path leg. */
312 Json_path_iterator end() const { return m_path_legs.end(); }
313
314 /** Get a pointer to the last path leg. The path must not be empty. */
315 const Json_path_leg *last_leg() const { return m_path_legs.back(); }
316};
317
318/**
319 A JSON path expression.
320
321 From the user's point of view, a path expression is a string literal
322 with the following structure. We parse this structure into a
323 Json_path object:
324
325 pathExpression ::= scope pathLeg (pathLeg)*
326
327 scope ::= dollarSign
328
329 pathLeg ::= member | arrayLocation | doubleAsterisk
330
331 member ::= period (keyName | asterisk)
332
333 arrayLocation ::=
334 leftBracket
335 (arrayIndex | arrayRange | asterisk)
336 rightBracket
337
338 arrayIndex ::=
339 non-negative-integer |
340 last [ minus non-negative-integer ]
341
342 arrayRange ::= arrayIndex to arrayIndex
343
344 keyName ::= ECMAScript-identifier | ECMAScript-string-literal
345
346 doubleAsterisk ::= **
347
348 to ::= "to"
349
350 last ::= "last"
351*/
352class Json_path final : public Json_seekable_path {
353 private:
354 /**
355 A MEM_ROOT in which the Json_path_leg objects pointed to by
356 #Json_seekable_path::m_path_legs are allocated.
357 */
359 /**
360 Key used to instrument memory usage.
361 */
363
364 public:
365 explicit Json_path(PSI_memory_key key);
366
368 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
369 }
370
371 /** Move constructor. */
374 m_mem_root(std::move(other.m_mem_root)),
375 m_psi_key(other.m_psi_key) {
376 // Move the contents of m_path_legs from other into this.
377 m_path_legs = std::move(other.m_path_legs);
378
379 /*
380 Must also make sure that other.m_path_legs is empty, so that we
381 don't end up destroying the same objects twice; once from this's
382 destructor and once from other's destructor.
383
384 Move-constructing a vector would usually leave "other" empty,
385 but it is not guaranteed. Furthermore, m_path_legs is a
386 Prealloced_array, not a std::vector, so often moving will mean
387 copying from one prealloced area to another instead of simply
388 swapping pointers to the backing array. (And at the time of
389 writing Prealloced_array doesn't even have a move-assignment
390 operator, so the above assignment will always copy and leave
391 "other" unchanged.)
392 */
393 other.m_path_legs.clear();
394 }
395
396 /** Move assignment. */
398 if (&other != this) {
399 this->~Json_path();
400 new (this) Json_path(std::move(other));
401 }
402 return *this;
403 }
404
405 /**
406 Add a path leg to the end of this path.
407 @param[in] leg the leg to add
408 @return false on success, true on error
409 */
410 bool append(const Json_path_leg &leg) {
411 auto ptr = new (&m_mem_root) Json_path_leg(leg);
412 return ptr == nullptr || m_path_legs.push_back(ptr);
413 }
414
415 /**
416 Resets this to an empty path with no legs.
417 */
418 void clear() {
419 // Destruct all the Json_path_leg objects, and clear the pointers to them.
420 for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
422 // Mark the memory as ready for reuse.
424 }
425
426 /**
427 Return true if the path can match more than one value in a JSON document.
428
429 @retval true if the path contains a path leg which is a wildcard,
430 ellipsis or array range
431 @retval false otherwise
432 */
433 bool can_match_many() const;
434
435 /** Turn into a human-readable string. */
436 bool to_string(String *buf) const;
437};
438
439/**
440 A lightweight path expression. This exists so that paths can be cloned
441 from the path legs of other paths without allocating heap memory
442 to copy those legs into. This class does not own the memory of the
443 Json_path_leg objects pointed to by #Json_seekable_path::m_path_legs, it
444 just points to Json_path_leg objects that belong to a Json_path instance.
445*/
447 public:
449 /**
450 Add a path leg to the end of this cloned path.
451 @param[in] leg the leg to add
452 @return false on success, true on error
453 */
454 bool append(const Json_path_leg *leg) { return m_path_legs.push_back(leg); }
455
456 /**
457 Resets this to an empty path with no legs.
458 */
459 void clear() { m_path_legs.clear(); }
460};
461
462/**
463 Initialize a Json_path from a path expression.
464
465 Stops parsing on the first error. It initializes the Json_path and
466 returns false if the path is parsed successfully. Otherwise, it
467 returns false. In that case, the output bad_index argument will
468 contain an index into the path expression. The parsing failed near
469 that index.
470
471 @param[in] path_length The length of the path expression.
472 @param[in] path_expression The string form of the path expression.
473 @param[out] path The Json_path object to be initialized.
474 @param[out] bad_index If null is returned, the parsing failed around here.
475 @return false on success, true on error
476*/
477bool parse_path(size_t path_length, const char *path_expression,
478 Json_path *path, size_t *bad_index);
479
480/**
481 A helper function that uses the above one as workhorse. Entry point for
482 for JSON_TABLE (Table_function_json class) and Json_path_cache. Raises an
483 error if the path expression is syntactically incorrect. Raises an
484 error if the path expression contains wildcard tokens but is not
485 supposed to. Otherwise updates the supplied Json_path object with
486 the parsed path.
487
488 @param[in] path_value A String to be interpreted as a path.
489 @param[in] forbid_wildcards True if the path shouldn't contain * or **
490 @param[out] json_path The object that will hold the parsed path
491
492 @returns false on success (valid path or NULL), true on error
493*/
494bool parse_path(const String &path_value, bool forbid_wildcards,
495 Json_path *json_path);
496#endif /* SQL_JSON_PATH_INCLUDED */
A class that represents the index of an element in a JSON array.
Definition: json_path.h:93
size_t m_index
The array index.
Definition: json_path.h:99
size_t position() const
Get the position in the array pointed to by this array index.
Definition: json_path.h:136
Json_array_index(size_t index, bool from_end, size_t array_length)
Construct a new Json_array_index object representing the specified position in an array of the given ...
Definition: json_path.h:113
bool m_within_bounds
True if the array index is within the bounds of the array.
Definition: json_path.h:102
bool within_bounds() const
Is the array index within the bounds of the array?
Definition: json_path.h:124
A lightweight path expression.
Definition: json_path.h:446
bool append(const Json_path_leg *leg)
Add a path leg to the end of this cloned path.
Definition: json_path.h:454
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:459
Json_path_clone(PSI_memory_key key)
Definition: json_path.h:448
One path leg in a JSON path expression.
Definition: json_path.h:145
size_t m_first_array_index
The index of an array cell, or the start of an array range.
Definition: json_path.h:150
size_t m_last_array_index
The end (inclusive) of an array range.
Definition: json_path.h:156
Json_path_leg(enum_json_path_leg_type leg_type)
Construct a wildcard or ellipsis path leg.
Definition: json_path.h:171
Json_array_index first_array_index(size_t array_length) const
Get the first array cell pointed to by an array range, or the array cell pointed to by an array cell ...
Definition: json_path.h:256
Array_range get_array_range(size_t array_length) const
Get the array range pointed to by a path leg of type jpl_array_range or jpl_array_cell_wildcard.
Definition: json_path.cc:147
bool m_last_array_index_from_end
Is m_last_array_index relative to the end of the array?
Definition: json_path.h:159
std::string m_member_name
The member name of a member path leg.
Definition: json_path.h:162
enum_json_path_leg_type m_leg_type
The type of this path leg.
Definition: json_path.h:147
bool is_autowrap() const
Is this path leg an auto-wrapping array accessor?
Definition: json_path.cc:124
bool m_first_array_index_from_end
Is m_first_array_index relative to the end of the array?
Definition: json_path.h:153
Json_path_leg(const char *member_name, size_t length)
Construct an object member path leg.
Definition: json_path.h:220
Json_path_leg(size_t index)
Construct an array cell path leg.
Definition: json_path.h:183
Json_path_leg(const std::string &member_name)
Construct an object member path leg.
Definition: json_path.h:224
Json_path_leg(size_t index, bool from_end)
Construct an array cell path leg.
Definition: json_path.h:191
enum_json_path_leg_type get_type() const
Get the type of the path leg.
Definition: json_path.h:228
const std::string & get_member_name() const
Get the member name of a jpl_member path leg.
Definition: json_path.h:231
Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2, bool idx2_from_end)
Construct an array range path leg.
Definition: json_path.h:206
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:89
Json_array_index last_array_index(size_t array_length) const
Get the last array cell pointed to by an array range.
Definition: json_path.h:268
A JSON path expression.
Definition: json_path.h:352
Json_path(Json_path &&other)
Move constructor.
Definition: json_path.h:372
~Json_path()
Definition: json_path.h:367
Json_path(PSI_memory_key key)
Definition: json_path.cc:167
bool append(const Json_path_leg &leg)
Add a path leg to the end of this path.
Definition: json_path.h:410
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:418
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:170
PSI_memory_key m_psi_key
Key used to instrument memory usage.
Definition: json_path.h:362
Json_path & operator=(Json_path &&other)
Move assignment.
Definition: json_path.h:397
bool can_match_many() const
Return true if the path can match more than one value in a JSON document.
Definition: json_path.cc:180
MEM_ROOT m_mem_root
A MEM_ROOT in which the Json_path_leg objects pointed to by Json_seekable_path::m_path_legs are alloc...
Definition: json_path.h:358
A path expression which can be used to seek to a position inside a JSON value.
Definition: json_path.h:297
Json_path_iterator end() const
Get an iterator pointing just past the last path leg.
Definition: json_path.h:312
Json_seekable_path(PSI_memory_key key)
Definition: json_path.cc:164
size_t leg_count() const
Return the number of legs in this searchable path.
Definition: json_path.h:306
const Json_path_leg * last_leg() const
Get a pointer to the last path leg.
Definition: json_path.h:315
Json_path_iterator begin() const
Get an iterator pointing to the first path leg.
Definition: json_path.h:309
Json_path_leg_pointers m_path_legs
An array of pointers to the legs of the JSON path.
Definition: json_path.h:300
bool push_back(const Element_type &element)
Copies an element into the back of the array.
Definition: prealloced_array.h:317
void clear()
Removes (and destroys) all elements.
Definition: prealloced_array.h:600
size_t size() const
Definition: prealloced_array.h:226
iterator begin()
begin : Returns a pointer to the first element in the array.
Definition: prealloced_array.h:253
Element_type & back()
Definition: prealloced_array.h:243
iterator end()
Definition: prealloced_array.h:254
const const Json_path_leg * * const_iterator
Definition: prealloced_array.h:116
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:166
unsigned int PSI_memory_key
Instrumented memory key.
Definition: psi_memory_bits.h:48
enum_json_path_leg_type
The type of a Json_path_leg.
Definition: json_path.h:48
@ jpl_array_range
A path leg that represents a range in a JSON array (such as [2 to 7]).
Definition: json_path.h:65
@ jpl_array_cell_wildcard
A path leg that represents the array wildcard ([*]), which matches all the elements of a JSON array.
Definition: json_path.h:79
@ jpl_ellipsis
A path leg that represents the ellipsis (**), which matches any JSON value and recursively all the JS...
Definition: json_path.h:86
@ jpl_member
A path leg that represents a JSON object member (such as .name).
Definition: json_path.h:53
@ jpl_array_cell
A path leg that represents a JSON array cell (such as [10]).
Definition: json_path.h:59
@ jpl_member_wildcard
A path leg that represents the member wildcard.
Definition: json_path.h:73
Json_path_leg_pointers::const_iterator Json_path_iterator
Definition: json_path.h:291
bool parse_path(size_t path_length, const char *path_expression, Json_path *path, size_t *bad_index)
Initialize a Json_path from a path expression.
Definition: json_path.cc:257
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
static char * path
Definition: mysqldump.cc:148
Definition: buf0block_hint.cc:29
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
Definition: varlen_sort.h:174
Performance schema instrumentation interface.
required string key
Definition: replication_asynchronous_connection_failover.proto:59
A structure that represents an array range.
Definition: json_path.h:277
size_t m_end
End of the range, exclusive.
Definition: json_path.h:279
size_t m_begin
Beginning of the range, inclusive.
Definition: json_path.h:278
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:82
void ClearForReuse()
Similar to Clear(), but anticipates that the block will be reused for further allocations.
Definition: my_alloc.cc:189