MySQL  8.0.26
Source Code Documentation
json_path.h
Go to the documentation of this file.
1 #ifndef SQL_JSON_PATH_INCLUDED
2 #define SQL_JSON_PATH_INCLUDED
3 
4 /* Copyright (c) 2015, 2021, Oracle and/or its affiliates.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
9 
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
16 
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License, version 2.0, for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25 
26 /**
27  @file json_path.h
28 
29  This file contains interface support for the JSON path abstraction.
30  The path abstraction is described by the functional spec
31  attached to WL#7909.
32 */
33 
34 #include <assert.h>
35 #include <stddef.h>
36 #include <algorithm>
37 #include <new>
38 #include <string>
39 #include <utility>
40 
41 #include "my_alloc.h" // MEM_ROOT
42  // assert
43 #include "my_inttypes.h"
44 #include "my_sys.h"
45 #include "prealloced_array.h" // Prealloced_array
46 
47 class String;
48 
49 /** The type of a Json_path_leg. */
51  /**
52  A path leg that represents a JSON object member (such as `.name`).
53  This path leg matches a single member in a JSON object.
54  */
56 
57  /**
58  A path leg that represents a JSON array cell (such as `[10]`).
59  This path leg matches a single element in a JSON object.
60  */
62 
63  /**
64  A path leg that represents a range in a JSON array
65  (such as `[2 to 7]`).
66  */
68 
69  /**
70  @brief A path leg that represents the member wildcard.
71 
72  A path leg that represents the member wildcard (`.*`), which
73  matches all the members of a JSON object.
74  */
76 
77  /**
78  A path leg that represents the array wildcard (`[*]`), which
79  matches all the elements of a JSON array.
80  */
82 
83  /**
84  A path leg that represents the ellipsis (`**`), which matches any
85  JSON value and recursively all the JSON values nested within it if
86  it is an object or an array.
87  */
89 };
90 
91 /**
92  A class that represents the index of an element in a JSON array. The
93  index is 0-based and relative to the beginning of the array.
94 */
95 class Json_array_index final {
96  /**
97  The array index. It is 0 if the specified index was before the
98  first element of the array, or equal to the array length if the
99  specified index was after the last element of the array.
100  */
101  size_t m_index;
102 
103  /** True if the array index is within the bounds of the array. */
105 
106  public:
107  /**
108  Construct a new Json_array_index object representing the specified
109  position in an array of the given length.
110 
111  @param index the array index
112  @param from_end true if @a index is relative to the end of the array
113  @param array_length the length of the array
114  */
115  Json_array_index(size_t index, bool from_end, size_t array_length)
116  : m_index(from_end ? (index < array_length ? array_length - index - 1 : 0)
117  : std::min(index, array_length)),
118  m_within_bounds(index < array_length) {}
119 
120  /**
121  Is the array index within the bounds of the array?
122 
123  @retval true if the array index is within bounds
124  @retval false otherwise
125  */
126  bool within_bounds() const { return m_within_bounds; }
127 
128  /**
129  Get the position in the array pointed to by this array index.
130 
131  If the index is out of bounds, 0 will be returned if the array
132  index is before the first element in the array, or a value equal
133  to the length of the array if the index is after the last element.
134 
135  @return the position in the array (0-based index relative to the
136  start of the array)
137  */
138  size_t position() const { return m_index; }
139 };
140 
141 /**
142  One path leg in a JSON path expression.
143 
144  A path leg describes either a key/value pair in an object
145  or a 0-based index into an array.
146 */
147 class Json_path_leg final {
148  /// The type of this path leg.
150 
151  /// The index of an array cell, or the start of an array range.
153 
154  /// Is #m_first_array_index relative to the end of the array?
156 
157  /// The end (inclusive) of an array range.
158  size_t m_last_array_index = 0;
159 
160  /// Is #m_last_array_index relative to the end of the array?
162 
163  /// The member name of a member path leg.
164  std::string m_member_name;
165 
166  public:
167  /**
168  Construct a wildcard or ellipsis path leg.
169 
170  @param leg_type the type of wildcard (#jpl_ellipsis,
171  #jpl_member_wildcard or #jpl_array_cell_wildcard)
172  */
174  : m_leg_type(leg_type) {
175  assert(leg_type == jpl_ellipsis || leg_type == jpl_member_wildcard ||
176  leg_type == jpl_array_cell_wildcard);
177  }
178 
179  /**
180  Construct an array cell path leg.
181 
182  @param index the 0-based index in the array,
183  relative to the beginning of the array
184  */
185  explicit Json_path_leg(size_t index) : Json_path_leg(index, false) {}
186 
187  /**
188  Construct an array cell path leg.
189 
190  @param index the 0-based index in the array
191  @param from_end true if @a index is relative to the end of the array
192  */
193  Json_path_leg(size_t index, bool from_end)
195  m_first_array_index(index),
196  m_first_array_index_from_end(from_end) {}
197 
198  /**
199  Construct an array range path leg.
200 
201  @param idx1 the start index of the range, inclusive
202  @param idx1_from_end true if the start index is relative
203  to the end of the array
204  @param idx2 the last index of the range, inclusive
205  @param idx2_from_end true if the last index is relative
206  to the end of the array
207  */
208  Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2,
209  bool idx2_from_end)
211  m_first_array_index(idx1),
212  m_first_array_index_from_end(idx1_from_end),
213  m_last_array_index(idx2),
214  m_last_array_index_from_end(idx2_from_end) {}
215 
216  /**
217  Construct an object member path leg.
218 
219  @param member_name the name of the object member
220  @param length the length of the member name
221  */
222  Json_path_leg(const char *member_name, size_t length)
223  : m_leg_type(jpl_member), m_member_name(member_name, length) {}
224 
225  /** Construct an object member path leg. */
226  Json_path_leg(const std::string &member_name)
227  : Json_path_leg(member_name.c_str(), member_name.length()) {}
228 
229  /** Get the type of the path leg. */
231 
232  /** Get the member name of a ::jpl_member path leg. */
233  const std::string &get_member_name() const { return m_member_name; }
234 
235  /** Turn into a human-readable string. */
236  bool to_string(String *buf) const;
237 
238  /**
239  Is this path leg an auto-wrapping array accessor?
240 
241  An auto-wrapping array accessor is an array accessor that matches
242  non-arrays by auto-wrapping them in a single-element array before doing
243  the matching.
244 
245  This function returns true for any ::jpl_array_cell or ::jpl_array_range
246  path leg that would match the element contained in a single-element
247  array, and which therefore would also match non-arrays that have been
248  auto-wrapped in single-element arrays.
249  */
250  bool is_autowrap() const;
251 
252  /**
253  Get the first array cell pointed to by an array range, or the
254  array cell pointed to by an array cell index.
255 
256  @param array_length the length of the array
257  */
258  Json_array_index first_array_index(size_t array_length) const {
261  array_length);
262  }
263 
264  /**
265  Get the last array cell pointed to by an array range. The range
266  includes this cell.
267 
268  @param array_length the length of the array
269  */
270  Json_array_index last_array_index(size_t array_length) const {
271  assert(m_leg_type == jpl_array_range);
273  array_length);
274  }
275 
276  /**
277  A structure that represents an array range.
278  */
279  struct Array_range {
280  size_t m_begin; ///< Beginning of the range, inclusive.
281  size_t m_end; ///< End of the range, exclusive.
282  };
283 
284  /**
285  Get the array range pointed to by a path leg of type
286  ::jpl_array_range or ::jpl_array_cell_wildcard.
287  @param array_length the length of the array
288  */
289  Array_range get_array_range(size_t array_length) const;
290 };
291 
294 
295 /**
296  A path expression which can be used to seek to
297  a position inside a JSON value.
298 */
300  protected:
301  /** An array of pointers to the legs of the JSON path. */
303 
305 
306  public:
307  /** Return the number of legs in this searchable path */
308  size_t leg_count() const { return m_path_legs.size(); }
309 
310  /** Get an iterator pointing to the first path leg. */
312 
313  /** Get an iterator pointing just past the last path leg. */
314  Json_path_iterator end() const { return m_path_legs.end(); }
315 
316  /** Get a pointer to the last path leg. The path must not be empty. */
317  const Json_path_leg *last_leg() const { return m_path_legs.back(); }
318 };
319 
320 /**
321  A JSON path expression.
322 
323  From the user's point of view, a path expression is a string literal
324  with the following structure. We parse this structure into a
325  Json_path object:
326 
327  pathExpression ::= scope pathLeg (pathLeg)*
328 
329  scope ::= dollarSign
330 
331  pathLeg ::= member | arrayLocation | doubleAsterisk
332 
333  member ::= period (keyName | asterisk)
334 
335  arrayLocation ::=
336  leftBracket
337  (arrayIndex | arrayRange | asterisk)
338  rightBracket
339 
340  arrayIndex ::=
341  non-negative-integer |
342  last [ minus non-negative-integer ]
343 
344  arrayRange ::= arrayIndex to arrayIndex
345 
346  keyName ::= ECMAScript-identifier | ECMAScript-string-literal
347 
348  doubleAsterisk ::= **
349 
350  to ::= "to"
351 
352  last ::= "last"
353 */
354 class Json_path final : public Json_seekable_path {
355  private:
356  /**
357  A MEM_ROOT in which the Json_path_leg objects pointed to by
358  #Json_seekable_path::m_path_legs are allocated.
359  */
361 
362  public:
363  Json_path();
364 
366  for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
367  }
368 
369  /** Move constructor. */
370  Json_path(Json_path &&other) : m_mem_root(std::move(other.m_mem_root)) {
371  // Move the contents of m_path_legs from other into this.
372  m_path_legs = std::move(other.m_path_legs);
373 
374  /*
375  Must also make sure that other.m_path_legs is empty, so that we
376  don't end up destroying the same objects twice; once from this's
377  destructor and once from other's destructor.
378 
379  Move-constructing a vector would usually leave "other" empty,
380  but it is not guaranteed. Furthermore, m_path_legs is a
381  Prealloced_array, not a std::vector, so often moving will mean
382  copying from one prealloced area to another instead of simply
383  swapping pointers to the backing array. (And at the time of
384  writing Prealloced_array doesn't even have a move-assignment
385  operator, so the above assignment will always copy and leave
386  "other" unchanged.)
387  */
388  other.m_path_legs.clear();
389  }
390 
391  /** Move assignment. */
393  if (&other != this) {
394  this->~Json_path();
395  new (this) Json_path(std::move(other));
396  }
397  return *this;
398  }
399 
400  /**
401  Add a path leg to the end of this path.
402  @param[in] leg the leg to add
403  @return false on success, true on error
404  */
405  bool append(const Json_path_leg &leg) {
406  auto ptr = new (&m_mem_root) Json_path_leg(leg);
407  return ptr == nullptr || m_path_legs.push_back(ptr);
408  }
409 
410  /**
411  Resets this to an empty path with no legs.
412  */
413  void clear() {
414  // Destruct all the Json_path_leg objects, and clear the pointers to them.
415  for (const auto ptr : m_path_legs) ptr->~Json_path_leg();
416  m_path_legs.clear();
417  // Mark the memory as ready for reuse.
419  }
420 
421  /**
422  Return true if the path can match more than one value in a JSON document.
423 
424  @retval true if the path contains a path leg which is a wildcard,
425  ellipsis or array range
426  @retval false otherwise
427  */
428  bool can_match_many() const;
429 
430  /** Turn into a human-readable string. */
431  bool to_string(String *buf) const;
432 };
433 
434 /**
435  A lightweight path expression. This exists so that paths can be cloned
436  from the path legs of other paths without allocating heap memory
437  to copy those legs into. This class does not own the memory of the
438  Json_path_leg objects pointed to by #Json_seekable_path::m_path_legs, it
439  just points to Json_path_leg objects that belong to a Json_path instance.
440 */
441 class Json_path_clone final : public Json_seekable_path {
442  public:
443  /**
444  Add a path leg to the end of this cloned path.
445  @param[in] leg the leg to add
446  @return false on success, true on error
447  */
448  bool append(const Json_path_leg *leg) { return m_path_legs.push_back(leg); }
449 
450  /**
451  Resets this to an empty path with no legs.
452  */
453  void clear() { m_path_legs.clear(); }
454 };
455 
456 /**
457  Initialize a Json_path from a path expression.
458 
459  Stops parsing on the first error. It initializes the Json_path and
460  returns false if the path is parsed successfully. Otherwise, it
461  returns false. In that case, the output bad_index argument will
462  contain an index into the path expression. The parsing failed near
463  that index.
464 
465  @param[in] path_length The length of the path expression.
466  @param[in] path_expression The string form of the path expression.
467  @param[out] path The Json_path object to be initialized.
468  @param[out] bad_index If null is returned, the parsing failed around here.
469  @return false on success, true on error
470 */
471 bool parse_path(size_t path_length, const char *path_expression,
472  Json_path *path, size_t *bad_index);
473 
474 /**
475  A helper function that uses the above one as workhorse. Entry point for
476  for JSON_TABLE (Table_function_json class) and Json_path_cache. Raises an
477  error if the path expression is syntactically incorrect. Raises an
478  error if the path expression contains wildcard tokens but is not
479  supposed to. Otherwise updates the supplied Json_path object with
480  the parsed path.
481 
482  @param[in] path_value A String to be interpreted as a path.
483  @param[in] forbid_wildcards True if the path shouldn't contain * or **
484  @param[out] json_path The object that will hold the parsed path
485 
486  @returns false on success (valid path or NULL), true on error
487 */
488 bool parse_path(const String &path_value, bool forbid_wildcards,
489  Json_path *json_path);
490 #endif /* SQL_JSON_PATH_INCLUDED */
A class that represents the index of an element in a JSON array.
Definition: json_path.h:95
size_t m_index
The array index.
Definition: json_path.h:101
size_t position() const
Get the position in the array pointed to by this array index.
Definition: json_path.h:138
Json_array_index(size_t index, bool from_end, size_t array_length)
Construct a new Json_array_index object representing the specified position in an array of the given ...
Definition: json_path.h:115
bool m_within_bounds
True if the array index is within the bounds of the array.
Definition: json_path.h:104
bool within_bounds() const
Is the array index within the bounds of the array?
Definition: json_path.h:126
A lightweight path expression.
Definition: json_path.h:441
bool append(const Json_path_leg *leg)
Add a path leg to the end of this cloned path.
Definition: json_path.h:448
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:453
One path leg in a JSON path expression.
Definition: json_path.h:147
size_t m_first_array_index
The index of an array cell, or the start of an array range.
Definition: json_path.h:152
size_t m_last_array_index
The end (inclusive) of an array range.
Definition: json_path.h:158
Json_path_leg(enum_json_path_leg_type leg_type)
Construct a wildcard or ellipsis path leg.
Definition: json_path.h:173
Json_array_index first_array_index(size_t array_length) const
Get the first array cell pointed to by an array range, or the array cell pointed to by an array cell ...
Definition: json_path.h:258
Array_range get_array_range(size_t array_length) const
Get the array range pointed to by a path leg of type jpl_array_range or jpl_array_cell_wildcard.
Definition: json_path.cc:146
bool m_last_array_index_from_end
Is m_last_array_index relative to the end of the array?
Definition: json_path.h:161
std::string m_member_name
The member name of a member path leg.
Definition: json_path.h:164
enum_json_path_leg_type m_leg_type
The type of this path leg.
Definition: json_path.h:149
bool is_autowrap() const
Is this path leg an auto-wrapping array accessor?
Definition: json_path.cc:123
bool m_first_array_index_from_end
Is m_first_array_index relative to the end of the array?
Definition: json_path.h:155
Json_path_leg(const char *member_name, size_t length)
Construct an object member path leg.
Definition: json_path.h:222
Json_path_leg(size_t index)
Construct an array cell path leg.
Definition: json_path.h:185
Json_path_leg(const std::string &member_name)
Construct an object member path leg.
Definition: json_path.h:226
Json_path_leg(size_t index, bool from_end)
Construct an array cell path leg.
Definition: json_path.h:193
enum_json_path_leg_type get_type() const
Get the type of the path leg.
Definition: json_path.h:230
Json_path_leg(size_t idx1, bool idx1_from_end, size_t idx2, bool idx2_from_end)
Construct an array range path leg.
Definition: json_path.h:208
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:88
Json_array_index last_array_index(size_t array_length) const
Get the last array cell pointed to by an array range.
Definition: json_path.h:270
const std::string & get_member_name() const
Get the member name of a jpl_member path leg.
Definition: json_path.h:233
A JSON path expression.
Definition: json_path.h:354
Json_path & operator=(Json_path &&other)
Move assignment.
Definition: json_path.h:392
Json_path(Json_path &&other)
Move constructor.
Definition: json_path.h:370
~Json_path()
Definition: json_path.h:365
bool append(const Json_path_leg &leg)
Add a path leg to the end of this path.
Definition: json_path.h:405
Json_path()
Definition: json_path.cc:166
void clear()
Resets this to an empty path with no legs.
Definition: json_path.h:413
bool to_string(String *buf) const
Turn into a human-readable string.
Definition: json_path.cc:168
bool can_match_many() const
Return true if the path can match more than one value in a JSON document.
Definition: json_path.cc:178
MEM_ROOT m_mem_root
A MEM_ROOT in which the Json_path_leg objects pointed to by Json_seekable_path::m_path_legs are alloc...
Definition: json_path.h:360
A path expression which can be used to seek to a position inside a JSON value.
Definition: json_path.h:299
Json_seekable_path()
Definition: json_path.cc:162
Json_path_iterator end() const
Get an iterator pointing just past the last path leg.
Definition: json_path.h:314
size_t leg_count() const
Return the number of legs in this searchable path.
Definition: json_path.h:308
const Json_path_leg * last_leg() const
Get a pointer to the last path leg.
Definition: json_path.h:317
Json_path_iterator begin() const
Get an iterator pointing to the first path leg.
Definition: json_path.h:311
Json_path_leg_pointers m_path_legs
An array of pointers to the legs of the JSON path.
Definition: json_path.h:302
bool push_back(const Element_type &element)
Copies an element into the back of the array.
Definition: prealloced_array.h:317
void clear()
Removes (and destroys) all elements.
Definition: prealloced_array.h:600
size_t size() const
Definition: prealloced_array.h:226
iterator begin()
begin : Returns a pointer to the first element in the array.
Definition: prealloced_array.h:253
Element_type & back()
Definition: prealloced_array.h:243
iterator end()
Definition: prealloced_array.h:254
const const Json_path_leg * * const_iterator
Definition: prealloced_array.h:116
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:165
#define MY_MARK_BLOCKS_FREE
Definition: my_sys.h:177
enum_json_path_leg_type
The type of a Json_path_leg.
Definition: json_path.h:50
@ jpl_array_range
A path leg that represents a range in a JSON array (such as [2 to 7]).
Definition: json_path.h:67
@ jpl_array_cell_wildcard
A path leg that represents the array wildcard ([*]), which matches all the elements of a JSON array.
Definition: json_path.h:81
@ jpl_ellipsis
A path leg that represents the ellipsis (**), which matches any JSON value and recursively all the JS...
Definition: json_path.h:88
@ jpl_member
A path leg that represents a JSON object member (such as .name).
Definition: json_path.h:55
@ jpl_array_cell
A path leg that represents a JSON array cell (such as [10]).
Definition: json_path.h:61
@ jpl_member_wildcard
A path leg that represents the member wildcard.
Definition: json_path.h:75
Json_path_leg_pointers::const_iterator Json_path_iterator
Definition: json_path.h:293
bool parse_path(size_t path_length, const char *path_expression, Json_path *path, size_t *bad_index)
Initialize a Json_path from a path expression.
Definition: json_path.cc:255
This file follows Google coding style, except for the name MEM_ROOT (which is kept for historical rea...
void free_root(MEM_ROOT *root, myf flags)
Definition: my_alloc.cc:278
Some integer typedefs for easier portability.
#define MYF(v)
Definition: my_inttypes.h:96
Common header for many mysys elements.
static char * path
Definition: mysqldump.cc:132
Definition: buf0block_hint.cc:29
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
Definition: varlen_sort.h:183
A structure that represents an array range.
Definition: json_path.h:279
size_t m_end
End of the range, exclusive.
Definition: json_path.h:281
size_t m_begin
Beginning of the range, inclusive.
Definition: json_path.h:280
The MEM_ROOT is a simple arena, where allocations are carved out of larger blocks.
Definition: my_alloc.h:78