MySQL  8.0.19
Source Code Documentation
json_binary.h
Go to the documentation of this file.
1 #ifndef JSON_BINARY_INCLUDED
2 #define JSON_BINARY_INCLUDED
3 
4 /* Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
5 
6  This program is free software; you can redistribute it and/or modify
7  it under the terms of the GNU General Public License, version 2.0,
8  as published by the Free Software Foundation.
9 
10  This program is also distributed with certain software (including
11  but not limited to OpenSSL) that is licensed under separate terms,
12  as designated in a particular file or component or in included license
13  documentation. The authors of MySQL hereby grant you an additional
14  permission to link the program and your derivative works with the
15  separately licensed software that they have included with MySQL.
16 
17  This program is distributed in the hope that it will be useful,
18  but WITHOUT ANY WARRANTY; without even the implied warranty of
19  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  GNU General Public License, version 2.0, for more details.
21 
22  You should have received a copy of the GNU General Public License
23  along with this program; if not, write to the Free Software
24  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25 
26 /**
27  @file
28 
29  This file specifies the interface for serializing JSON values into
30  binary representation, and for reading values back from the binary
31  representation.
32 
33  The binary format is as follows:
34 
35  Each JSON value (scalar, object or array) has a one byte type
36  identifier followed by the actual value.
37 
38  If the value is a JSON object, its binary representation will have a
39  header that contains:
40 
41  - the member count
42  - the size of the binary value in bytes
43  - a list of pointers to each key
44  - a list of pointers to each value
45 
46  The actual keys and values will come after the header, in the same
47  order as in the header.
48 
49  Similarly, if the value is a JSON array, the binary representation
50  will have a header with
51 
52  - the element count
53  - the size of the binary value in bytes
54  - a list of pointers to each value
55 
56  followed by the actual values, in the same order as in the header.
57 
58  @verbatim
59  doc ::= type value
60 
61  type ::=
62  0x00 | // small JSON object
63  0x01 | // large JSON object
64  0x02 | // small JSON array
65  0x03 | // large JSON array
66  0x04 | // literal (true/false/null)
67  0x05 | // int16
68  0x06 | // uint16
69  0x07 | // int32
70  0x08 | // uint32
71  0x09 | // int64
72  0x0a | // uint64
73  0x0b | // double
74  0x0c | // utf8mb4 string
75  0x0f // custom data (any MySQL data type)
76 
77  value ::=
78  object |
79  array |
80  literal |
81  number |
82  string |
83  custom-data
84 
85  object ::= element-count size key-entry* value-entry* key* value*
86 
87  array ::= element-count size value-entry* value*
88 
89  // number of members in object or number of elements in array
90  element-count ::=
91  uint16 | // if used in small JSON object/array
92  uint32 // if used in large JSON object/array
93 
94  // number of bytes in the binary representation of the object or array
95  size ::=
96  uint16 | // if used in small JSON object/array
97  uint32 // if used in large JSON object/array
98 
99  key-entry ::= key-offset key-length
100 
101  key-offset ::=
102  uint16 | // if used in small JSON object
103  uint32 // if used in large JSON object
104 
105  key-length ::= uint16 // key length must be less than 64KB
106 
107  value-entry ::= type offset-or-inlined-value
108 
109  // This field holds either the offset to where the value is stored,
110  // or the value itself if it is small enough to be inlined (that is,
111  // if it is a JSON literal or a small enough [u]int).
112  offset-or-inlined-value ::=
113  uint16 | // if used in small JSON object/array
114  uint32 // if used in large JSON object/array
115 
116  key ::= utf8mb4-data
117 
118  literal ::=
119  0x00 | // JSON null literal
120  0x01 | // JSON true literal
121  0x02 | // JSON false literal
122 
123  number ::= .... // little-endian format for [u]int(16|32|64), whereas
124  // double is stored in a platform-independent, eight-byte
125  // format using float8store()
126 
127  string ::= data-length utf8mb4-data
128 
129  custom-data ::= custom-type data-length binary-data
130 
131  custom-type ::= uint8 // type identifier that matches the
132  // internal enum_field_types enum
133 
134  data-length ::= uint8* // If the high bit of a byte is 1, the length
135  // field is continued in the next byte,
136  // otherwise it is the last byte of the length
137  // field. So we need 1 byte to represent
138  // lengths up to 127, 2 bytes to represent
139  // lengths up to 16383, and so on...
140  @endverbatim
141 */
142 
143 #include <stddef.h>
144 #include <string>
145 
146 #include "field_types.h" // enum_field_types
147 #include "my_dbug.h" // DBUG_ASSERT
148 #include "my_inttypes.h"
149 
150 class Field_json;
151 class Json_dom;
152 class Json_wrapper;
153 class String;
154 class THD;
155 
156 namespace json_binary {
157 
158 /**
159  Serialize the JSON document represented by dom to binary format in
160  the destination string, replacing any content already in the
161  destination string.
162 
163  @param[in] thd THD handle
164  @param[in] dom the input DOM tree
165  @param[in,out] dest the destination string
166  @retval false on success
167  @retval true if an error occurred
168 */
169 #ifdef MYSQL_SERVER
170 bool serialize(const THD *thd, const Json_dom *dom, String *dest);
171 #endif
172 
173 /**
174  Class used for reading JSON values that are stored in the binary
175  format. Values are parsed lazily, so that only the parts of the
176  value that are interesting to the caller, are read. Array elements
177  can be looked up in constant time using the element() function.
178  Object members can be looked up in O(log n) time using the lookup()
179  function.
180 */
181 class Value {
182  public:
183  enum enum_type : uint8 {
194  ERROR /* Not really a type. Used to signal that an
195  error was detected. */
196  };
197 
198  /**
199  Does this value, and all of its members, represent a valid JSON
200  value?
201  */
202  bool is_valid() const;
203  enum_type type() const { return m_type; }
204  /// Does this value use the large storage format?
205  bool large_format() const { return m_large; }
206 
207  /**
208  Get a pointer to the beginning of the STRING or OPAQUE data
209  represented by this instance.
210  */
211  const char *get_data() const {
213  return m_data;
214  }
215 
216  /**
217  Get the length in bytes of the STRING or OPAQUE value represented by
218  this instance.
219  */
222  return m_length;
223  }
224 
225  /** Get the value of an INT. */
226  int64 get_int64() const {
227  DBUG_ASSERT(m_type == INT);
228  return m_int_value;
229  }
230 
231  /** Get the value of a UINT. */
232  uint64 get_uint64() const {
233  DBUG_ASSERT(m_type == UINT);
234  return static_cast<uint64>(m_int_value);
235  }
236 
237  /** Get the value of a DOUBLE. */
238  double get_double() const {
240  return m_double_value;
241  }
242 
243  /**
244  Get the number of elements in an array, or the number of members in
245  an object.
246  */
249  return m_element_count;
250  }
251 
252  /**
253  Get the MySQL field type of an opaque value. Identifies the type of
254  the value stored in the data portion of an opaque value.
255  */
258  return m_field_type;
259  }
260 
261  Value element(size_t pos) const;
262  Value key(size_t pos) const;
263  Value lookup(const char *key, size_t length) const;
264  Value lookup(const std::string &key) const {
265  return lookup(key.c_str(), key.length());
266  }
267  size_t lookup_index(const char *key, size_t length) const;
268  size_t lookup_index(const std::string &key) const {
269  return lookup_index(key.c_str(), key.length());
270  }
271  bool is_backed_by(const String *str) const;
272  bool raw_binary(const THD *thd, String *buf) const;
273  bool get_free_space(const THD *thd, size_t *space) const;
274  bool has_space(size_t pos, size_t needed, size_t *offset) const;
275  bool update_in_shadow(const Field_json *field, size_t pos,
276  Json_wrapper *new_value, size_t data_offset,
277  size_t data_length, const char *original,
278  char *destination, bool *changed) const;
279  bool remove_in_shadow(const Field_json *field, size_t pos,
280  const char *original, char *destination) const;
281 
282  /** Constructor for values that represent literals or errors. */
283  explicit Value(enum_type t) : m_data(nullptr), m_type(t) {
284  DBUG_ASSERT(t == LITERAL_NULL || t == LITERAL_TRUE || t == LITERAL_FALSE ||
285  t == ERROR);
286  }
287 
288  /** Constructor for values that represent ints or uints. */
289  explicit Value(enum_type t, int64 val) : m_int_value(val), m_type(t) {
290  DBUG_ASSERT(t == INT || t == UINT);
291  }
292 
293  /** Constructor for values that represent doubles. */
294  explicit Value(double val) : m_double_value(val), m_type(DOUBLE) {}
295 
296  /** Constructor for values that represent strings. */
297  Value(const char *data, uint32 len)
298  : m_data(data), m_length(len), m_type(STRING) {}
299 
300  /**
301  Constructor for values that represent arrays or objects.
302 
303  @param t type
304  @param data pointer to the start of the binary representation
305  @param bytes the number of bytes in the binary representation of the value
306  @param element_count the number of elements or members in the value
307  @param large true if the value should be stored in the large
308  storage format with 4 byte offsets instead of 2 byte offsets
309  */
310  Value(enum_type t, const char *data, uint32 bytes, uint32 element_count,
311  bool large)
312  : m_data(data),
314  m_length(bytes),
315  m_type(t),
316  m_large(large) {
317  DBUG_ASSERT(t == ARRAY || t == OBJECT);
318  }
319 
320  /** Constructor for values that represent opaque data. */
321  Value(enum_field_types ft, const char *data, uint32 len)
322  : m_data(data), m_length(len), m_field_type(ft), m_type(OPAQUE) {}
323 
324  /** Empty constructor. Produces a value that represents an error condition. */
325  Value() : Value(ERROR) {}
326 
327  /** Is this value an array? */
328  bool is_array() const { return m_type == ARRAY; }
329 
330  /** Is this value an object? */
331  bool is_object() const { return m_type == OBJECT; }
332 
333  /**
334  Compare two Values
335  @note This function is limited to scalars only, for objects/arrays it
336  asserts. The main purpose is to separate old/new scalar values for updates
337  on multi-valued indexes.
338  @returns
339  -1 this < val
340  0 this == val
341  1 this > val
342  */
343  int eq(const Value &val) const;
344 
345  private:
346  /*
347  Instances use only one of m_data, m_int_value and m_double_value,
348  so keep them in a union to save space in memory.
349  */
350  union {
351  /**
352  Pointer to the start of the binary representation of the value. Only
353  used by STRING, OPAQUE, OBJECT and ARRAY.
354 
355  The memory pointed to by this member is not owned by this Value
356  object. Callers that create Value objects must make sure that the
357  memory is not freed as long as the Value object is alive.
358  */
359  const char *m_data;
360  /** The value if the type is INT or UINT. */
362  /** The value if the type is DOUBLE. */
364  };
365 
366  /**
367  Element count for arrays and objects. Unused for other types.
368  */
370 
371  /**
372  The full length (in bytes) of the binary representation of an array or
373  object, or the length of a string or opaque value. Unused for other types.
374  */
376 
377  /**
378  The MySQL field type of the value, in case the type of the value is
379  OPAQUE. Otherwise, it is unused.
380  */
382 
383  /** The JSON type of the value. */
385 
386  /**
387  True if an array or an object uses the large storage format with 4
388  byte offsets instead of 2 byte offsets.
389  */
390  bool m_large;
391 
392  size_t key_entry_offset(size_t pos) const;
393  size_t value_entry_offset(size_t pos) const;
394  bool first_value_offset(size_t *offset) const;
395  bool element_offsets(size_t pos, size_t *start, size_t *end,
396  bool *inlined) const;
397 };
398 
399 /**
400  Parse a JSON binary document.
401 
402  @param[in] data a pointer to the binary data
403  @param[in] len the size of the binary document in bytes
404  @return an object that allows access to the contents of the document
405 */
406 Value parse_binary(const char *data, size_t len);
407 
408 /**
409  How much space is needed for a JSON value when it is stored in the binary
410  format.
411 
412  @param[in] thd THD handle
413  @param[in] value the JSON value to add to a document
414  @param[in] large true if the large storage format is used
415  @param[out] needed gets set to the amount of bytes needed to store
416  the value
417  @retval false if successful
418  @retval true if an error occurred while calculating the needed space
419 */
420 #ifdef MYSQL_SERVER
421 bool space_needed(const THD *thd, const Json_wrapper *value, bool large,
422  size_t *needed);
423 #endif
424 
425 /**
426  Apply a function to every value in a JSON document. That is, apply
427  the function to the root node of the JSON document, to all its
428  children, grandchildren and so on.
429 
430  @param value the root of the JSON document
431  @param func the function to apply
432  @retval true if the processing was stopped
433  @retval false if the processing was completed
434 
435  @tparam Func a functor type that takes a #json_binary::Value
436  parameter and returns a `bool` which is `true` if the processing
437  should stop or `false` if the processing should continue with the
438  next node
439 */
440 template <typename Func>
441 bool for_each_node(const Value &value, const Func &func) {
442  if (func(value)) return true;
443 
444  if (value.is_array() || value.is_object())
445  for (size_t i = 0, size = value.element_count(); i < size; ++i)
446  if (for_each_node(value.element(i), func)) return true;
447 
448  return false;
449 }
450 } // namespace json_binary
451 
452 #endif /* JSON_BINARY_INCLUDED */
json_binary::Value::Value
Value()
Empty constructor.
Definition: json_binary.h:325
json_binary::Value::get_double
double get_double() const
Get the value of a DOUBLE.
Definition: json_binary.h:238
THD
Definition: sql_class.h:764
json_binary::Value::Value
Value(const char *data, uint32 len)
Constructor for values that represent strings.
Definition: json_binary.h:297
Json_dom
JSON DOM abstract base class.
Definition: json_dom.h:169
field_types.h
This file contains the field type.
Json_wrapper
Abstraction for accessing JSON values irrespective of whether they are (started out as) binary JSON v...
Definition: json_dom.h:1141
json_binary::Value::raw_binary
bool raw_binary(const THD *thd, String *buf) const
Copy the binary representation of this value into a buffer, replacing the contents of the receiving b...
Definition: json_binary.cc:1254
json_binary::Value::Value
Value(enum_type t, const char *data, uint32 bytes, uint32 element_count, bool large)
Constructor for values that represent arrays or objects.
Definition: json_binary.h:310
json_binary::Value::lookup
Value lookup(const char *key, size_t length) const
Get the value associated with the specified key in a JSON object.
Definition: json_binary.cc:1167
json_binary::Value::get_data_length
uint32 get_data_length() const
Get the length in bytes of the STRING or OPAQUE value represented by this instance.
Definition: json_binary.h:220
json_binary::Value::update_in_shadow
bool update_in_shadow(const Field_json *field, size_t pos, Json_wrapper *new_value, size_t data_offset, size_t data_length, const char *original, char *destination, bool *changed) const
Update a value in an array or object.
Definition: json_binary.cc:1725
json_binary::Value::element_offsets
bool element_offsets(size_t pos, size_t *start, size_t *end, bool *inlined) const
Find the start offset and the end offset of the specified element.
Definition: json_binary.cc:1319
element_count
uint32 element_count
Definition: my_tree.h:51
uint64
uint64_t uint64
Definition: my_inttypes.h:68
json_binary::Value::large_format
bool large_format() const
Does this value use the large storage format?
Definition: json_binary.h:205
pos
char * pos
Definition: do_ctype.cc:76
my_dbug.h
String
Using this class is fraught with peril, and you need to be very careful when doing so.
Definition: sql_string.h:164
json_binary::Value::Value
Value(enum_type t, int64 val)
Constructor for values that represent ints or uints.
Definition: json_binary.h:289
json_binary::Value::key
Value key(size_t pos) const
Get the key of the member stored at the specified position in a JSON object.
Definition: json_binary.cc:1129
json_binary::Value::lookup_index
size_t lookup_index(const std::string &key) const
Definition: json_binary.h:268
json_binary::Value::STRING
@ STRING
Definition: json_binary.h:186
value
const string value("\"Value\"")
json_binary::Value::get_int64
int64 get_int64() const
Get the value of an INT.
Definition: json_binary.h:226
json_binary::Value::m_double_value
double m_double_value
The value if the type is DOUBLE.
Definition: json_binary.h:363
json_binary::Value::m_data
const char * m_data
Pointer to the start of the binary representation of the value.
Definition: json_binary.h:359
json_binary::Value::get_free_space
bool get_free_space(const THD *thd, size_t *space) const
Get the amount of unused space in the binary representation of this value.
Definition: json_binary.cc:1965
json_binary::Value::value_entry_offset
size_t value_entry_offset(size_t pos) const
Get the offset of the value entry that describes the element at a given position in this array or obj...
Definition: json_binary.cc:1522
Field_json
A field that stores a JSON value.
Definition: field.h:4164
json_binary::for_each_node
bool for_each_node(const Value &value, const Func &func)
Apply a function to every value in a JSON document.
Definition: json_binary.h:441
json_binary::Value::element_count
uint32 element_count() const
Get the number of elements in an array, or the number of members in an object.
Definition: json_binary.h:247
json_binary::Value::field_type
enum_field_types field_type() const
Get the MySQL field type of an opaque value.
Definition: json_binary.h:256
int64
int64_t int64
Definition: my_inttypes.h:67
json_binary::space_needed
bool space_needed(const THD *thd, const Json_wrapper *value, bool large, size_t *needed)
How much space is needed for a JSON value when it is stored in the binary format.
Definition: json_binary.cc:1536
my_inttypes.h
json_binary::Value::Value
Value(enum_field_types ft, const char *data, uint32 len)
Constructor for values that represent opaque data.
Definition: json_binary.h:321
json_binary::Value::get_uint64
uint64 get_uint64() const
Get the value of a UINT.
Definition: json_binary.h:232
json_binary::parse_binary
Value parse_binary(const char *data, size_t len)
Parse a JSON binary document.
Definition: json_binary.cc:1066
json_binary::Value::LITERAL_NULL
@ LITERAL_NULL
Definition: json_binary.h:190
json_binary::Value::is_backed_by
bool is_backed_by(const String *str) const
Is this binary value pointing to data that is contained in the specified string.
Definition: json_binary.cc:1228
enum_field_types
enum_field_types
Column types for MySQL.
Definition: field_types.h:52
json_binary::Value
Class used for reading JSON values that are stored in the binary format.
Definition: json_binary.h:181
json_binary::Value::first_value_offset
bool first_value_offset(size_t *offset) const
Find the lowest possible offset where a value can be located inside this array or object.
Definition: json_binary.cc:1376
json_binary::Value::element
Value element(size_t pos) const
Get the element at the specified position of a JSON array or a JSON object.
Definition: json_binary.cc:1091
json_binary::Value::Value
Value(enum_type t)
Constructor for values that represent literals or errors.
Definition: json_binary.h:283
json_binary::Value::get_data
const char * get_data() const
Get a pointer to the beginning of the STRING or OPAQUE data represented by this instance.
Definition: json_binary.h:211
rules_table_service::end
Cursor end()
A past-the-end Cursor.
Definition: rules_table_service.cc:188
json_binary::Value::ERROR
@ ERROR
Definition: json_binary.h:194
json_binary::Value::lookup
Value lookup(const std::string &key) const
Definition: json_binary.h:264
json_binary::Value::type
enum_type type() const
Definition: json_binary.h:203
uint32
uint32_t uint32
Definition: my_inttypes.h:66
json_binary::Value::lookup_index
size_t lookup_index(const char *key, size_t length) const
Get the index of the element with the specified key in a JSON object.
Definition: json_binary.cc:1181
json_binary::Value::Value
Value(double val)
Constructor for values that represent doubles.
Definition: json_binary.h:294
json_binary::Value::ARRAY
@ ARRAY
Definition: json_binary.h:185
json_binary::Value::OBJECT
@ OBJECT
Definition: json_binary.h:184
json_binary::Value::is_object
bool is_object() const
Is this value an object?
Definition: json_binary.h:331
json_binary::Value::LITERAL_TRUE
@ LITERAL_TRUE
Definition: json_binary.h:191
json_binary::Value::eq
int eq(const Value &val) const
Compare two Values.
Definition: json_binary.cc:2057
json_binary::Value::m_length
uint32 m_length
The full length (in bytes) of the binary representation of an array or object, or the length of a str...
Definition: json_binary.h:375
json_binary::Value::key_entry_offset
size_t key_entry_offset(size_t pos) const
Get the offset of the key entry that describes the key of the member at a given position in this obje...
Definition: json_binary.cc:1509
json_binary::Value::m_type
enum_type m_type
The JSON type of the value.
Definition: json_binary.h:384
json_binary::Value::INT
@ INT
Definition: json_binary.h:187
json_binary
Definition: json_binary.cc:98
uint8
uint8_t uint8
Definition: my_inttypes.h:62
DBUG_ASSERT
#define DBUG_ASSERT(A)
Definition: my_dbug.h:197
json_binary::Value::m_int_value
int64 m_int_value
The value if the type is INT or UINT.
Definition: json_binary.h:361
json_binary::Value::is_array
bool is_array() const
Is this value an array?
Definition: json_binary.h:328
gis::length
bool length(const dd::Spatial_reference_system *srs, const Geometry *g1, double *length, bool *null) noexcept
Computes the length of linestrings and multilinestrings.
Definition: length.cc:75
json_binary::Value::OPAQUE
@ OPAQUE
Definition: json_binary.h:193
json_binary::Value::UINT
@ UINT
Definition: json_binary.h:188
start
static void start(PluginFuncEnv *env)
Definition: http_server_plugin.cc:572
json_binary::Value::DOUBLE
@ DOUBLE
Definition: json_binary.h:189
json_binary::serialize
bool serialize(const THD *thd, const Json_dom *dom, String *dest)
Serialize the JSON document represented by dom to binary format in the destination string,...
Definition: json_binary.cc:130
json_binary::Value::enum_type
enum_type
Definition: json_binary.h:183
json_binary::Value::has_space
bool has_space(size_t pos, size_t needed, size_t *offset) const
Does this array or object have enough space to replace the value at the given position with another v...
Definition: json_binary.cc:1408
json_binary::Value::m_field_type
enum_field_types m_field_type
The MySQL field type of the value, in case the type of the value is OPAQUE.
Definition: json_binary.h:381
json_binary::Value::is_valid
bool is_valid() const
Does this value, and all of its members, represent a valid JSON value?
Definition: json_binary.cc:865
json_binary::Value::m_large
bool m_large
True if an array or an object uses the large storage format with 4 byte offsets instead of 2 byte off...
Definition: json_binary.h:390
json_binary::Value::m_element_count
uint32 m_element_count
Element count for arrays and objects.
Definition: json_binary.h:369
json_binary::Value::LITERAL_FALSE
@ LITERAL_FALSE
Definition: json_binary.h:192
json_binary::Value::remove_in_shadow
bool remove_in_shadow(const Field_json *field, size_t pos, const char *original, char *destination) const
Remove a value from an array or object.
Definition: json_binary.cc:1912