MySQL 9.1.0
Source Code Documentation
json_path.cc File Reference

This file contains implementation support for the JSON path abstraction. More...

#include "sql-common/json_path.h"
#include "my_rapidjson_size_t.h"
#include <assert.h>
#include <stddef.h>
#include <algorithm>
#include <string>
#include <string_view>
#include <rapidjson/encodings.h>
#include <rapidjson/error/error.h>
#include <rapidjson/memorystream.h>
#include <rapidjson/reader.h>
#include "my_inttypes.h"
#include "mysql/strings/m_ctype.h"
#include "sql-common/json_dom.h"
#include "sql/psi_memory_key.h"
#include "sql/sql_const.h"
#include "sql_string.h"
#include "string_with_len.h"

Classes

class  anonymous_namespace{json_path.cc}::Stream
 A simple input stream class for the JSON path parser. More...
 
class  anonymous_namespace{json_path.cc}::MemberNameHandler
 A RapidJSON handler which accepts a scalar string and nothing else. More...
 

Namespaces

namespace  anonymous_namespace{json_path.cc}
 

Functions

static bool is_ecmascript_identifier (const std::string_view &name)
 Returns true if the name is a valid ECMAScript identifier. More...
 
static bool is_digit (unsigned codepoint)
 Return true if the codepoint is a Unicode digit. More...
 
static bool is_whitespace (char ch)
 Is this a whitespace character? More...
 
static bool parse_path (Stream *stream, Json_path *path)
 Fills in a Json_path from a path expression. More...
 
static bool parse_path_leg (Stream *stream, Json_path *path)
 Parses a single path leg and appends it to a Json_path object. More...
 
static bool parse_ellipsis_leg (Stream *stream, Json_path *path)
 Parses a single ellipsis leg and appends it to a Json_path object. More...
 
static bool parse_array_leg (Stream *stream, Json_path *path)
 Parses a single array leg and appends it to a Json_path object. More...
 
static bool parse_member_leg (Stream *stream, Json_path *path)
 Parses a single member leg and appends it to a Json_path object. More...
 
static bool append_array_index (String *buf, size_t index, bool from_end)
 
bool parse_path (size_t path_length, const char *path_expression, Json_path *path, size_t *bad_index)
 Top level parsing factory method. More...
 
static bool parse_array_index (Stream *stream, uint32 *array_index, bool *from_end)
 Parse an array index in an array cell index or array range path leg. More...
 
static const char * find_end_of_member_name (const char *start, const char *end)
 Find the end of a member name in a JSON path. More...
 
bool parse_name_with_rapidjson (const char *str, size_t len, String *name)
 Parse a quoted member name using the rapidjson parser, so that we get the name without the enclosing quotes and with any escape sequences replaced with the actual characters. More...
 
static bool unicode_combining_mark (unsigned codepoint)
 Return true if the character is a unicode combining mark. More...
 
static bool is_letter (unsigned codepoint)
 Return true if the codepoint is a Unicode letter. More...
 
static bool is_connector_punctuation (unsigned codepoint)
 Return true if the codepoint is Unicode connector punctuation. More...
 
bool clone_without_autowrapping (const Json_path *source_path, Json_path_clone *target_path, Json_wrapper *doc, PSI_memory_key key)
 Clone a source path to a target path, stripping out legs which are made redundant by the auto-wrapping rule from the WL#7909 spec and further extended in the WL#9831 spec: More...
 

Variables

constexpr char anonymous_namespace{json_path.cc}::SCOPE = '$'
 
constexpr char anonymous_namespace{json_path.cc}::BEGIN_MEMBER = '.'
 
constexpr char anonymous_namespace{json_path.cc}::BEGIN_ARRAY = '['
 
constexpr char anonymous_namespace{json_path.cc}::END_ARRAY = ']'
 
constexpr char anonymous_namespace{json_path.cc}::DOUBLE_QUOTE = '"'
 
constexpr char anonymous_namespace{json_path.cc}::WILDCARD = '*'
 
constexpr char anonymous_namespace{json_path.cc}::MINUS = '-'
 
constexpr char anonymous_namespace{json_path.cc}::LAST [] = "last"
 

Detailed Description

This file contains implementation support for the JSON path abstraction.

The path abstraction is described by the functional spec attached to WL#7909.

Function Documentation

◆ append_array_index()

static bool append_array_index ( String buf,
size_t  index,
bool  from_end 
)
static

◆ clone_without_autowrapping()

bool clone_without_autowrapping ( const Json_path source_path,
Json_path_clone target_path,
Json_wrapper doc,
PSI_memory_key  key 
)

Clone a source path to a target path, stripping out legs which are made redundant by the auto-wrapping rule from the WL#7909 spec and further extended in the WL#9831 spec:

"If an array cell path leg or an array range path leg is evaluated against a non-array value, the result of the evaluation is the same as if the non-array value had been wrapped in a single-element array."

See also
Json_path_leg::is_autowrap
Parameters
[in]source_pathThe original path.
[in,out]target_pathThe clone to be filled in.
[in]docThe document to seek through.
[in]keyInstrumented memory key
Returns
True if an error occurred. False otherwise.

◆ find_end_of_member_name()

static const char * find_end_of_member_name ( const char *  start,
const char *  end 
)
static

Find the end of a member name in a JSON path.

The name could be either a quoted or an unquoted identifier.

Parameters
startthe start of the member name
endthe end of the JSON path expression
Returns
pointer to the position right after the end of the name, or to the position right after the end of the string if the input string is an unterminated quoted identifier

◆ is_connector_punctuation()

static bool is_connector_punctuation ( unsigned  codepoint)
static

Return true if the codepoint is Unicode connector punctuation.

◆ is_digit()

static bool is_digit ( unsigned  codepoint)
static

Return true if the codepoint is a Unicode digit.

This was the best recommendation from the old-times about how to answer this question.

◆ is_ecmascript_identifier()

static bool is_ecmascript_identifier ( const std::string_view &  name)
static

Returns true if the name is a valid ECMAScript identifier.

The name must be a sequence of UTF8-encoded bytes. All escape sequences have been replaced with UTF8-encoded bytes.

Parameters
[in]namename to check
Returns
True if the name is a valid ECMAScript identifier. False otherwise.

◆ is_letter()

static bool is_letter ( unsigned  codepoint)
static

Return true if the codepoint is a Unicode letter.

This was the best recommendation from the old-timers about how to answer this question. But as you can see from the need to call unicode_combining_mark(), my_isalpha() isn't good enough. It probably has many other defects.

FIXME

◆ is_whitespace()

static bool is_whitespace ( char  ch)
inlinestatic

Is this a whitespace character?

◆ parse_array_index()

static bool parse_array_index ( Stream *  stream,
uint32 array_index,
bool *  from_end 
)
static

Parse an array index in an array cell index or array range path leg.

An array index is either a non-negative integer (a 0-based index relative to the beginning of the array), or the keyword "last" (which means the last element in the array), or the keyword "last" followed by a minus ("-") and a non-negative integer (which is the 0-based index relative to the end of the array).

Parameters
[in,out]streamthe stream to read the path expression from
[out]array_indexgets set to the parsed array index
[out]from_endgets set to true if the array index is relative to the end of the array
Returns
true on error, false on success

◆ parse_array_leg()

static bool parse_array_leg ( Stream *  stream,
Json_path path 
)
static

Parses a single array leg and appends it to a Json_path object.

Parameters
[in,out]streamThe stream to read the path expression from.
[in,out]pathThe Json_path object to fill.
Returns
true on error, false on success

◆ parse_ellipsis_leg()

static bool parse_ellipsis_leg ( Stream *  stream,
Json_path path 
)
static

Parses a single ellipsis leg and appends it to a Json_path object.

Parameters
[in,out]streamThe stream to read the path expression from.
[in,out]pathThe Json_path object to fill.
Returns
true on error, false on success

◆ parse_member_leg()

static bool parse_member_leg ( Stream *  stream,
Json_path path 
)
static

Parses a single member leg and appends it to a Json_path object.

Parameters
[in,out]streamThe stream to read the path expression from.
[in,out]pathThe Json_path object to fill.
Returns
true on error, false on success

◆ parse_name_with_rapidjson()

bool parse_name_with_rapidjson ( const char *  str,
size_t  len,
String name 
)

Parse a quoted member name using the rapidjson parser, so that we get the name without the enclosing quotes and with any escape sequences replaced with the actual characters.

It is the caller's responsibility to destroy the returned Json_string when it's done with it.

Parameters
strthe input string
lenthe length of the input string
[out]namethe member name
Returns
false on success, true on error

◆ parse_path() [1/2]

bool parse_path ( size_t  path_length,
const char *  path_expression,
Json_path path,
size_t *  bad_index 
)

Top level parsing factory method.

Initialize a Json_path from a path expression.

◆ parse_path() [2/2]

static bool parse_path ( Stream *  stream,
Json_path path 
)
static

Fills in a Json_path from a path expression.

Parameters
[in,out]streamThe stream to read the path expression from.
[in,out]pathThe Json_path object to fill.
Returns
true on error, false on success

◆ parse_path_leg()

static bool parse_path_leg ( Stream *  stream,
Json_path path 
)
static

Parses a single path leg and appends it to a Json_path object.

Parameters
[in,out]streamThe stream to read the path expression from.
[in,out]pathThe Json_path object to fill.
Returns
true on error, false on success

◆ unicode_combining_mark()

static bool unicode_combining_mark ( unsigned  codepoint)
inlinestatic

Return true if the character is a unicode combining mark.

Parameters
codepointA unicode codepoint.
Returns
True if the codepoint is a unicode combining mark.