This class represents the character input stream consumed during lexical analysis. More...

#include <sql_lexer_input_stream.h>

Public Member Functions
	Lex_input_stream (uint grammar_selector_token_arg)
	Constructor. More...

bool	init (THD thd, const char buff, size_t length)
	Object initializer. More...

void	reset (const char *buff, size_t length)
	Prepare Lex_input_stream instance state for use for handling next SQL statement. More...

void	set_echo (bool echo)
	Set the echo mode. More...

void	save_in_comment_state ()

void	restore_in_comment_state ()

void	skip_binary (int n)
	Skip binary from the input stream. More...

unsigned char	yyGet ()
	Get a character, and advance in the stream. More...

unsigned char	yyGetLast () const
	Get the last character accepted. More...

unsigned char	yyPeek () const
	Look at the next character to parse, but do not accept it. More...

unsigned char	yyPeekn (int n) const
	Look ahead at some character to parse. More...

void	yyUnget ()
	Cancel the effect of the last yyGet() or yySkip(). More...

void	yySkip ()
	Accept a character, by advancing the input stream. More...

void	yySkipn (int n)
	Accept multiple characters at once. More...

char *	yyUnput (char ch)
	Puts a character back into the stream, canceling the effect of the last yyGet() or yySkip(). More...

char *	cpp_inject (char ch)
	Inject a character into the pre-processed stream. More...

bool	eof () const
	End of file indicator for the query text to parse. More...

bool	eof (int n) const
	End of file indicator for the query text to parse. More...

const char *	get_buf () const
	Get the raw query buffer. More...

const char *	get_cpp_buf () const
	Get the pre-processed query buffer. More...

const char *	get_end_of_query () const
	Get the end of the raw query buffer. More...

void	start_token ()
	Mark the stream position as the start of a new token. More...

void	restart_token ()
	Adjust the starting position of the current token. More...

const char *	get_tok_start () const
	Get the token start position, in the raw buffer. More...

const char *	get_cpp_tok_start () const
	Get the token start position, in the pre-processed buffer. More...

const char *	get_tok_end () const
	Get the token end position, in the raw buffer. More...

const char *	get_cpp_tok_end () const
	Get the token end position, in the pre-processed buffer. More...

const char *	get_ptr () const
	Get the current stream pointer, in the raw buffer. More...

const char *	get_cpp_ptr () const
	Get the current stream pointer, in the pre-processed buffer. More...

uint	yyLength () const
	Get the length of the current token, in the raw buffer. More...

const char *	get_body_utf8_str () const
	Get the utf8-body string. More...

uint	get_body_utf8_length () const
	Get the utf8-body length. More...

void	body_utf8_start (THD thd, const char begin_ptr)
	The operation is called from the parser in order to 1) designate the intention to have utf8 body; 1) Indicate to the lexer that we will need a utf8 representation of this statement; 2) Determine the beginning of the body. More...

void	body_utf8_append (const char *ptr)
	The operation appends unprocessed part of the pre-processed buffer till the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to ptr. More...

void	body_utf8_append (const char ptr, const char end_ptr)
	The operation appends unprocessed part of pre-processed buffer till the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to end_ptr. More...

void	body_utf8_append_literal (THD thd, const LEX_STRING txt, const CHARSET_INFO txt_cs, const char end_ptr)
	The operation converts the specified text literal to the utf8 and appends the result to the utf8-body. More...

uint	get_lineno (const char *raw_ptr) const

void	add_digest_token (uint token, Lexer_yystype *yylval)

void	reduce_digest_token (uint token_left, uint token_right)

bool	is_partial_parser () const
	True if this scanner tokenizes a partial query (partition expression, generated column expression etc.) More...

void	warn_on_deprecated_charset (const CHARSET_INFO cs, const char alias) const
	Outputs warnings on deprecated charsets in complete SQL statements. More...

void	warn_on_deprecated_collation (const CHARSET_INFO *collation) const
	Outputs warnings on deprecated collations in complete SQL statements. More...

bool	text_string_is_7bit () const

	Lex_input_stream (uint grammar_selector_token_arg)
	Constructor. More...

bool	init (THD thd, const char buff, size_t length)
	Object initializer. More...

void	reset (const char *buff, size_t length)

void	set_echo (bool echo)
	Set the echo mode. More...

void	save_in_comment_state ()

void	restore_in_comment_state ()

void	skip_binary (int n)
	Skip binary from the input stream. More...

unsigned char	yyGet ()
	Get a character, and advance in the stream. More...

unsigned char	yyGetLast () const
	Get the last character accepted. More...

unsigned char	yyPeek () const
	Look at the next character to parse, but do not accept it. More...

unsigned char	yyPeekn (int n) const
	Look ahead at some character to parse. More...

void	yyUnget ()
	Cancel the effect of the last yyGet() or yySkip(). More...

void	yySkip ()
	Accept a character, by advancing the input stream. More...

void	yySkipn (int n)
	Accept multiple characters at once. More...

char *	yyUnput (char ch)
	Puts a character back into the stream, canceling the effect of the last yyGet() or yySkip(). More...

char *	cpp_inject (char ch)
	Inject a character into the pre-processed stream. More...

bool	eof () const
	End of file indicator for the query text to parse. More...

bool	eof (int n) const
	End of file indicator for the query text to parse. More...

const char *	get_buf () const
	Get the raw query buffer. More...

const char *	get_cpp_buf () const
	Get the pre-processed query buffer. More...

const char *	get_end_of_query () const
	Get the end of the raw query buffer. More...

void	start_token ()
	Mark the stream position as the start of a new token. More...

void	restart_token ()
	Adjust the starting position of the current token. More...

const char *	get_tok_start () const
	Get the token start position, in the raw buffer. More...

const char *	get_cpp_tok_start () const
	Get the token start position, in the pre-processed buffer. More...

const char *	get_tok_end () const
	Get the token end position, in the raw buffer. More...

const char *	get_cpp_tok_end () const
	Get the token end position, in the pre-processed buffer. More...

const char *	get_ptr () const
	Get the current stream pointer, in the raw buffer. More...

const char *	get_cpp_ptr () const
	Get the current stream pointer, in the pre-processed buffer. More...

uint	yyLength () const
	Get the length of the current token, in the raw buffer. More...

const char *	get_body_utf8_str () const
	Get the utf8-body string. More...

uint	get_body_utf8_length () const
	Get the utf8-body length. More...

void	body_utf8_start (THD thd, const char begin_ptr)

void	body_utf8_append (const char *ptr)

void	body_utf8_append (const char ptr, const char end_ptr)

void	body_utf8_append_literal (THD thd, const LEX_STRING txt, const CHARSET_INFO txt_cs, const char end_ptr)

uint	get_lineno (const char *raw_ptr) const

void	add_digest_token (uint token, Lexer_yystype *yylval)

void	reduce_digest_token (uint token_left, uint token_right)

bool	is_partial_parser () const
	True if this scanner tokenizes a partial query (partition expression, generated column expression etc.) More...

void	warn_on_deprecated_charset (const CHARSET_INFO cs, const char alias) const
	Outputs warnings on deprecated charsets in complete SQL statements. More...

void	warn_on_deprecated_collation (const CHARSET_INFO *collation) const
	Outputs warnings on deprecated collations in complete SQL statements. More...

bool	text_string_is_7bit () const

Public Attributes
THD *	m_thd
	Current thread. More...

uint	yylineno
	Current line number. More...

uint	yytoklen
	Length of the last token parsed. More...

Lexer_yystype *	yylval
	Interface with bison, value of the last token parsed. More...

int	lookahead_token
	LALR(2) resolution, look ahead token. More...

Lexer_yystype *	lookahead_yylval
	LALR(2) resolution, value of the look ahead token. More...

bool	skip_digest
	Skip adding of the current token's digest since it is already added. More...

const CHARSET_INFO *	query_charset

enum my_lex_states	next_state
	Current state of the lexical analyser. More...

const char *	found_semicolon
	Position of ';' in the stream, to delimit multiple queries. More...

uchar	tok_bitmap
	Token character bitmaps, to detect 7bit strings. More...

bool	ignore_space
	SQL_MODE = IGNORE_SPACE. More...

bool	stmt_prepare_mode
	true if we're parsing a prepared statement: in this mode we should allow placeholders. More...

bool	multi_statements
	true if we should allow multi-statements. More...

enum_comment_state	in_comment
	State of the lexical analyser for comments. More...

enum_comment_state	in_comment_saved

const char *	m_cpp_text_start
	Starting position of the TEXT_STRING or IDENT in the pre-processed buffer. More...

const char *	m_cpp_text_end
	Ending position of the TEXT_STRING or IDENT in the pre-processed buffer. More...

const CHARSET_INFO *	m_underscore_cs
	Character set specified by the character-set-introducer. More...

sql_digest_state *	m_digest {nullptr}
	Current statement digest instrumentation. More...

const int	grammar_selector_token
	The synthetic 1st token to prepend token stream with. More...

Private Attributes
char *	m_ptr
	Pointer to the current position in the raw input stream. More...

const char *	m_tok_start
	Starting position of the last token parsed, in the raw buffer. More...

const char *	m_tok_end
	Ending position of the previous token parsed, in the raw buffer. More...

const char *	m_end_of_query
	End of the query text in the input stream, in the raw buffer. More...

const char *	m_buf
	Begining of the query text in the input stream, in the raw buffer. More...

size_t	m_buf_length
	Length of the raw buffer. More...

bool	m_echo
	Echo the parsed stream to the pre-processed buffer. More...

bool	m_echo_saved

char *	m_cpp_buf
	Pre-processed buffer. More...

char *	m_cpp_ptr
	Pointer to the current position in the pre-processed input stream. More...

const char *	m_cpp_tok_start
	Starting position of the last token parsed, in the pre-processed buffer. More...

const char *	m_cpp_tok_end
	Ending position of the previous token parsed, in the pre-processed buffer. More...

char *	m_body_utf8
	UTF8-body buffer created during parsing. More...

char *	m_body_utf8_ptr
	Pointer to the current position in the UTF8-body buffer. More...

const char *	m_cpp_utf8_processed_ptr
	Position in the pre-processed buffer. More...

Detailed Description

This class represents the character input stream consumed during lexical analysis.

In addition to consuming the input stream, this class performs some comment pre processing, by filtering out out-of-bound special text from the query input stream.

Two buffers, with pointers inside each, are maintained in parallel. The 'raw' buffer is the original query text, which may contain out-of-bound comments. The 'cpp' (for comments pre processor) is the pre-processed buffer that contains only the query text that should be seen once out-of-bound data is removed.

Constructor & Destructor Documentation

◆ Lex_input_stream() [1/2]

Lex_input_stream::Lex_input_stream ( uint grammar_selector_token_arg )

inlineexplicit

Constructor.

Parameters

grammar_selector_token_arg See grammar_selector_token.

◆ Lex_input_stream() [2/2]

Lex_input_stream::Lex_input_stream ( uint grammar_selector_token_arg )

inlineexplicit

Constructor.

Parameters

grammar_selector_token_arg See grammar_selector_token.

Member Function Documentation

◆ add_digest_token() [1/2]

void Lex_input_stream::add_digest_token	(	uint	token,
		Lexer_yystype *	yylval
	)

◆ add_digest_token() [2/2]

void Lex_input_stream::add_digest_token	(	uint	token,
		Lexer_yystype *	yylval
	)

◆ body_utf8_append() [1/4]

void Lex_input_stream::body_utf8_append ( const char * ptr )

The operation appends unprocessed part of the pre-processed buffer till the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to ptr.

Parameters

ptr	Pointer in the pre-processed buffer, which specifies the end of the chunk, which should be appended to the utf8 body.

◆ body_utf8_append() [2/4]

void Lex_input_stream::body_utf8_append ( const char * ptr )

◆ body_utf8_append() [3/4]

void Lex_input_stream::body_utf8_append	(	const char *	ptr,
		const char *	end_ptr
	)

The operation appends unprocessed part of pre-processed buffer till the given pointer (ptr) and sets m_cpp_utf8_processed_ptr to end_ptr.

The idea is that some tokens in the pre-processed buffer (like character set introducers) should be skipped.

Example: CPP buffer: SELECT 'str1', _latin1 'str2'; m_cpp_utf8_processed_ptr – points at the "SELECT ..."; In order to skip "_latin1", the following call should be made: body_utf8_append(<pointer to "_latin1 ...">, <pointer to " 'str2'...">)

Parameters

ptr	Pointer in the pre-processed buffer, which specifies the end of the chunk, which should be appended to the utf8 body.
end_ptr	Pointer in the pre-processed buffer, to which m_cpp_utf8_processed_ptr will be set in the end of the operation.

◆ body_utf8_append() [4/4]

void Lex_input_stream::body_utf8_append	(	const char *	ptr,
		const char *	end_ptr
	)

◆ body_utf8_append_literal() [1/2]

void Lex_input_stream::body_utf8_append_literal	(	THD *	thd,
		const LEX_STRING *	txt,
		const CHARSET_INFO *	txt_cs,
		const char *	end_ptr
	)

The operation converts the specified text literal to the utf8 and appends the result to the utf8-body.

Parameters

thd	Thread context.
txt	Text literal.
txt_cs	Character set of the text literal.
end_ptr	Pointer in the pre-processed buffer, to which m_cpp_utf8_processed_ptr will be set in the end of the operation.

◆ body_utf8_append_literal() [2/2]

void Lex_input_stream::body_utf8_append_literal	(	THD *	thd,
		const LEX_STRING *	txt,
		const CHARSET_INFO *	txt_cs,
		const char *	end_ptr
	)

◆ body_utf8_start() [1/2]

void Lex_input_stream::body_utf8_start	(	THD *	thd,
		const char *	begin_ptr
	)

The operation is called from the parser in order to 1) designate the intention to have utf8 body; 1) Indicate to the lexer that we will need a utf8 representation of this statement; 2) Determine the beginning of the body.

Parameters

thd	Thread context.
begin_ptr	Pointer to the start of the body in the pre-processed buffer.

◆ body_utf8_start() [2/2]

void Lex_input_stream::body_utf8_start	(	THD *	thd,
		const char *	begin_ptr
	)

◆ cpp_inject() [1/2]

char * Lex_input_stream::cpp_inject ( char ch )

inline

Inject a character into the pre-processed stream.

Note, this function is used to inject a space instead of multi-character C-comment. Thus there is no boundary checks here (basically, we replace N-chars by 1-char here).

◆ cpp_inject() [2/2]

char * Lex_input_stream::cpp_inject ( char ch )

inline

Inject a character into the pre-processed stream.

Note, this function is used to inject a space instead of multi-character C-comment. Thus there is no boundary checks here (basically, we replace N-chars by 1-char here).

◆ eof() [1/4]

bool Lex_input_stream::eof ( ) const

inline

End of file indicator for the query text to parse.

Returns: true if there are no more characters to parse

◆ eof() [2/4]

bool Lex_input_stream::eof ( ) const

inline

End of file indicator for the query text to parse.

Returns: true if there are no more characters to parse

◆ eof() [3/4]

bool Lex_input_stream::eof ( int n ) const

inline

End of file indicator for the query text to parse.

Parameters

n	number of characters expected

Returns: true if there are less than n characters to parse

◆ eof() [4/4]

bool Lex_input_stream::eof ( int n ) const

inline

End of file indicator for the query text to parse.

Parameters

n	number of characters expected

Returns: true if there are less than n characters to parse

◆ get_body_utf8_length() [1/2]

uint Lex_input_stream::get_body_utf8_length ( ) const

inline

Get the utf8-body length.

◆ get_body_utf8_length() [2/2]

uint Lex_input_stream::get_body_utf8_length ( ) const

inline

Get the utf8-body length.

◆ get_body_utf8_str() [1/2]

const char * Lex_input_stream::get_body_utf8_str ( ) const

inline

Get the utf8-body string.

◆ get_body_utf8_str() [2/2]

const char * Lex_input_stream::get_body_utf8_str ( ) const

inline

Get the utf8-body string.

◆ get_buf() [1/2]

const char * Lex_input_stream::get_buf ( ) const

inline

Get the raw query buffer.

◆ get_buf() [2/2]

const char * Lex_input_stream::get_buf ( ) const

inline

Get the raw query buffer.

◆ get_cpp_buf() [1/2]

const char * Lex_input_stream::get_cpp_buf ( ) const

inline

Get the pre-processed query buffer.

◆ get_cpp_buf() [2/2]

const char * Lex_input_stream::get_cpp_buf ( ) const

inline

Get the pre-processed query buffer.

◆ get_cpp_ptr() [1/2]

const char * Lex_input_stream::get_cpp_ptr ( ) const

inline

Get the current stream pointer, in the pre-processed buffer.

◆ get_cpp_ptr() [2/2]

const char * Lex_input_stream::get_cpp_ptr ( ) const

inline

Get the current stream pointer, in the pre-processed buffer.

◆ get_cpp_tok_end() [1/2]

const char * Lex_input_stream::get_cpp_tok_end ( ) const

inline

Get the token end position, in the pre-processed buffer.

◆ get_cpp_tok_end() [2/2]

const char * Lex_input_stream::get_cpp_tok_end ( ) const

inline

Get the token end position, in the pre-processed buffer.

◆ get_cpp_tok_start() [1/2]

const char * Lex_input_stream::get_cpp_tok_start ( ) const

inline

Get the token start position, in the pre-processed buffer.

◆ get_cpp_tok_start() [2/2]

const char * Lex_input_stream::get_cpp_tok_start ( ) const

inline

Get the token start position, in the pre-processed buffer.

◆ get_end_of_query() [1/2]

const char * Lex_input_stream::get_end_of_query ( ) const

inline

Get the end of the raw query buffer.

◆ get_end_of_query() [2/2]

const char * Lex_input_stream::get_end_of_query ( ) const

inline

Get the end of the raw query buffer.

◆ get_lineno() [1/2]

uint Lex_input_stream::get_lineno ( const char * raw_ptr ) const

◆ get_lineno() [2/2]

uint Lex_input_stream::get_lineno ( const char * raw_ptr ) const

◆ get_ptr() [1/2]

const char * Lex_input_stream::get_ptr ( ) const

inline

Get the current stream pointer, in the raw buffer.

◆ get_ptr() [2/2]

const char * Lex_input_stream::get_ptr ( ) const

inline

Get the current stream pointer, in the raw buffer.

◆ get_tok_end() [1/2]

const char * Lex_input_stream::get_tok_end ( ) const

inline

Get the token end position, in the raw buffer.

◆ get_tok_end() [2/2]

const char * Lex_input_stream::get_tok_end ( ) const

inline

Get the token end position, in the raw buffer.

◆ get_tok_start() [1/2]

const char * Lex_input_stream::get_tok_start ( ) const

inline

Get the token start position, in the raw buffer.

◆ get_tok_start() [2/2]

const char * Lex_input_stream::get_tok_start ( ) const

inline

Get the token start position, in the raw buffer.

◆ init() [1/2]

bool Lex_input_stream::init	(	THD *	thd,
		const char *	buff,
		size_t	length
	)

Object initializer.

Perform initialization of Lex_input_stream instance.

Must be called before usage.

Return values

false	OK
true	Error

Basically, a buffer for a pre-processed query. This buffer should be large enough to keep a multi-statement query. The allocation is done once in Lex_input_stream::init() in order to prevent memory pollution when the server is processing large multi-statement queries.

◆ init() [2/2]

bool Lex_input_stream::init	(	THD *	thd,
		const char *	buff,
		size_t	length
	)

Object initializer.

Must be called before usage.

Return values

false	OK
true	Error

◆ is_partial_parser() [1/2]

bool Lex_input_stream::is_partial_parser ( ) const

inline

True if this scanner tokenizes a partial query (partition expression, generated column expression etc.)

Returns: true if parsing a partial query, otherwise false.

◆ is_partial_parser() [2/2]

bool Lex_input_stream::is_partial_parser ( ) const

inline

True if this scanner tokenizes a partial query (partition expression, generated column expression etc.)

Returns: true if parsing a partial query, otherwise false.

◆ reduce_digest_token() [1/2]

void Lex_input_stream::reduce_digest_token	(	uint	token_left,
		uint	token_right
	)

◆ reduce_digest_token() [2/2]

void Lex_input_stream::reduce_digest_token	(	uint	token_left,
		uint	token_right
	)

◆ reset() [1/2]

void Lex_input_stream::reset	(	const char *	buffer,
		size_t	length
	)

Prepare Lex_input_stream instance state for use for handling next SQL statement.

It should be called between two statements in a multi-statement query. The operation resets the input stream to the beginning-of-parse state, but does not reallocate m_cpp_buf.

◆ reset() [2/2]

void Lex_input_stream::reset	(	const char *	buff,
		size_t	length
	)

◆ restart_token() [1/2]

void Lex_input_stream::restart_token ( )

inline

Adjust the starting position of the current token.

This is used to compensate for starting whitespace.

◆ restart_token() [2/2]

void Lex_input_stream::restart_token ( )

inline

Adjust the starting position of the current token.

This is used to compensate for starting whitespace.

◆ restore_in_comment_state() [1/2]

void Lex_input_stream::restore_in_comment_state ( )

inline

◆ restore_in_comment_state() [2/2]

void Lex_input_stream::restore_in_comment_state ( )

inline

◆ save_in_comment_state() [1/2]

void Lex_input_stream::save_in_comment_state ( )

inline

◆ save_in_comment_state() [2/2]

void Lex_input_stream::save_in_comment_state ( )

inline

◆ set_echo() [1/2]

void Lex_input_stream::set_echo ( bool echo )

inline

Set the echo mode.

When echo is true, characters parsed from the raw input stream are preserved. When false, characters parsed are silently ignored.

Parameters

echo	the echo mode.

◆ set_echo() [2/2]

void Lex_input_stream::set_echo ( bool echo )

inline

Set the echo mode.

When echo is true, characters parsed from the raw input stream are preserved. When false, characters parsed are silently ignored.

Parameters

echo	the echo mode.

◆ skip_binary() [1/2]

void Lex_input_stream::skip_binary ( int n )

inline

Skip binary from the input stream.

Parameters

n	number of bytes to accept.

◆ skip_binary() [2/2]

void Lex_input_stream::skip_binary ( int n )

inline

Skip binary from the input stream.

Parameters

n	number of bytes to accept.

◆ start_token() [1/2]

void Lex_input_stream::start_token ( )

inline

Mark the stream position as the start of a new token.

◆ start_token() [2/2]

void Lex_input_stream::start_token ( )

inline

Mark the stream position as the start of a new token.

◆ text_string_is_7bit() [1/2]

bool Lex_input_stream::text_string_is_7bit ( ) const

inline

◆ text_string_is_7bit() [2/2]

bool Lex_input_stream::text_string_is_7bit ( ) const

inline

◆ warn_on_deprecated_charset() [1/2]

void Lex_input_stream::warn_on_deprecated_charset	(	const CHARSET_INFO *	cs,
		const char *	alias
	)		const

inline

Outputs warnings on deprecated charsets in complete SQL statements.

Parameters

[in]	cs	The character set/collation to check for a deprecation.
[in]	alias	The name/alias of `cs`.

◆ warn_on_deprecated_charset() [2/2]

void Lex_input_stream::warn_on_deprecated_charset	(	const CHARSET_INFO *	cs,
		const char *	alias
	)		const

inline

Outputs warnings on deprecated charsets in complete SQL statements.

Parameters

[in]	cs	The character set/collation to check for a deprecation.
[in]	alias	The name/alias of `cs`.

◆ warn_on_deprecated_collation() [1/2]

void Lex_input_stream::warn_on_deprecated_collation ( const CHARSET_INFO * collation ) const

inline

Outputs warnings on deprecated collations in complete SQL statements.

Parameters

[in] collation The collation to check for a deprecation.

◆ warn_on_deprecated_collation() [2/2]

void Lex_input_stream::warn_on_deprecated_collation ( const CHARSET_INFO * collation ) const

inline

Outputs warnings on deprecated collations in complete SQL statements.

Parameters

[in] collation The collation to check for a deprecation.

◆ yyGet() [1/2]

unsigned char Lex_input_stream::yyGet ( )

inline

Get a character, and advance in the stream.

Returns: the next character to parse.

◆ yyGet() [2/2]

unsigned char Lex_input_stream::yyGet ( )

inline

Get a character, and advance in the stream.

Returns: the next character to parse.

◆ yyGetLast() [1/2]

unsigned char Lex_input_stream::yyGetLast ( ) const

inline

Get the last character accepted.

Returns: the last character accepted.

◆ yyGetLast() [2/2]

unsigned char Lex_input_stream::yyGetLast ( ) const

inline

Get the last character accepted.

Returns: the last character accepted.

◆ yyLength() [1/2]

uint Lex_input_stream::yyLength ( ) const

inline

Get the length of the current token, in the raw buffer.

◆ yyLength() [2/2]

uint Lex_input_stream::yyLength ( ) const

inline

Get the length of the current token, in the raw buffer.

◆ yyPeek() [1/2]

unsigned char Lex_input_stream::yyPeek ( ) const

inline

Look at the next character to parse, but do not accept it.

◆ yyPeek() [2/2]

unsigned char Lex_input_stream::yyPeek ( ) const

inline

Look at the next character to parse, but do not accept it.

◆ yyPeekn() [1/2]

unsigned char Lex_input_stream::yyPeekn ( int n ) const

inline

Look ahead at some character to parse.

Parameters

n	offset of the character to look up

◆ yyPeekn() [2/2]

unsigned char Lex_input_stream::yyPeekn ( int n ) const

inline

Look ahead at some character to parse.

Parameters

n	offset of the character to look up

◆ yySkip() [1/2]

void Lex_input_stream::yySkip ( )

inline

Accept a character, by advancing the input stream.

◆ yySkip() [2/2]

void Lex_input_stream::yySkip ( )

inline

Accept a character, by advancing the input stream.

◆ yySkipn() [1/2]

void Lex_input_stream::yySkipn ( int n )

inline

Accept multiple characters at once.

Parameters

n	the number of characters to accept.

◆ yySkipn() [2/2]

void Lex_input_stream::yySkipn ( int n )

inline

Accept multiple characters at once.

Parameters

n	the number of characters to accept.

◆ yyUnget() [1/2]

void Lex_input_stream::yyUnget ( )

inline

Cancel the effect of the last yyGet() or yySkip().

Note that the echo mode should not change between calls to yyGet / yySkip and yyUnget. The caller is responsible for ensuring that.

◆ yyUnget() [2/2]

void Lex_input_stream::yyUnget ( )

inline

Cancel the effect of the last yyGet() or yySkip().

Note that the echo mode should not change between calls to yyGet / yySkip and yyUnget. The caller is responsible for ensuring that.

◆ yyUnput() [1/2]

char * Lex_input_stream::yyUnput ( char ch )

inline

Puts a character back into the stream, canceling the effect of the last yyGet() or yySkip().

Note that the echo mode should not change between calls to unput, get, or skip from the stream.

◆ yyUnput() [2/2]

char * Lex_input_stream::yyUnput ( char ch )

inline

Puts a character back into the stream, canceling the effect of the last yyGet() or yySkip().

Note that the echo mode should not change between calls to unput, get, or skip from the stream.

Member Data Documentation

◆ found_semicolon

const char * Lex_input_stream::found_semicolon

Position of ';' in the stream, to delimit multiple queries.

This delimiter is in the raw buffer.

◆ grammar_selector_token

const int Lex_input_stream::grammar_selector_token

The synthetic 1st token to prepend token stream with.

This token value tricks parser to simulate multiple start-ing points. Currently the grammar is aware of 4 such synthetic tokens:

GRAMMAR_SELECTOR_PART for partitioning stuff from DD,
GRAMMAR_SELECTOR_GCOL for generated column stuff from DD,
GRAMMAR_SELECTOR_EXPR for generic single expressions from DD/.frm.
GRAMMAR_SELECTOR_CTE for generic subquery expressions from CTEs.
-1 when parsing with the main grammar (no grammar selector available).

Note: yylex() is expected to return the value of type int: 0 is for EOF and everything else for real token numbers. Bison, in its turn, generates positive token numbers. So, the negative grammar_selector_token means "not a token". In other words, -1 is "empty value".

◆ ignore_space

bool Lex_input_stream::ignore_space

SQL_MODE = IGNORE_SPACE.

◆ in_comment

enum_comment_state Lex_input_stream::in_comment

State of the lexical analyser for comments.

◆ in_comment_saved

enum_comment_state Lex_input_stream::in_comment_saved

◆ lookahead_token

int Lex_input_stream::lookahead_token

LALR(2) resolution, look ahead token.

Value of the next token to return, if any, or -1, if no token was parsed in advance. Note: 0 is a legal token, and represents YYEOF.

◆ lookahead_yylval

Lexer_yystype * Lex_input_stream::lookahead_yylval

LALR(2) resolution, value of the look ahead token.

◆ m_body_utf8

char * Lex_input_stream::m_body_utf8

private

UTF8-body buffer created during parsing.

◆ m_body_utf8_ptr

char * Lex_input_stream::m_body_utf8_ptr

private

Pointer to the current position in the UTF8-body buffer.

◆ m_buf

const char * Lex_input_stream::m_buf

private

Begining of the query text in the input stream, in the raw buffer.

Beginning of the query text in the input stream, in the raw buffer.

◆ m_buf_length

size_t Lex_input_stream::m_buf_length

private

Length of the raw buffer.

◆ m_cpp_buf

char * Lex_input_stream::m_cpp_buf

private

Pre-processed buffer.

◆ m_cpp_ptr

char * Lex_input_stream::m_cpp_ptr

private

Pointer to the current position in the pre-processed input stream.

◆ m_cpp_text_end

const char * Lex_input_stream::m_cpp_text_end

Ending position of the TEXT_STRING or IDENT in the pre-processed buffer.

NOTE: this member must be used within MYSQLlex() function only.

◆ m_cpp_text_start

const char * Lex_input_stream::m_cpp_text_start

Starting position of the TEXT_STRING or IDENT in the pre-processed buffer.

NOTE: this member must be used within MYSQLlex() function only.

◆ m_cpp_tok_end

const char * Lex_input_stream::m_cpp_tok_end

private

Ending position of the previous token parsed, in the pre-processed buffer.

◆ m_cpp_tok_start

const char * Lex_input_stream::m_cpp_tok_start

private

Starting position of the last token parsed, in the pre-processed buffer.

◆ m_cpp_utf8_processed_ptr

const char * Lex_input_stream::m_cpp_utf8_processed_ptr

private

Position in the pre-processed buffer.

The query from m_cpp_buf to m_cpp_utf_processed_ptr is converted to UTF8-body.

◆ m_digest

sql_digest_state * Lex_input_stream::m_digest {nullptr}

Current statement digest instrumentation.

◆ m_echo

bool Lex_input_stream::m_echo

private

Echo the parsed stream to the pre-processed buffer.

◆ m_echo_saved

bool Lex_input_stream::m_echo_saved

private

◆ m_end_of_query

const char * Lex_input_stream::m_end_of_query

private

End of the query text in the input stream, in the raw buffer.

◆ m_ptr

char * Lex_input_stream::m_ptr

private

Pointer to the current position in the raw input stream.

◆ m_thd

THD * Lex_input_stream::m_thd

Current thread.

◆ m_tok_end

const char * Lex_input_stream::m_tok_end

private

Ending position of the previous token parsed, in the raw buffer.

◆ m_tok_start

const char * Lex_input_stream::m_tok_start

private

Starting position of the last token parsed, in the raw buffer.

◆ m_underscore_cs

const CHARSET_INFO * Lex_input_stream::m_underscore_cs

Character set specified by the character-set-introducer.

NOTE: this member must be used within MYSQLlex() function only.

◆ multi_statements

bool Lex_input_stream::multi_statements

true if we should allow multi-statements.

◆ next_state

enum my_lex_states Lex_input_stream::next_state

Current state of the lexical analyser.

◆ query_charset

const CHARSET_INFO * Lex_input_stream::query_charset

◆ skip_digest

bool Lex_input_stream::skip_digest

Skip adding of the current token's digest since it is already added.

Usually we calculate a digest token by token at the top-level function of the lexer: MYSQLlex(). However, some complex ("hintable") tokens break that data flow: for example, the SELECT /*+ HINT(t) *‍/ is the single token from the main parser's point of view, and we add the "SELECT" keyword to the digest buffer right after the lex_one_token() call, but the "/*+ HINT(t) *&zwj;/" is a sequence of separate tokens from the hint parser's point of view, and we add those tokens to the digest buffer inside the lex_one_token() call. Thus, the usual data flow adds tokens from the "/*+ HINT(t) *&zwj;/" string first, and only than it appends the "SELECT" keyword token to that stream: "/*+ HINT(t) *&zwj;/ SELECT". This is not acceptable, since we use the digest buffer to restore query strings in their normalized forms, so the order of added tokens is important. Thus, we add tokens of "hintable" keywords to a digest buffer right in the hint parser and skip adding of them at the caller with the help of skip_digest flag.

◆ stmt_prepare_mode

bool Lex_input_stream::stmt_prepare_mode

true if we're parsing a prepared statement: in this mode we should allow placeholders.

◆ tok_bitmap

uchar Lex_input_stream::tok_bitmap

Token character bitmaps, to detect 7bit strings.

◆ yylineno

uint Lex_input_stream::yylineno

Current line number.

◆ yylval

Lexer_yystype * Lex_input_stream::yylval

Interface with bison, value of the last token parsed.

◆ yytoklen

uint Lex_input_stream::yytoklen

Length of the last token parsed.

The documentation for this class was generated from the following files:

router/src/routing/src/sql_lexer_input_stream.h
sql/sql_lex.h
router/src/routing/src/sql_lexer.cc
sql/sql_lex.cc

Public Member Functions

Public Attributes

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ Lex_input_stream() [1/2]

◆ Lex_input_stream() [2/2]

Member Function Documentation

◆ add_digest_token() [1/2]

◆ add_digest_token() [2/2]

◆ body_utf8_append() [1/4]

◆ body_utf8_append() [2/4]

◆ body_utf8_append() [3/4]

◆ body_utf8_append() [4/4]

◆ body_utf8_append_literal() [1/2]

◆ body_utf8_append_literal() [2/2]

◆ body_utf8_start() [1/2]

◆ body_utf8_start() [2/2]

◆ cpp_inject() [1/2]

◆ cpp_inject() [2/2]

◆ eof() [1/4]

◆ eof() [2/4]

◆ eof() [3/4]

◆ eof() [4/4]

◆ get_body_utf8_length() [1/2]

◆ get_body_utf8_length() [2/2]

◆ get_body_utf8_str() [1/2]

◆ get_body_utf8_str() [2/2]

◆ get_buf() [1/2]

◆ get_buf() [2/2]

◆ get_cpp_buf() [1/2]

◆ get_cpp_buf() [2/2]

◆ get_cpp_ptr() [1/2]

◆ get_cpp_ptr() [2/2]

◆ get_cpp_tok_end() [1/2]

◆ get_cpp_tok_end() [2/2]

◆ get_cpp_tok_start() [1/2]

◆ get_cpp_tok_start() [2/2]

◆ get_end_of_query() [1/2]

◆ get_end_of_query() [2/2]

◆ get_lineno() [1/2]

◆ get_lineno() [2/2]

◆ get_ptr() [1/2]

◆ get_ptr() [2/2]

◆ get_tok_end() [1/2]

◆ get_tok_end() [2/2]

◆ get_tok_start() [1/2]

◆ get_tok_start() [2/2]

◆ init() [1/2]

◆ init() [2/2]

◆ is_partial_parser() [1/2]

◆ is_partial_parser() [2/2]

◆ reduce_digest_token() [1/2]

◆ reduce_digest_token() [2/2]

◆ reset() [1/2]

◆ reset() [2/2]

◆ restart_token() [1/2]

◆ restart_token() [2/2]

◆ restore_in_comment_state() [1/2]

◆ restore_in_comment_state() [2/2]

◆ save_in_comment_state() [1/2]

◆ save_in_comment_state() [2/2]

◆ set_echo() [1/2]

◆ set_echo() [2/2]

◆ skip_binary() [1/2]

◆ skip_binary() [2/2]

◆ start_token() [1/2]

◆ start_token() [2/2]

◆ text_string_is_7bit() [1/2]

◆ text_string_is_7bit() [2/2]

◆ warn_on_deprecated_charset() [1/2]

◆ warn_on_deprecated_charset() [2/2]

◆ warn_on_deprecated_collation() [1/2]

◆ warn_on_deprecated_collation() [2/2]

◆ yyGet() [1/2]

◆ yyGet() [2/2]

◆ yyGetLast() [1/2]

◆ yyGetLast() [2/2]

◆ yyLength() [1/2]

◆ yyLength() [2/2]