#include "sql/range_optimizer/tree.h"
#include <algorithm>
#include <set>
#include <utility>
#include "m_string.h"
#include "memory_debugging.h"
#include "my_dbug.h"
#include "my_sqlcommand.h"
#include "mysql/components/services/log_builtins.h"
#include "mysql/strings/m_ctype.h"
#include "mysqld_error.h"
#include "sql/handler.h"
#include "sql/key.h"
#include "sql/key_spec.h"
#include "sql/range_optimizer/internal.h"
#include "sql/range_optimizer/range_opt_param.h"
#include "sql/range_optimizer/range_optimizer.h"
#include "sql/sql_class.h"
#include "sql/sql_lex.h"
#include "sql/system_variables.h"
#include "sql/table.h"
#include "sql_string.h"

Namespaces
namespace	anonymous_namespace{tree.cc}

Functions
SEL_TREE *	tree_and (RANGE_OPT_PARAM param, SEL_TREE tree1, SEL_TREE *tree2)

SEL_TREE *	tree_or (RANGE_OPT_PARAM param, bool remove_jump_scans, SEL_TREE tree1, SEL_TREE *tree2)

SEL_ROOT *	key_or (RANGE_OPT_PARAM param, SEL_ROOT key1, SEL_ROOT *key2)
	Combine two range expression under a common OR. More...

SEL_ROOT *	key_and (RANGE_OPT_PARAM param, SEL_ROOT key1, SEL_ROOT *key2)

SEL_ARG *	rb_delete_fixup (SEL_ARG root, SEL_ARG key, SEL_ARG *par)

int	test_rb_tree (SEL_ARG element, SEL_ARG parent)

static bool	eq_tree (const SEL_ROOT a, const SEL_ROOT b)
	Compare if two trees are equal, recursively (not necessarily the same elements, but in terms of structure and values in each leaf). More...

static bool	eq_tree (const SEL_ARG a, const SEL_ARG b)

static bool	get_range (SEL_ARG e1, SEL_ARG e2, const SEL_ROOT *root1)

static bool	all_same (const SEL_ROOT sa1, const SEL_ROOT sa2)
	Helper function to compare two SEL_ROOTs. More...

void	imerge_list_and_list (List< SEL_IMERGE > im1, List< SEL_IMERGE > im2)

static int	imerge_list_or_list (RANGE_OPT_PARAM param, bool remove_jump_scans, List< SEL_IMERGE > im1, List< SEL_IMERGE > *im2)

static bool	imerge_list_or_tree (RANGE_OPT_PARAM param, bool remove_jump_scans, List< SEL_IMERGE > im1, SEL_TREE *tree)

int	sel_cmp (Field field, uchar a, uchar *b, uint8 a_flag, uint8 b_flag)

size_t	anonymous_namespace{tree.cc}::count_elements (const SEL_ARG *arg)

bool	sel_trees_can_be_ored (SEL_TREE tree1, SEL_TREE tree2, RANGE_OPT_PARAM *param)

static bool	remove_nonrange_trees (RANGE_OPT_PARAM param, SEL_TREE tree)

static SEL_ROOT *	and_all_keys (RANGE_OPT_PARAM param, SEL_ROOT key1, SEL_ROOT *key2)
	And key trees where key1->part < key2->part. More...

static void	left_rotate (SEL_ARG *root, SEL_ARG leaf)

static void	right_rotate (SEL_ARG *root, SEL_ARG leaf)

static ulong	count_key_part_usage (const SEL_ROOT root, const SEL_ROOT key, std::set< const SEL_ROOT * > *seen)
	Count how many times SEL_ARG graph "root" refers to its part "key" via transitive closure. More...

bool	get_sel_root_for_keypart (uint key_part_num, SEL_ROOT keypart_tree, SEL_ROOT *cur_range)

void	print_sel_tree (RANGE_OPT_PARAM param, SEL_TREE tree, Key_map tree_map, const char msg)

Function Documentation

◆ all_same()

static bool all_same	(	const SEL_ROOT *	sa1,
		const SEL_ROOT *	sa2
	)

static

Helper function to compare two SEL_ROOTs.

◆ and_all_keys()

static SEL_ROOT * and_all_keys	(	RANGE_OPT_PARAM *	param,
		SEL_ROOT *	key1,
		SEL_ROOT *	key2
	)

static

And key trees where key1->part < key2->part.

key2 will be connected to every key in key1, and thus have its use_count incremented many times. The returned node will not have its use_count increased; you are supposed to do that yourself when you connect it to a root.

Parameters

param	Range analysis context (needed to track if we have allocated too many SEL_ARGs)
key1	Root of first tree to AND together
key2	Root of second tree to AND together

Returns: Root of (key1 AND key2)

◆ count_key_part_usage()

static ulong count_key_part_usage	(	const SEL_ROOT *	root,
		const SEL_ROOT *	key,
		std::set< const SEL_ROOT * > *	seen
	)

static

Count how many times SEL_ARG graph "root" refers to its part "key" via transitive closure.

Parameters

root	An RB-Root node in a SEL_ARG graph.
key	Another RB-Root node in that SEL_ARG graph.
seen	Which SEL_ARGs we have already seen in this traversal. Used for deduplication, so that we only count each SEL_ARG once.

The passed "root" node may refer to "key" node via root->next_key_part, root->next->n

This function counts how many times the node "key" is referred (via SEL_ARG::next_key_part) by

intervals of RB-tree pointed by "root",
intervals of RB-trees that are pointed by SEL_ARG::next_key_part from intervals of RB-tree pointed by "root",
and so on.

Here is an example (horizontal links represent next_key_part pointers, vertical links - next/prev prev pointers):

   +----+               $
   |root|-----------------+
   +----+               $ |
     |                  $ |
     |                  $ |
   +----+       +---+   $ |     +---+    Here the return value
   |    |- ... -|   |---$-+--+->|key|    will be 4.
   +----+       +---+   $ |  |  +---+
     |                  $ |  |
    ...                 $ |  |
     |                  $ |  |
   +----+   +---+       $ |  |
   |    |---|   |---------+  |
   +----+   +---+       $    |
     |        |         $    |
    ...     +---+       $    |
            |   |------------+
            +---+       $

Returns: Number of links to "key" from nodes reachable from "root".

◆ eq_tree() [1/2]

static bool eq_tree	(	const SEL_ARG *	a,
		const SEL_ARG *	b
	)

static

◆ eq_tree() [2/2]

static bool eq_tree	(	const SEL_ROOT *	a,
		const SEL_ROOT *	b
	)

static

Compare if two trees are equal, recursively (not necessarily the same elements, but in terms of structure and values in each leaf).

NOTE: The demand for the same structure means that some trees that are equivalent could be deemed inequal by this function, depending on insertion order.

Parameters

a	First tree to compare.
b	Second tree to compare.

Returns: true iff they are equivalent.

◆ get_range()

static bool get_range	(	SEL_ARG **	e1,
		SEL_ARG **	e2,
		const SEL_ROOT *	root1
	)

static

◆ get_sel_root_for_keypart()

bool get_sel_root_for_keypart	(	uint	key_part_num,
		SEL_ROOT *	keypart_tree,
		SEL_ROOT **	cur_range
	)

Check if the SEL_ARG tree for 'field' is identical for all ranges in 'keypart_tree'.

◆ imerge_list_and_list()

void imerge_list_and_list	(	List< SEL_IMERGE > *	im1,
		List< SEL_IMERGE > *	im2
	)

inline

◆ imerge_list_or_list()

static int imerge_list_or_list	(	RANGE_OPT_PARAM *	param,
		bool	remove_jump_scans,
		List< SEL_IMERGE > *	im1,
		List< SEL_IMERGE > *	im2
	)

static

◆ imerge_list_or_tree()

static bool imerge_list_or_tree	(	RANGE_OPT_PARAM *	param,
		bool	remove_jump_scans,
		List< SEL_IMERGE > *	im1,
		SEL_TREE *	tree
	)

static

◆ key_and()

SEL_ROOT * key_and	(	RANGE_OPT_PARAM *	param,
		SEL_ROOT *	key1,
		SEL_ROOT *	key2
	)

◆ key_or()

SEL_ROOT * key_or	(	RANGE_OPT_PARAM *	param,
		SEL_ROOT *	key1,
		SEL_ROOT *	key2
	)

Combine two range expression under a common OR.

On a logical level, the transformation is key_or( expr1, expr2 ) => expr1 OR expr2.

Both expressions are assumed to be in the SEL_ARG format. In a logic sense, the format is reminiscent of DNF, since an expression such as the following

( 1 < kp1 < 10 AND p1 ) OR ( 10 <= kp2 < 20 AND p2 )

where there is a key consisting of keyparts ( kp1, kp2, ..., kpn ) and p1 and p2 are valid SEL_ARG expressions over keyparts kp2 ... kpn, is a valid SEL_ARG condition. The disjuncts appear ordered by the minimum endpoint of the first range and ranges must not overlap. It follows that they are also ordered by maximum endpoints. Thus

( 1 < kp1 <= 2 AND ( kp2 = 2 OR kp2 = 3 ) ) OR kp1 = 3

Is a a valid SER_ARG expression for a key of at least 2 keyparts.

For simplicity, we will assume that expr2 is a single range predicate, i.e. on the form ( a < x < b AND ... ). It is easy to generalize to a disjunction of several predicates by subsequently call key_or for each disjunct.

The algorithm iterates over each disjunct of expr1, and for each disjunct where the first keypart's range overlaps with the first keypart's range in expr2:

If the predicates are equal for the rest of the keyparts, or if there are no more, the range in expr2 has its endpoints copied in, and the SEL_ARG node in expr2 is deallocated. If more ranges became connected in expr1, the surplus is also dealocated. If they differ, two ranges are created.

The range leading up to the overlap. Empty if endpoints are equal.
The overlapping sub-range. May be the entire range if they are equal.

Finally, there may be one more range if expr2's first keypart's range has a greater maximum endpoint than the last range in expr1.

For the overlapping sub-range, we recursively call key_or. Thus in order to compute key_or of

(1) ( 1 < kp1 < 10 AND 1 < kp2 < 10 )

(2) ( 2 < kp1 < 20 AND 4 < kp2 < 20 )

We create the ranges 1 < kp <= 2, 2 < kp1 < 10, 10 <= kp1 < 20. For the first one, we simply hook on the condition for the second keypart from (1) : 1 < kp2 < 10. For the second range 2 < kp1 < 10, key_or( 1 < kp2 < 10, 4 < kp2 < 20 ) is called, yielding 1 < kp2 < 20. For the last range, we reuse the range 4 < kp2 < 20 from (2) for the second keypart. The result is thus

( 1 < kp1 <= 2 AND 1 < kp2 < 10 ) OR ( 2 < kp1 < 10 AND 1 < kp2 < 20 ) OR ( 10 <= kp1 < 20 AND 4 < kp2 < 20 )

key_or() does not modify key1 nor key2 if they are in use by other roots (although typical use is that key1 has been disconnected from its root and thus can be modified in-place). Thus, it does not change their use_count.

The returned node will not have its use_count increased; you are supposed to do that yourself when you connect it to a root.

Parameters

param	RANGE_OPT_PARAM from test_quick_select
key1	Root of RB-tree of SEL_ARGs to be ORed with key2
key2	Root of RB-tree of SEL_ARGs to be ORed with key1

◆ left_rotate()

static void left_rotate	(	SEL_ARG **	root,
		SEL_ARG *	leaf
	)

static

◆ print_sel_tree()

void print_sel_tree	(	RANGE_OPT_PARAM *	param,
		SEL_TREE *	tree,
		Key_map *	tree_map,
		const char *	msg
	)

◆ rb_delete_fixup()

SEL_ARG * rb_delete_fixup	(	SEL_ARG *	root,
		SEL_ARG *	key,
		SEL_ARG *	par
	)

◆ remove_nonrange_trees()

static bool remove_nonrange_trees	(	RANGE_OPT_PARAM *	param,
		SEL_TREE *	tree
	)

static

◆ right_rotate()

static void right_rotate	(	SEL_ARG **	root,
		SEL_ARG *	leaf
	)

static

◆ sel_cmp()

int sel_cmp	(	Field *	field,
		uchar *	a,
		uchar *	b,
		uint8	a_flag,
		uint8	b_flag
	)

◆ sel_trees_can_be_ored()

bool sel_trees_can_be_ored	(	SEL_TREE *	tree1,
		SEL_TREE *	tree2,
		RANGE_OPT_PARAM *	param
	)

◆ test_rb_tree()

int test_rb_tree	(	SEL_ARG *	element,
		SEL_ARG *	parent
	)

◆ tree_and()

SEL_TREE * tree_and	(	RANGE_OPT_PARAM *	param,
		SEL_TREE *	tree1,
		SEL_TREE *	tree2
	)

◆ tree_or()

SEL_TREE * tree_or	(	RANGE_OPT_PARAM *	param,
		bool	remove_jump_scans,
		SEL_TREE *	tree1,
		SEL_TREE *	tree2
	)

Namespaces

Functions

Function Documentation

◆ all_same()

◆ and_all_keys()

◆ count_key_part_usage()

◆ eq_tree() [1/2]

◆ eq_tree() [2/2]

◆ get_range()

◆ get_sel_root_for_keypart()

◆ imerge_list_and_list()

◆ imerge_list_or_list()

◆ imerge_list_or_tree()

◆ key_and()

◆ key_or()

◆ left_rotate()

◆ print_sel_tree()

◆ rb_delete_fixup()

◆ remove_nonrange_trees()

◆ right_rotate()

◆ sel_cmp()

◆ sel_trees_can_be_ored()

◆ test_rb_tree()

◆ tree_and()

◆ tree_or()